aid-installer 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2105 @@
1
+ # dashboard/reader/parsers.py
2
+ # LC-2 Parsers: per-source structural parse for the AID state reader.
3
+ #
4
+ # Responsibility: parse file bytes into typed model fields.
5
+ # No derivation, no write, no I/O side-effects.
6
+ # Every rule is a single anchored grep / line-scan expressible in either runtime
7
+ # (Python or Node) with zero third-party deps.
8
+ #
9
+ # Read-only by construction: all open() calls are read-only (mode 'r' / 'rb').
10
+ # No open(..., 'w'), no open(..., 'a'), no lock primitive exists here.
11
+ #
12
+ # Python 3.11+ stdlib only. Zero third-party deps.
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import re
18
+ from pathlib import Path
19
+ from typing import Optional
20
+
21
+ from .models import (
22
+ DeliverableRef,
23
+ DeferredIssue,
24
+ FeatureRef,
25
+ Finding,
26
+ KbBaseline,
27
+ KbStateRef,
28
+ Lifecycle,
29
+ LogAvailability,
30
+ PendingInput,
31
+ Phase,
32
+ RawStateRef,
33
+ SourceMode,
34
+ TaskDetail,
35
+ TaskLedger,
36
+ TaskModel,
37
+ TaskStatus,
38
+ ToolInfo,
39
+ )
40
+ from .derivation import derive_lifecycle
41
+
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # Parse result containers (plain dicts/values; models assembled in reader.py)
45
+ # ---------------------------------------------------------------------------
46
+
47
+ class ParsedWork:
48
+ """Intermediate parse result for a single work folder's STATE.md.
49
+
50
+ Fields match WorkModel fields. Assembled into a WorkModel by reader.py.
51
+ """
52
+ __slots__ = (
53
+ "lifecycle",
54
+ "phase",
55
+ "active_skill",
56
+ "updated",
57
+ "pause_reason",
58
+ "block_reason",
59
+ "block_artifact",
60
+ "tasks",
61
+ "pending_inputs",
62
+ "source_mode",
63
+ "parse_warnings",
64
+ "bytes_read",
65
+ # prototype: work-overview header fields
66
+ "work_path",
67
+ "recipe",
68
+ "features",
69
+ "deliverables",
70
+ "created",
71
+ )
72
+
73
+ def __init__(self) -> None:
74
+ self.lifecycle: Lifecycle = Lifecycle.Unknown
75
+ self.phase: Optional[Phase] = None
76
+ self.active_skill: Optional[str] = None
77
+ self.updated: Optional[str] = None
78
+ self.pause_reason: Optional[str] = None
79
+ self.block_reason: Optional[str] = None
80
+ self.block_artifact: Optional[str] = None
81
+ self.tasks: list[TaskModel] = []
82
+ self.pending_inputs: list[PendingInput] = []
83
+ self.source_mode: SourceMode = SourceMode.Fallback
84
+ self.parse_warnings: list[str] = []
85
+ self.bytes_read: int = 0
86
+ # prototype: work-overview header fields
87
+ self.work_path: Optional[str] = None
88
+ self.recipe: Optional[str] = None
89
+ self.features: list[FeatureRef] = []
90
+ self.deliverables: list[DeliverableRef] = []
91
+ self.created: Optional[str] = None
92
+
93
+
94
+ # ---------------------------------------------------------------------------
95
+ # Level-0: ToolInfo from .aid-manifest.json (+ .aid-version fallback)
96
+ # ---------------------------------------------------------------------------
97
+
98
+ def parse_tool_info(
99
+ manifest_path: Path,
100
+ version_path: Path,
101
+ ) -> tuple[ToolInfo, int]:
102
+ """Parse .aid/.aid-manifest.json into ToolInfo.
103
+
104
+ Falls back to .aid/.aid-version (plain string) for aid_version if the JSON
105
+ manifest is absent.
106
+
107
+ Returns (ToolInfo, bytes_read).
108
+ manifest_present=False -> all fields None, no error (DM-2).
109
+ """
110
+ bytes_read = 0
111
+
112
+ # Try manifest JSON first.
113
+ if manifest_path.is_file():
114
+ try:
115
+ raw = manifest_path.read_bytes()
116
+ bytes_read += len(raw)
117
+ data = json.loads(raw.decode("utf-8", errors="replace"))
118
+ except (OSError, json.JSONDecodeError, ValueError):
119
+ return ToolInfo(manifest_present=False), bytes_read
120
+
121
+ aid_version = data.get("aid_version")
122
+ installed_at = data.get("installed_at")
123
+ tools_dict = data.get("tools", {})
124
+ tools_installed = list(tools_dict.keys()) if isinstance(tools_dict, dict) else []
125
+
126
+ return ToolInfo(
127
+ manifest_present=True,
128
+ aid_version=str(aid_version) if aid_version is not None else None,
129
+ installed_at=str(installed_at) if installed_at is not None else None,
130
+ tools_installed=tools_installed,
131
+ ), bytes_read
132
+
133
+ # Fallback: .aid/.aid-version (plain string with the version)
134
+ if version_path.is_file():
135
+ try:
136
+ raw = version_path.read_bytes()
137
+ bytes_read += len(raw)
138
+ version_str = raw.decode("utf-8", errors="replace").strip()
139
+ except OSError:
140
+ version_str = None
141
+
142
+ return ToolInfo(
143
+ manifest_present=False,
144
+ aid_version=version_str or None,
145
+ ), bytes_read
146
+
147
+ # No manifest, no version file.
148
+ return ToolInfo(manifest_present=False), bytes_read
149
+
150
+
151
+ # ---------------------------------------------------------------------------
152
+ # Level-1: RepoInfo helpers
153
+ # ---------------------------------------------------------------------------
154
+
155
+ def parse_project_name(settings_path: Path) -> tuple[str, int]:
156
+ """Extract project.name from .aid/settings.yml.
157
+
158
+ Uses a simple line-scan for 'name:' under the 'project:' block.
159
+ Returns (name, bytes_read). On any failure, returns ("", 0).
160
+
161
+ This is display-only: we read only the literal name scalar, not
162
+ grade-resolution semantics (read-setting.sh is the contract for resolution).
163
+ """
164
+ if not settings_path.is_file():
165
+ return "", 0
166
+
167
+ try:
168
+ raw = settings_path.read_bytes()
169
+ except OSError:
170
+ return "", 0
171
+
172
+ bytes_read = len(raw)
173
+ text = raw.decode("utf-8", errors="replace")
174
+
175
+ # Find 'project:' section then the first 'name:' line after it.
176
+ # Simple anchored line-scan: no YAML parser needed for this one scalar.
177
+ in_project = False
178
+ for line in text.splitlines():
179
+ stripped = line.strip()
180
+ if stripped == "project:" or stripped.startswith("project: "):
181
+ in_project = True
182
+ continue
183
+ if in_project:
184
+ # Another top-level key ends the project block
185
+ if line and line[0] not in (" ", "\t", "#", "") and ":" in line:
186
+ key = line.split(":")[0].strip()
187
+ if key != "name":
188
+ # If this is a new top-level section (no leading whitespace), stop.
189
+ if not line[0].isspace():
190
+ break
191
+ m = re.match(r"^\s+name:\s+(.+)", line)
192
+ if m:
193
+ val = m.group(1)
194
+ # PF-6: strip inline YAML comment -- drop from first unquoted '#' to EOL
195
+ val = _strip_yaml_inline_comment(val)
196
+ val = val.strip().strip('"').strip("'")
197
+ return val, bytes_read
198
+
199
+ return "", bytes_read
200
+
201
+
202
+ def _strip_yaml_inline_comment(scalar: str) -> str:
203
+ """Strip an inline YAML comment from a scalar value (PF-6).
204
+
205
+ Drops everything from the first '#' that is NOT inside a quoted string
206
+ to end-of-line. Handles single- and double-quoted values.
207
+
208
+ Examples:
209
+ 'AID # set during /aid-config INIT' -> 'AID'
210
+ '"Foo Bar" # comment' -> '"Foo Bar"'
211
+ 'plain' -> 'plain'
212
+ """
213
+ s = scalar
214
+ # If the value starts with a quote, find the closing quote first
215
+ if s and s[0] in ('"', "'"):
216
+ quote = s[0]
217
+ end = s.find(quote, 1)
218
+ if end != -1:
219
+ # Everything after the closing quote is potentially a comment
220
+ after = s[end + 1:].lstrip()
221
+ if after.startswith("#"):
222
+ s = s[:end + 1]
223
+ return s
224
+ # Unquoted: first '#' (possibly preceded by space) is the comment
225
+ idx = s.find("#")
226
+ if idx != -1:
227
+ s = s[:idx]
228
+ return s
229
+
230
+
231
+ def parse_kb_baseline(settings_path: Path) -> tuple[Optional["KbBaseline"], int]:
232
+ """Parse the kb_baseline block from .aid/settings.yml (DM-A4, task-064).
233
+
234
+ Tolerant line-scan of the 'kb_baseline:' nested block, reusing the
235
+ parse_project_name posture (parsers.py:148):
236
+ - Scan for 'kb_baseline:' top-level key
237
+ - Within that block, extract 'branch:' and 'tip_date:' scalar values
238
+ - Absent/unparseable -> None (skip freshness, stay approved; FF-A2)
239
+
240
+ Returns (KbBaseline or None, bytes_read).
241
+ Never raises (NFR7). Never writes.
242
+ """
243
+ if not settings_path.is_file():
244
+ return None, 0
245
+
246
+ try:
247
+ raw = settings_path.read_bytes()
248
+ except OSError:
249
+ return None, 0
250
+
251
+ bytes_read = len(raw)
252
+ text = raw.decode("utf-8", errors="replace")
253
+
254
+ in_baseline = False
255
+ branch: Optional[str] = None
256
+ tip_date: Optional[str] = None
257
+
258
+ for line in text.splitlines():
259
+ stripped = line.strip()
260
+ if stripped == "kb_baseline:" or stripped.startswith("kb_baseline: "):
261
+ in_baseline = True
262
+ continue
263
+ if in_baseline:
264
+ # Another top-level key (no leading whitespace) ends the block
265
+ if line and not line[0].isspace() and ":" in line and not stripped.startswith("#"):
266
+ break
267
+ # Extract branch:
268
+ m = re.match(r"^\s+branch:\s+(.+)", line)
269
+ if m and branch is None:
270
+ val = _strip_yaml_inline_comment(m.group(1)).strip().strip('"').strip("'")
271
+ if val:
272
+ branch = val
273
+ continue
274
+ # Extract tip_date:
275
+ m = re.match(r"^\s+tip_date:\s+(.+)", line)
276
+ if m and tip_date is None:
277
+ val = _strip_yaml_inline_comment(m.group(1)).strip().strip('"').strip("'")
278
+ if val:
279
+ tip_date = val
280
+ continue
281
+
282
+ if branch is None and tip_date is None:
283
+ return None, bytes_read
284
+
285
+ return KbBaseline(branch=branch, tip_date=tip_date), bytes_read
286
+
287
+
288
+ def parse_kb_state(
289
+ kb_dir: Path,
290
+ dashboard_dir: Optional[Path] = None,
291
+ ) -> tuple[Optional["KbStateRef"], int]:
292
+ """Parse .aid/knowledge/STATE.md + README.md into a KbStateRef hook.
293
+
294
+ If .aid/knowledge/ does not exist, returns (None, 0) -- repo never ran
295
+ /aid-discover; render gracefully.
296
+
297
+ dashboard_dir: if supplied, stat .aid/dashboard/kb.html for summary_present.
298
+ The status field and kb_baseline are populated by the caller (reader.py)
299
+ after derivation (FF-A3) and parsing (parse_kb_baseline).
300
+
301
+ Fields populated:
302
+ summary_approved -- from STATE.md "**User Approved:** yes ..."
303
+ last_summary_date -- extracted from same line (parenthesized date)
304
+ doc_count -- count of data rows in README.md ## Completeness table
305
+ summary_present -- True if dashboard_dir/kb.html exists (stat only)
306
+ """
307
+ if not kb_dir.is_dir():
308
+ return None, 0
309
+
310
+ bytes_read = 0
311
+ summary_approved = False
312
+ last_summary_date: Optional[str] = None
313
+ doc_count: Optional[int] = None
314
+
315
+ # Parse STATE.md for summary approval.
316
+ state_path = kb_dir / "STATE.md"
317
+ if state_path.is_file():
318
+ try:
319
+ raw = state_path.read_bytes()
320
+ bytes_read += len(raw)
321
+ state_text = raw.decode("utf-8", errors="replace")
322
+ except OSError:
323
+ state_text = ""
324
+ summary_approved, last_summary_date = _parse_kb_summary_approval(state_text)
325
+
326
+ # Parse README.md for doc_count.
327
+ readme_path = kb_dir / "README.md"
328
+ if readme_path.is_file():
329
+ try:
330
+ raw = readme_path.read_bytes()
331
+ bytes_read += len(raw)
332
+ readme_text = raw.decode("utf-8", errors="replace")
333
+ except OSError:
334
+ readme_text = ""
335
+ doc_count = _parse_kb_doc_count(readme_text)
336
+
337
+ # Stat .aid/dashboard/kb.html for summary_present.
338
+ summary_present = False
339
+ if dashboard_dir is not None:
340
+ kb_html = dashboard_dir / "kb.html"
341
+ try:
342
+ summary_present = kb_html.is_file()
343
+ except OSError:
344
+ summary_present = False
345
+
346
+ return KbStateRef(
347
+ summary_approved=summary_approved,
348
+ last_summary_date=last_summary_date,
349
+ doc_count=doc_count,
350
+ summary_present=summary_present,
351
+ # status and kb_baseline are set by reader.py after derivation
352
+ ), bytes_read
353
+
354
+
355
+ def _parse_kb_summary_approval(text: str) -> tuple[bool, Optional[str]]:
356
+ """Find the Knowledge Summary Status '**User Approved:** yes' line.
357
+
358
+ Returns (approved: bool, date: Optional[str]).
359
+ The date is extracted from the first parenthesized group on that line if present.
360
+ """
361
+ # Look for the section ## Knowledge Summary Status then **User Approved:**
362
+ in_summary_status = False
363
+ for line in text.splitlines():
364
+ if re.match(r"^##\s+Knowledge Summary Status", line):
365
+ in_summary_status = True
366
+ continue
367
+ if in_summary_status:
368
+ # Stop at the next section header
369
+ if re.match(r"^##\s+", line):
370
+ break
371
+ m = re.match(r"\*\*User Approved:\*\*\s+(.+)", line.strip())
372
+ if m:
373
+ val = m.group(1).strip()
374
+ approved = val.lower().startswith("yes")
375
+ date_m = re.search(r"\((\d{4}-\d{2}-\d{2})", val)
376
+ date = date_m.group(1) if date_m else None
377
+ return approved, date
378
+ return False, None
379
+
380
+
381
+ def _parse_kb_doc_count(text: str) -> Optional[int]:
382
+ """Count data rows under ## Completeness table in README.md.
383
+
384
+ A data row is a Markdown table row that:
385
+ - starts with '|' and contains at least 2 columns
386
+ - is not the header row (does not contain '---')
387
+ - is not a blank/separator row
388
+ """
389
+ in_completeness = False
390
+ count = 0
391
+ header_seen = False
392
+
393
+ for line in text.splitlines():
394
+ if re.match(r"^##\s+Completeness", line):
395
+ in_completeness = True
396
+ header_seen = False
397
+ count = 0
398
+ continue
399
+ if in_completeness:
400
+ if re.match(r"^##\s+", line):
401
+ break
402
+ if not line.strip().startswith("|"):
403
+ continue
404
+ if "---" in line:
405
+ header_seen = True # separator row; skip
406
+ continue
407
+ if not header_seen:
408
+ header_seen = True # first non-separator table line = header
409
+ continue
410
+ # Data row
411
+ cols = [c.strip() for c in line.strip().strip("|").split("|")]
412
+ if len(cols) >= 2 and cols[0]:
413
+ count += 1
414
+
415
+ return count if in_completeness else None
416
+
417
+
418
+ # ---------------------------------------------------------------------------
419
+ # Prototype: REQUIREMENTS.md parser (work-overview header, delivery-002)
420
+ # ---------------------------------------------------------------------------
421
+
422
+ def parse_requirements_md(path: Path) -> tuple[Optional[str], Optional[str], Optional[str], int]:
423
+ """Parse REQUIREMENTS.md for identity block fields.
424
+
425
+ Returns (title, description, objective, bytes_read).
426
+ All fields are None when the file is absent or the pattern is not found.
427
+ Never raises (NFR7).
428
+
429
+ Parses:
430
+ - **Name:** value -> title
431
+ - **Description:** val -> description
432
+ - ## 1. Objective (or ## Objective) body -> objective (until next ## heading)
433
+
434
+ PF-2: lines matching ^>\\s*_.*_\\s*$ (status blockquote footer) are dropped
435
+ from the Objective body so > _Status: ..._ never appears in objective.
436
+ """
437
+ if not path.is_file():
438
+ return None, None, None, 0
439
+
440
+ try:
441
+ raw = path.read_bytes()
442
+ bytes_read = len(raw)
443
+ text = raw.decode("utf-8", errors="replace")
444
+ except OSError:
445
+ return None, None, None, 0
446
+
447
+ title: Optional[str] = None
448
+ description: Optional[str] = None
449
+ objective: Optional[str] = None
450
+
451
+ _re_name = re.compile(r"^\s*-\s*\*\*Name:\*\*\s*(.+)", re.IGNORECASE)
452
+ _re_desc = re.compile(r"^\s*-\s*\*\*Description:\*\*\s*(.+)", re.IGNORECASE)
453
+ _re_obj_hdr = re.compile(r"^##\s+(?:\d+\.\s+)?Objective\s*$", re.IGNORECASE)
454
+ _re_section = re.compile(r"^##\s+\S")
455
+ # PF-2: status blockquote footer shape: > _..._ (wholly italic blockquote)
456
+ _re_status_blockquote = re.compile(r"^>\s*_.*_\s*$")
457
+
458
+ lines = text.splitlines()
459
+ in_objective = False
460
+ obj_lines: list[str] = []
461
+
462
+ # Template seed placeholder: treat *(pending)* as absent (PF-7)
463
+ _PENDING_PLACEHOLDER = "*(pending)*"
464
+
465
+ for line in lines:
466
+ if in_objective:
467
+ if _re_section.match(line):
468
+ in_objective = False
469
+ else:
470
+ # PF-2: skip status blockquote lines (> _Status: ..._)
471
+ if not _re_status_blockquote.match(line.strip() if line.strip() else line):
472
+ obj_lines.append(line)
473
+ continue
474
+
475
+ m = _re_name.match(line)
476
+ if m and title is None:
477
+ val = m.group(1).strip()
478
+ title = None if val == _PENDING_PLACEHOLDER else val
479
+ continue
480
+
481
+ m = _re_desc.match(line)
482
+ if m and description is None:
483
+ val = m.group(1).strip()
484
+ description = None if val == _PENDING_PLACEHOLDER else val
485
+ continue
486
+
487
+ if _re_obj_hdr.match(line):
488
+ in_objective = True
489
+ obj_lines = []
490
+ continue
491
+
492
+ if obj_lines:
493
+ # Strip leading/trailing blank lines from the captured block
494
+ raw_obj = "\n".join(obj_lines).strip()
495
+ if raw_obj:
496
+ objective = raw_obj
497
+
498
+ return title, description, objective, bytes_read
499
+
500
+
501
+ # ---------------------------------------------------------------------------
502
+ # PF-8: SPEC.md parser (Lite-path identity fallback source)
503
+ # ---------------------------------------------------------------------------
504
+
505
+ def parse_spec_md(spec_path: Path) -> tuple[Optional[str], Optional[str], Optional[str], int]:
506
+ """Parse work-root SPEC.md for identity fields (PF-8 Lite-path fallback).
507
+
508
+ Returns (title, description, h1_title, bytes_read).
509
+ - title: value from '- **Name:**' line (None if absent or *(pending)*)
510
+ - description: value from '- **Description:**' line (None if absent or *(pending)*)
511
+ - h1_title: text after the first '# ' line (None if absent)
512
+ - bytes_read: number of bytes read
513
+
514
+ Reuses the same _re_name/_re_desc regexes as parse_requirements_md and the
515
+ *(pending)* null sentinel (PF-7). Never raises (NFR7).
516
+ """
517
+ if not spec_path.is_file():
518
+ return None, None, None, 0
519
+
520
+ try:
521
+ raw = spec_path.read_bytes()
522
+ bytes_read = len(raw)
523
+ text = raw.decode("utf-8", errors="replace")
524
+ except OSError:
525
+ return None, None, None, 0
526
+
527
+ _re_name = re.compile(r"^\s*-\s*\*\*Name:\*\*\s*(.+)", re.IGNORECASE)
528
+ _re_desc = re.compile(r"^\s*-\s*\*\*Description:\*\*\s*(.+)", re.IGNORECASE)
529
+ _re_h1 = re.compile(r"^#\s+(.+)$")
530
+
531
+ # Template seed placeholder: treat *(pending)* as absent (PF-7)
532
+ _PENDING_PLACEHOLDER = "*(pending)*"
533
+
534
+ title: Optional[str] = None
535
+ description: Optional[str] = None
536
+ h1_title: Optional[str] = None
537
+
538
+ for line in text.splitlines():
539
+ if h1_title is None:
540
+ m = _re_h1.match(line)
541
+ if m:
542
+ h1_title = m.group(1).strip()
543
+ continue
544
+
545
+ m = _re_name.match(line)
546
+ if m and title is None:
547
+ val = m.group(1).strip()
548
+ title = None if val == _PENDING_PLACEHOLDER else val
549
+ continue
550
+
551
+ m = _re_desc.match(line)
552
+ if m and description is None:
553
+ val = m.group(1).strip()
554
+ description = None if val == _PENDING_PLACEHOLDER else val
555
+ continue
556
+
557
+ # Stop scanning after we have all three fields
558
+ if title is not None and description is not None and h1_title is not None:
559
+ break
560
+
561
+ return title, description, h1_title, bytes_read
562
+
563
+
564
+ # ---------------------------------------------------------------------------
565
+ # PF-3: Task short-name from tasks/task-NNN.md first line
566
+ # ---------------------------------------------------------------------------
567
+
568
+ def parse_task_short_name(task_path: Path) -> tuple[Optional[str], int]:
569
+ """Parse the short-name from the first non-blank line of a task file.
570
+
571
+ Reads only the first ~256 bytes (first-line-bounded read).
572
+ Returns (short_name, bytes_read).
573
+ short_name is None when absent or unparseable (PF-7 graceful).
574
+ Never raises (NFR7).
575
+
576
+ Parse rule (PF-3): ^#\\s+task-0*\\d+\\s*:\\s*(.+)$ (case-insensitive)
577
+ Strips trailing period from the captured title.
578
+ """
579
+ if not task_path.is_file():
580
+ return None, 0
581
+
582
+ try:
583
+ # Read up to 4096 bytes to cover long titles; first-line-bounded parse
584
+ raw = task_path.read_bytes()
585
+ bytes_read = len(raw)
586
+ text = raw.decode("utf-8", errors="replace")
587
+ except OSError:
588
+ return None, 0
589
+
590
+ _re_title = re.compile(r"^#\s+task-0*\d+\s*:\s*(.+)$", re.IGNORECASE)
591
+
592
+ for line in text.splitlines():
593
+ stripped = line.strip()
594
+ if not stripped:
595
+ continue
596
+ m = _re_title.match(stripped)
597
+ if m:
598
+ title = m.group(1).strip().rstrip(".")
599
+ return title if title else None, bytes_read
600
+ # First non-blank line didn't match the pattern -> no short_name
601
+ break
602
+
603
+ return None, bytes_read
604
+
605
+
606
+ # ---------------------------------------------------------------------------
607
+ # PF-5: Execution graph from PLAN.md (wave-map + legacy prose fallback)
608
+ # ---------------------------------------------------------------------------
609
+
610
+ def parse_execution_graph(plan_path: Path) -> tuple[dict, int]:
611
+ """Parse PLAN.md for wave-map blocks (PF-5a) with prose fallback (PF-5b).
612
+
613
+ Returns (task_lane_map, bytes_read) where:
614
+ task_lane_map: dict mapping task_id -> lane (int or None)
615
+
616
+ Note: delivery comes from STATE Wave column (PF-5c); this function only
617
+ derives the lane number within a delivery.
618
+
619
+ PF-5a (normalized): scans for ```wave-map fences; reads delivery: NNN +
620
+ wave N: task-001, ... lines.
621
+ PF-5b (prose fallback): when no wave-map found for a delivery section,
622
+ parses - Wave N: lines (including sub-bullets) to extract task ids and
623
+ their lane numbers.
624
+
625
+ Never raises (NFR7). Returns ({}, 0) when PLAN.md absent.
626
+ """
627
+ if not plan_path.is_file():
628
+ return {}, 0
629
+
630
+ try:
631
+ raw = plan_path.read_bytes()
632
+ bytes_read = len(raw)
633
+ text = raw.decode("utf-8", errors="replace")
634
+ except OSError:
635
+ return {}, 0
636
+
637
+ task_lane_map: dict[str, int] = {}
638
+
639
+ lines = text.splitlines()
640
+
641
+ # --- PF-5a: scan for wave-map fenced blocks ---
642
+ _re_wavemap_open = re.compile(r"^```wave-map\s*$")
643
+ _re_wavemap_close = re.compile(r"^```\s*$")
644
+ _re_delivery_line = re.compile(r"^delivery:\s*(\d+)\s*$")
645
+ _re_wave_line = re.compile(r"^wave\s+(\d+)\s*:\s*(.+)$", re.IGNORECASE)
646
+ _re_task_id = re.compile(r"\btask-\d+\b", re.IGNORECASE)
647
+
648
+ # Wave-map blocks found: set of delivery numbers that have a wave-map
649
+ wavemap_deliveries: set[int] = set()
650
+
651
+ i = 0
652
+ n = len(lines)
653
+ while i < n:
654
+ line = lines[i]
655
+ if _re_wavemap_open.match(line.strip()):
656
+ # Read block until closing fence
657
+ i += 1
658
+ block_delivery: Optional[int] = None
659
+ while i < n:
660
+ bline = lines[i].strip()
661
+ if _re_wavemap_close.match(bline):
662
+ i += 1
663
+ break
664
+ dm = _re_delivery_line.match(bline)
665
+ if dm:
666
+ block_delivery = int(dm.group(1))
667
+ if block_delivery is not None:
668
+ wavemap_deliveries.add(block_delivery)
669
+ i += 1
670
+ continue
671
+ wm = _re_wave_line.match(bline)
672
+ if wm:
673
+ lane = int(wm.group(1))
674
+ tasks_str = wm.group(2)
675
+ for tid_match in _re_task_id.finditer(tasks_str):
676
+ tid = tid_match.group(0).lower()
677
+ task_lane_map[tid] = lane
678
+ i += 1
679
+ continue
680
+ i += 1
681
+ else:
682
+ i += 1
683
+
684
+ # --- PF-5b: prose fallback for delivery sections with no wave-map ---
685
+ # Parse - Wave N: lines and collect sub-bullets
686
+ _re_delivery_section = re.compile(
687
+ r"^###\s+delivery-(\d+)\s+execution\s+graph", re.IGNORECASE
688
+ )
689
+ _re_wave_prose = re.compile(r"^(\s*)-\s*Wave\s+(\d+)\b", re.IGNORECASE)
690
+
691
+ current_delivery: Optional[int] = None
692
+ current_wave: Optional[int] = None
693
+ wave_indent: Optional[int] = None # indent level of the - Wave N: bullet
694
+ # Track tasks already placed by wave-map (don't overwrite with prose)
695
+ wavemap_task_ids: set[str] = set(task_lane_map.keys())
696
+
697
+ for line in lines:
698
+ # Detect delivery section header (for tracking current delivery context)
699
+ dm = _re_delivery_section.match(line)
700
+ if dm:
701
+ current_delivery = int(dm.group(1))
702
+ current_wave = None
703
+ wave_indent = None
704
+ continue
705
+
706
+ # Only run prose fallback for deliveries WITHOUT a wave-map
707
+ if current_delivery is None or current_delivery in wavemap_deliveries:
708
+ current_wave = None
709
+ wave_indent = None
710
+ continue
711
+
712
+ # Detect Wave N: prose heading
713
+ wm = _re_wave_prose.match(line)
714
+ if wm:
715
+ current_wave = int(wm.group(2))
716
+ wave_indent = len(wm.group(1)) # indent of the "- Wave N" line
717
+ # Collect task ids from the heading line itself
718
+ for tid_match in _re_task_id.finditer(line):
719
+ tid = tid_match.group(0).lower()
720
+ if tid not in wavemap_task_ids:
721
+ task_lane_map[tid] = current_wave
722
+ continue
723
+
724
+ # Collect task ids from sub-bullets (more-indented than the Wave heading)
725
+ if current_wave is not None and wave_indent is not None:
726
+ line_indent = len(line) - len(line.lstrip())
727
+ # Sub-bullet must be more-indented than the wave heading
728
+ if line_indent > wave_indent and line.strip():
729
+ # Stop on a new Wave heading at the same or shallower indent (handled above)
730
+ # Collect task ids from this sub-bullet
731
+ for tid_match in _re_task_id.finditer(line):
732
+ tid = tid_match.group(0).lower()
733
+ if tid not in wavemap_task_ids:
734
+ task_lane_map[tid] = current_wave
735
+ elif line.strip() == "":
736
+ # Blank line: maintain current wave for following sub-bullets
737
+ pass
738
+ elif line_indent <= wave_indent and line.strip():
739
+ # Dedented non-blank line ends the current wave's sub-bullets
740
+ current_wave = None
741
+ wave_indent = None
742
+
743
+ return task_lane_map, bytes_read
744
+
745
+
746
+ # ---------------------------------------------------------------------------
747
+ # Level-2: STATE.md parser -- normalized path (LC-2 levels 0-3)
748
+ # ---------------------------------------------------------------------------
749
+
750
+ # Section header patterns (anchored, case-insensitive for resilience)
751
+ # Accept BOTH the new "state" names (work-004 rename) and the legacy "status" names
752
+ # (Pillar 3 / Pillar 6 coexistence: new works use "State"; old works keep "Status").
753
+ _RE_PIPELINE_STATUS = re.compile(r"^##\s+Pipeline (?:State|Status)\s*$", re.IGNORECASE)
754
+ _RE_TASKS_STATUS = re.compile(r"^##\s+Tasks (?:State|Status)\s*$", re.IGNORECASE)
755
+ _RE_CROSSPHASE_QA = re.compile(r"^##\s+Cross-phase Q&A", re.IGNORECASE)
756
+ _RE_TRIAGE = re.compile(r"^##\s+Triage\s*$", re.IGNORECASE)
757
+ _RE_FEATURES_STATUS = re.compile(r"^##\s+Features (?:State|Status)\s*$", re.IGNORECASE)
758
+ _RE_PLAN_DELIVERIES = re.compile(r"^##\s+Plan\s*/\s*Deliveries\s*$", re.IGNORECASE)
759
+ _RE_LIFECYCLE_HISTORY = re.compile(r"^##\s+Lifecycle History\s*$", re.IGNORECASE)
760
+ _RE_SECTION = re.compile(r"^##\s+\S") # any ## section (to end a prior section)
761
+
762
+ # Triage field patterns
763
+ _RE_TRIAGE_PATH = re.compile(r"^\s*-\s*\*\*Path:\*\*\s*(.+)", re.IGNORECASE)
764
+ _RE_TRIAGE_RECIPE = re.compile(r"^\s*-\s*\*\*Recipe:\*\*\s*(.+)", re.IGNORECASE)
765
+
766
+ # Pipeline Status field patterns (each is a "- **Field:** value" line)
767
+ _RE_PS_LIFECYCLE = re.compile(r"^\s*-\s*\*\*Lifecycle:\*\*\s*(.+)", re.IGNORECASE)
768
+ _RE_PS_PHASE = re.compile(r"^\s*-\s*\*\*Phase:\*\*\s*(.+)", re.IGNORECASE)
769
+ _RE_PS_SKILL = re.compile(r"^\s*-\s*\*\*Active Skill:\*\*\s*(.+)", re.IGNORECASE)
770
+ _RE_PS_UPDATED = re.compile(r"^\s*-\s*\*\*Updated:\*\*\s*(.+)", re.IGNORECASE)
771
+ _RE_PS_PAUSE_REASON = re.compile(r"^\s*-\s*\*\*Pause Reason:\*\*\s*(.+)", re.IGNORECASE)
772
+ _RE_PS_BLOCK_REASON = re.compile(r"^\s*-\s*\*\*Block Reason:\*\*\s*(.+)", re.IGNORECASE)
773
+ _RE_PS_BLOCK_ART = re.compile(r"^\s*-\s*\*\*Block Artifact:\*\*\s*(.+)", re.IGNORECASE)
774
+
775
+ # Q{N} header under Cross-phase Q&A
776
+ _RE_QN_HEADER = re.compile(r"^###\s+(Q\d+)\s*$")
777
+ _RE_QN_STATUS = re.compile(r"^\s*-\s*\*\*Status:\*\*\s*(.+)", re.IGNORECASE)
778
+ _RE_QN_CAT = re.compile(r"^\s*-\s*\*\*Category:\*\*\s*(.+)", re.IGNORECASE)
779
+ _RE_QN_IMPACT = re.compile(r"^\s*-\s*\*\*Impact:\*\*\s*(.+)", re.IGNORECASE)
780
+ _RE_QN_CONTEXT = re.compile(r"^\s*-\s*\*\*Context:\*\*\s*(.+)", re.IGNORECASE)
781
+ _RE_QN_SUGGEST = re.compile(r"^\s*-\s*\*\*Suggested:\*\*\s*(.+)", re.IGNORECASE)
782
+
783
+ # Tasks table separator row detector
784
+ _RE_TABLE_SEP = re.compile(r"^\|[\s\-|]+\|$")
785
+ # Placeholder row
786
+ _NONE_YET = "_none yet_"
787
+
788
+
789
+ def parse_state_md(
790
+ text: str,
791
+ work_id: str = "",
792
+ work_dir: Optional[Path] = None,
793
+ ) -> ParsedWork:
794
+ """Parse a STATE.md file text into a ParsedWork.
795
+
796
+ Single-pass line scan. Three phases in a single pass:
797
+ - ## Pipeline Status -> normalized WorkModel fields (source_mode=normalized)
798
+ - ## Tasks Status -> tasks[] (DM-5); skip _none yet_
799
+ - ## Cross-phase Q&A -> pending_inputs (Status: Pending only)
800
+
801
+ When ## Pipeline Status is absent, the LC-3 fallback adapter (derive_lifecycle)
802
+ is invoked to reconstruct lifecycle from legacy signals (SM-2 fallback path).
803
+ source_mode=fallback is recorded for all works that use the fallback.
804
+
805
+ work_dir is required for the fallback IMPEDIMENT scan (KI-003); if absent,
806
+ the IMPEDIMENT check is skipped (IMPEDIMENT file detection does not fire).
807
+
808
+ This function is pure (text-only) when work_dir is None. When work_dir is
809
+ supplied it performs one filesystem scan for IMPEDIMENT files; no writes.
810
+ """
811
+ pw = ParsedWork()
812
+ lines = text.splitlines()
813
+
814
+ # State machine over sections
815
+ in_pipeline_status = False
816
+ pipeline_status_found = False
817
+ in_tasks = False
818
+ in_crossphase = False
819
+ in_triage = False
820
+ in_features = False
821
+ in_deliveries = False
822
+ in_lifecycle_history = False
823
+ lifecycle_history_header_seen = False
824
+ tasks_header_seen = False
825
+ features_header_seen = False
826
+ deliveries_header_seen = False
827
+
828
+ # Q&A tracking
829
+ current_q_id: Optional[str] = None
830
+ current_q: dict = {} # accumulator for current Q{N} block
831
+
832
+ def _flush_q() -> None:
833
+ nonlocal current_q, current_q_id
834
+ if current_q_id and current_q.get("status", "").lower() == "pending":
835
+ pw.pending_inputs.append(PendingInput(
836
+ question_id=current_q_id,
837
+ category=current_q.get("category"),
838
+ impact=current_q.get("impact"),
839
+ context=current_q.get("context"),
840
+ suggested=current_q.get("suggested"),
841
+ ))
842
+ current_q_id = None
843
+ current_q = {}
844
+
845
+ def _reset_sections() -> None:
846
+ nonlocal in_pipeline_status, in_tasks, in_crossphase
847
+ nonlocal in_triage, in_features, in_deliveries, in_lifecycle_history
848
+ in_pipeline_status = False
849
+ in_tasks = False
850
+ in_crossphase = False
851
+ in_triage = False
852
+ in_features = False
853
+ in_deliveries = False
854
+ in_lifecycle_history = False
855
+
856
+ for line in lines:
857
+ # Detect section boundaries (## headers)
858
+ if _RE_PIPELINE_STATUS.match(line):
859
+ _flush_q()
860
+ _reset_sections()
861
+ in_pipeline_status = True
862
+ continue
863
+
864
+ if _RE_TASKS_STATUS.match(line):
865
+ _flush_q()
866
+ _reset_sections()
867
+ in_tasks = True
868
+ tasks_header_seen = False
869
+ continue
870
+
871
+ if _RE_CROSSPHASE_QA.match(line):
872
+ _flush_q()
873
+ _reset_sections()
874
+ in_crossphase = True
875
+ continue
876
+
877
+ if _RE_TRIAGE.match(line):
878
+ _flush_q()
879
+ _reset_sections()
880
+ in_triage = True
881
+ continue
882
+
883
+ if _RE_FEATURES_STATUS.match(line):
884
+ _flush_q()
885
+ _reset_sections()
886
+ in_features = True
887
+ features_header_seen = False
888
+ continue
889
+
890
+ if _RE_PLAN_DELIVERIES.match(line):
891
+ _flush_q()
892
+ _reset_sections()
893
+ in_deliveries = True
894
+ deliveries_header_seen = False
895
+ continue
896
+
897
+ if _RE_LIFECYCLE_HISTORY.match(line):
898
+ _flush_q()
899
+ _reset_sections()
900
+ in_lifecycle_history = True
901
+ lifecycle_history_header_seen = False
902
+ continue
903
+
904
+ # Any other ## section resets active section
905
+ if _RE_SECTION.match(line):
906
+ _flush_q()
907
+ _reset_sections()
908
+ continue
909
+
910
+ # --- Process active section ---
911
+
912
+ if in_pipeline_status:
913
+ _parse_pipeline_status_line(line, pw)
914
+ pipeline_status_found = True
915
+ continue
916
+
917
+ if in_tasks:
918
+ _parse_tasks_line(line, pw, tasks_header_seen)
919
+ if line.strip().startswith("|") and not _RE_TABLE_SEP.match(line.strip()):
920
+ tasks_header_seen = True
921
+ continue
922
+
923
+ if in_crossphase:
924
+ # ### Q{N} header
925
+ m = _RE_QN_HEADER.match(line)
926
+ if m:
927
+ _flush_q()
928
+ current_q_id = m.group(1)
929
+ current_q = {}
930
+ continue
931
+ if current_q_id:
932
+ # Accept both "Status:" (legacy) and "State:" (new, Pillar 3) for Q&A state
933
+ m2 = _RE_QN_STATUS.match(line)
934
+ if m2:
935
+ current_q["status"] = m2.group(1).strip()
936
+ continue
937
+ # New name "State:" -- map to same "status" key for unified flush logic
938
+ m2 = re.match(r"^\s*-\s*\*\*State:\*\*\s*(.+)", line, re.IGNORECASE)
939
+ if m2:
940
+ current_q["status"] = m2.group(1).strip()
941
+ continue
942
+ m2 = _RE_QN_CAT.match(line)
943
+ if m2:
944
+ current_q["category"] = m2.group(1).strip()
945
+ continue
946
+ m2 = _RE_QN_IMPACT.match(line)
947
+ if m2:
948
+ current_q["impact"] = m2.group(1).strip()
949
+ continue
950
+ m2 = _RE_QN_CONTEXT.match(line)
951
+ if m2:
952
+ current_q["context"] = m2.group(1).strip()
953
+ continue
954
+ m2 = _RE_QN_SUGGEST.match(line)
955
+ if m2:
956
+ current_q["suggested"] = m2.group(1).strip()
957
+ continue
958
+
959
+ if in_triage:
960
+ _parse_triage_line(line, pw)
961
+ continue
962
+
963
+ if in_features:
964
+ _parse_features_line(line, pw, features_header_seen)
965
+ if line.strip().startswith("|") and not _RE_TABLE_SEP.match(line.strip()):
966
+ features_header_seen = True
967
+ continue
968
+
969
+ if in_deliveries:
970
+ _parse_deliveries_line(line, pw, deliveries_header_seen)
971
+ if line.strip().startswith("|") and not _RE_TABLE_SEP.match(line.strip()):
972
+ deliveries_header_seen = True
973
+ continue
974
+
975
+ if in_lifecycle_history:
976
+ _parse_lifecycle_history_line(line, pw, lifecycle_history_header_seen)
977
+ if line.strip().startswith("|") and not _RE_TABLE_SEP.match(line.strip()):
978
+ lifecycle_history_header_seen = True
979
+ continue
980
+
981
+ # Flush any trailing Q block
982
+ _flush_q()
983
+
984
+ # Normalized path: if ## Pipeline Status was found, set source_mode=normalized.
985
+ if pipeline_status_found:
986
+ pw.source_mode = SourceMode.Normalized
987
+ else:
988
+ # LC-3 FALLBACK ADAPTER (task-011, audited task-013 M6):
989
+ # ## Pipeline Status block absent -- apply SM-2 fallback derivation from
990
+ # legacy signals (IMPEDIMENT scan, task status rollup, Q&A pending,
991
+ # Lifecycle History, Deploy Status). source_mode=fallback is recorded.
992
+ # All signals except Canceled are now LEGACY-COMPAT (live works use the
993
+ # normalized ## Pipeline Status path). Canceled remains the legitimate path
994
+ # since it has no automatic producer. KI-003 RESOLVED (task-013).
995
+ _wd = work_dir if work_dir is not None else Path(".")
996
+ (
997
+ pw.lifecycle,
998
+ pw.source_mode,
999
+ pw.pause_reason,
1000
+ pw.block_reason,
1001
+ pw.block_artifact,
1002
+ fallback_updated,
1003
+ extra_warnings,
1004
+ ) = derive_lifecycle(
1005
+ work_dir=_wd,
1006
+ tasks=pw.tasks,
1007
+ pending_inputs=pw.pending_inputs,
1008
+ state_text=text,
1009
+ work_id=work_id,
1010
+ )
1011
+ # Only update updated from fallback if not already set (normalized path may
1012
+ # have set it before the fallback; in practice the block is absent here so
1013
+ # pw.updated is always None, but guard explicitly for mixed-mode safety).
1014
+ if pw.updated is None:
1015
+ pw.updated = fallback_updated
1016
+ pw.parse_warnings.extend(extra_warnings)
1017
+
1018
+ return pw
1019
+
1020
+
1021
+ # ---------------------------------------------------------------------------
1022
+ # Hierarchical per-unit STATE.md parsers (work-004 Pillar 1/2/6)
1023
+ #
1024
+ # These parsers read the TASK-LEVEL and DELIVERY-LEVEL STATE.md files
1025
+ # produced by the new uniform unit hierarchy:
1026
+ # delivery-NNN/tasks/task-NNN/STATE.md -- task mutable cells
1027
+ # delivery-NNN/STATE.md -- delivery lifecycle + gate + Q&A
1028
+ #
1029
+ # They are ONLY called when hierarchy detection fires (_detect_hierarchy in reader.py).
1030
+ # Legacy (monolithic) works continue to use parse_state_md().
1031
+ # ---------------------------------------------------------------------------
1032
+
1033
+ # Task-level STATE.md section patterns
1034
+ _RE_TASK_STATE_SECTION = re.compile(r"^##\s+Task State\s*$", re.IGNORECASE)
1035
+
1036
+ # Task state field patterns (- **Field:** value)
1037
+ _RE_TS_STATE = re.compile(r"^\s*-\s*\*\*State:\*\*\s*(.+)", re.IGNORECASE)
1038
+ _RE_TS_REVIEW = re.compile(r"^\s*-\s*\*\*Review:\*\*\s*(.+)", re.IGNORECASE)
1039
+ _RE_TS_ELAPSED = re.compile(r"^\s*-\s*\*\*Elapsed:\*\*\s*(.+)", re.IGNORECASE)
1040
+ _RE_TS_NOTES = re.compile(r"^\s*-\s*\*\*Notes:\*\*\s*(.+)", re.IGNORECASE)
1041
+
1042
+ # Delivery-level STATE.md section patterns
1043
+ _RE_DELIVERY_LIFECYCLE_SECTION = re.compile(r"^##\s+Delivery Lifecycle\s*$", re.IGNORECASE)
1044
+ _RE_DELIVERY_GATE_SECTION = re.compile(r"^##\s+Delivery Gate\s*$", re.IGNORECASE)
1045
+ _RE_DELIVERY_CROSSPHASE_QA = re.compile(r"^##\s+Cross-phase Q&A", re.IGNORECASE)
1046
+ _RE_DELIVERY_TASKS_STATE = re.compile(r"^##\s+Tasks State\s*$", re.IGNORECASE)
1047
+
1048
+ # Delivery lifecycle field patterns
1049
+ _RE_DL_STATE = re.compile(r"^\s*-\s*\*\*State:\*\*\s*(.+)", re.IGNORECASE)
1050
+ _RE_DL_UPDATED = re.compile(r"^\s*-\s*\*\*Updated:\*\*\s*(.+)", re.IGNORECASE)
1051
+ _RE_DL_BLOCK_REASON = re.compile(r"^\s*-\s*\*\*Block Reason:\*\*\s*(.+)", re.IGNORECASE)
1052
+ _RE_DL_BLOCK_ART = re.compile(r"^\s*-\s*\*\*Block Artifact:\*\*\s*(.+)", re.IGNORECASE)
1053
+
1054
+ # Delivery Gate field patterns
1055
+ _RE_DG_REVIEWER_TIER = re.compile(r"^\s*-\s*\*\*Reviewer Tier:\*\*\s*(.+)", re.IGNORECASE)
1056
+ _RE_DG_GRADE = re.compile(r"^\s*-\s*\*\*Grade:\*\*\s*(.+)", re.IGNORECASE)
1057
+ _RE_DG_ISSUE_LIST = re.compile(r"^\s*-\s*\*\*Issue List:\*\*\s*(.+)", re.IGNORECASE)
1058
+ _RE_DG_TIMESTAMP = re.compile(r"^\s*-\s*\*\*Timestamp:\*\*\s*(.+)", re.IGNORECASE)
1059
+
1060
+ # Valid SD-8 delivery lifecycle enum values (Pillar 1 / SD-8)
1061
+ _DELIVERY_STATE_VALUES = frozenset({
1062
+ "Pending-Spec", "Specified", "Executing", "Gated", "Done", "Blocked",
1063
+ })
1064
+
1065
+
1066
+ class ParsedTaskState:
1067
+ """Parsed result for one task-level STATE.md (task-NNN/STATE.md).
1068
+
1069
+ Covers: State / Review / Elapsed / Notes from ## Task State section.
1070
+ Used by the hierarchical reader path only.
1071
+ """
1072
+ __slots__ = ("state", "review", "elapsed", "notes", "parse_warnings")
1073
+
1074
+ def __init__(self) -> None:
1075
+ self.state: TaskStatus = TaskStatus.Unknown
1076
+ self.review: Optional[str] = None
1077
+ self.elapsed: Optional[str] = None
1078
+ self.notes: Optional[str] = None
1079
+ self.parse_warnings: list[str] = []
1080
+
1081
+
1082
+ class ParsedDeliveryState:
1083
+ """Parsed result for one delivery-level STATE.md (delivery-NNN/STATE.md).
1084
+
1085
+ Covers:
1086
+ - delivery_state: SD-8 lifecycle enum (authored, not derived from tasks)
1087
+ - updated, block_reason, block_artifact from ## Delivery Lifecycle
1088
+ - grade, reviewer_tier, gate_timestamp from ## Delivery Gate
1089
+ - pending_inputs from ## Cross-phase Q&A (Pending entries)
1090
+ - tasks: list[TaskModel] from ## Tasks State derived table (if present inline)
1091
+ Used by the hierarchical reader path only.
1092
+ """
1093
+ __slots__ = (
1094
+ "delivery_state", "updated", "block_reason", "block_artifact",
1095
+ "gate_grade", "gate_reviewer_tier", "gate_timestamp",
1096
+ "pending_inputs", "tasks", "parse_warnings",
1097
+ )
1098
+
1099
+ def __init__(self) -> None:
1100
+ self.delivery_state: Optional[str] = None
1101
+ self.updated: Optional[str] = None
1102
+ self.block_reason: Optional[str] = None
1103
+ self.block_artifact: Optional[str] = None
1104
+ self.gate_grade: Optional[str] = None
1105
+ self.gate_reviewer_tier: Optional[str] = None
1106
+ self.gate_timestamp: Optional[str] = None
1107
+ self.pending_inputs: list[PendingInput] = []
1108
+ self.tasks: list[TaskModel] = []
1109
+ self.parse_warnings: list[str] = []
1110
+
1111
+
1112
+ def parse_task_state_md(
1113
+ text: str,
1114
+ task_id: str = "",
1115
+ ) -> ParsedTaskState:
1116
+ """Parse a task-level STATE.md into a ParsedTaskState.
1117
+
1118
+ Reads the ## Task State section for the 4 mutable cells:
1119
+ State / Review / Elapsed / Notes
1120
+
1121
+ The closed State enum values are the same as the work-level TaskStatus enum
1122
+ (Pending | In Progress | In Review | Blocked | Done | Failed | Canceled).
1123
+
1124
+ Read-only; never throws (parse_warnings on error). Called only by the
1125
+ hierarchical reader path when delivery-NNN/tasks/task-NNN/STATE.md exists.
1126
+ """
1127
+ pts = ParsedTaskState()
1128
+
1129
+ try:
1130
+ in_task_state = False
1131
+
1132
+ for line in text.splitlines():
1133
+ # Section boundary
1134
+ if _RE_TASK_STATE_SECTION.match(line):
1135
+ in_task_state = True
1136
+ continue
1137
+
1138
+ if _RE_SECTION.match(line):
1139
+ in_task_state = False
1140
+ continue
1141
+
1142
+ if not in_task_state:
1143
+ continue
1144
+
1145
+ m = _RE_TS_STATE.match(line)
1146
+ if m:
1147
+ raw = m.group(1).strip()
1148
+ pts.state = _parse_task_status(raw)
1149
+ continue
1150
+
1151
+ m = _RE_TS_REVIEW.match(line)
1152
+ if m:
1153
+ val = m.group(1).strip()
1154
+ pts.review = None if _is_null(val) else val
1155
+ continue
1156
+
1157
+ m = _RE_TS_ELAPSED.match(line)
1158
+ if m:
1159
+ val = m.group(1).strip()
1160
+ pts.elapsed = None if _is_null(val) else val
1161
+ continue
1162
+
1163
+ m = _RE_TS_NOTES.match(line)
1164
+ if m:
1165
+ val = m.group(1).strip()
1166
+ pts.notes = None if _is_null(val) else val
1167
+ continue
1168
+
1169
+ except Exception as exc: # noqa: BLE001 -- never throws (NFR7)
1170
+ pts.parse_warnings.append(
1171
+ f"{task_id}: error parsing task STATE.md ({exc}); "
1172
+ f"returning best-effort task state"
1173
+ )
1174
+
1175
+ return pts
1176
+
1177
+
1178
+ def parse_delivery_state_md(
1179
+ text: str,
1180
+ delivery_id: str = "",
1181
+ ) -> ParsedDeliveryState:
1182
+ """Parse a delivery-level STATE.md into a ParsedDeliveryState.
1183
+
1184
+ Reads:
1185
+ - ## Delivery Lifecycle: delivery_state (SD-8 enum), updated, block_reason,
1186
+ block_artifact
1187
+ - ## Delivery Gate: grade, reviewer_tier, gate_timestamp
1188
+ - ## Cross-phase Q&A: pending Q&A entries (Status: Pending only)
1189
+ - ## Tasks State: derived task rows (if present inline -- fallback table)
1190
+
1191
+ The delivery_state is the INDEPENDENTLY AUTHORED SD-8 enum
1192
+ (Pending-Spec | Specified | Executing | Gated | Done | Blocked).
1193
+ It is NOT derived from the task rollup (SD-9).
1194
+
1195
+ Read-only; never throws (parse_warnings on error). Called only by the
1196
+ hierarchical reader path.
1197
+ """
1198
+ pds = ParsedDeliveryState()
1199
+
1200
+ try:
1201
+ in_lifecycle = False
1202
+ in_gate = False
1203
+ in_crossphase = False
1204
+ in_tasks = False
1205
+ tasks_header_seen = False
1206
+
1207
+ # Reuse one accumulator for the delivery ## Tasks State table (avoids per-line alloc)
1208
+ task_accumulator = _TaskAccumulator(pds)
1209
+
1210
+ # Q&A tracking
1211
+ current_q_id: Optional[str] = None
1212
+ current_q: dict = {}
1213
+
1214
+ def _flush_q() -> None:
1215
+ nonlocal current_q, current_q_id
1216
+ if current_q_id and current_q.get("state", "").lower() == "pending":
1217
+ pds.pending_inputs.append(PendingInput(
1218
+ question_id=current_q_id,
1219
+ category=current_q.get("category"),
1220
+ impact=current_q.get("impact"),
1221
+ context=current_q.get("context"),
1222
+ suggested=current_q.get("suggested"),
1223
+ ))
1224
+ current_q_id = None
1225
+ current_q = {}
1226
+
1227
+ for line in lines_iter(text):
1228
+ # Section boundaries (## headers, including ###)
1229
+ if _RE_DELIVERY_LIFECYCLE_SECTION.match(line):
1230
+ _flush_q()
1231
+ in_lifecycle = True
1232
+ in_gate = False
1233
+ in_crossphase = False
1234
+ in_tasks = False
1235
+ continue
1236
+
1237
+ if _RE_DELIVERY_GATE_SECTION.match(line):
1238
+ _flush_q()
1239
+ in_lifecycle = False
1240
+ in_gate = True
1241
+ in_crossphase = False
1242
+ in_tasks = False
1243
+ continue
1244
+
1245
+ if _RE_DELIVERY_CROSSPHASE_QA.match(line):
1246
+ _flush_q()
1247
+ in_lifecycle = False
1248
+ in_gate = False
1249
+ in_crossphase = True
1250
+ in_tasks = False
1251
+ continue
1252
+
1253
+ if _RE_DELIVERY_TASKS_STATE.match(line):
1254
+ _flush_q()
1255
+ in_lifecycle = False
1256
+ in_gate = False
1257
+ in_crossphase = False
1258
+ in_tasks = True
1259
+ tasks_header_seen = False
1260
+ continue
1261
+
1262
+ # Any other ## section resets all active sections
1263
+ if _RE_SECTION.match(line):
1264
+ _flush_q()
1265
+ in_lifecycle = False
1266
+ in_gate = False
1267
+ in_crossphase = False
1268
+ in_tasks = False
1269
+ continue
1270
+
1271
+ # --- Process active section ---
1272
+
1273
+ if in_lifecycle:
1274
+ m = _RE_DL_STATE.match(line)
1275
+ if m:
1276
+ raw = m.group(1).strip()
1277
+ # Accept valid SD-8 enum values; ignore placeholder text
1278
+ if raw in _DELIVERY_STATE_VALUES:
1279
+ pds.delivery_state = raw
1280
+ elif "|" not in raw and raw:
1281
+ # Unparseable -- warn but keep going
1282
+ pds.parse_warnings.append(
1283
+ f"{delivery_id}: unknown Delivery Lifecycle State '{raw}'; "
1284
+ f"expected one of {sorted(_DELIVERY_STATE_VALUES)}"
1285
+ )
1286
+ continue
1287
+
1288
+ m = _RE_DL_UPDATED.match(line)
1289
+ if m:
1290
+ val = m.group(1).strip()
1291
+ pds.updated = None if _is_null(val) else val
1292
+ continue
1293
+
1294
+ m = _RE_DL_BLOCK_REASON.match(line)
1295
+ if m:
1296
+ val = m.group(1).strip()
1297
+ pds.block_reason = None if _is_null(val) else val
1298
+ continue
1299
+
1300
+ m = _RE_DL_BLOCK_ART.match(line)
1301
+ if m:
1302
+ val = m.group(1).strip()
1303
+ pds.block_artifact = None if _is_null(val) else val
1304
+ continue
1305
+
1306
+ elif in_gate:
1307
+ m = _RE_DG_REVIEWER_TIER.match(line)
1308
+ if m and pds.gate_reviewer_tier is None:
1309
+ val = m.group(1).strip()
1310
+ raw_split = val.split()[0] if val else None
1311
+ pds.gate_reviewer_tier = raw_split if raw_split and not _is_null(raw_split) else None
1312
+ continue
1313
+
1314
+ m = _RE_DG_GRADE.match(line)
1315
+ if m and pds.gate_grade is None:
1316
+ val = m.group(1).strip()
1317
+ raw_split = val.split()[0] if val else None
1318
+ # Treat "Pending" placeholder as absent grade
1319
+ if raw_split and not _is_null(raw_split) and raw_split.lower() != "pending":
1320
+ pds.gate_grade = raw_split
1321
+ continue
1322
+
1323
+ m = _RE_DG_TIMESTAMP.match(line)
1324
+ if m and pds.gate_timestamp is None:
1325
+ val = m.group(1).strip()
1326
+ pds.gate_timestamp = None if _is_null(val) else val
1327
+ continue
1328
+
1329
+ elif in_crossphase:
1330
+ # ### Q{N} header
1331
+ m = _RE_QN_HEADER.match(line)
1332
+ if m:
1333
+ _flush_q()
1334
+ current_q_id = m.group(1)
1335
+ current_q = {}
1336
+ continue
1337
+ if current_q_id:
1338
+ # Accept both "State:" (new) and "Status:" (legacy) for Q&A state
1339
+ m2 = re.match(r"^\s*-\s*\*\*(?:State|Status):\*\*\s*(.+)", line, re.IGNORECASE)
1340
+ if m2:
1341
+ current_q["state"] = m2.group(1).strip()
1342
+ continue
1343
+ m2 = _RE_QN_CAT.match(line)
1344
+ if m2:
1345
+ current_q["category"] = m2.group(1).strip()
1346
+ continue
1347
+ m2 = _RE_QN_IMPACT.match(line)
1348
+ if m2:
1349
+ current_q["impact"] = m2.group(1).strip()
1350
+ continue
1351
+ m2 = _RE_QN_CONTEXT.match(line)
1352
+ if m2:
1353
+ current_q["context"] = m2.group(1).strip()
1354
+ continue
1355
+ m2 = _RE_QN_SUGGEST.match(line)
1356
+ if m2:
1357
+ current_q["suggested"] = m2.group(1).strip()
1358
+ continue
1359
+
1360
+ elif in_tasks:
1361
+ # Parse the derived task rollup table from delivery STATE.md
1362
+ _parse_tasks_line(line, task_accumulator, tasks_header_seen)
1363
+ stripped = line.strip()
1364
+ if stripped.startswith("|") and not _RE_TABLE_SEP.match(stripped):
1365
+ tasks_header_seen = True
1366
+
1367
+ # Flush any trailing Q block
1368
+ _flush_q()
1369
+
1370
+ except Exception as exc: # noqa: BLE001 -- never throws (NFR7)
1371
+ pds.parse_warnings.append(
1372
+ f"{delivery_id}: error parsing delivery STATE.md ({exc}); "
1373
+ f"returning best-effort delivery state"
1374
+ )
1375
+
1376
+ return pds
1377
+
1378
+
1379
+ def lines_iter(text: str):
1380
+ """Yield lines from text (helper to avoid repeated splitlines() calls)."""
1381
+ return text.splitlines()
1382
+
1383
+
1384
+ class _TaskAccumulator:
1385
+ """Minimal duck-type for ParsedWork accepted by _parse_tasks_line.
1386
+
1387
+ Wraps a ParsedDeliveryState so we can reuse _parse_tasks_line for the
1388
+ delivery-level ## Tasks State derived table without duplicating the parser.
1389
+ """
1390
+ __slots__ = ("_pds",)
1391
+
1392
+ def __init__(self, pds: ParsedDeliveryState) -> None:
1393
+ self._pds = pds
1394
+
1395
+ @property
1396
+ def tasks(self) -> list[TaskModel]:
1397
+ return self._pds.tasks
1398
+
1399
+ @property
1400
+ def parse_warnings(self) -> list[str]:
1401
+ return self._pds.parse_warnings
1402
+
1403
+
1404
+ def _parse_pipeline_status_line(line: str, pw: ParsedWork) -> None:
1405
+ """Parse one line from the ## Pipeline State / ## Pipeline Status section into pw fields.
1406
+
1407
+ Each line has the shape: - **Field:** value
1408
+ Unknown field lines are silently ignored (forward-compatible).
1409
+ Accepts both legacy "## Pipeline Status" and new "## Pipeline State" section names
1410
+ (Pillar 3 / Pillar 6 coexistence).
1411
+ """
1412
+ m = _RE_PS_LIFECYCLE.match(line)
1413
+ if m:
1414
+ pw.lifecycle = _parse_lifecycle(m.group(1).strip())
1415
+ return
1416
+
1417
+ m = _RE_PS_PHASE.match(line)
1418
+ if m:
1419
+ pw.phase = _parse_phase(m.group(1).strip())
1420
+ return
1421
+
1422
+ m = _RE_PS_SKILL.match(line)
1423
+ if m:
1424
+ val = m.group(1).strip()
1425
+ pw.active_skill = None if _is_null(val) or val == "none" else val
1426
+ return
1427
+
1428
+ m = _RE_PS_UPDATED.match(line)
1429
+ if m:
1430
+ val = m.group(1).strip()
1431
+ pw.updated = None if _is_null(val) else val
1432
+ return
1433
+
1434
+ m = _RE_PS_PAUSE_REASON.match(line)
1435
+ if m:
1436
+ val = m.group(1).strip()
1437
+ pw.pause_reason = None if _is_null(val) else val
1438
+ return
1439
+
1440
+ m = _RE_PS_BLOCK_REASON.match(line)
1441
+ if m:
1442
+ val = m.group(1).strip()
1443
+ pw.block_reason = None if _is_null(val) else val
1444
+ return
1445
+
1446
+ m = _RE_PS_BLOCK_ART.match(line)
1447
+ if m:
1448
+ val = m.group(1).strip()
1449
+ pw.block_artifact = None if _is_null(val) else val
1450
+ return
1451
+
1452
+
1453
+ def _parse_tasks_line(line: str, pw: ParsedWork, header_seen: bool) -> None:
1454
+ """Parse one line from the ## Tasks State / ## Tasks Status table.
1455
+
1456
+ Table columns (new work-state-template.md -- work-004 rename):
1457
+ # | Task | Type | Wave | State | Review | Elapsed | Notes
1458
+ Table columns (legacy work-state-template.md -- pre-work-004):
1459
+ # | Task | Type | Wave | Status | Review | Elapsed | Notes
1460
+
1461
+ Column index 4 is "State" (new) or "Status" (legacy); both parse identically
1462
+ since the reader reads by column index, not header name.
1463
+
1464
+ Header row (col index 0 = "#") and separator rows are skipped.
1465
+ The _none yet_ placeholder row is skipped (DM-5).
1466
+ """
1467
+ stripped = line.strip()
1468
+ if not stripped.startswith("|"):
1469
+ return
1470
+ if _RE_TABLE_SEP.match(stripped):
1471
+ return
1472
+
1473
+ cols = [c.strip() for c in stripped.strip("|").split("|")]
1474
+ if len(cols) < 2:
1475
+ return
1476
+
1477
+ # Skip header row (first column is "#" or blank)
1478
+ if cols[0] in ("#", "") and not header_seen:
1479
+ return
1480
+
1481
+ # Skip _none yet_ placeholder
1482
+ if any(_NONE_YET in c for c in cols):
1483
+ return
1484
+
1485
+ # Column layout: # | Task | Type | Wave | Status | Review | Elapsed | Notes
1486
+ # Index: 0 1 2 3 4 5 6 7
1487
+ def _col(idx: int) -> Optional[str]:
1488
+ if idx < len(cols):
1489
+ v = cols[idx].strip()
1490
+ return None if _is_null(v) else v
1491
+ return None
1492
+
1493
+ task_id = _col(1) or _col(0) or ""
1494
+ if not task_id or task_id == "#":
1495
+ return
1496
+
1497
+ status_str = _col(4) or ""
1498
+ status = _parse_task_status(status_str)
1499
+
1500
+ pw.tasks.append(TaskModel(
1501
+ task_id=task_id,
1502
+ type=_col(2) or "",
1503
+ wave=_col(3),
1504
+ status=status,
1505
+ review_grade=_col(5),
1506
+ elapsed=_col(6),
1507
+ notes=_col(7),
1508
+ ))
1509
+
1510
+
1511
+ def _parse_triage_line(line: str, pw: ParsedWork) -> None:
1512
+ """Parse one line from the ## Triage section into pw fields."""
1513
+ m = _RE_TRIAGE_PATH.match(line)
1514
+ if m:
1515
+ val = m.group(1).strip()
1516
+ if not _is_null(val):
1517
+ pw.work_path = val.lower()
1518
+ return
1519
+
1520
+ m = _RE_TRIAGE_RECIPE.match(line)
1521
+ if m:
1522
+ val = m.group(1).strip()
1523
+ if not _is_null(val):
1524
+ pw.recipe = val
1525
+ return
1526
+
1527
+
1528
+ def _parse_features_line(line: str, pw: ParsedWork, header_seen: bool) -> None:
1529
+ """Parse one line from the ## Features Status table.
1530
+
1531
+ Table columns: # | Feature | Spec Status | ... (at minimum # and Feature)
1532
+ """
1533
+ stripped = line.strip()
1534
+ if not stripped.startswith("|"):
1535
+ return
1536
+ if _RE_TABLE_SEP.match(stripped):
1537
+ return
1538
+
1539
+ cols = [c.strip() for c in stripped.strip("|").split("|")]
1540
+ if len(cols) < 2:
1541
+ return
1542
+
1543
+ # Skip header row
1544
+ if cols[0] in ("#", "") and not header_seen:
1545
+ return
1546
+
1547
+ def _col(idx: int) -> Optional[str]:
1548
+ if idx < len(cols):
1549
+ v = cols[idx].strip()
1550
+ return None if _is_null(v) else v
1551
+ return None
1552
+
1553
+ num_str = _col(0) or ""
1554
+ feature_name = _col(1) or ""
1555
+
1556
+ if not num_str or num_str == "#" or not feature_name:
1557
+ return
1558
+
1559
+ try:
1560
+ number = int(num_str)
1561
+ except ValueError:
1562
+ return
1563
+
1564
+ # Readable name: strip "feature-NNN-" prefix if present
1565
+ readable = re.sub(r"^feature-\d+-", "", feature_name, flags=re.IGNORECASE).replace("-", " ").strip()
1566
+ if not readable:
1567
+ readable = feature_name
1568
+
1569
+ pw.features.append(FeatureRef(number=number, name=readable))
1570
+
1571
+
1572
+ def _parse_deliveries_line(line: str, pw: ParsedWork, header_seen: bool) -> None:
1573
+ """Parse one line from the ## Plan / Deliveries table.
1574
+
1575
+ Table columns: Delivery | Status | Tasks | Notes
1576
+ """
1577
+ stripped = line.strip()
1578
+ if not stripped.startswith("|"):
1579
+ return
1580
+ if _RE_TABLE_SEP.match(stripped):
1581
+ return
1582
+
1583
+ cols = [c.strip() for c in stripped.strip("|").split("|")]
1584
+ if len(cols) < 3:
1585
+ return
1586
+
1587
+ # Skip header row (first column is "Delivery" or blank)
1588
+ if cols[0].lower() in ("delivery", "") and not header_seen:
1589
+ return
1590
+
1591
+ def _col(idx: int) -> Optional[str]:
1592
+ if idx < len(cols):
1593
+ v = cols[idx].strip()
1594
+ return None if _is_null(v) else v
1595
+ return None
1596
+
1597
+ delivery_id = _col(0) or ""
1598
+ tasks_str = _col(2) or ""
1599
+ notes_str = _col(3) or ""
1600
+
1601
+ if not delivery_id or delivery_id.lower() == "delivery":
1602
+ return
1603
+
1604
+ # Parse delivery number from "delivery-NNN" or "delivery-NNN ..."
1605
+ m = re.match(r"delivery-(\d+)", delivery_id, re.IGNORECASE)
1606
+ if not m:
1607
+ return
1608
+ number = int(m.group(1))
1609
+
1610
+ # Parse leading integer from tasks column e.g. "13 (task-001-013)" -> 13
1611
+ task_count = 0
1612
+ tm = re.match(r"(\d+)", tasks_str)
1613
+ if tm:
1614
+ task_count = int(tm.group(1))
1615
+
1616
+ # Name: use notes up to first semicolon/period, or delivery_id
1617
+ name = delivery_id
1618
+ if notes_str:
1619
+ # Split on "; " or " - " or " -- " separators to get the first clause
1620
+ short = notes_str.split(";")[0].split(" - ")[0].split(" -- ")[0].strip()
1621
+ if short:
1622
+ name = short
1623
+
1624
+ pw.deliverables.append(DeliverableRef(number=number, name=name, task_count=task_count))
1625
+
1626
+
1627
+ def _parse_lifecycle_history_line(line: str, pw: ParsedWork, header_seen: bool) -> None:
1628
+ """Parse one line from the ## Lifecycle History table for the 'created' date.
1629
+
1630
+ Table columns: Date | Phase Transition / Gate | Grade | Notes (typical shape)
1631
+ The Date column is index 0; the Phase Transition / Gate column is index 1.
1632
+
1633
+ Extracts pw.created = the Date cell (first column, trimmed) of the FIRST row
1634
+ whose second column equals "Work created" (case-insensitive, trimmed).
1635
+ Once pw.created is set, subsequent rows are not re-evaluated (take first match).
1636
+ Header row and separator rows are skipped.
1637
+ """
1638
+ stripped = line.strip()
1639
+ if not stripped.startswith("|"):
1640
+ return
1641
+ if _RE_TABLE_SEP.match(stripped):
1642
+ return
1643
+
1644
+ cols = [c.strip() for c in stripped.strip("|").split("|")]
1645
+ if len(cols) < 2:
1646
+ return
1647
+
1648
+ # Skip header row (first column is "Date" or blank) before first data row seen
1649
+ if not header_seen:
1650
+ return
1651
+
1652
+ # Already found created date; skip remaining rows
1653
+ if pw.created is not None:
1654
+ return
1655
+
1656
+ date_val = cols[0].strip()
1657
+ gate_val = cols[1].strip()
1658
+
1659
+ if gate_val.lower() == "work created" and date_val:
1660
+ pw.created = date_val
1661
+
1662
+
1663
+ # ---------------------------------------------------------------------------
1664
+ # Null-value helper
1665
+ # ---------------------------------------------------------------------------
1666
+
1667
+ # Null/absent sentinels used in the work-state-template.md:
1668
+ # - single dash (early template style)
1669
+ # -- double dash (common in task-status tables)
1670
+ # — em-dash (Unicode U+2014, used in some fields)
1671
+ _NULL_SENTINELS = frozenset(("-", "--", "—", ""))
1672
+
1673
+
1674
+ def _is_null(val: str) -> bool:
1675
+ """Return True when the value represents an absent / not-applicable field."""
1676
+ return val in _NULL_SENTINELS
1677
+
1678
+
1679
+ # ---------------------------------------------------------------------------
1680
+ # Enum parsing helpers
1681
+ # ---------------------------------------------------------------------------
1682
+
1683
+ # Mapping from on-disk literal -> Lifecycle enum member (verbatim, SM-2)
1684
+ _LIFECYCLE_MAP: dict[str, Lifecycle] = {
1685
+ "Running": Lifecycle.Running,
1686
+ "Paused-Awaiting-Input": Lifecycle.PausedAwaitingInput,
1687
+ "Blocked": Lifecycle.Blocked,
1688
+ "Completed": Lifecycle.Completed,
1689
+ "Canceled": Lifecycle.Canceled,
1690
+ }
1691
+
1692
+ # Phase mapping
1693
+ _PHASE_MAP: dict[str, Phase] = {
1694
+ "Interview": Phase.Interview,
1695
+ "Specify": Phase.Specify,
1696
+ "Plan": Phase.Plan,
1697
+ "Detail": Phase.Detail,
1698
+ "Execute": Phase.Execute,
1699
+ "Deploy": Phase.Deploy,
1700
+ "Monitor": Phase.Monitor,
1701
+ }
1702
+
1703
+ # TaskStatus mapping (feature-001 M3 closed enum)
1704
+ _TASK_STATUS_MAP: dict[str, TaskStatus] = {
1705
+ "Pending": TaskStatus.Pending,
1706
+ "In Progress": TaskStatus.InProgress,
1707
+ "In Review": TaskStatus.InReview,
1708
+ "Blocked": TaskStatus.Blocked,
1709
+ "Done": TaskStatus.Done,
1710
+ "Failed": TaskStatus.Failed,
1711
+ "Canceled": TaskStatus.Canceled,
1712
+ }
1713
+
1714
+
1715
+ def _parse_lifecycle(raw: str) -> Lifecycle:
1716
+ """Return the Lifecycle enum for a raw string literal (verbatim, SM-2 preferred path).
1717
+
1718
+ Unknown -> Lifecycle.Unknown (reader-only sentinel; DM-6).
1719
+ """
1720
+ return _LIFECYCLE_MAP.get(raw, Lifecycle.Unknown)
1721
+
1722
+
1723
+ def _parse_phase(raw: str) -> Phase:
1724
+ """Return the Phase enum for a raw string literal.
1725
+
1726
+ Unknown -> Phase.Unknown (reader-only sentinel; DM-6).
1727
+ """
1728
+ return _PHASE_MAP.get(raw, Phase.Unknown)
1729
+
1730
+
1731
+ def _parse_task_status(raw: str) -> TaskStatus:
1732
+ """Return the TaskStatus enum for a raw string literal.
1733
+
1734
+ Unknown -> TaskStatus.Unknown (reader-only sentinel; DM-6).
1735
+ """
1736
+ return _TASK_STATUS_MAP.get(raw, TaskStatus.Unknown)
1737
+
1738
+
1739
+ # ---------------------------------------------------------------------------
1740
+ # LC-TR: TaskDetail sub-parsers (feature-008, task-069)
1741
+ # Detail-only: these run ONLY when detail_task_ids is supplied to read_repo_detail().
1742
+ # The always-on read_repo() path does NOT call any function below.
1743
+ # No write / no LLM / no subprocess (NFR2/NFR7).
1744
+ # ---------------------------------------------------------------------------
1745
+
1746
+ # Section header patterns for the forensic sections
1747
+ _RE_QUICK_CHECK_FINDINGS = re.compile(r"^##\s+Quick Check Findings\s*$", re.IGNORECASE)
1748
+ _RE_DELIVERY_GATES_SECTION = re.compile(r"^##\s+Delivery Gates\s*$", re.IGNORECASE)
1749
+ # Task block header under ## Quick Check Findings: ### task-NNN
1750
+ _RE_TASK_BLOCK_HEADER = re.compile(r"^###\s+(task-\S+)\s*$", re.IGNORECASE)
1751
+ # Delivery sub-section under ## Delivery Gates: ### delivery-NNN
1752
+ _RE_DELIVERY_BLOCK_HEADER = re.compile(r"^###\s+(delivery-\d+[^\s]*)\s*$", re.IGNORECASE)
1753
+
1754
+ # Per-task block field patterns
1755
+ _RE_FINDINGS_REVIEWER_TIER = re.compile(r"^\s*-\s*\*\*Reviewer Tier:\*\*\s*(.+)", re.IGNORECASE)
1756
+ _RE_FINDINGS_BULLET = re.compile(r"^\s*-\s*(\[.+?\])\s+(.*)")
1757
+ # Per-delivery gate field patterns
1758
+ _RE_GATE_GRADE = re.compile(r"^\s*-\s*\*\*Grade:\*\*\s*(.+)", re.IGNORECASE)
1759
+ _RE_GATE_REVIEWER_TIER = re.compile(r"^\s*-\s*\*\*Reviewer Tier:\*\*\s*(.+)", re.IGNORECASE)
1760
+ _RE_GATE_TIMESTAMP = re.compile(r"^\s*-\s*\*\*Timestamp:\*\*\s*(.+)", re.IGNORECASE)
1761
+
1762
+ # Severity normalization: only CRITICAL and HIGH; all others -> MINOR neutral
1763
+ _KNOWN_SEVERITIES = frozenset({"[CRITICAL]", "[HIGH]"})
1764
+
1765
+ # Location pattern: {file:line} or {source-file:line} segments
1766
+ _RE_LOCATION = re.compile(r"\{([^}]+:[^}]*)\}")
1767
+
1768
+ # Disposition tokens (verbatim from the template)
1769
+ _DISPOSITION_TOKENS = ("Fixed-on-spot", "Deferred-to-gate")
1770
+
1771
+
1772
+ def _parse_severity(tag: str) -> str:
1773
+ """Normalize a severity tag to [CRITICAL], [HIGH], or [MINOR] (neutral fallback).
1774
+
1775
+ Mirrors feature-002 DM-6: lower/unknown -> [MINOR] neutral, never throws (NFR7).
1776
+ """
1777
+ normalized = tag.upper().strip()
1778
+ if normalized in ("[CRITICAL]", "[HIGH]"):
1779
+ return normalized
1780
+ return "[MINOR]"
1781
+
1782
+
1783
+ def _parse_finding_bullet(
1784
+ bullet_text: str,
1785
+ reviewer_tier: Optional[str],
1786
+ ) -> Optional[Finding]:
1787
+ """Parse one **Findings:** bullet into a Finding.
1788
+
1789
+ Bullet shape (DR-2):
1790
+ - [SEVERITY] description — {file:line} — Disposition
1791
+
1792
+ Field separator: the canonical em-dash ' — ' (space U+2014 space); the
1793
+ legacy ASCII ' -- ' (space dash-dash space) is also accepted. Location and
1794
+ disposition are optional. Never throws (NFR7); returns None only if the
1795
+ bullet is blank.
1796
+ """
1797
+ text = bullet_text.strip()
1798
+ if not text:
1799
+ return None
1800
+
1801
+ # Extract leading bracketed tag (severity)
1802
+ m = _RE_FINDINGS_BULLET.match("- " + text)
1803
+ if not m:
1804
+ # No bracketed tag -- treat whole text as description with MINOR severity
1805
+ return Finding(
1806
+ severity="[MINOR]",
1807
+ description=text,
1808
+ location=None,
1809
+ disposition=None,
1810
+ reviewer_tier=reviewer_tier,
1811
+ )
1812
+
1813
+ tag = m.group(1)
1814
+ rest = m.group(2).strip()
1815
+ severity = _parse_severity(tag)
1816
+
1817
+ # Split on em-dash ' — ' (canonical) or legacy ' -- ' (ASCII double-dash).
1818
+ # The canonical findings template uses U+2014 em-dash; accept both for back-compat.
1819
+ segments = re.split(r" (?:—|--) ", rest)
1820
+
1821
+ description = segments[0].strip() if segments else rest
1822
+
1823
+ # Extract location from any segment: {file:line}
1824
+ location: Optional[str] = None
1825
+ for seg in segments[1:]:
1826
+ lm = _RE_LOCATION.search(seg)
1827
+ if lm:
1828
+ location = lm.group(1).strip()
1829
+ break
1830
+
1831
+ # Extract disposition: last segment matching a known token
1832
+ disposition: Optional[str] = None
1833
+ for seg in segments:
1834
+ stripped_seg = seg.strip()
1835
+ for token in _DISPOSITION_TOKENS:
1836
+ if stripped_seg == token or stripped_seg.startswith(token):
1837
+ disposition = token
1838
+ break
1839
+ if disposition:
1840
+ break
1841
+
1842
+ return Finding(
1843
+ severity=severity,
1844
+ description=description,
1845
+ location=location,
1846
+ disposition=disposition,
1847
+ reviewer_tier=reviewer_tier,
1848
+ )
1849
+
1850
+
1851
+ def parse_quick_check_findings(
1852
+ state_text: str,
1853
+ task_id: str,
1854
+ parse_warnings: list[str],
1855
+ ) -> list[Finding]:
1856
+ """DR-2: Parse ## Quick Check Findings -> ### task-NNN -> **Findings:** bullets.
1857
+
1858
+ Returns a list of Finding objects for the given task_id.
1859
+ A clean task (no block or empty Findings list) -> returns [] (not an error).
1860
+ Torn/missing block -> parse_warning + best-effort (never throws, NFR7).
1861
+ """
1862
+ findings: list[Finding] = []
1863
+ in_findings_section = False
1864
+ in_task_block = False
1865
+ in_findings_list = False
1866
+ reviewer_tier: Optional[str] = None
1867
+
1868
+ # Normalize task_id for comparison (case-insensitive)
1869
+ task_id_lower = task_id.lower()
1870
+
1871
+ try:
1872
+ lines = state_text.splitlines()
1873
+ for line in lines:
1874
+ # Detect ## Quick Check Findings section
1875
+ if _RE_QUICK_CHECK_FINDINGS.match(line):
1876
+ in_findings_section = True
1877
+ in_task_block = False
1878
+ in_findings_list = False
1879
+ reviewer_tier = None
1880
+ continue
1881
+
1882
+ if in_findings_section:
1883
+ # A ## section (not ###) ends the quick-check findings section
1884
+ if re.match(r"^##\s+\S", line) and not re.match(r"^###", line):
1885
+ in_findings_section = False
1886
+ in_task_block = False
1887
+ in_findings_list = False
1888
+ continue
1889
+
1890
+ # ### task-NNN sub-section header
1891
+ tm = _RE_TASK_BLOCK_HEADER.match(line)
1892
+ if tm:
1893
+ block_task_id = tm.group(1).lower()
1894
+ in_task_block = (block_task_id == task_id_lower)
1895
+ in_findings_list = False
1896
+ reviewer_tier = None
1897
+ continue
1898
+
1899
+ if in_task_block:
1900
+ # **Reviewer Tier:** line
1901
+ rtm = _RE_FINDINGS_REVIEWER_TIER.match(line)
1902
+ if rtm:
1903
+ reviewer_tier = rtm.group(1).strip()
1904
+ continue
1905
+
1906
+ # **Findings:** line (heading for the bullet list)
1907
+ if re.match(r"^\s*-\s*\*\*Findings:\*\*\s*$", line, re.IGNORECASE):
1908
+ in_findings_list = True
1909
+ continue
1910
+
1911
+ if in_findings_list:
1912
+ # A findings bullet: starts with ' - [' (indented bullet with bracket)
1913
+ stripped = line.strip()
1914
+ if stripped.startswith("- [") or stripped.startswith("-["):
1915
+ # Parse the bullet (strip the leading '- ')
1916
+ bullet_body = re.sub(r"^-\s*", "", stripped, count=1)
1917
+ f = _parse_finding_bullet(bullet_body, reviewer_tier)
1918
+ if f is not None:
1919
+ findings.append(f)
1920
+ continue
1921
+ # Blank line or non-bullet: end of findings list for this task
1922
+ if stripped and not stripped.startswith("-"):
1923
+ in_findings_list = False
1924
+
1925
+ except Exception as exc: # noqa: BLE001 -- never throws (NFR7)
1926
+ parse_warnings.append(
1927
+ f"{task_id}: error parsing ## Quick Check Findings ({exc}); "
1928
+ f"returning best-effort findings"
1929
+ )
1930
+
1931
+ return findings
1932
+
1933
+
1934
+ def parse_delivery_gate(
1935
+ state_text: str,
1936
+ delivery_id: str,
1937
+ parse_warnings: list[str],
1938
+ ) -> tuple[Optional[str], Optional[str], Optional[str]]:
1939
+ """DR-3: Parse ## Delivery Gates -> ### delivery-NNN for grade/tier/timestamp.
1940
+
1941
+ Returns (grade, reviewer_tier, gate_timestamp). All None if the block is absent.
1942
+ Verbatim -- never re-grades (NFR7). Never throws (torn -> parse_warning + None).
1943
+ """
1944
+ grade: Optional[str] = None
1945
+ reviewer_tier: Optional[str] = None
1946
+ gate_timestamp: Optional[str] = None
1947
+
1948
+ in_gates = False
1949
+ in_delivery_block = False
1950
+
1951
+ # Normalize for comparison
1952
+ delivery_id_lower = delivery_id.lower()
1953
+
1954
+ try:
1955
+ for line in state_text.splitlines():
1956
+ if _RE_DELIVERY_GATES_SECTION.match(line):
1957
+ in_gates = True
1958
+ in_delivery_block = False
1959
+ continue
1960
+
1961
+ if in_gates:
1962
+ # A ## section (not ###) ends the delivery gates section
1963
+ if re.match(r"^##\s+\S", line) and not re.match(r"^###", line):
1964
+ in_gates = False
1965
+ in_delivery_block = False
1966
+ continue
1967
+
1968
+ # ### delivery-NNN sub-section header
1969
+ dm = _RE_DELIVERY_BLOCK_HEADER.match(line)
1970
+ if dm:
1971
+ block_delivery_id = dm.group(1).lower()
1972
+ in_delivery_block = (block_delivery_id == delivery_id_lower)
1973
+ continue
1974
+
1975
+ if in_delivery_block:
1976
+ gm = _RE_GATE_GRADE.match(line)
1977
+ if gm and grade is None:
1978
+ raw = gm.group(1).strip()
1979
+ # Grade is the first word (e.g. "A+ (cycle 2 ...)" -> "A+")
1980
+ grade = raw.split()[0] if raw else None
1981
+ continue
1982
+
1983
+ rtm = _RE_GATE_REVIEWER_TIER.match(line)
1984
+ if rtm and reviewer_tier is None:
1985
+ raw = rtm.group(1).strip()
1986
+ # Tier is the first word (e.g. "Large (complexity score ...)" -> "Large")
1987
+ reviewer_tier = raw.split()[0] if raw else None
1988
+ continue
1989
+
1990
+ tsm = _RE_GATE_TIMESTAMP.match(line)
1991
+ if tsm and gate_timestamp is None:
1992
+ gate_timestamp = tsm.group(1).strip() or None
1993
+ continue
1994
+
1995
+ # Once all three are found, we can stop scanning the delivery block
1996
+ if grade and reviewer_tier and gate_timestamp:
1997
+ break
1998
+
1999
+ except Exception as exc: # noqa: BLE001 -- never throws (NFR7)
2000
+ parse_warnings.append(
2001
+ f"{delivery_id}: error parsing ## Delivery Gates ({exc}); "
2002
+ f"returning best-effort gate fields"
2003
+ )
2004
+
2005
+ return grade, reviewer_tier, gate_timestamp
2006
+
2007
+
2008
+ def parse_deferred_issues(
2009
+ issues_path: Path,
2010
+ task_id: str,
2011
+ parse_warnings: list[str],
2012
+ ) -> list[DeferredIssue]:
2013
+ """DR-4: Parse delivery-NNN-issues.md and filter rows to Source task == task_id.
2014
+
2015
+ File schema (schemas.md §12): 4-col markdown table
2016
+ Source task | Severity | Description | Status
2017
+
2018
+ Returns list[DeferredIssue] filtered to this task. Absent file -> [] (not an error).
2019
+ Torn/malformed -> parse_warning + best-effort rows. Never throws (NFR7).
2020
+ """
2021
+ if not issues_path.is_file():
2022
+ return []
2023
+
2024
+ try:
2025
+ raw = issues_path.read_bytes()
2026
+ text = raw.decode("utf-8", errors="replace")
2027
+ except OSError as exc:
2028
+ parse_warnings.append(
2029
+ f"{task_id}: could not read {issues_path.name} ({exc}); "
2030
+ f"deferred_issues will be empty"
2031
+ )
2032
+ return []
2033
+
2034
+ deferred: list[DeferredIssue] = []
2035
+ header_seen = False
2036
+
2037
+ try:
2038
+ for line in text.splitlines():
2039
+ stripped = line.strip()
2040
+ if not stripped.startswith("|"):
2041
+ continue
2042
+ if _RE_TABLE_SEP.match(stripped):
2043
+ header_seen = True
2044
+ continue
2045
+ cols = [c.strip() for c in stripped.strip("|").split("|")]
2046
+ if len(cols) < 4:
2047
+ continue
2048
+ # Skip header row (first column is 'Source task' or similar)
2049
+ if not header_seen:
2050
+ header_seen = True
2051
+ continue
2052
+
2053
+ source_task = cols[0].strip()
2054
+ severity = cols[1].strip()
2055
+ description = cols[2].strip()
2056
+ status = cols[3].strip()
2057
+
2058
+ # Filter to this task_id (case-insensitive comparison)
2059
+ if source_task.lower() == task_id.lower():
2060
+ deferred.append(DeferredIssue(
2061
+ source_task=source_task,
2062
+ severity=severity if severity else "[HIGH]",
2063
+ description=description,
2064
+ status=status if status else "Open",
2065
+ ))
2066
+
2067
+ except Exception as exc: # noqa: BLE001 -- never throws (NFR7)
2068
+ parse_warnings.append(
2069
+ f"{task_id}: error parsing {issues_path.name} ({exc}); "
2070
+ f"returning best-effort deferred issues"
2071
+ )
2072
+
2073
+ return deferred
2074
+
2075
+
2076
+ def parse_log_availability(aid_dir: Path) -> LogAvailability:
2077
+ """DR-5: Stat log/heartbeat paths for honest DM-4 log inventory.
2078
+
2079
+ task_logs: always 'none' (AID persists no per-task execution log, DM-4)
2080
+ server_log_present: stat .aid/.temp/dashboard.log (expected-false on Windows)
2081
+ heartbeat_present: stat .aid/.heartbeat/ (liveness signal, corroborating-only, KI-004)
2082
+
2083
+ Never throws (NFR7). No file is read (stat only). No write.
2084
+ """
2085
+ server_log_path = aid_dir / ".temp" / "dashboard.log"
2086
+ heartbeat_dir = aid_dir / ".heartbeat"
2087
+
2088
+ server_log_present = False
2089
+ heartbeat_present = False
2090
+
2091
+ try:
2092
+ server_log_present = server_log_path.is_file()
2093
+ except OSError:
2094
+ server_log_present = False
2095
+
2096
+ try:
2097
+ heartbeat_present = heartbeat_dir.is_dir()
2098
+ except OSError:
2099
+ heartbeat_present = False
2100
+
2101
+ return LogAvailability(
2102
+ task_logs="none",
2103
+ server_log_present=server_log_present,
2104
+ heartbeat_present=heartbeat_present,
2105
+ )