docassert 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. docassert/__init__.py +8 -0
  2. docassert/__main__.py +6 -0
  3. docassert/_data/consistency.yaml +51 -0
  4. docassert/_data/criteria/adr.criteria.yaml +36 -0
  5. docassert/_data/criteria/benefits-realization.criteria.yaml +30 -0
  6. docassert/_data/criteria/brd.criteria.yaml +30 -0
  7. docassert/_data/criteria/business-case.criteria.yaml +23 -0
  8. docassert/_data/criteria/charter.criteria.yaml +73 -0
  9. docassert/_data/criteria/data-migration-plan.criteria.yaml +28 -0
  10. docassert/_data/criteria/frnfr.criteria.yaml +31 -0
  11. docassert/_data/criteria/hypercare-plan.criteria.yaml +27 -0
  12. docassert/_data/criteria/post-implementation-review.criteria.yaml +24 -0
  13. docassert/_data/criteria/prd.criteria.yaml +31 -0
  14. docassert/_data/criteria/project.criteria.yaml +32 -0
  15. docassert/_data/criteria/qa-test-plan.criteria.yaml +27 -0
  16. docassert/_data/criteria/raci-stakeholder.criteria.yaml +24 -0
  17. docassert/_data/criteria/release-cutover-plan.criteria.yaml +30 -0
  18. docassert/_data/criteria/risk-register.criteria.yaml +32 -0
  19. docassert/_data/criteria/rollback-plan.criteria.yaml +29 -0
  20. docassert/_data/criteria/runbook.criteria.yaml +30 -0
  21. docassert/_data/criteria/status-report.criteria.yaml +26 -0
  22. docassert/_data/criteria/test-cases.criteria.yaml +28 -0
  23. docassert/_data/criteria/user-story.criteria.yaml +32 -0
  24. docassert/_data/profiles/agile-delivery.yaml +20 -0
  25. docassert/_data/profiles/lean-startup.yaml +19 -0
  26. docassert/_data/profiles/regulated-industry.yaml +31 -0
  27. docassert/_data/schema/adr.schema.json +45 -0
  28. docassert/_data/schema/benefits-realization.schema.json +45 -0
  29. docassert/_data/schema/brd.schema.json +45 -0
  30. docassert/_data/schema/business-case.schema.json +45 -0
  31. docassert/_data/schema/charter.schema.json +84 -0
  32. docassert/_data/schema/data-migration-plan.schema.json +45 -0
  33. docassert/_data/schema/frnfr.schema.json +45 -0
  34. docassert/_data/schema/hypercare-plan.schema.json +45 -0
  35. docassert/_data/schema/post-implementation-review.schema.json +45 -0
  36. docassert/_data/schema/prd.schema.json +45 -0
  37. docassert/_data/schema/project.schema.json +32 -0
  38. docassert/_data/schema/qa-test-plan.schema.json +45 -0
  39. docassert/_data/schema/raci-stakeholder.schema.json +45 -0
  40. docassert/_data/schema/release-cutover-plan.schema.json +45 -0
  41. docassert/_data/schema/risk-register.schema.json +45 -0
  42. docassert/_data/schema/rollback-plan.schema.json +45 -0
  43. docassert/_data/schema/runbook.schema.json +45 -0
  44. docassert/_data/schema/status-report.schema.json +58 -0
  45. docassert/_data/schema/test-cases.schema.json +45 -0
  46. docassert/_data/schema/user-story.schema.json +45 -0
  47. docassert/_data/templates/adr.template.md +17 -0
  48. docassert/_data/templates/benefits-realization.template.md +25 -0
  49. docassert/_data/templates/brd.template.md +22 -0
  50. docassert/_data/templates/business-case.template.md +27 -0
  51. docassert/_data/templates/charter.template.md +46 -0
  52. docassert/_data/templates/data-migration-plan.template.md +35 -0
  53. docassert/_data/templates/frnfr.template.md +19 -0
  54. docassert/_data/templates/hypercare-plan.template.md +29 -0
  55. docassert/_data/templates/post-implementation-review.template.md +31 -0
  56. docassert/_data/templates/prd.template.md +23 -0
  57. docassert/_data/templates/project.template.md +17 -0
  58. docassert/_data/templates/qa-test-plan.template.md +31 -0
  59. docassert/_data/templates/raci-stakeholder.template.md +21 -0
  60. docassert/_data/templates/release-cutover-plan.template.md +28 -0
  61. docassert/_data/templates/risk-register.template.md +18 -0
  62. docassert/_data/templates/rollback-plan.template.md +24 -0
  63. docassert/_data/templates/runbook.template.md +28 -0
  64. docassert/_data/templates/status-report.template.md +27 -0
  65. docassert/_data/templates/test-cases.template.md +17 -0
  66. docassert/_data/templates/user-story.template.md +17 -0
  67. docassert/cli.py +291 -0
  68. docassert/config.py +104 -0
  69. docassert/consistency.py +167 -0
  70. docassert/graph.py +68 -0
  71. docassert/loader.py +116 -0
  72. docassert/models.py +99 -0
  73. docassert/profiles.py +111 -0
  74. docassert/projects.py +49 -0
  75. docassert/report.py +83 -0
  76. docassert/rtm.py +70 -0
  77. docassert/semantic.py +124 -0
  78. docassert/status.py +538 -0
  79. docassert/structural.py +406 -0
  80. docassert-0.1.0.dist-info/METADATA +125 -0
  81. docassert-0.1.0.dist-info/RECORD +86 -0
  82. docassert-0.1.0.dist-info/WHEEL +5 -0
  83. docassert-0.1.0.dist-info/entry_points.txt +2 -0
  84. docassert-0.1.0.dist-info/licenses/LICENSE +201 -0
  85. docassert-0.1.0.dist-info/licenses/NOTICE +4 -0
  86. docassert-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,167 @@
1
+ """Cross-document consistency checks over the traceable-item graph.
2
+
3
+ Structural checks are deterministic and blocking:
4
+ - item-id-uniqueness : IDs unique across the repo (always blocks)
5
+ - referential-integrity: every link target exists (always blocks)
6
+ - required-links : downstream items declare their upstream link
7
+ (blocks only when the item's doc is approved)
8
+ - coverage : every parent item has >=1 downstream child
9
+ (blocks only when the parent's doc is approved)
10
+
11
+ Semantic alignment is AI-graded and advisory (never blocks).
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ from pathlib import Path
17
+
18
+ import yaml
19
+
20
+ from . import config as config_mod
21
+ from .graph import build_graph
22
+ from .models import CheckResult
23
+ from .semantic import run_alignment
24
+
25
+ CONFIG_PATH = Path("consistency.yaml")
26
+ APPROVED_STATES = {"approved", "baselined"}
27
+
28
+
29
+ def load_config(path: str | Path = CONFIG_PATH) -> dict:
30
+ if not Path(path).is_file():
31
+ return {}
32
+ with open(path, encoding="utf-8") as fh:
33
+ return yaml.safe_load(fh) or {}
34
+
35
+
36
+ def _approved(item) -> bool:
37
+ return str(item.doc_status).lower() in APPROVED_STATES
38
+
39
+
40
+ # ── structural (deterministic) ─────────────────────────────────────────────
41
+ def check_unique_item_ids(graph) -> CheckResult:
42
+ dups = graph.duplicates()
43
+ if not dups:
44
+ return CheckResult("item-id-uniqueness", True, True,
45
+ f"All {len(graph.occurrences)} item IDs are unique.")
46
+ detail = "; ".join(f"{iid} in {', '.join(paths)}" for iid, paths in dups.items())
47
+ return CheckResult("item-id-uniqueness", False, True,
48
+ f"Duplicate item IDs: {detail}")
49
+
50
+
51
+ def check_referential_integrity(graph) -> CheckResult:
52
+ broken = []
53
+ for item in graph.all_items():
54
+ for relation, targets in item.links.items():
55
+ for target in targets:
56
+ if not graph.exists(target):
57
+ broken.append(f"{item.id} —{relation}→ {target} (missing)")
58
+ if broken:
59
+ return CheckResult("referential-integrity", False, True,
60
+ f"{len(broken)} broken reference(s): " + "; ".join(broken))
61
+ return CheckResult("referential-integrity", True, True, "All references resolve.")
62
+
63
+
64
+ def check_required_links(graph, config) -> CheckResult:
65
+ required = config.get("required_links", {})
66
+ approved_orphans, draft_orphans = [], []
67
+ for item in graph.all_items():
68
+ relation = required.get(item.type)
69
+ if relation and not item.targets(relation):
70
+ bucket = approved_orphans if _approved(item) else draft_orphans
71
+ bucket.append(f"{item.id} (missing '{relation}')")
72
+ parts = []
73
+ if approved_orphans:
74
+ parts.append("approved items missing a required link: " + "; ".join(approved_orphans))
75
+ if draft_orphans:
76
+ parts.append(f"{len(draft_orphans)} draft item(s) not yet linked (ok until approved)")
77
+ return CheckResult("required-links", not approved_orphans, True,
78
+ " · ".join(parts) if parts else "All required upstream links present.")
79
+
80
+
81
+ def check_coverage(graph, config) -> CheckResult:
82
+ approved_gaps, draft_gaps = [], []
83
+ for rule in config.get("coverage", []):
84
+ parent_prefix, relation = rule["parent"], rule["relation"]
85
+ by_prefix = rule.get("by_prefix")
86
+ label = rule.get("label", f"{parent_prefix} → {by_prefix}")
87
+ for parent in graph.by_type.get(parent_prefix, []):
88
+ if graph.children(parent.id, relation, by_prefix):
89
+ continue
90
+ bucket = approved_gaps if _approved(parent) else draft_gaps
91
+ bucket.append(f"{parent.id} ({label})")
92
+ parts = []
93
+ if approved_gaps:
94
+ parts.append("approved items with no downstream coverage: " + "; ".join(approved_gaps))
95
+ if draft_gaps:
96
+ parts.append(f"{len(draft_gaps)} draft item(s) not yet covered (ok until approved)")
97
+ return CheckResult("coverage", not approved_gaps, True,
98
+ " · ".join(parts) if parts else "All approved items are covered.")
99
+
100
+
101
+ def check_profile_completeness(documents_dir: str | Path = "documents") -> CheckResult:
102
+ """Every profiled project must carry the documents its profile requires.
103
+
104
+ Blocks when an *enforced* (e.g. active) project is missing a required kind,
105
+ or names a profile that doesn't exist. Projects not yet enforced (e.g.
106
+ proposed) surface their gaps as advisory only.
107
+ """
108
+ from . import status as status_mod
109
+ report = status_mod.completeness_report(documents_dir)
110
+ blockers, unknowns, advisories = [], [], []
111
+ for r in report:
112
+ if r.get("unknown"):
113
+ unknowns.append(f"{r['id']} → unknown profile '{r['profile']}'")
114
+ elif r["blocks"]:
115
+ blockers.append(f"{r['id']} ({r['profile']}) missing required: "
116
+ + ", ".join(r["missing_required"]))
117
+ elif r["missing_required"] or r["incomplete_required"]:
118
+ n = len(r["missing_required"]) + len(r["incomplete_required"])
119
+ advisories.append(f"{r['id']} ({n} not yet complete)")
120
+ parts = []
121
+ if blockers:
122
+ parts.append("active projects missing required documents: " + "; ".join(blockers))
123
+ if unknowns:
124
+ parts.append("unknown profiles: " + "; ".join(unknowns))
125
+ if advisories:
126
+ parts.append(f"{len(advisories)} project(s) with advisory gaps (not enforced yet)")
127
+ return CheckResult("profile-completeness", not blockers and not unknowns, True,
128
+ " · ".join(parts) if parts else
129
+ "All profiled projects carry their required documents.")
130
+
131
+
132
+ # ── semantic (advisory) ────────────────────────────────────────────────────
133
+ def run_alignment_checks(graph, config) -> list[CheckResult]:
134
+ edges = [] # (prompt, parent, child, relation)
135
+ for rule in config.get("alignment", []):
136
+ relation, prompt = rule["relation"], rule.get("prompt", "").strip()
137
+ for child in graph.all_items():
138
+ for target in child.targets(relation):
139
+ parent = graph.canonical(target)
140
+ if parent is not None:
141
+ edges.append((prompt, parent, child, relation))
142
+
143
+ if not edges:
144
+ return []
145
+ if not os.environ.get("ANTHROPIC_API_KEY"):
146
+ return [CheckResult("alignment", True, False,
147
+ f"skipped — no ANTHROPIC_API_KEY ({len(edges)} link(s) to grade)",
148
+ kind="semantic", score=None)]
149
+ return [run_alignment(f"align:{c.id}-{rel}-{p.id}", prompt, p.text, c.text)
150
+ for prompt, p, c, rel in edges]
151
+
152
+
153
+ def run_consistency(documents_dir: str | Path = "documents",
154
+ config_path: str | Path | None = None,
155
+ with_semantic: bool = True) -> list[CheckResult]:
156
+ graph = build_graph(documents_dir)
157
+ cfg = load_config(config_path) if config_path is not None else config_mod.read_consistency_config()
158
+ results = [
159
+ check_unique_item_ids(graph),
160
+ check_referential_integrity(graph),
161
+ check_required_links(graph, cfg),
162
+ check_coverage(graph, cfg),
163
+ check_profile_completeness(documents_dir),
164
+ ]
165
+ if with_semantic:
166
+ results.extend(run_alignment_checks(graph, cfg))
167
+ return results
docassert/graph.py ADDED
@@ -0,0 +1,68 @@
1
+ """Build the cross-document item graph used by the consistency engine."""
2
+ from __future__ import annotations
3
+
4
+ from collections import defaultdict
5
+ from pathlib import Path
6
+
7
+ from . import config
8
+ from .loader import load, parse_items
9
+ from .models import Item
10
+
11
+ DOCUMENTS_DIR = Path("documents")
12
+
13
+
14
+ class Graph:
15
+ """All traceable items across the repo, plus a reverse (incoming) index."""
16
+
17
+ def __init__(self) -> None:
18
+ self.occurrences: dict[str, list[Item]] = defaultdict(list) # id -> items
19
+ self.by_type: dict[str, list[Item]] = defaultdict(list) # BR/PR/… -> items
20
+ self.by_project: dict[str, list[Item]] = defaultdict(list) # AUR/ATL/… -> items
21
+ self.incoming: dict[str, list[tuple[str, Item]]] = defaultdict(list) # target -> (relation, source)
22
+
23
+ def add(self, item: Item) -> None:
24
+ self.occurrences[item.id].append(item)
25
+ self.by_type[item.type].append(item)
26
+ self.by_project[item.project].append(item)
27
+ for relation, targets in item.links.items():
28
+ for target in targets:
29
+ self.incoming[target].append((relation, item))
30
+
31
+ def exists(self, item_id: str) -> bool:
32
+ return item_id in self.occurrences
33
+
34
+ def canonical(self, item_id: str) -> Item | None:
35
+ occ = self.occurrences.get(item_id)
36
+ return occ[0] if occ else None
37
+
38
+ def duplicates(self) -> dict[str, list[str]]:
39
+ """id -> paths, for every id defined more than once."""
40
+ return {iid: [it.doc_path for it in occ]
41
+ for iid, occ in self.occurrences.items() if len(occ) > 1}
42
+
43
+ def all_items(self) -> list[Item]:
44
+ return [occ[0] for occ in self.occurrences.values()]
45
+
46
+ def children(self, target_id: str, relation: str,
47
+ by_type: str | None = None) -> list[Item]:
48
+ """Items that link to `target_id` via `relation` (optionally by item type)."""
49
+ return [src for rel, src in self.incoming.get(target_id, [])
50
+ if rel == relation and (by_type is None or src.type == by_type)]
51
+
52
+
53
+ def _item_sections_for(kind: str) -> list[dict]:
54
+ if not config.criteria_exists(kind):
55
+ return []
56
+ return config.read_criteria(kind).get("item_sections", []) or []
57
+
58
+
59
+ def build_graph(documents_dir: str | Path = DOCUMENTS_DIR) -> Graph:
60
+ graph = Graph()
61
+ for path in sorted(Path(documents_dir).rglob("*.md")):
62
+ try:
63
+ doc = load(path)
64
+ except ValueError:
65
+ continue # malformed frontmatter is caught by per-document validation
66
+ for item in parse_items(doc, _item_sections_for(doc.kind or "")):
67
+ graph.add(item)
68
+ return graph
docassert/loader.py ADDED
@@ -0,0 +1,116 @@
1
+ """Load and parse a business document into frontmatter + sections."""
2
+ from __future__ import annotations
3
+
4
+ import re
5
+ from pathlib import Path
6
+
7
+ import frontmatter
8
+ import yaml
9
+
10
+ from .models import Document, Item, Section
11
+
12
+ # A traceable item bullet, e.g.
13
+ # **AUR-PR-014** (traces: AUR-BR-001): The flow shall be self-serve.
14
+ # The id is <CODE>-<TYPE>-<NNN>: project code, item type, number.
15
+ ITEM_RE = re.compile(
16
+ r"^\*\*(?P<id>(?P<project>[A-Z]{2,6})-(?P<type>[A-Z]{2,6})-\d+)\*\*" # **CODE-TYPE-123**
17
+ r"(?:\s*\((?P<links>[^)]*)\))?" # optional (relation: id, …)
18
+ r"\s*:\s*(?P<text>.+)$" # : text
19
+ )
20
+
21
+
22
+ def parse_link_clause(clause: str) -> dict[str, list[str]]:
23
+ """Parse `traces: BR-001, BR-002; verifies: FR-3` -> {relation: [ids]}."""
24
+ links: dict[str, list[str]] = {}
25
+ for group in (clause or "").split(";"):
26
+ if ":" not in group:
27
+ continue
28
+ rel, ids = group.split(":", 1)
29
+ rel = rel.strip().lower()
30
+ id_list = [i.strip() for i in ids.split(",") if i.strip()]
31
+ if rel and id_list:
32
+ links.setdefault(rel, []).extend(id_list)
33
+ return links
34
+
35
+
36
+ def parse_sections(body: str) -> dict[str, Section]:
37
+ """Split a markdown body into H2 (`## `) sections, preserving order."""
38
+ sections: dict[str, Section] = {}
39
+ current_title: str | None = None
40
+ current_lines: list[str] = []
41
+
42
+ def flush() -> None:
43
+ if current_title is not None:
44
+ sections[current_title] = Section(
45
+ title=current_title, body="\n".join(current_lines).strip()
46
+ )
47
+
48
+ for line in body.splitlines():
49
+ if line.startswith("## "):
50
+ flush()
51
+ current_title = line[3:].strip()
52
+ current_lines = []
53
+ else:
54
+ if current_title is not None:
55
+ current_lines.append(line)
56
+ flush()
57
+ return sections
58
+
59
+
60
+ def load(path: str | Path) -> Document:
61
+ """Parse a document file. Raises ValueError on malformed frontmatter."""
62
+ path = Path(path)
63
+ text = path.read_text(encoding="utf-8")
64
+ try:
65
+ post = frontmatter.loads(text)
66
+ except yaml.YAMLError as exc: # malformed YAML frontmatter
67
+ raise ValueError(f"{path}: invalid YAML frontmatter: {exc}") from exc
68
+
69
+ return Document(
70
+ path=str(path),
71
+ frontmatter=dict(post.metadata),
72
+ sections=parse_sections(post.content),
73
+ raw_body=post.content,
74
+ )
75
+
76
+
77
+ def load_criteria(path: str | Path) -> dict:
78
+ """Load a criteria YAML file (e.g. criteria/charter.criteria.yaml)."""
79
+ with open(path, encoding="utf-8") as fh:
80
+ return yaml.safe_load(fh)
81
+
82
+
83
+ def iter_item_lines(section: Section):
84
+ """Yield (raw_bullet, match_or_None) for each bullet in an item section."""
85
+ for raw in section.list_items:
86
+ yield raw, ITEM_RE.match(raw)
87
+
88
+
89
+ def parse_items(doc: Document, item_sections: list[dict]) -> list[Item]:
90
+ """Extract traceable items from a document per its criteria item_sections.
91
+
92
+ Each entry in `item_sections` is {"section": <title>, "prefix": <PREFIX>}.
93
+ Bullets that don't parse are skipped here; the `items-well-formed`
94
+ structural check is what flags them.
95
+ """
96
+ status = str(doc.frontmatter.get("status", "draft"))
97
+ items: list[Item] = []
98
+ for spec in item_sections or []:
99
+ section = doc.section(spec["section"])
100
+ if section is None:
101
+ continue
102
+ for _raw, m in iter_item_lines(section):
103
+ if not m:
104
+ continue
105
+ items.append(Item(
106
+ id=m.group("id"),
107
+ project=m.group("project"),
108
+ type=m.group("type"),
109
+ text=m.group("text").strip(),
110
+ links=parse_link_clause(m.group("links") or ""),
111
+ doc_path=doc.path,
112
+ doc_kind=doc.kind or "",
113
+ doc_status=status,
114
+ section=spec["section"],
115
+ ))
116
+ return items
docassert/models.py ADDED
@@ -0,0 +1,99 @@
1
+ """Shared data types for docassert."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass, field
5
+
6
+
7
+ @dataclass
8
+ class Section:
9
+ """A single H2 section of a document body."""
10
+ title: str
11
+ body: str
12
+
13
+ @property
14
+ def list_items(self) -> list[str]:
15
+ """Top-level markdown bullet items in this section, comments stripped."""
16
+ items: list[str] = []
17
+ for line in self.body.splitlines():
18
+ stripped = line.strip()
19
+ if stripped.startswith(("- ", "* ")):
20
+ text = stripped[2:].strip()
21
+ # drop HTML-comment-only placeholder bullets from the template
22
+ if text.startswith("<!--") or not text:
23
+ continue
24
+ items.append(text)
25
+ return items
26
+
27
+ @property
28
+ def is_empty(self) -> bool:
29
+ """A section counts as empty if it has no prose or list items once
30
+ template HTML comments and whitespace are removed."""
31
+ meaningful = []
32
+ for line in self.body.splitlines():
33
+ s = line.strip()
34
+ if not s or s.startswith("<!--") and s.endswith("-->"):
35
+ continue
36
+ # strip inline comment fragments
37
+ if s.startswith("<!--") or s.endswith("-->"):
38
+ continue
39
+ meaningful.append(s)
40
+ return len("".join(meaningful).strip()) == 0
41
+
42
+
43
+ @dataclass
44
+ class Document:
45
+ """A parsed business document: frontmatter + ordered body sections."""
46
+ path: str
47
+ frontmatter: dict
48
+ sections: dict[str, Section] = field(default_factory=dict)
49
+ raw_body: str = ""
50
+
51
+ @property
52
+ def id(self) -> str | None:
53
+ return self.frontmatter.get("id")
54
+
55
+ @property
56
+ def kind(self) -> str | None:
57
+ return self.frontmatter.get("kind")
58
+
59
+ def section(self, title: str) -> Section | None:
60
+ return self.sections.get(title)
61
+
62
+
63
+ @dataclass
64
+ class CheckResult:
65
+ """The outcome of one audit check against one document."""
66
+ check_id: str
67
+ passed: bool
68
+ blocking: bool
69
+ detail: str
70
+ kind: str = "structural" # structural | semantic
71
+ score: float | None = None # semantic checks only, 0..1
72
+
73
+ @property
74
+ def is_blocking_failure(self) -> bool:
75
+ return self.blocking and not self.passed
76
+
77
+
78
+ @dataclass
79
+ class Item:
80
+ """A traceable item inside a document (a requirement, criterion, test, …).
81
+
82
+ Authored as a bullet like:
83
+ - **AUR-PR-014** (traces: AUR-BR-001): The onboarding flow shall be self-serve.
84
+
85
+ The id is <CODE>-<TYPE>-<NNN>: `project` is the project code (AUR), `type` is
86
+ the item type (PR). Ids are globally unique and self-identify their project.
87
+ """
88
+ id: str # e.g. "AUR-PR-014"
89
+ project: str # project code, e.g. "AUR"
90
+ type: str # item type, e.g. "PR"
91
+ text: str
92
+ links: dict[str, list[str]] # relation -> [target ids], e.g. {"traces": ["AUR-BR-001"]}
93
+ doc_path: str
94
+ doc_kind: str
95
+ doc_status: str
96
+ section: str
97
+
98
+ def targets(self, relation: str) -> list[str]:
99
+ return self.links.get(relation, [])
docassert/profiles.py ADDED
@@ -0,0 +1,111 @@
1
+ """Profiles — the expected document set for a project.
2
+
3
+ A profile (profiles/<name>.yaml) lists the document kinds a project is expected
4
+ to carry, at two levels:
5
+ - required : must be present and complete; a missing one blocks CI once the
6
+ project reaches the profile's `enforce_when` lifecycle stage.
7
+ - recommended : surfaced as a gap on the project page, but never blocking.
8
+
9
+ A project opts in with `profile: <name>` in its project.md. No profile means no
10
+ completeness expectations (fully backward-compatible).
11
+ """
12
+ from __future__ import annotations
13
+
14
+ from pathlib import Path
15
+
16
+ import yaml
17
+
18
+ from . import config as config_mod
19
+
20
+ APPROVED = {"approved", "baselined"}
21
+
22
+
23
+ def available(profiles_dir: str | Path | None = None) -> list[str]:
24
+ """Profile names. Default resolves local ./profiles + packaged defaults; pass
25
+ an explicit dir to look only there."""
26
+ if profiles_dir is not None:
27
+ d = Path(profiles_dir)
28
+ return sorted(p.stem for p in d.glob("*.yaml")) if d.is_dir() else []
29
+ return config_mod.available_profiles()
30
+
31
+
32
+ def load_profile(name: str, profiles_dir: str | Path | None = None) -> dict | None:
33
+ """Load one profile, or None if there is no such file. Default resolves
34
+ ./profiles then the packaged defaults; pass an explicit dir to override."""
35
+ if profiles_dir is not None:
36
+ candidate = Path(profiles_dir) / f"{name}.yaml"
37
+ path = candidate if candidate.is_file() else None
38
+ else:
39
+ path = config_mod.profile_path(name)
40
+ if path is None:
41
+ return None
42
+ data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
43
+ expects = data.get("expects", {}) or {}
44
+ return {
45
+ "name": data.get("name", name),
46
+ "enforce_when": data.get("enforce_when", "active"),
47
+ "required": list(expects.get("required", []) or []),
48
+ "recommended": list(expects.get("recommended", []) or []),
49
+ }
50
+
51
+
52
+ def _kind_state(kind: str, by_kind: dict[str, list[dict]]) -> str:
53
+ """complete / incomplete / missing for one expected kind.
54
+
55
+ complete = at least one document of the kind is approved/baselined AND
56
+ passing its audit.
57
+ incomplete = present, but none is complete yet (draft/proposed or failing).
58
+ missing = no document of the kind at all.
59
+ """
60
+ docs = by_kind.get(kind, [])
61
+ if not docs:
62
+ return "missing"
63
+ if any(str(d.get("status", "")).lower() in APPROVED and d.get("passing", True)
64
+ for d in docs):
65
+ return "complete"
66
+ return "incomplete"
67
+
68
+
69
+ def completeness(profile: dict, documents: list[dict], project_status: str) -> dict:
70
+ """Assess a project's documents against its profile.
71
+
72
+ `documents` is a list of {kind, status, passing} dicts (the project's docs).
73
+ """
74
+ by_kind: dict[str, list[dict]] = {}
75
+ for d in documents:
76
+ by_kind.setdefault(d.get("kind"), []).append(d)
77
+
78
+ required = [{"kind": k, "state": _kind_state(k, by_kind)} for k in profile["required"]]
79
+ recommended = [{"kind": k, "state": _kind_state(k, by_kind)} for k in profile["recommended"]]
80
+
81
+ missing_required = [r["kind"] for r in required if r["state"] == "missing"]
82
+ incomplete_required = [r["kind"] for r in required if r["state"] == "incomplete"]
83
+ recommended_gaps = [r["kind"] for r in recommended if r["state"] != "complete"]
84
+
85
+ enforced = str(project_status).lower() == str(profile["enforce_when"]).lower()
86
+ return {
87
+ "profile": profile["name"],
88
+ "enforce_when": profile["enforce_when"],
89
+ "enforced": enforced,
90
+ "unknown": False,
91
+ "required": required,
92
+ "recommended": recommended,
93
+ "required_total": len(required),
94
+ "required_complete": sum(1 for r in required if r["state"] == "complete"),
95
+ "missing_required": missing_required,
96
+ "incomplete_required": incomplete_required,
97
+ "recommended_gaps": recommended_gaps,
98
+ # A missing required document blocks only once the project is enforced.
99
+ "blocks": enforced and bool(missing_required),
100
+ }
101
+
102
+
103
+ def unknown(profile_name: str) -> dict:
104
+ """Placeholder completeness for a project that names a non-existent profile."""
105
+ return {
106
+ "profile": profile_name, "enforce_when": None, "enforced": False,
107
+ "unknown": True, "required": [], "recommended": [],
108
+ "required_total": 0, "required_complete": 0,
109
+ "missing_required": [], "incomplete_required": [], "recommended_gaps": [],
110
+ "blocks": False,
111
+ }
docassert/projects.py ADDED
@@ -0,0 +1,49 @@
1
+ """The project registry — derived from the `project.md` anchors.
2
+
3
+ Each project folder holds a `project.md` (kind: project) declaring the canonical
4
+ id (PRJ-NNN-CODE), the code, name, sponsor, and status. `projects.yaml` at the
5
+ repo root is the generated index of them all.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from collections import Counter
10
+ from pathlib import Path
11
+
12
+ import yaml
13
+
14
+ from .loader import load
15
+
16
+ DOCUMENTS_DIR = Path("documents")
17
+ _FIELDS = ("id", "code", "name", "sponsor", "status")
18
+
19
+
20
+ def load_projects(documents_dir: str | Path = DOCUMENTS_DIR) -> list[dict]:
21
+ """Every project anchor (kind: project), sorted by id."""
22
+ projects = []
23
+ for path in sorted(Path(documents_dir).rglob("*.md")):
24
+ try:
25
+ doc = load(path)
26
+ except ValueError:
27
+ continue
28
+ if doc.kind == "project":
29
+ entry = {k: str(doc.frontmatter.get(k, "")) for k in _FIELDS}
30
+ entry["path"] = doc.path
31
+ projects.append(entry)
32
+ projects.sort(key=lambda p: p["id"])
33
+ return projects
34
+
35
+
36
+ def registry_issues(projects: list[dict]) -> list[str]:
37
+ """Cross-project validation: ids and codes must be unique."""
38
+ issues = []
39
+ for field in ("id", "code"):
40
+ dups = [v for v, n in Counter(p[field] for p in projects).items() if v and n > 1]
41
+ issues += [f"duplicate project {field}: {d}" for d in dups]
42
+ return issues
43
+
44
+
45
+ def render_yaml(projects: list[dict]) -> str:
46
+ data = [{k: p[k] for k in _FIELDS} for p in projects]
47
+ header = ("# Generated by `docassert projects` — do not edit.\n"
48
+ "# The project.md anchors under documents/ are the source of truth.\n")
49
+ return header + yaml.safe_dump({"projects": data}, sort_keys=False, allow_unicode=True)