docassert 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docassert/__init__.py +8 -0
- docassert/__main__.py +6 -0
- docassert/_data/consistency.yaml +51 -0
- docassert/_data/criteria/adr.criteria.yaml +36 -0
- docassert/_data/criteria/benefits-realization.criteria.yaml +30 -0
- docassert/_data/criteria/brd.criteria.yaml +30 -0
- docassert/_data/criteria/business-case.criteria.yaml +23 -0
- docassert/_data/criteria/charter.criteria.yaml +73 -0
- docassert/_data/criteria/data-migration-plan.criteria.yaml +28 -0
- docassert/_data/criteria/frnfr.criteria.yaml +31 -0
- docassert/_data/criteria/hypercare-plan.criteria.yaml +27 -0
- docassert/_data/criteria/post-implementation-review.criteria.yaml +24 -0
- docassert/_data/criteria/prd.criteria.yaml +31 -0
- docassert/_data/criteria/project.criteria.yaml +32 -0
- docassert/_data/criteria/qa-test-plan.criteria.yaml +27 -0
- docassert/_data/criteria/raci-stakeholder.criteria.yaml +24 -0
- docassert/_data/criteria/release-cutover-plan.criteria.yaml +30 -0
- docassert/_data/criteria/risk-register.criteria.yaml +32 -0
- docassert/_data/criteria/rollback-plan.criteria.yaml +29 -0
- docassert/_data/criteria/runbook.criteria.yaml +30 -0
- docassert/_data/criteria/status-report.criteria.yaml +26 -0
- docassert/_data/criteria/test-cases.criteria.yaml +28 -0
- docassert/_data/criteria/user-story.criteria.yaml +32 -0
- docassert/_data/profiles/agile-delivery.yaml +20 -0
- docassert/_data/profiles/lean-startup.yaml +19 -0
- docassert/_data/profiles/regulated-industry.yaml +31 -0
- docassert/_data/schema/adr.schema.json +45 -0
- docassert/_data/schema/benefits-realization.schema.json +45 -0
- docassert/_data/schema/brd.schema.json +45 -0
- docassert/_data/schema/business-case.schema.json +45 -0
- docassert/_data/schema/charter.schema.json +84 -0
- docassert/_data/schema/data-migration-plan.schema.json +45 -0
- docassert/_data/schema/frnfr.schema.json +45 -0
- docassert/_data/schema/hypercare-plan.schema.json +45 -0
- docassert/_data/schema/post-implementation-review.schema.json +45 -0
- docassert/_data/schema/prd.schema.json +45 -0
- docassert/_data/schema/project.schema.json +32 -0
- docassert/_data/schema/qa-test-plan.schema.json +45 -0
- docassert/_data/schema/raci-stakeholder.schema.json +45 -0
- docassert/_data/schema/release-cutover-plan.schema.json +45 -0
- docassert/_data/schema/risk-register.schema.json +45 -0
- docassert/_data/schema/rollback-plan.schema.json +45 -0
- docassert/_data/schema/runbook.schema.json +45 -0
- docassert/_data/schema/status-report.schema.json +58 -0
- docassert/_data/schema/test-cases.schema.json +45 -0
- docassert/_data/schema/user-story.schema.json +45 -0
- docassert/_data/templates/adr.template.md +17 -0
- docassert/_data/templates/benefits-realization.template.md +25 -0
- docassert/_data/templates/brd.template.md +22 -0
- docassert/_data/templates/business-case.template.md +27 -0
- docassert/_data/templates/charter.template.md +46 -0
- docassert/_data/templates/data-migration-plan.template.md +35 -0
- docassert/_data/templates/frnfr.template.md +19 -0
- docassert/_data/templates/hypercare-plan.template.md +29 -0
- docassert/_data/templates/post-implementation-review.template.md +31 -0
- docassert/_data/templates/prd.template.md +23 -0
- docassert/_data/templates/project.template.md +17 -0
- docassert/_data/templates/qa-test-plan.template.md +31 -0
- docassert/_data/templates/raci-stakeholder.template.md +21 -0
- docassert/_data/templates/release-cutover-plan.template.md +28 -0
- docassert/_data/templates/risk-register.template.md +18 -0
- docassert/_data/templates/rollback-plan.template.md +24 -0
- docassert/_data/templates/runbook.template.md +28 -0
- docassert/_data/templates/status-report.template.md +27 -0
- docassert/_data/templates/test-cases.template.md +17 -0
- docassert/_data/templates/user-story.template.md +17 -0
- docassert/cli.py +291 -0
- docassert/config.py +104 -0
- docassert/consistency.py +167 -0
- docassert/graph.py +68 -0
- docassert/loader.py +116 -0
- docassert/models.py +99 -0
- docassert/profiles.py +111 -0
- docassert/projects.py +49 -0
- docassert/report.py +83 -0
- docassert/rtm.py +70 -0
- docassert/semantic.py +124 -0
- docassert/status.py +538 -0
- docassert/structural.py +406 -0
- docassert-0.1.0.dist-info/METADATA +125 -0
- docassert-0.1.0.dist-info/RECORD +86 -0
- docassert-0.1.0.dist-info/WHEEL +5 -0
- docassert-0.1.0.dist-info/entry_points.txt +2 -0
- docassert-0.1.0.dist-info/licenses/LICENSE +201 -0
- docassert-0.1.0.dist-info/licenses/NOTICE +4 -0
- docassert-0.1.0.dist-info/top_level.txt +1 -0
docassert/consistency.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Cross-document consistency checks over the traceable-item graph.
|
|
2
|
+
|
|
3
|
+
Structural checks are deterministic and blocking:
|
|
4
|
+
- item-id-uniqueness : IDs unique across the repo (always blocks)
|
|
5
|
+
- referential-integrity: every link target exists (always blocks)
|
|
6
|
+
- required-links : downstream items declare their upstream link
|
|
7
|
+
(blocks only when the item's doc is approved)
|
|
8
|
+
- coverage : every parent item has >=1 downstream child
|
|
9
|
+
(blocks only when the parent's doc is approved)
|
|
10
|
+
|
|
11
|
+
Semantic alignment is AI-graded and advisory (never blocks).
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
import yaml
|
|
19
|
+
|
|
20
|
+
from . import config as config_mod
|
|
21
|
+
from .graph import build_graph
|
|
22
|
+
from .models import CheckResult
|
|
23
|
+
from .semantic import run_alignment
|
|
24
|
+
|
|
25
|
+
CONFIG_PATH = Path("consistency.yaml")
|
|
26
|
+
APPROVED_STATES = {"approved", "baselined"}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def load_config(path: str | Path = CONFIG_PATH) -> dict:
|
|
30
|
+
if not Path(path).is_file():
|
|
31
|
+
return {}
|
|
32
|
+
with open(path, encoding="utf-8") as fh:
|
|
33
|
+
return yaml.safe_load(fh) or {}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _approved(item) -> bool:
|
|
37
|
+
return str(item.doc_status).lower() in APPROVED_STATES
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ── structural (deterministic) ─────────────────────────────────────────────
|
|
41
|
+
def check_unique_item_ids(graph) -> CheckResult:
|
|
42
|
+
dups = graph.duplicates()
|
|
43
|
+
if not dups:
|
|
44
|
+
return CheckResult("item-id-uniqueness", True, True,
|
|
45
|
+
f"All {len(graph.occurrences)} item IDs are unique.")
|
|
46
|
+
detail = "; ".join(f"{iid} in {', '.join(paths)}" for iid, paths in dups.items())
|
|
47
|
+
return CheckResult("item-id-uniqueness", False, True,
|
|
48
|
+
f"Duplicate item IDs: {detail}")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def check_referential_integrity(graph) -> CheckResult:
|
|
52
|
+
broken = []
|
|
53
|
+
for item in graph.all_items():
|
|
54
|
+
for relation, targets in item.links.items():
|
|
55
|
+
for target in targets:
|
|
56
|
+
if not graph.exists(target):
|
|
57
|
+
broken.append(f"{item.id} —{relation}→ {target} (missing)")
|
|
58
|
+
if broken:
|
|
59
|
+
return CheckResult("referential-integrity", False, True,
|
|
60
|
+
f"{len(broken)} broken reference(s): " + "; ".join(broken))
|
|
61
|
+
return CheckResult("referential-integrity", True, True, "All references resolve.")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def check_required_links(graph, config) -> CheckResult:
|
|
65
|
+
required = config.get("required_links", {})
|
|
66
|
+
approved_orphans, draft_orphans = [], []
|
|
67
|
+
for item in graph.all_items():
|
|
68
|
+
relation = required.get(item.type)
|
|
69
|
+
if relation and not item.targets(relation):
|
|
70
|
+
bucket = approved_orphans if _approved(item) else draft_orphans
|
|
71
|
+
bucket.append(f"{item.id} (missing '{relation}')")
|
|
72
|
+
parts = []
|
|
73
|
+
if approved_orphans:
|
|
74
|
+
parts.append("approved items missing a required link: " + "; ".join(approved_orphans))
|
|
75
|
+
if draft_orphans:
|
|
76
|
+
parts.append(f"{len(draft_orphans)} draft item(s) not yet linked (ok until approved)")
|
|
77
|
+
return CheckResult("required-links", not approved_orphans, True,
|
|
78
|
+
" · ".join(parts) if parts else "All required upstream links present.")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def check_coverage(graph, config) -> CheckResult:
|
|
82
|
+
approved_gaps, draft_gaps = [], []
|
|
83
|
+
for rule in config.get("coverage", []):
|
|
84
|
+
parent_prefix, relation = rule["parent"], rule["relation"]
|
|
85
|
+
by_prefix = rule.get("by_prefix")
|
|
86
|
+
label = rule.get("label", f"{parent_prefix} → {by_prefix}")
|
|
87
|
+
for parent in graph.by_type.get(parent_prefix, []):
|
|
88
|
+
if graph.children(parent.id, relation, by_prefix):
|
|
89
|
+
continue
|
|
90
|
+
bucket = approved_gaps if _approved(parent) else draft_gaps
|
|
91
|
+
bucket.append(f"{parent.id} ({label})")
|
|
92
|
+
parts = []
|
|
93
|
+
if approved_gaps:
|
|
94
|
+
parts.append("approved items with no downstream coverage: " + "; ".join(approved_gaps))
|
|
95
|
+
if draft_gaps:
|
|
96
|
+
parts.append(f"{len(draft_gaps)} draft item(s) not yet covered (ok until approved)")
|
|
97
|
+
return CheckResult("coverage", not approved_gaps, True,
|
|
98
|
+
" · ".join(parts) if parts else "All approved items are covered.")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def check_profile_completeness(documents_dir: str | Path = "documents") -> CheckResult:
|
|
102
|
+
"""Every profiled project must carry the documents its profile requires.
|
|
103
|
+
|
|
104
|
+
Blocks when an *enforced* (e.g. active) project is missing a required kind,
|
|
105
|
+
or names a profile that doesn't exist. Projects not yet enforced (e.g.
|
|
106
|
+
proposed) surface their gaps as advisory only.
|
|
107
|
+
"""
|
|
108
|
+
from . import status as status_mod
|
|
109
|
+
report = status_mod.completeness_report(documents_dir)
|
|
110
|
+
blockers, unknowns, advisories = [], [], []
|
|
111
|
+
for r in report:
|
|
112
|
+
if r.get("unknown"):
|
|
113
|
+
unknowns.append(f"{r['id']} → unknown profile '{r['profile']}'")
|
|
114
|
+
elif r["blocks"]:
|
|
115
|
+
blockers.append(f"{r['id']} ({r['profile']}) missing required: "
|
|
116
|
+
+ ", ".join(r["missing_required"]))
|
|
117
|
+
elif r["missing_required"] or r["incomplete_required"]:
|
|
118
|
+
n = len(r["missing_required"]) + len(r["incomplete_required"])
|
|
119
|
+
advisories.append(f"{r['id']} ({n} not yet complete)")
|
|
120
|
+
parts = []
|
|
121
|
+
if blockers:
|
|
122
|
+
parts.append("active projects missing required documents: " + "; ".join(blockers))
|
|
123
|
+
if unknowns:
|
|
124
|
+
parts.append("unknown profiles: " + "; ".join(unknowns))
|
|
125
|
+
if advisories:
|
|
126
|
+
parts.append(f"{len(advisories)} project(s) with advisory gaps (not enforced yet)")
|
|
127
|
+
return CheckResult("profile-completeness", not blockers and not unknowns, True,
|
|
128
|
+
" · ".join(parts) if parts else
|
|
129
|
+
"All profiled projects carry their required documents.")
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# ── semantic (advisory) ────────────────────────────────────────────────────
|
|
133
|
+
def run_alignment_checks(graph, config) -> list[CheckResult]:
|
|
134
|
+
edges = [] # (prompt, parent, child, relation)
|
|
135
|
+
for rule in config.get("alignment", []):
|
|
136
|
+
relation, prompt = rule["relation"], rule.get("prompt", "").strip()
|
|
137
|
+
for child in graph.all_items():
|
|
138
|
+
for target in child.targets(relation):
|
|
139
|
+
parent = graph.canonical(target)
|
|
140
|
+
if parent is not None:
|
|
141
|
+
edges.append((prompt, parent, child, relation))
|
|
142
|
+
|
|
143
|
+
if not edges:
|
|
144
|
+
return []
|
|
145
|
+
if not os.environ.get("ANTHROPIC_API_KEY"):
|
|
146
|
+
return [CheckResult("alignment", True, False,
|
|
147
|
+
f"skipped — no ANTHROPIC_API_KEY ({len(edges)} link(s) to grade)",
|
|
148
|
+
kind="semantic", score=None)]
|
|
149
|
+
return [run_alignment(f"align:{c.id}-{rel}-{p.id}", prompt, p.text, c.text)
|
|
150
|
+
for prompt, p, c, rel in edges]
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def run_consistency(documents_dir: str | Path = "documents",
|
|
154
|
+
config_path: str | Path | None = None,
|
|
155
|
+
with_semantic: bool = True) -> list[CheckResult]:
|
|
156
|
+
graph = build_graph(documents_dir)
|
|
157
|
+
cfg = load_config(config_path) if config_path is not None else config_mod.read_consistency_config()
|
|
158
|
+
results = [
|
|
159
|
+
check_unique_item_ids(graph),
|
|
160
|
+
check_referential_integrity(graph),
|
|
161
|
+
check_required_links(graph, cfg),
|
|
162
|
+
check_coverage(graph, cfg),
|
|
163
|
+
check_profile_completeness(documents_dir),
|
|
164
|
+
]
|
|
165
|
+
if with_semantic:
|
|
166
|
+
results.extend(run_alignment_checks(graph, cfg))
|
|
167
|
+
return results
|
docassert/graph.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Build the cross-document item graph used by the consistency engine."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from . import config
|
|
8
|
+
from .loader import load, parse_items
|
|
9
|
+
from .models import Item
|
|
10
|
+
|
|
11
|
+
DOCUMENTS_DIR = Path("documents")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Graph:
|
|
15
|
+
"""All traceable items across the repo, plus a reverse (incoming) index."""
|
|
16
|
+
|
|
17
|
+
def __init__(self) -> None:
|
|
18
|
+
self.occurrences: dict[str, list[Item]] = defaultdict(list) # id -> items
|
|
19
|
+
self.by_type: dict[str, list[Item]] = defaultdict(list) # BR/PR/… -> items
|
|
20
|
+
self.by_project: dict[str, list[Item]] = defaultdict(list) # AUR/ATL/… -> items
|
|
21
|
+
self.incoming: dict[str, list[tuple[str, Item]]] = defaultdict(list) # target -> (relation, source)
|
|
22
|
+
|
|
23
|
+
def add(self, item: Item) -> None:
|
|
24
|
+
self.occurrences[item.id].append(item)
|
|
25
|
+
self.by_type[item.type].append(item)
|
|
26
|
+
self.by_project[item.project].append(item)
|
|
27
|
+
for relation, targets in item.links.items():
|
|
28
|
+
for target in targets:
|
|
29
|
+
self.incoming[target].append((relation, item))
|
|
30
|
+
|
|
31
|
+
def exists(self, item_id: str) -> bool:
|
|
32
|
+
return item_id in self.occurrences
|
|
33
|
+
|
|
34
|
+
def canonical(self, item_id: str) -> Item | None:
|
|
35
|
+
occ = self.occurrences.get(item_id)
|
|
36
|
+
return occ[0] if occ else None
|
|
37
|
+
|
|
38
|
+
def duplicates(self) -> dict[str, list[str]]:
|
|
39
|
+
"""id -> paths, for every id defined more than once."""
|
|
40
|
+
return {iid: [it.doc_path for it in occ]
|
|
41
|
+
for iid, occ in self.occurrences.items() if len(occ) > 1}
|
|
42
|
+
|
|
43
|
+
def all_items(self) -> list[Item]:
|
|
44
|
+
return [occ[0] for occ in self.occurrences.values()]
|
|
45
|
+
|
|
46
|
+
def children(self, target_id: str, relation: str,
|
|
47
|
+
by_type: str | None = None) -> list[Item]:
|
|
48
|
+
"""Items that link to `target_id` via `relation` (optionally by item type)."""
|
|
49
|
+
return [src for rel, src in self.incoming.get(target_id, [])
|
|
50
|
+
if rel == relation and (by_type is None or src.type == by_type)]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _item_sections_for(kind: str) -> list[dict]:
|
|
54
|
+
if not config.criteria_exists(kind):
|
|
55
|
+
return []
|
|
56
|
+
return config.read_criteria(kind).get("item_sections", []) or []
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def build_graph(documents_dir: str | Path = DOCUMENTS_DIR) -> Graph:
|
|
60
|
+
graph = Graph()
|
|
61
|
+
for path in sorted(Path(documents_dir).rglob("*.md")):
|
|
62
|
+
try:
|
|
63
|
+
doc = load(path)
|
|
64
|
+
except ValueError:
|
|
65
|
+
continue # malformed frontmatter is caught by per-document validation
|
|
66
|
+
for item in parse_items(doc, _item_sections_for(doc.kind or "")):
|
|
67
|
+
graph.add(item)
|
|
68
|
+
return graph
|
docassert/loader.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Load and parse a business document into frontmatter + sections."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import frontmatter
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
from .models import Document, Item, Section
|
|
11
|
+
|
|
12
|
+
# A traceable item bullet, e.g.
|
|
13
|
+
# **AUR-PR-014** (traces: AUR-BR-001): The flow shall be self-serve.
|
|
14
|
+
# The id is <CODE>-<TYPE>-<NNN>: project code, item type, number.
|
|
15
|
+
ITEM_RE = re.compile(
|
|
16
|
+
r"^\*\*(?P<id>(?P<project>[A-Z]{2,6})-(?P<type>[A-Z]{2,6})-\d+)\*\*" # **CODE-TYPE-123**
|
|
17
|
+
r"(?:\s*\((?P<links>[^)]*)\))?" # optional (relation: id, …)
|
|
18
|
+
r"\s*:\s*(?P<text>.+)$" # : text
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def parse_link_clause(clause: str) -> dict[str, list[str]]:
|
|
23
|
+
"""Parse `traces: BR-001, BR-002; verifies: FR-3` -> {relation: [ids]}."""
|
|
24
|
+
links: dict[str, list[str]] = {}
|
|
25
|
+
for group in (clause or "").split(";"):
|
|
26
|
+
if ":" not in group:
|
|
27
|
+
continue
|
|
28
|
+
rel, ids = group.split(":", 1)
|
|
29
|
+
rel = rel.strip().lower()
|
|
30
|
+
id_list = [i.strip() for i in ids.split(",") if i.strip()]
|
|
31
|
+
if rel and id_list:
|
|
32
|
+
links.setdefault(rel, []).extend(id_list)
|
|
33
|
+
return links
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def parse_sections(body: str) -> dict[str, Section]:
|
|
37
|
+
"""Split a markdown body into H2 (`## `) sections, preserving order."""
|
|
38
|
+
sections: dict[str, Section] = {}
|
|
39
|
+
current_title: str | None = None
|
|
40
|
+
current_lines: list[str] = []
|
|
41
|
+
|
|
42
|
+
def flush() -> None:
|
|
43
|
+
if current_title is not None:
|
|
44
|
+
sections[current_title] = Section(
|
|
45
|
+
title=current_title, body="\n".join(current_lines).strip()
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
for line in body.splitlines():
|
|
49
|
+
if line.startswith("## "):
|
|
50
|
+
flush()
|
|
51
|
+
current_title = line[3:].strip()
|
|
52
|
+
current_lines = []
|
|
53
|
+
else:
|
|
54
|
+
if current_title is not None:
|
|
55
|
+
current_lines.append(line)
|
|
56
|
+
flush()
|
|
57
|
+
return sections
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def load(path: str | Path) -> Document:
|
|
61
|
+
"""Parse a document file. Raises ValueError on malformed frontmatter."""
|
|
62
|
+
path = Path(path)
|
|
63
|
+
text = path.read_text(encoding="utf-8")
|
|
64
|
+
try:
|
|
65
|
+
post = frontmatter.loads(text)
|
|
66
|
+
except yaml.YAMLError as exc: # malformed YAML frontmatter
|
|
67
|
+
raise ValueError(f"{path}: invalid YAML frontmatter: {exc}") from exc
|
|
68
|
+
|
|
69
|
+
return Document(
|
|
70
|
+
path=str(path),
|
|
71
|
+
frontmatter=dict(post.metadata),
|
|
72
|
+
sections=parse_sections(post.content),
|
|
73
|
+
raw_body=post.content,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def load_criteria(path: str | Path) -> dict:
|
|
78
|
+
"""Load a criteria YAML file (e.g. criteria/charter.criteria.yaml)."""
|
|
79
|
+
with open(path, encoding="utf-8") as fh:
|
|
80
|
+
return yaml.safe_load(fh)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def iter_item_lines(section: Section):
|
|
84
|
+
"""Yield (raw_bullet, match_or_None) for each bullet in an item section."""
|
|
85
|
+
for raw in section.list_items:
|
|
86
|
+
yield raw, ITEM_RE.match(raw)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def parse_items(doc: Document, item_sections: list[dict]) -> list[Item]:
|
|
90
|
+
"""Extract traceable items from a document per its criteria item_sections.
|
|
91
|
+
|
|
92
|
+
Each entry in `item_sections` is {"section": <title>, "prefix": <PREFIX>}.
|
|
93
|
+
Bullets that don't parse are skipped here; the `items-well-formed`
|
|
94
|
+
structural check is what flags them.
|
|
95
|
+
"""
|
|
96
|
+
status = str(doc.frontmatter.get("status", "draft"))
|
|
97
|
+
items: list[Item] = []
|
|
98
|
+
for spec in item_sections or []:
|
|
99
|
+
section = doc.section(spec["section"])
|
|
100
|
+
if section is None:
|
|
101
|
+
continue
|
|
102
|
+
for _raw, m in iter_item_lines(section):
|
|
103
|
+
if not m:
|
|
104
|
+
continue
|
|
105
|
+
items.append(Item(
|
|
106
|
+
id=m.group("id"),
|
|
107
|
+
project=m.group("project"),
|
|
108
|
+
type=m.group("type"),
|
|
109
|
+
text=m.group("text").strip(),
|
|
110
|
+
links=parse_link_clause(m.group("links") or ""),
|
|
111
|
+
doc_path=doc.path,
|
|
112
|
+
doc_kind=doc.kind or "",
|
|
113
|
+
doc_status=status,
|
|
114
|
+
section=spec["section"],
|
|
115
|
+
))
|
|
116
|
+
return items
|
docassert/models.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Shared data types for docassert."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Section:
|
|
9
|
+
"""A single H2 section of a document body."""
|
|
10
|
+
title: str
|
|
11
|
+
body: str
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def list_items(self) -> list[str]:
|
|
15
|
+
"""Top-level markdown bullet items in this section, comments stripped."""
|
|
16
|
+
items: list[str] = []
|
|
17
|
+
for line in self.body.splitlines():
|
|
18
|
+
stripped = line.strip()
|
|
19
|
+
if stripped.startswith(("- ", "* ")):
|
|
20
|
+
text = stripped[2:].strip()
|
|
21
|
+
# drop HTML-comment-only placeholder bullets from the template
|
|
22
|
+
if text.startswith("<!--") or not text:
|
|
23
|
+
continue
|
|
24
|
+
items.append(text)
|
|
25
|
+
return items
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def is_empty(self) -> bool:
|
|
29
|
+
"""A section counts as empty if it has no prose or list items once
|
|
30
|
+
template HTML comments and whitespace are removed."""
|
|
31
|
+
meaningful = []
|
|
32
|
+
for line in self.body.splitlines():
|
|
33
|
+
s = line.strip()
|
|
34
|
+
if not s or s.startswith("<!--") and s.endswith("-->"):
|
|
35
|
+
continue
|
|
36
|
+
# strip inline comment fragments
|
|
37
|
+
if s.startswith("<!--") or s.endswith("-->"):
|
|
38
|
+
continue
|
|
39
|
+
meaningful.append(s)
|
|
40
|
+
return len("".join(meaningful).strip()) == 0
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class Document:
|
|
45
|
+
"""A parsed business document: frontmatter + ordered body sections."""
|
|
46
|
+
path: str
|
|
47
|
+
frontmatter: dict
|
|
48
|
+
sections: dict[str, Section] = field(default_factory=dict)
|
|
49
|
+
raw_body: str = ""
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def id(self) -> str | None:
|
|
53
|
+
return self.frontmatter.get("id")
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def kind(self) -> str | None:
|
|
57
|
+
return self.frontmatter.get("kind")
|
|
58
|
+
|
|
59
|
+
def section(self, title: str) -> Section | None:
|
|
60
|
+
return self.sections.get(title)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class CheckResult:
|
|
65
|
+
"""The outcome of one audit check against one document."""
|
|
66
|
+
check_id: str
|
|
67
|
+
passed: bool
|
|
68
|
+
blocking: bool
|
|
69
|
+
detail: str
|
|
70
|
+
kind: str = "structural" # structural | semantic
|
|
71
|
+
score: float | None = None # semantic checks only, 0..1
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def is_blocking_failure(self) -> bool:
|
|
75
|
+
return self.blocking and not self.passed
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class Item:
|
|
80
|
+
"""A traceable item inside a document (a requirement, criterion, test, …).
|
|
81
|
+
|
|
82
|
+
Authored as a bullet like:
|
|
83
|
+
- **AUR-PR-014** (traces: AUR-BR-001): The onboarding flow shall be self-serve.
|
|
84
|
+
|
|
85
|
+
The id is <CODE>-<TYPE>-<NNN>: `project` is the project code (AUR), `type` is
|
|
86
|
+
the item type (PR). Ids are globally unique and self-identify their project.
|
|
87
|
+
"""
|
|
88
|
+
id: str # e.g. "AUR-PR-014"
|
|
89
|
+
project: str # project code, e.g. "AUR"
|
|
90
|
+
type: str # item type, e.g. "PR"
|
|
91
|
+
text: str
|
|
92
|
+
links: dict[str, list[str]] # relation -> [target ids], e.g. {"traces": ["AUR-BR-001"]}
|
|
93
|
+
doc_path: str
|
|
94
|
+
doc_kind: str
|
|
95
|
+
doc_status: str
|
|
96
|
+
section: str
|
|
97
|
+
|
|
98
|
+
def targets(self, relation: str) -> list[str]:
|
|
99
|
+
return self.links.get(relation, [])
|
docassert/profiles.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Profiles — the expected document set for a project.
|
|
2
|
+
|
|
3
|
+
A profile (profiles/<name>.yaml) lists the document kinds a project is expected
|
|
4
|
+
to carry, at two levels:
|
|
5
|
+
- required : must be present and complete; a missing one blocks CI once the
|
|
6
|
+
project reaches the profile's `enforce_when` lifecycle stage.
|
|
7
|
+
- recommended : surfaced as a gap on the project page, but never blocking.
|
|
8
|
+
|
|
9
|
+
A project opts in with `profile: <name>` in its project.md. No profile means no
|
|
10
|
+
completeness expectations (fully backward-compatible).
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
import yaml
|
|
17
|
+
|
|
18
|
+
from . import config as config_mod
|
|
19
|
+
|
|
20
|
+
APPROVED = {"approved", "baselined"}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def available(profiles_dir: str | Path | None = None) -> list[str]:
|
|
24
|
+
"""Profile names. Default resolves local ./profiles + packaged defaults; pass
|
|
25
|
+
an explicit dir to look only there."""
|
|
26
|
+
if profiles_dir is not None:
|
|
27
|
+
d = Path(profiles_dir)
|
|
28
|
+
return sorted(p.stem for p in d.glob("*.yaml")) if d.is_dir() else []
|
|
29
|
+
return config_mod.available_profiles()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_profile(name: str, profiles_dir: str | Path | None = None) -> dict | None:
|
|
33
|
+
"""Load one profile, or None if there is no such file. Default resolves
|
|
34
|
+
./profiles then the packaged defaults; pass an explicit dir to override."""
|
|
35
|
+
if profiles_dir is not None:
|
|
36
|
+
candidate = Path(profiles_dir) / f"{name}.yaml"
|
|
37
|
+
path = candidate if candidate.is_file() else None
|
|
38
|
+
else:
|
|
39
|
+
path = config_mod.profile_path(name)
|
|
40
|
+
if path is None:
|
|
41
|
+
return None
|
|
42
|
+
data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
|
|
43
|
+
expects = data.get("expects", {}) or {}
|
|
44
|
+
return {
|
|
45
|
+
"name": data.get("name", name),
|
|
46
|
+
"enforce_when": data.get("enforce_when", "active"),
|
|
47
|
+
"required": list(expects.get("required", []) or []),
|
|
48
|
+
"recommended": list(expects.get("recommended", []) or []),
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _kind_state(kind: str, by_kind: dict[str, list[dict]]) -> str:
|
|
53
|
+
"""complete / incomplete / missing for one expected kind.
|
|
54
|
+
|
|
55
|
+
complete = at least one document of the kind is approved/baselined AND
|
|
56
|
+
passing its audit.
|
|
57
|
+
incomplete = present, but none is complete yet (draft/proposed or failing).
|
|
58
|
+
missing = no document of the kind at all.
|
|
59
|
+
"""
|
|
60
|
+
docs = by_kind.get(kind, [])
|
|
61
|
+
if not docs:
|
|
62
|
+
return "missing"
|
|
63
|
+
if any(str(d.get("status", "")).lower() in APPROVED and d.get("passing", True)
|
|
64
|
+
for d in docs):
|
|
65
|
+
return "complete"
|
|
66
|
+
return "incomplete"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def completeness(profile: dict, documents: list[dict], project_status: str) -> dict:
|
|
70
|
+
"""Assess a project's documents against its profile.
|
|
71
|
+
|
|
72
|
+
`documents` is a list of {kind, status, passing} dicts (the project's docs).
|
|
73
|
+
"""
|
|
74
|
+
by_kind: dict[str, list[dict]] = {}
|
|
75
|
+
for d in documents:
|
|
76
|
+
by_kind.setdefault(d.get("kind"), []).append(d)
|
|
77
|
+
|
|
78
|
+
required = [{"kind": k, "state": _kind_state(k, by_kind)} for k in profile["required"]]
|
|
79
|
+
recommended = [{"kind": k, "state": _kind_state(k, by_kind)} for k in profile["recommended"]]
|
|
80
|
+
|
|
81
|
+
missing_required = [r["kind"] for r in required if r["state"] == "missing"]
|
|
82
|
+
incomplete_required = [r["kind"] for r in required if r["state"] == "incomplete"]
|
|
83
|
+
recommended_gaps = [r["kind"] for r in recommended if r["state"] != "complete"]
|
|
84
|
+
|
|
85
|
+
enforced = str(project_status).lower() == str(profile["enforce_when"]).lower()
|
|
86
|
+
return {
|
|
87
|
+
"profile": profile["name"],
|
|
88
|
+
"enforce_when": profile["enforce_when"],
|
|
89
|
+
"enforced": enforced,
|
|
90
|
+
"unknown": False,
|
|
91
|
+
"required": required,
|
|
92
|
+
"recommended": recommended,
|
|
93
|
+
"required_total": len(required),
|
|
94
|
+
"required_complete": sum(1 for r in required if r["state"] == "complete"),
|
|
95
|
+
"missing_required": missing_required,
|
|
96
|
+
"incomplete_required": incomplete_required,
|
|
97
|
+
"recommended_gaps": recommended_gaps,
|
|
98
|
+
# A missing required document blocks only once the project is enforced.
|
|
99
|
+
"blocks": enforced and bool(missing_required),
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def unknown(profile_name: str) -> dict:
|
|
104
|
+
"""Placeholder completeness for a project that names a non-existent profile."""
|
|
105
|
+
return {
|
|
106
|
+
"profile": profile_name, "enforce_when": None, "enforced": False,
|
|
107
|
+
"unknown": True, "required": [], "recommended": [],
|
|
108
|
+
"required_total": 0, "required_complete": 0,
|
|
109
|
+
"missing_required": [], "incomplete_required": [], "recommended_gaps": [],
|
|
110
|
+
"blocks": False,
|
|
111
|
+
}
|
docassert/projects.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""The project registry — derived from the `project.md` anchors.
|
|
2
|
+
|
|
3
|
+
Each project folder holds a `project.md` (kind: project) declaring the canonical
|
|
4
|
+
id (PRJ-NNN-CODE), the code, name, sponsor, and status. `projects.yaml` at the
|
|
5
|
+
repo root is the generated index of them all.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections import Counter
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
import yaml
|
|
13
|
+
|
|
14
|
+
from .loader import load
|
|
15
|
+
|
|
16
|
+
DOCUMENTS_DIR = Path("documents")
|
|
17
|
+
_FIELDS = ("id", "code", "name", "sponsor", "status")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def load_projects(documents_dir: str | Path = DOCUMENTS_DIR) -> list[dict]:
|
|
21
|
+
"""Every project anchor (kind: project), sorted by id."""
|
|
22
|
+
projects = []
|
|
23
|
+
for path in sorted(Path(documents_dir).rglob("*.md")):
|
|
24
|
+
try:
|
|
25
|
+
doc = load(path)
|
|
26
|
+
except ValueError:
|
|
27
|
+
continue
|
|
28
|
+
if doc.kind == "project":
|
|
29
|
+
entry = {k: str(doc.frontmatter.get(k, "")) for k in _FIELDS}
|
|
30
|
+
entry["path"] = doc.path
|
|
31
|
+
projects.append(entry)
|
|
32
|
+
projects.sort(key=lambda p: p["id"])
|
|
33
|
+
return projects
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def registry_issues(projects: list[dict]) -> list[str]:
|
|
37
|
+
"""Cross-project validation: ids and codes must be unique."""
|
|
38
|
+
issues = []
|
|
39
|
+
for field in ("id", "code"):
|
|
40
|
+
dups = [v for v, n in Counter(p[field] for p in projects).items() if v and n > 1]
|
|
41
|
+
issues += [f"duplicate project {field}: {d}" for d in dups]
|
|
42
|
+
return issues
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def render_yaml(projects: list[dict]) -> str:
|
|
46
|
+
data = [{k: p[k] for k in _FIELDS} for p in projects]
|
|
47
|
+
header = ("# Generated by `docassert projects` — do not edit.\n"
|
|
48
|
+
"# The project.md anchors under documents/ are the source of truth.\n")
|
|
49
|
+
return header + yaml.safe_dump({"projects": data}, sort_keys=False, allow_unicode=True)
|