codd-dev 0.2.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codd/validator.py ADDED
@@ -0,0 +1,499 @@
1
+ """CoDD validator — verify frontmatter integrity before scan/impact."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import yaml
11
+
12
+
13
+ NODE_ID_PATTERN = re.compile(r"^(?P<prefix>[a-z_]+):(?P<name>.+)$")
14
+ ALLOWED_NODE_PREFIXES = {
15
+ "config",
16
+ "db",
17
+ "db_column",
18
+ "db_table",
19
+ "design",
20
+ "doc",
21
+ "endpoint",
22
+ "file",
23
+ "governance",
24
+ "infra",
25
+ "module",
26
+ "operations",
27
+ "plan",
28
+ "req",
29
+ "test",
30
+ }
31
+ LEVEL_ERROR = "ERROR"
32
+ LEVEL_BLOCKED = "BLOCKED"
33
+ LEVEL_WARNING = "WARNING"
34
+ IMPLEMENTATION_NODE_PREFIXES = {
35
+ "config",
36
+ "db",
37
+ "db_column",
38
+ "db_table",
39
+ "endpoint",
40
+ "file",
41
+ "infra",
42
+ "test",
43
+ }
44
+ IMPLEMENTATION_DESIGN_SUFFIXES = ("-service", "-integration", "-delivery")
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class ValidationIssue:
49
+ level: str
50
+ code: str
51
+ location: str
52
+ message: str
53
+
54
+
55
+ @dataclass
56
+ class ValidationResult:
57
+ documents_checked: int = 0
58
+ issues: list[ValidationIssue] = field(default_factory=list)
59
+
60
+ @property
61
+ def error_count(self) -> int:
62
+ return sum(1 for issue in self.issues if issue.level == LEVEL_ERROR)
63
+
64
+ @property
65
+ def blocked_count(self) -> int:
66
+ return sum(1 for issue in self.issues if issue.level == LEVEL_BLOCKED)
67
+
68
+ @property
69
+ def warning_count(self) -> int:
70
+ return sum(1 for issue in self.issues if issue.level == LEVEL_WARNING)
71
+
72
+ @property
73
+ def exit_code(self) -> int:
74
+ return 1 if self.error_count else 0
75
+
76
+ def add(self, level: str, code: str, location: str, message: str):
77
+ self.issues.append(ValidationIssue(level=level, code=code, location=location, message=message))
78
+
79
+ def status(self) -> str:
80
+ if self.error_count:
81
+ return LEVEL_ERROR
82
+ if self.blocked_count:
83
+ return LEVEL_BLOCKED
84
+ if self.warning_count:
85
+ return LEVEL_WARNING
86
+ return "OK"
87
+
88
+ def sorted_issues(self) -> list[ValidationIssue]:
89
+ level_order = {LEVEL_ERROR: 0, LEVEL_BLOCKED: 1, LEVEL_WARNING: 2}
90
+ return sorted(
91
+ self.issues,
92
+ key=lambda issue: (level_order.get(issue.level, 99), issue.location, issue.code, issue.message),
93
+ )
94
+
95
+
96
+ @dataclass
97
+ class DocumentRecord:
98
+ path: str
99
+ node_id: str
100
+ doc_type: str
101
+ depends_on: list[str]
102
+ depended_by: list[str]
103
+ conventions: list[str]
104
+
105
+
106
+ def run_validate(project_root: Path, codd_dir: Path) -> int:
107
+ """Validate CoDD documents and print a human-readable report."""
108
+ result = validate_project(project_root, codd_dir)
109
+
110
+ if result.status() == "OK":
111
+ print(f"OK: validated {result.documents_checked} Markdown files under configured doc_dirs")
112
+ return 0
113
+
114
+ summary = (
115
+ f"{result.status()}: {result.error_count} error(s), "
116
+ f"{result.blocked_count} blocked issue(s), "
117
+ f"{result.warning_count} warning(s), {result.documents_checked} Markdown files checked"
118
+ )
119
+ print(summary)
120
+ for issue in result.sorted_issues():
121
+ print(f"[{issue.level}] {issue.location}: {issue.message}")
122
+ return result.exit_code
123
+
124
+
125
+ def validate_project(project_root: Path, codd_dir: Path | None = None) -> ValidationResult:
126
+ """Validate CoDD frontmatter, references, wave config, and dependency cycles."""
127
+ codd_dir = codd_dir or (project_root / "codd")
128
+ config_path = codd_dir / "codd.yaml"
129
+ config = yaml.safe_load(config_path.read_text()) or {}
130
+
131
+ result = ValidationResult()
132
+ documents: dict[str, DocumentRecord] = {}
133
+ wave_expectations = _extract_wave_config_expectations(config)
134
+ wave_defined_nodes = set(wave_expectations)
135
+ service_boundary_modules = _extract_service_boundary_modules(config)
136
+
137
+ for doc_path in _iter_doc_files(project_root, config):
138
+ result.documents_checked += 1
139
+ relative_path = doc_path.relative_to(project_root).as_posix()
140
+ frontmatter = _parse_codd_frontmatter(doc_path)
141
+ if frontmatter.error:
142
+ result.add("ERROR", frontmatter.error["code"], relative_path, frontmatter.error["message"])
143
+ continue
144
+
145
+ codd = frontmatter.codd or {}
146
+ node_id = codd.get("node_id")
147
+ if not isinstance(node_id, str) or not _is_valid_node_id(node_id):
148
+ result.add(
149
+ "ERROR",
150
+ "invalid_node_id",
151
+ relative_path,
152
+ f"node_id must follow CoDD naming rules (<prefix>:<name>), got {node_id!r}",
153
+ )
154
+ continue
155
+
156
+ depends_on = _extract_reference_ids(codd.get("depends_on"))
157
+ depended_by = _extract_reference_ids(codd.get("depended_by"))
158
+ conventions = _extract_convention_targets(codd.get("conventions"))
159
+
160
+ existing = documents.get(node_id)
161
+ if existing:
162
+ result.add(
163
+ "ERROR",
164
+ "duplicate_node_id",
165
+ relative_path,
166
+ f"node_id {node_id!r} is already defined in {existing.path}",
167
+ )
168
+ continue
169
+
170
+ documents[node_id] = DocumentRecord(
171
+ path=relative_path,
172
+ node_id=node_id,
173
+ doc_type=str(codd.get("type") or ""),
174
+ depends_on=depends_on,
175
+ depended_by=depended_by,
176
+ conventions=conventions,
177
+ )
178
+
179
+ defined_nodes = set(documents)
180
+
181
+ for record in documents.values():
182
+ for target_id in record.depends_on:
183
+ if target_id not in defined_nodes:
184
+ level, message = _classify_missing_reference(
185
+ target_id,
186
+ relation="depends_on",
187
+ source_doc_type=record.doc_type,
188
+ wave_defined_nodes=wave_defined_nodes,
189
+ service_boundary_modules=service_boundary_modules,
190
+ )
191
+ result.add(level, "dangling_depends_on", record.path, message)
192
+ for source_id in record.depended_by:
193
+ if source_id not in defined_nodes:
194
+ level, message = _classify_missing_reference(
195
+ source_id,
196
+ relation="depended_by",
197
+ source_doc_type=record.doc_type,
198
+ wave_defined_nodes=wave_defined_nodes,
199
+ service_boundary_modules=service_boundary_modules,
200
+ )
201
+ result.add(level, "dangling_depended_by", record.path, message)
202
+ for target_id in record.conventions:
203
+ if target_id not in defined_nodes:
204
+ result.add(
205
+ LEVEL_WARNING,
206
+ "dangling_convention",
207
+ record.path,
208
+ f"conventions references undefined node {target_id!r}",
209
+ )
210
+
211
+ for record in documents.values():
212
+ for target_id in record.depends_on:
213
+ target = documents.get(target_id)
214
+ if target and record.node_id not in set(target.depended_by):
215
+ result.add(
216
+ LEVEL_WARNING,
217
+ "missing_depended_by",
218
+ target.path,
219
+ f"depended_by is missing reciprocal reference to {record.node_id!r}",
220
+ )
221
+
222
+ for node_id, expected_depends in wave_expectations.items():
223
+ record = documents.get(node_id)
224
+ if not record:
225
+ result.add(
226
+ LEVEL_BLOCKED,
227
+ "wave_config_missing_node",
228
+ config_path.relative_to(project_root).as_posix(),
229
+ f"wave_config defines {node_id!r}, but the document has not been generated yet",
230
+ )
231
+ continue
232
+
233
+ actual_depends = set(record.depends_on)
234
+ if actual_depends != expected_depends:
235
+ missing = sorted(expected_depends - actual_depends)
236
+ unexpected = sorted(actual_depends - expected_depends)
237
+ details = []
238
+ if missing:
239
+ details.append(f"missing {missing}")
240
+ if unexpected:
241
+ details.append(f"unexpected {unexpected}")
242
+ detail_text = ", ".join(details) if details else "dependency mismatch"
243
+ result.add(
244
+ LEVEL_ERROR,
245
+ "wave_config_mismatch",
246
+ record.path,
247
+ f"wave_config mismatch for {node_id!r}: {detail_text}",
248
+ )
249
+
250
+ adjacency = _build_adjacency(documents)
251
+ for cycle in _find_cycles(adjacency):
252
+ cycle_text = " -> ".join(list(cycle) + [cycle[0]])
253
+ location = documents[cycle[0]].path if cycle[0] in documents else config_path.relative_to(project_root).as_posix()
254
+ result.add(LEVEL_ERROR, "circular_dependency", location, f"circular dependency detected: {cycle_text}")
255
+
256
+ return result
257
+
258
+
259
+ @dataclass
260
+ class FrontmatterParseResult:
261
+ codd: dict[str, Any] | None = None
262
+ error: dict[str, str] | None = None
263
+
264
+
265
+ def _iter_doc_files(project_root: Path, config: dict[str, Any]):
266
+ doc_dirs = ((config.get("scan") or {}).get("doc_dirs") or [])
267
+ for doc_dir in doc_dirs:
268
+ full_path = project_root / doc_dir
269
+ if not full_path.exists():
270
+ continue
271
+ for file_path in sorted(full_path.rglob("*.md")):
272
+ if file_path.is_file():
273
+ yield file_path
274
+
275
+
276
+ def _parse_codd_frontmatter(file_path: Path) -> FrontmatterParseResult:
277
+ try:
278
+ content = file_path.read_text(errors="ignore")
279
+ except Exception as exc:
280
+ return FrontmatterParseResult(
281
+ error={
282
+ "code": "read_error",
283
+ "message": f"failed to read file: {exc}",
284
+ }
285
+ )
286
+
287
+ match = re.match(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
288
+ if not match:
289
+ return FrontmatterParseResult(
290
+ error={
291
+ "code": "missing_frontmatter",
292
+ "message": "missing CoDD YAML frontmatter",
293
+ }
294
+ )
295
+
296
+ try:
297
+ frontmatter = yaml.safe_load(match.group(1))
298
+ except yaml.YAMLError as exc:
299
+ return FrontmatterParseResult(
300
+ error={
301
+ "code": "invalid_frontmatter",
302
+ "message": f"invalid YAML frontmatter: {exc}",
303
+ }
304
+ )
305
+
306
+ if not isinstance(frontmatter, dict) or not isinstance(frontmatter.get("codd"), dict):
307
+ return FrontmatterParseResult(
308
+ error={
309
+ "code": "missing_frontmatter",
310
+ "message": "missing CoDD YAML frontmatter",
311
+ }
312
+ )
313
+
314
+ return FrontmatterParseResult(codd=frontmatter["codd"])
315
+
316
+
317
+ def _is_valid_node_id(node_id: str) -> bool:
318
+ match = NODE_ID_PATTERN.match(node_id.strip())
319
+ if not match:
320
+ return False
321
+ return match.group("prefix") in ALLOWED_NODE_PREFIXES
322
+
323
+
324
+ def _extract_reference_ids(entries: Any) -> list[str]:
325
+ if not entries:
326
+ return []
327
+
328
+ refs = []
329
+ for entry in entries:
330
+ if isinstance(entry, str):
331
+ refs.append(entry)
332
+ continue
333
+ if isinstance(entry, dict):
334
+ ref_id = entry.get("id") or entry.get("node_id")
335
+ if isinstance(ref_id, str):
336
+ refs.append(ref_id)
337
+ return refs
338
+
339
+
340
+ def _extract_convention_targets(entries: Any) -> list[str]:
341
+ if not entries:
342
+ return []
343
+
344
+ targets = []
345
+ for entry in entries:
346
+ if isinstance(entry, str):
347
+ targets.append(entry)
348
+ continue
349
+ if not isinstance(entry, dict):
350
+ continue
351
+ value = entry.get("targets", [])
352
+ if isinstance(value, str):
353
+ targets.append(value)
354
+ elif isinstance(value, list):
355
+ targets.extend(item for item in value if isinstance(item, str))
356
+ return targets
357
+
358
+
359
+ def _classify_missing_reference(
360
+ target_id: str,
361
+ *,
362
+ relation: str,
363
+ source_doc_type: str,
364
+ wave_defined_nodes: set[str],
365
+ service_boundary_modules: set[str],
366
+ ) -> tuple[str, str]:
367
+ if target_id in wave_defined_nodes:
368
+ return (
369
+ LEVEL_BLOCKED,
370
+ f"{relation} references planned node {target_id!r} from wave_config, but it has not been generated yet",
371
+ )
372
+
373
+ if relation == "depends_on" and source_doc_type == "requirement":
374
+ if _is_requirement_phase_reference(target_id, service_boundary_modules):
375
+ return (
376
+ LEVEL_WARNING,
377
+ f"{relation} references implementation-phase node {target_id!r}; define it later via docs or scan",
378
+ )
379
+
380
+ return LEVEL_ERROR, f"{relation} references undefined node {target_id!r}"
381
+
382
+
383
+ def _is_requirement_phase_reference(target_id: str, service_boundary_modules: set[str]) -> bool:
384
+ match = NODE_ID_PATTERN.match(target_id.strip())
385
+ if not match:
386
+ return False
387
+
388
+ prefix = match.group("prefix")
389
+ name = match.group("name")
390
+
391
+ if prefix in IMPLEMENTATION_NODE_PREFIXES:
392
+ return True
393
+
394
+ if prefix == "module":
395
+ return not service_boundary_modules or name in service_boundary_modules
396
+
397
+ if prefix == "design":
398
+ return name.endswith(IMPLEMENTATION_DESIGN_SUFFIXES)
399
+
400
+ return False
401
+
402
+
403
+ def _extract_service_boundary_modules(config: dict[str, Any]) -> set[str]:
404
+ boundaries = config.get("service_boundaries")
405
+ if not isinstance(boundaries, list):
406
+ return set()
407
+
408
+ modules: set[str] = set()
409
+ for entry in boundaries:
410
+ if not isinstance(entry, dict):
411
+ continue
412
+ name = entry.get("name")
413
+ if isinstance(name, str) and name:
414
+ modules.add(name)
415
+ return modules
416
+
417
+
418
+ def _extract_wave_config_expectations(config: dict[str, Any]) -> dict[str, set[str]]:
419
+ wave_config = config.get("wave_config")
420
+ if not wave_config:
421
+ return {}
422
+
423
+ expectations: dict[str, set[str]] = {}
424
+ for node_id, depends_on in _walk_wave_entries(wave_config):
425
+ expectations.setdefault(node_id, set()).update(depends_on)
426
+ return expectations
427
+
428
+
429
+ def _walk_wave_entries(node: Any):
430
+ if isinstance(node, list):
431
+ for item in node:
432
+ yield from _walk_wave_entries(item)
433
+ return
434
+
435
+ if isinstance(node, dict):
436
+ node_id = node.get("node_id") or node.get("id")
437
+ depends_on = node.get("depends_on")
438
+ if isinstance(node_id, str):
439
+ yield node_id, set(_extract_reference_ids(depends_on))
440
+ return
441
+
442
+ for key in ("nodes", "documents", "artifacts", "waves", "items"):
443
+ if key in node:
444
+ yield from _walk_wave_entries(node[key])
445
+
446
+ for key, value in node.items():
447
+ if key in {"nodes", "documents", "artifacts", "waves", "items", "depends_on"}:
448
+ continue
449
+ if isinstance(value, (dict, list)):
450
+ yield from _walk_wave_entries(value)
451
+
452
+
453
+ def _build_adjacency(documents: dict[str, DocumentRecord]) -> dict[str, set[str]]:
454
+ adjacency: dict[str, set[str]] = {node_id: set() for node_id in documents}
455
+ for record in documents.values():
456
+ for target_id in record.depends_on:
457
+ if target_id in documents:
458
+ adjacency[record.node_id].add(target_id)
459
+ for source_id in record.depended_by:
460
+ if source_id in documents:
461
+ adjacency.setdefault(source_id, set()).add(record.node_id)
462
+ return adjacency
463
+
464
+
465
+ def _find_cycles(adjacency: dict[str, set[str]]) -> list[tuple[str, ...]]:
466
+ cycles: set[tuple[str, ...]] = set()
467
+ visited: set[str] = set()
468
+ visiting: dict[str, int] = {}
469
+ stack: list[str] = []
470
+
471
+ def dfs(node: str):
472
+ visiting[node] = len(stack)
473
+ stack.append(node)
474
+
475
+ for neighbor in adjacency.get(node, set()):
476
+ if neighbor in visiting:
477
+ cycle = stack[visiting[neighbor]:]
478
+ cycles.add(_canonicalize_cycle(cycle))
479
+ continue
480
+ if neighbor in visited:
481
+ continue
482
+ dfs(neighbor)
483
+
484
+ stack.pop()
485
+ visiting.pop(node, None)
486
+ visited.add(node)
487
+
488
+ for node in sorted(adjacency):
489
+ if node not in visited:
490
+ dfs(node)
491
+
492
+ return sorted(cycles)
493
+
494
+
495
+ def _canonicalize_cycle(nodes: list[str]) -> tuple[str, ...]:
496
+ if not nodes:
497
+ return tuple()
498
+ rotations = [tuple(nodes[index:] + nodes[:index]) for index in range(len(nodes))]
499
+ return min(rotations)