codd-dev 0.4.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {codd_dev-0.4.0 → codd_dev-0.6.0}/PKG-INFO +1 -1
  2. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/extractor.py +20 -0
  3. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/parsing.py +101 -0
  4. codd_dev-0.6.0/codd/risk.py +100 -0
  5. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/scanner.py +63 -55
  6. codd_dev-0.6.0/codd/schema_refs.py +122 -0
  7. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/synth.py +39 -0
  8. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/templates/extracted/architecture-overview.md.j2 +10 -0
  9. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/templates/extracted/module-detail.md.j2 +33 -0
  10. codd_dev-0.6.0/codd/traceability.py +67 -0
  11. codd_dev-0.6.0/codd/wiring.py +146 -0
  12. {codd_dev-0.4.0 → codd_dev-0.6.0}/pyproject.toml +1 -1
  13. {codd_dev-0.4.0 → codd_dev-0.6.0}/.gitignore +0 -0
  14. {codd_dev-0.4.0 → codd_dev-0.6.0}/LICENSE +0 -0
  15. {codd_dev-0.4.0 → codd_dev-0.6.0}/README.md +0 -0
  16. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/__init__.py +0 -0
  17. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/cli.py +0 -0
  18. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/clustering.py +0 -0
  19. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/config.py +0 -0
  20. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/contracts.py +0 -0
  21. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/defaults.yaml +0 -0
  22. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/generator.py +0 -0
  23. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/graph.py +0 -0
  24. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/hooks.py +0 -0
  25. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/implementer.py +0 -0
  26. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/planner.py +0 -0
  27. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/propagate.py +0 -0
  28. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/templates/codd.yaml.tmpl +0 -0
  29. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/templates/conventions.yaml.tmpl +0 -0
  30. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/templates/data_dependencies.yaml.tmpl +0 -0
  31. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/templates/doc_links.yaml.tmpl +0 -0
  32. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/templates/extracted/api-contract.md.j2 +0 -0
  33. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/templates/extracted/schema-design.md.j2 +0 -0
  34. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/templates/extracted/system-context.md.j2 +0 -0
  35. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/templates/gitignore.tmpl +0 -0
  36. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/templates/overrides.yaml.tmpl +0 -0
  37. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/validator.py +0 -0
  38. {codd_dev-0.4.0 → codd_dev-0.6.0}/codd/verifier.py +0 -0
  39. {codd_dev-0.4.0 → codd_dev-0.6.0}/hooks/pre-commit +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codd-dev
3
- Version: 0.4.0
3
+ Version: 0.6.0
4
4
  Summary: CoDD: Coherence-Driven Development — cross-artifact change impact analysis
5
5
  Project-URL: Homepage, https://github.com/yohey-w/codd-dev
6
6
  Project-URL: Repository, https://github.com/yohey-w/codd-dev
@@ -76,6 +76,9 @@ class ModuleInfo:
76
76
  patterns: dict[str, str] = field(default_factory=dict) # pattern_type -> detail
77
77
  call_edges: list[CallEdge] = field(default_factory=list)
78
78
  interface_contract: Any = None # InterfaceContract from contracts.py
79
+ test_coverage: Any = None # TestCoverage from traceability.py
80
+ schema_refs: list[Any] = field(default_factory=list) # SchemaRef from schema_refs.py
81
+ runtime_wires: list[Any] = field(default_factory=list) # RuntimeWire from wiring.py
79
82
 
80
83
 
81
84
  @dataclass
@@ -104,6 +107,7 @@ class ProjectFacts:
104
107
  infra_config: dict[str, ConfigInfo] = field(default_factory=dict)
105
108
  build_deps: BuildDepsInfo | None = None
106
109
  feature_clusters: list[FeatureCluster] = field(default_factory=list)
110
+ change_risks: list[Any] = field(default_factory=list) # ChangeRisk from risk.py
107
111
 
108
112
 
109
113
  @dataclass
@@ -184,6 +188,22 @@ def extract_facts(project_root: Path, language: str | None = None,
184
188
  from codd.clustering import build_feature_clusters
185
189
  build_feature_clusters(facts)
186
190
 
191
+ # R5.1: Test traceability
192
+ from codd.traceability import build_test_traceability
193
+ build_test_traceability(facts, project_root)
194
+
195
+ # R5.2: Schema-code dependency
196
+ from codd.schema_refs import build_schema_refs
197
+ build_schema_refs(facts, project_root)
198
+
199
+ # R5.3: Runtime wiring detection
200
+ from codd.wiring import build_runtime_wires
201
+ build_runtime_wires(facts, project_root)
202
+
203
+ # R5.4: Change risk scoring (depends on R4.3, R5.1)
204
+ from codd.risk import build_change_risks
205
+ build_change_risks(facts)
206
+
187
207
  return facts
188
208
 
189
209
 
@@ -301,6 +301,8 @@ class TreeSitterExtractor:
301
301
  root = self._parse(content)
302
302
  if self.language == "python":
303
303
  return _extract_python_call_graph(root, content, file_path, symbols)
304
+ if self.language in {"typescript", "javascript"}:
305
+ return _extract_ts_call_graph(root, content, file_path, symbols)
304
306
  except Exception:
305
307
  return []
306
308
  return []
@@ -1009,6 +1011,105 @@ def _extract_python_call_graph(root: Any, content: str, file_path: str, symbols:
1009
1011
  return edges
1010
1012
 
1011
1013
 
1014
+ _TS_BUILTIN_NAMES = {
1015
+ "console", "Math", "JSON", "Object", "Array", "Promise",
1016
+ "setTimeout", "setInterval", "clearTimeout", "clearInterval",
1017
+ "require", "parseInt", "parseFloat", "isNaN",
1018
+ "encodeURIComponent", "decodeURIComponent",
1019
+ }
1020
+
1021
+
1022
+ def _extract_ts_call_graph(root: Any, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
1023
+ """Extract function call edges from TypeScript/JavaScript AST using tree-sitter."""
1024
+ from codd.extractor import CallEdge
1025
+
1026
+ content_bytes = content.encode("utf-8", errors="ignore")
1027
+ edges: list[CallEdge] = []
1028
+ symbol_names = {s.name for s in symbols}
1029
+
1030
+ def _current_scope(node: Any) -> str:
1031
+ """Walk parents to find enclosing function/method/class scope."""
1032
+ parts: list[str] = []
1033
+ current = node.parent
1034
+ while current is not None:
1035
+ if current.type in (
1036
+ "function_declaration",
1037
+ "function",
1038
+ "method_definition",
1039
+ "arrow_function",
1040
+ "class_declaration",
1041
+ "class",
1042
+ ):
1043
+ name = _field_text(content_bytes, current, "name")
1044
+ if name:
1045
+ parts.append(name)
1046
+ current = current.parent
1047
+ parts.reverse()
1048
+ return ".".join(parts) if parts else "<module>"
1049
+
1050
+ def _callee_name(func_node: Any) -> str | None:
1051
+ """Extract callee name from the function child of a call/new expression."""
1052
+ if func_node is None:
1053
+ return None
1054
+ node_type = func_node.type
1055
+ if node_type == "identifier":
1056
+ return _node_text(content_bytes, func_node).strip()
1057
+ if node_type in ("member_expression", "optional_chain"):
1058
+ obj = func_node.child_by_field_name("object")
1059
+ prop = func_node.child_by_field_name("property")
1060
+ if obj is not None and prop is not None:
1061
+ obj_text = _node_text(content_bytes, obj).strip()
1062
+ prop_text = _node_text(content_bytes, prop).strip()
1063
+ return f"{obj_text}.{prop_text}"
1064
+ # Fallback: return full text
1065
+ return _node_text(content_bytes, func_node).strip()
1066
+ # Other node types (parenthesized_expression, etc.) — use full text
1067
+ return _node_text(content_bytes, func_node).strip()
1068
+
1069
+ for node in _iter_named_nodes(root):
1070
+ if node.type == "call_expression":
1071
+ func_node = node.child_by_field_name("function")
1072
+ is_async = node.parent is not None and node.parent.type == "await_expression"
1073
+ elif node.type == "new_expression":
1074
+ func_node = node.child_by_field_name("constructor")
1075
+ is_async = False
1076
+ else:
1077
+ continue
1078
+
1079
+ callee_text = _callee_name(func_node)
1080
+ if not callee_text:
1081
+ continue
1082
+
1083
+ bare_name = callee_text.split(".")[-1] if "." in callee_text else callee_text
1084
+
1085
+ # Skip known JS/TS builtins
1086
+ root_name = callee_text.split(".")[0]
1087
+ if root_name in _TS_BUILTIN_NAMES or bare_name in _TS_BUILTIN_NAMES:
1088
+ continue
1089
+
1090
+ # Only include calls to known project symbols (intra-project filter)
1091
+ if bare_name not in symbol_names and callee_text not in symbol_names:
1092
+ # Allow method calls on this/self (this.method)
1093
+ if callee_text.startswith("this."):
1094
+ method_name = callee_text[5:]
1095
+ if method_name not in symbol_names:
1096
+ continue
1097
+ else:
1098
+ continue
1099
+
1100
+ caller = _current_scope(node)
1101
+ line_no = node.start_point.row + 1
1102
+
1103
+ edges.append(CallEdge(
1104
+ caller=caller,
1105
+ callee=callee_text,
1106
+ call_site=f"{file_path}:{line_no}",
1107
+ is_async=is_async,
1108
+ ))
1109
+
1110
+ return edges
1111
+
1112
+
1012
1113
  def _sql_first_object_name(content_bytes: bytes, node: Any) -> str:
1013
1114
  for child in getattr(node, "named_children", []):
1014
1115
  if child.type == "object_reference":
@@ -0,0 +1,100 @@
1
+ """R5.4 — Change risk scoring for codd extract.
2
+
3
+ Computes per-module risk score based on:
4
+ - Number of dependents (import + call + runtime)
5
+ - Test coverage ratio (R5.1)
6
+ - API surface ratio (R4.3)
7
+ - Encapsulation violations (R4.3)
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass, field
13
+ from typing import TYPE_CHECKING
14
+
15
+ if TYPE_CHECKING:
16
+ from codd.extractor import ProjectFacts
17
+
18
+
19
+ @dataclass
20
+ class ChangeRisk:
21
+ """Per-module change risk assessment."""
22
+ module: str
23
+ score: float = 0.0
24
+ factors: dict[str, float] = field(default_factory=dict)
25
+
26
+
27
+ def build_change_risks(facts: ProjectFacts) -> None:
28
+ """Populate ``facts.change_risks`` with per-module risk scores."""
29
+ # Step 1: Count inbound dependents per module (import + call + runtime)
30
+ dependents: dict[str, int] = {name: 0 for name in facts.modules}
31
+
32
+ for mod in facts.modules.values():
33
+ # Import dependents
34
+ for dep_name in mod.internal_imports:
35
+ if dep_name in dependents:
36
+ dependents[dep_name] += 1
37
+ # Call dependents
38
+ for edge in mod.call_edges:
39
+ target = edge.callee.split(".")[0]
40
+ if target in dependents and target != mod.name:
41
+ dependents[target] += 1
42
+ # Runtime wire dependents
43
+ for wire in getattr(mod, "runtime_wires", []):
44
+ target = wire.target.split(".")[0]
45
+ if target in dependents and target != mod.name:
46
+ dependents[target] += 1
47
+
48
+ max_dep = max(dependents.values()) if dependents else 1
49
+ if max_dep == 0:
50
+ max_dep = 1
51
+
52
+ # Step 2: Collect max violations
53
+ violations: dict[str, int] = {}
54
+ for mod in facts.modules.values():
55
+ ic = mod.interface_contract
56
+ violations[mod.name] = len(ic.encapsulation_violations) if ic else 0
57
+ max_viol = max(violations.values()) if violations else 1
58
+ if max_viol == 0:
59
+ max_viol = 1
60
+
61
+ # Step 3: Compute risk score per module
62
+ risks: list[ChangeRisk] = []
63
+ for mod in facts.modules.values():
64
+ # Coverage ratio (from R5.1 test traceability)
65
+ tc = getattr(mod, "test_coverage", None)
66
+ coverage_ratio = tc.coverage_ratio if tc else 0.0
67
+
68
+ # API surface ratio (from R4.3 interface contracts)
69
+ ic = mod.interface_contract
70
+ api_ratio = ic.api_surface_ratio if ic else 1.0
71
+
72
+ # Violation count
73
+ viol_count = violations.get(mod.name, 0)
74
+
75
+ # Dependent count
76
+ dep_count = dependents.get(mod.name, 0)
77
+
78
+ # Formula
79
+ dep_factor = dep_count / max_dep
80
+ cov_factor = 1.0 - coverage_ratio
81
+ api_factor = api_ratio
82
+ viol_factor = viol_count / max_viol
83
+
84
+ score = (0.3 * dep_factor
85
+ + 0.3 * cov_factor
86
+ + 0.2 * api_factor
87
+ + 0.2 * viol_factor)
88
+
89
+ risks.append(ChangeRisk(
90
+ module=mod.name,
91
+ score=round(score, 2),
92
+ factors={
93
+ "dependents": round(dep_factor, 2),
94
+ "uncovered": round(cov_factor, 2),
95
+ "api_surface": round(api_factor, 2),
96
+ "violations": round(viol_factor, 2),
97
+ },
98
+ ))
99
+
100
+ facts.change_risks = sorted(risks, key=lambda r: -r.score)
@@ -11,10 +11,10 @@ import re
11
11
  from pathlib import Path
12
12
  from typing import Any
13
13
 
14
- import yaml
15
-
16
- from codd.graph import CEG
17
- from codd.parsing import get_extractor
14
+ import yaml
15
+
16
+ from codd.graph import CEG
17
+ from codd.parsing import get_extractor
18
18
 
19
19
 
20
20
  def run_scan(project_root: Path, codd_dir: Path):
@@ -228,6 +228,14 @@ def _load_frontmatter(ceg: CEG, doc_path: str, codd: dict):
228
228
  ceg.add_evidence(edge_id, "frontmatter", "frontmatter", 0.75,
229
229
  detail=data_dep.get("condition", ""))
230
230
 
231
+ # R6.2: source_files bridge edges (extracted design → source file)
232
+ for source_file in codd.get("source_files", []):
233
+ file_node_id = f"file:{source_file}"
234
+ ceg.upsert_node(file_node_id, "file", path=source_file, name=file_node_id)
235
+ edge_id = ceg.add_edge(node_id, file_node_id, "extracted_from", "technical")
236
+ ceg.add_evidence(edge_id, "frontmatter", "source_files", 0.85,
237
+ detail=f"design doc maps to source file {source_file}")
238
+
231
239
 
232
240
  # ═══════════════════════════════════════════════════════════
233
241
  # Legacy: annotations/ YAML support (backward compatibility)
@@ -323,8 +331,8 @@ def _load_legacy_data_dependency(ceg: CEG, dep: dict):
323
331
  # Phase 2: Source code scanning
324
332
  # ═══════════════════════════════════════════════════════════
325
333
 
326
- def _scan_source_directory(ceg: CEG, project_root: Path, src_dir: Path,
327
- language: str, exclude_patterns: list):
334
+ def _scan_source_directory(ceg: CEG, project_root: Path, src_dir: Path,
335
+ language: str, exclude_patterns: list):
328
336
  """Scan source files for import/call dependencies."""
329
337
  extensions = {
330
338
  "python": [".py"],
@@ -343,60 +351,60 @@ def _scan_source_directory(ceg: CEG, project_root: Path, src_dir: Path,
343
351
  full = Path(root) / fname
344
352
  rel = full.relative_to(project_root).as_posix()
345
353
 
346
- if any(_match_glob(rel, pat) for pat in exclude_patterns):
347
- continue
348
-
349
- ceg.upsert_node(f"file:{rel}", "file", path=rel, name=fname)
350
- file_count += 1
351
- _extract_imports_basic(ceg, project_root, src_dir, full, rel, language)
354
+ if any(_match_glob(rel, pat) for pat in exclude_patterns):
355
+ continue
356
+
357
+ ceg.upsert_node(f"file:{rel}", "file", path=rel, name=fname)
358
+ file_count += 1
359
+ _extract_imports_basic(ceg, project_root, src_dir, full, rel, language)
352
360
 
353
361
  if file_count > 0:
354
362
  print(f" Source: {file_count} {language} files in {src_dir.relative_to(project_root)}")
355
363
 
356
364
 
357
- def _extract_imports_basic(ceg: CEG, project_root: Path, src_dir: Path, file_path: Path,
358
- rel_path: str, language: str):
359
- """Basic import extraction using the shared parsing backend."""
360
- try:
361
- content = file_path.read_text(errors="ignore")
362
- except Exception:
363
- return
364
-
365
- source_id = f"file:{rel_path}"
366
- extractor = get_extractor(language, "source")
367
- internal, _ = extractor.extract_imports(content, file_path, project_root, src_dir)
368
-
369
- if language in ("typescript", "javascript"):
370
- for import_lines in internal.values():
371
- for line in import_lines:
372
- match = re.search(r'''(?:import|from)\s+['"]([^'"]+)['"]''', line)
373
- if not match:
374
- continue
375
- target_module = match.group(1)
376
- if not target_module.startswith("."):
377
- continue
378
- resolved = (file_path.parent / target_module).resolve()
379
- extensions = [".ts", ".tsx", ".js", ".jsx", ".mts", ".cts", "/index.ts", "/index.tsx", "/index.js", "/index.jsx"]
380
- for ext in [""] + extensions:
381
- candidate = Path(f"{resolved}{ext}")
382
- if not candidate.exists():
383
- continue
384
- try:
385
- target_rel = candidate.relative_to(project_root).as_posix()
386
- except ValueError:
387
- continue
388
- target_id = f"file:{target_rel}"
389
- ceg.upsert_node(target_id, "file", path=target_rel)
390
- edge_id = ceg.add_edge(source_id, target_id, "imports", "structural")
391
- ceg.add_evidence(edge_id, "static", "ast_import", 0.95)
392
- break
393
-
394
- elif language == "python":
395
- for target_module in internal:
396
- target_id = f"module:{target_module}"
397
- ceg.upsert_node(target_id, "module", name=target_module)
398
- edge_id = ceg.add_edge(source_id, target_id, "imports", "structural")
399
- ceg.add_evidence(edge_id, "static", "ast_import", 0.90)
365
+ def _extract_imports_basic(ceg: CEG, project_root: Path, src_dir: Path, file_path: Path,
366
+ rel_path: str, language: str):
367
+ """Basic import extraction using the shared parsing backend."""
368
+ try:
369
+ content = file_path.read_text(errors="ignore")
370
+ except Exception:
371
+ return
372
+
373
+ source_id = f"file:{rel_path}"
374
+ extractor = get_extractor(language, "source")
375
+ internal, _ = extractor.extract_imports(content, file_path, project_root, src_dir)
376
+
377
+ if language in ("typescript", "javascript"):
378
+ for import_lines in internal.values():
379
+ for line in import_lines:
380
+ match = re.search(r'''(?:import|from)\s+['"]([^'"]+)['"]''', line)
381
+ if not match:
382
+ continue
383
+ target_module = match.group(1)
384
+ if not target_module.startswith("."):
385
+ continue
386
+ resolved = (file_path.parent / target_module).resolve()
387
+ extensions = [".ts", ".tsx", ".js", ".jsx", ".mts", ".cts", "/index.ts", "/index.tsx", "/index.js", "/index.jsx"]
388
+ for ext in [""] + extensions:
389
+ candidate = Path(f"{resolved}{ext}")
390
+ if not candidate.exists():
391
+ continue
392
+ try:
393
+ target_rel = candidate.relative_to(project_root).as_posix()
394
+ except ValueError:
395
+ continue
396
+ target_id = f"file:{target_rel}"
397
+ ceg.upsert_node(target_id, "file", path=target_rel)
398
+ edge_id = ceg.add_edge(source_id, target_id, "imports", "structural")
399
+ ceg.add_evidence(edge_id, "static", "ast_import", 0.95)
400
+ break
401
+
402
+ elif language == "python":
403
+ for target_module in internal:
404
+ target_id = f"module:{target_module}"
405
+ ceg.upsert_node(target_id, "module", name=target_module)
406
+ edge_id = ceg.add_edge(source_id, target_id, "imports", "structural")
407
+ ceg.add_evidence(edge_id, "static", "ast_import", 0.90)
400
408
 
401
409
 
402
410
  # ═══════════════════════════════════════════════════════════
@@ -0,0 +1,122 @@
1
+ """R5.2 — Schema-code dependency detection for codd extract.
2
+
3
+ Detects ORM model definitions (SQLAlchemy, Django, Prisma) and raw SQL
4
+ references in source code. Links source modules to schema tables.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ from dataclasses import dataclass, field
11
+ from pathlib import Path
12
+ from typing import TYPE_CHECKING
13
+
14
+ if TYPE_CHECKING:
15
+ from codd.extractor import ProjectFacts
16
+
17
+
18
+ @dataclass
19
+ class SchemaRef:
20
+ """A reference from source code to a database table or model."""
21
+ table_or_model: str
22
+ kind: str # "sqlalchemy" | "django" | "prisma" | "raw_sql"
23
+ file: str
24
+ line: int
25
+
26
+
27
+ # ── Detection patterns ──────────────────────────────────
28
+
29
+ # SQLAlchemy: __tablename__ = 'users'
30
+ _SQLA_TABLENAME_RE = re.compile(
31
+ r"""__tablename__\s*=\s*['"](\w+)['"]""",
32
+ )
33
+
34
+ # Django: class User(models.Model)
35
+ _DJANGO_MODEL_RE = re.compile(
36
+ r"""class\s+(\w+)\s*\(\s*(?:models\.Model|AbstractUser|AbstractBaseUser)""",
37
+ )
38
+
39
+ # Prisma client: prisma.user.find_many() etc
40
+ _PRISMA_CLIENT_RE = re.compile(
41
+ r"""prisma\.(\w+)\.\s*(?:find_many|find_first|find_unique|create|update|delete|count|aggregate|group_by)""",
42
+ )
43
+
44
+ # Raw SQL: SELECT/INSERT/UPDATE/DELETE ... FROM/INTO/TABLE tablename
45
+ _RAW_SQL_RE = re.compile(
46
+ r"""(?:SELECT\s+.*?\s+FROM|INSERT\s+INTO|UPDATE|DELETE\s+FROM|CREATE\s+TABLE|ALTER\s+TABLE|DROP\s+TABLE)\s+[`"']?(\w+)[`"']?""",
47
+ re.IGNORECASE,
48
+ )
49
+
50
+
51
+ def detect_schema_refs(content: str, file_path: str) -> list[SchemaRef]:
52
+ """Detect schema/model references in source code."""
53
+ refs: list[SchemaRef] = []
54
+ lines = content.splitlines()
55
+
56
+ for line_no, line in enumerate(lines, 1):
57
+ # SQLAlchemy
58
+ m = _SQLA_TABLENAME_RE.search(line)
59
+ if m:
60
+ refs.append(SchemaRef(
61
+ table_or_model=m.group(1),
62
+ kind="sqlalchemy",
63
+ file=file_path,
64
+ line=line_no,
65
+ ))
66
+
67
+ # Django
68
+ m = _DJANGO_MODEL_RE.search(line)
69
+ if m:
70
+ refs.append(SchemaRef(
71
+ table_or_model=m.group(1),
72
+ kind="django",
73
+ file=file_path,
74
+ line=line_no,
75
+ ))
76
+
77
+ # Prisma client
78
+ m = _PRISMA_CLIENT_RE.search(line)
79
+ if m:
80
+ refs.append(SchemaRef(
81
+ table_or_model=m.group(1),
82
+ kind="prisma",
83
+ file=file_path,
84
+ line=line_no,
85
+ ))
86
+
87
+ # Raw SQL in string literals
88
+ # Only match inside quotes to avoid false positives
89
+ if any(kw in line.upper() for kw in ("SELECT", "INSERT", "UPDATE", "DELETE", "CREATE TABLE", "ALTER TABLE")):
90
+ for m2 in _RAW_SQL_RE.finditer(line):
91
+ table = m2.group(1)
92
+ # Filter out SQL keywords that got captured as table names
93
+ if table.upper() not in {"SET", "INTO", "FROM", "WHERE", "AND", "OR",
94
+ "TABLE", "INDEX", "VIEW", "VALUES", "NULL",
95
+ "NOT", "EXISTS", "AS", "ON", "JOIN", "LEFT",
96
+ "RIGHT", "INNER", "OUTER", "GROUP", "ORDER",
97
+ "BY", "HAVING", "LIMIT", "OFFSET", "UNION",
98
+ "ALL", "DISTINCT", "CASE", "WHEN", "THEN",
99
+ "ELSE", "END", "IF", "BEGIN", "COMMIT"}:
100
+ refs.append(SchemaRef(
101
+ table_or_model=table,
102
+ kind="raw_sql",
103
+ file=file_path,
104
+ line=line_no,
105
+ ))
106
+
107
+ return refs
108
+
109
+
110
+ def build_schema_refs(facts: ProjectFacts, project_root: Path) -> None:
111
+ """Populate ``schema_refs`` on every module in *facts*."""
112
+ for mod in facts.modules.values():
113
+ all_refs: list[SchemaRef] = []
114
+ for rel_file in mod.files:
115
+ full = project_root / rel_file
116
+ try:
117
+ content = full.read_text(errors="ignore")
118
+ except Exception:
119
+ continue
120
+ refs = detect_schema_refs(content, rel_file)
121
+ all_refs.extend(refs)
122
+ mod.schema_refs = all_refs
@@ -167,6 +167,7 @@ def synth_architecture(
167
167
  external_dependencies=_all_external_dependencies(facts),
168
168
  build_deps=_build_deps_context(facts.build_deps),
169
169
  deployment_hints=_deployment_hints(facts),
170
+ change_risks=facts.change_risks,
170
171
  )
171
172
  architecture_path.write_text(content, encoding="utf-8")
172
173
  return architecture_path
@@ -241,6 +242,7 @@ def _render_module_detail(env: Environment, facts: ProjectFacts, module: ModuleI
241
242
  confidence=_module_confidence(module),
242
243
  today=today,
243
244
  depends_on=_module_depends_on(facts, module),
245
+ source_files=sorted(module.files),
244
246
  ),
245
247
  mod=module,
246
248
  layer_name=layer_name,
@@ -266,6 +268,9 @@ def _render_module_detail(env: Environment, facts: ProjectFacts, module: ModuleI
266
268
  tests=_tests_context(module),
267
269
  call_edges=module.call_edges,
268
270
  interface_contract=module.interface_contract,
271
+ test_coverage=module.test_coverage,
272
+ schema_refs=module.schema_refs,
273
+ runtime_wires=module.runtime_wires,
269
274
  )
270
275
  return content
271
276
 
@@ -276,6 +281,7 @@ def _render_schema_design(env: Environment, relative_path: str, schema: Any, tod
276
281
  node_id=_schema_node_id(relative_path),
277
282
  confidence=_schema_confidence(schema),
278
283
  today=today,
284
+ source_files=[relative_path],
279
285
  ),
280
286
  relative_path=relative_path,
281
287
  slug=_slugify(Path(relative_path).with_suffix("").as_posix()),
@@ -298,6 +304,7 @@ def _render_api_contract(env: Environment, relative_path: str, spec: Any, today:
298
304
  node_id=_api_node_id(relative_path),
299
305
  confidence=_api_confidence(spec),
300
306
  today=today,
307
+ source_files=[relative_path],
301
308
  ),
302
309
  relative_path=relative_path,
303
310
  spec=spec,
@@ -314,6 +321,7 @@ def _build_frontmatter(
314
321
  confidence: float,
315
322
  today: str,
316
323
  depends_on: list[dict[str, Any]] | None = None,
324
+ source_files: list[str] | None = None,
317
325
  ) -> str:
318
326
  codd: dict[str, Any] = {
319
327
  "node_id": node_id,
@@ -322,6 +330,8 @@ def _build_frontmatter(
322
330
  "confidence": round(confidence, 2),
323
331
  "last_extracted": today,
324
332
  }
333
+ if source_files:
334
+ codd["source_files"] = source_files
325
335
  if depends_on:
326
336
  codd["depends_on"] = depends_on
327
337
  payload = yaml.safe_dump({"codd": codd}, sort_keys=False, allow_unicode=True)
@@ -367,6 +377,35 @@ def _module_depends_on(facts: ProjectFacts, module: ModuleInfo) -> list[dict[str
367
377
  )
368
378
  seen_ids.add(nid)
369
379
 
380
+ # R5.2: schema_uses edges
381
+ for ref in getattr(module, "schema_refs", []):
382
+ # Link to schema doc if one exists for this table
383
+ for schema_path in facts.schemas:
384
+ schema_obj = facts.schemas[schema_path]
385
+ tables = [t.get("name", "") for t in getattr(schema_obj, "tables", [])]
386
+ models = [m.get("name", "") for m in getattr(schema_obj, "models", [])]
387
+ if ref.table_or_model in tables or ref.table_or_model in models:
388
+ nid = _schema_node_id(schema_path)
389
+ if nid not in seen_ids:
390
+ depends_on.append(
391
+ {"id": nid, "relation": "schema_uses", "semantic": "technical"}
392
+ )
393
+ seen_ids.add(nid)
394
+
395
+ # R5.3: runtime_wires edges
396
+ wire_targets: set[str] = set()
397
+ for wire in getattr(module, "runtime_wires", []):
398
+ target = wire.target.split(".")[0]
399
+ if target in facts.modules and target != module.name:
400
+ wire_targets.add(target)
401
+ for target in sorted(wire_targets):
402
+ nid = _module_node_id(target)
403
+ if nid not in seen_ids:
404
+ depends_on.append(
405
+ {"id": nid, "relation": "runtime_wires", "semantic": "technical"}
406
+ )
407
+ seen_ids.add(nid)
408
+
370
409
  return depends_on
371
410
 
372
411
 
@@ -58,6 +58,16 @@ Evidence: {{ cluster.evidence | join("; ") }}
58
58
  {% endfor %}
59
59
  {% endif %}
60
60
 
61
+ {% if change_risks %}
62
+ ## Change Risk Summary
63
+
64
+ | Module | Risk | Dependents | Uncovered | API Surface | Violations |
65
+ |--------|------|------------|-----------|-------------|------------|
66
+ {% for risk in change_risks -%}
67
+ | `{{ risk.module }}` | {{ risk.score }} | {{ risk.factors.dependents }} | {{ risk.factors.uncovered }} | {{ risk.factors.api_surface }} | {{ risk.factors.violations }} |
68
+ {% endfor %}
69
+ {% endif %}
70
+
61
71
  ## Layer Violations
62
72
 
63
73
  {% if violations %}
@@ -98,6 +98,39 @@
98
98
  {% endfor %}
99
99
  {% endif %}
100
100
 
101
+ {% if test_coverage %}
102
+ ## Test Coverage
103
+
104
+ **Coverage**: {{ test_coverage.coverage_ratio }} ({{ test_coverage.covered_symbols | length }} / {{ test_coverage.covered_symbols | length + test_coverage.uncovered_symbols | length }})
105
+ {% if test_coverage.covering_tests %}
106
+ Tests: {{ test_coverage.covering_tests | join(", ") }}
107
+ {% endif %}
108
+ {% if test_coverage.uncovered_symbols %}
109
+
110
+ **Uncovered symbols**: {% for s in test_coverage.uncovered_symbols %}`{{ s }}`{% if not loop.last %}, {% endif %}{% endfor %}
111
+ {% endif %}
112
+ {% endif %}
113
+
114
+ {% if schema_refs %}
115
+ ## Schema Dependencies
116
+
117
+ | Table/Model | Kind | Location |
118
+ |-------------|------|----------|
119
+ {% for ref in schema_refs -%}
120
+ | `{{ ref.table_or_model }}` | {{ ref.kind }} | `{{ ref.file }}:{{ ref.line }}` |
121
+ {% endfor %}
122
+ {% endif %}
123
+
124
+ {% if runtime_wires %}
125
+ ## Runtime Wiring
126
+
127
+ | Kind | Target | Framework | Location |
128
+ |------|--------|-----------|----------|
129
+ {% for wire in runtime_wires -%}
130
+ | {{ wire.kind }} | `{{ wire.target }}` | {{ wire.framework }} | `{{ wire.source }}` |
131
+ {% endfor %}
132
+ {% endif %}
133
+
101
134
  {% if internal_dependencies %}
102
135
  ## Import Dependencies
103
136
 
@@ -0,0 +1,67 @@
1
+ """R5.1 — Test traceability for codd extract.
2
+
3
+ Maps test files to the source symbols they exercise, via import analysis
4
+ of test code. Enables 'which tests to run' and 'untested symbols' queries.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass, field
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING
12
+
13
+ if TYPE_CHECKING:
14
+ from codd.extractor import ProjectFacts
15
+
16
+
17
+ @dataclass
18
+ class TestCoverage:
19
+ """Per-module test coverage summary."""
20
+ module: str
21
+ covered_symbols: list[str] = field(default_factory=list)
22
+ uncovered_symbols: list[str] = field(default_factory=list)
23
+ coverage_ratio: float = 0.0
24
+ covering_tests: list[str] = field(default_factory=list)
25
+
26
+
27
+ def build_test_traceability(facts: ProjectFacts, project_root: Path) -> None:
28
+ """Populate ``test_coverage`` on every module in *facts*.
29
+
30
+ Strategy: For each module's test files, scan import lines and test
31
+ function call patterns to identify which source symbols are exercised.
32
+ """
33
+ for mod in facts.modules.values():
34
+ all_symbols = {s.name for s in mod.symbols}
35
+ if not all_symbols:
36
+ continue
37
+
38
+ covered: set[str] = set()
39
+ covering_tests: list[str] = []
40
+
41
+ for test_detail in mod.test_details:
42
+ covering_tests.append(test_detail.file_path)
43
+ # Read test file and scan for symbol references
44
+ test_path = project_root / test_detail.file_path
45
+ try:
46
+ test_content = test_path.read_text(errors="ignore")
47
+ except Exception:
48
+ continue
49
+ # Any source symbol name that appears in the test file = covered
50
+ for sym_name in all_symbols:
51
+ if sym_name in test_content:
52
+ covered.add(sym_name)
53
+
54
+ # Deduplicate covering_tests
55
+ covering_tests = sorted(set(covering_tests))
56
+ covered_list = sorted(covered & all_symbols)
57
+ uncovered_list = sorted(all_symbols - covered)
58
+ total = len(all_symbols)
59
+ ratio = len(covered_list) / total if total else 0.0
60
+
61
+ mod.test_coverage = TestCoverage(
62
+ module=mod.name,
63
+ covered_symbols=covered_list,
64
+ uncovered_symbols=uncovered_list,
65
+ coverage_ratio=round(ratio, 2),
66
+ covering_tests=covering_tests,
67
+ )
@@ -0,0 +1,146 @@
1
+ """R5.3 — Runtime wiring detection for codd extract.
2
+
3
+ Detects framework-specific implicit dependencies that don't appear
4
+ in import graphs or call graphs: DI injection, middleware chains,
5
+ signal handlers, and decorator-based routing.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ from dataclasses import dataclass
12
+ from pathlib import Path
13
+ from typing import TYPE_CHECKING
14
+
15
+ if TYPE_CHECKING:
16
+ from codd.extractor import ProjectFacts
17
+
18
+
19
+ @dataclass
20
+ class RuntimeWire:
21
+ """An implicit runtime dependency detected from framework patterns."""
22
+ kind: str # "depends" | "middleware" | "signal" | "decorator" | "task"
23
+ source: str # file:line
24
+ target: str # the function/class/module wired
25
+ framework: str # "fastapi" | "django" | "flask" | "celery" | "generic"
26
+
27
+
28
+ # ── Detection patterns ──────────────────────────────────
29
+
30
+ # FastAPI Depends()
31
+ _FASTAPI_DEPENDS_RE = re.compile(
32
+ r"""Depends\(\s*(\w[\w.]*)\s*\)""",
33
+ )
34
+
35
+ # Django MIDDLEWARE list
36
+ _DJANGO_MIDDLEWARE_RE = re.compile(
37
+ r"""MIDDLEWARE\s*=\s*\[([^\]]*)\]""",
38
+ re.DOTALL,
39
+ )
40
+
41
+ # Django signals: signal.connect(handler)
42
+ _DJANGO_SIGNAL_RE = re.compile(
43
+ r"""(\w+)\s*\.\s*connect\(\s*(\w[\w.]*)\s*""",
44
+ )
45
+
46
+ # Flask before/after request
47
+ _FLASK_HOOK_RE = re.compile(
48
+ r"""@\s*(?:\w+\.)\s*(before_request|after_request|before_first_request|teardown_request|teardown_appcontext)\b""",
49
+ )
50
+
51
+ # Celery task
52
+ _CELERY_TASK_RE = re.compile(
53
+ r"""@\s*(?:\w+\.)\s*task\b""",
54
+ )
55
+
56
+ # Generic event handler registration: on_event, add_event_handler, register
57
+ _GENERIC_HANDLER_RE = re.compile(
58
+ r"""(?:on_event|add_event_handler|register_handler|subscribe)\(\s*['"](\w+)['"]\s*,\s*(\w[\w.]*)""",
59
+ )
60
+
61
+
62
+ def detect_runtime_wires(content: str, file_path: str) -> list[RuntimeWire]:
63
+ """Detect runtime wiring patterns in source code."""
64
+ wires: list[RuntimeWire] = []
65
+ lines = content.splitlines()
66
+
67
+ for line_no, line in enumerate(lines, 1):
68
+ source = f"{file_path}:{line_no}"
69
+
70
+ # FastAPI Depends()
71
+ for m in _FASTAPI_DEPENDS_RE.finditer(line):
72
+ wires.append(RuntimeWire(
73
+ kind="depends",
74
+ source=source,
75
+ target=m.group(1),
76
+ framework="fastapi",
77
+ ))
78
+
79
+ # Django signals
80
+ m = _DJANGO_SIGNAL_RE.search(line)
81
+ if m and m.group(1) in ("post_save", "pre_save", "post_delete",
82
+ "pre_delete", "m2m_changed", "post_init",
83
+ "pre_init", "request_started", "request_finished"):
84
+ wires.append(RuntimeWire(
85
+ kind="signal",
86
+ source=source,
87
+ target=m.group(2),
88
+ framework="django",
89
+ ))
90
+
91
+ # Flask hooks
92
+ m = _FLASK_HOOK_RE.search(line)
93
+ if m:
94
+ wires.append(RuntimeWire(
95
+ kind="decorator",
96
+ source=source,
97
+ target=m.group(1),
98
+ framework="flask",
99
+ ))
100
+
101
+ # Celery task
102
+ if _CELERY_TASK_RE.search(line):
103
+ wires.append(RuntimeWire(
104
+ kind="task",
105
+ source=source,
106
+ target="celery_task",
107
+ framework="celery",
108
+ ))
109
+
110
+ # Generic event handlers
111
+ m = _GENERIC_HANDLER_RE.search(line)
112
+ if m:
113
+ wires.append(RuntimeWire(
114
+ kind="signal",
115
+ source=source,
116
+ target=m.group(2),
117
+ framework="generic",
118
+ ))
119
+
120
+ # Django MIDDLEWARE (multiline)
121
+ for m in _DJANGO_MIDDLEWARE_RE.finditer(content):
122
+ raw = m.group(1)
123
+ for mw in re.findall(r"""['"]([^'"]+)['"]""", raw):
124
+ wires.append(RuntimeWire(
125
+ kind="middleware",
126
+ source=f"{file_path}:{content[:m.start()].count(chr(10)) + 1}",
127
+ target=mw,
128
+ framework="django",
129
+ ))
130
+
131
+ return wires
132
+
133
+
134
+ def build_runtime_wires(facts: ProjectFacts, project_root: Path) -> None:
135
+ """Populate ``runtime_wires`` on every module in *facts*."""
136
+ for mod in facts.modules.values():
137
+ all_wires: list[RuntimeWire] = []
138
+ for rel_file in mod.files:
139
+ full = project_root / rel_file
140
+ try:
141
+ content = full.read_text(errors="ignore")
142
+ except Exception:
143
+ continue
144
+ wires = detect_runtime_wires(content, rel_file)
145
+ all_wires.extend(wires)
146
+ mod.runtime_wires = all_wires
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "codd-dev"
7
- version = "0.4.0"
7
+ version = "0.6.0"
8
8
  description = "CoDD: Coherence-Driven Development — cross-artifact change impact analysis"
9
9
  readme = "README.md"
10
10
  license = "MIT"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes