codd-dev 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {codd_dev-0.3.0 → codd_dev-0.4.0}/PKG-INFO +1 -1
  2. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/__init__.py +1 -1
  3. codd_dev-0.4.0/codd/clustering.py +168 -0
  4. codd_dev-0.4.0/codd/contracts.py +138 -0
  5. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/extractor.py +77 -0
  6. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/parsing.py +98 -1
  7. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/synth.py +55 -7
  8. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/extracted/architecture-overview.md.j2 +27 -0
  9. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/extracted/module-detail.md.j2 +34 -0
  10. {codd_dev-0.3.0 → codd_dev-0.4.0}/pyproject.toml +1 -1
  11. {codd_dev-0.3.0 → codd_dev-0.4.0}/.gitignore +0 -0
  12. {codd_dev-0.3.0 → codd_dev-0.4.0}/LICENSE +0 -0
  13. {codd_dev-0.3.0 → codd_dev-0.4.0}/README.md +0 -0
  14. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/cli.py +0 -0
  15. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/config.py +0 -0
  16. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/defaults.yaml +0 -0
  17. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/generator.py +0 -0
  18. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/graph.py +0 -0
  19. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/hooks.py +0 -0
  20. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/implementer.py +0 -0
  21. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/planner.py +0 -0
  22. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/propagate.py +0 -0
  23. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/scanner.py +0 -0
  24. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/codd.yaml.tmpl +0 -0
  25. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/conventions.yaml.tmpl +0 -0
  26. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/data_dependencies.yaml.tmpl +0 -0
  27. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/doc_links.yaml.tmpl +0 -0
  28. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/extracted/api-contract.md.j2 +0 -0
  29. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/extracted/schema-design.md.j2 +0 -0
  30. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/extracted/system-context.md.j2 +0 -0
  31. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/gitignore.tmpl +0 -0
  32. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/overrides.yaml.tmpl +0 -0
  33. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/validator.py +0 -0
  34. {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/verifier.py +0 -0
  35. {codd_dev-0.3.0 → codd_dev-0.4.0}/hooks/pre-commit +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codd-dev
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: CoDD: Coherence-Driven Development — cross-artifact change impact analysis
5
5
  Project-URL: Homepage, https://github.com/yohey-w/codd-dev
6
6
  Project-URL: Repository, https://github.com/yohey-w/codd-dev
@@ -1,3 +1,3 @@
1
1
  """CoDD — Coherence-Driven Development."""
2
2
 
3
- __version__ = "0.2.0a1"
3
+ __version__ = "0.4.0"
@@ -0,0 +1,168 @@
1
+ """R4.2 — Feature clustering for codd extract.
2
+
3
+ Groups modules by functional cohesion using call graph edges,
4
+ naming conventions, and cross-reference density.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections import defaultdict
10
+ from typing import TYPE_CHECKING
11
+
12
+ if TYPE_CHECKING:
13
+ from codd.extractor import ProjectFacts
14
+
15
+
16
+ def build_feature_clusters(facts: ProjectFacts) -> None:
17
+ """Populate ``facts.feature_clusters`` by analysing call edges and naming."""
18
+ from codd.extractor import FeatureCluster
19
+
20
+ module_names = list(facts.modules.keys())
21
+ if len(module_names) < 2:
22
+ return
23
+
24
+ # Step 1: Build adjacency from call edges
25
+ adj: dict[str, set[str]] = defaultdict(set)
26
+ for mod in facts.modules.values():
27
+ for edge in mod.call_edges:
28
+ # edge.callee may be "module.Class.method" — extract target module
29
+ target_mod = _resolve_callee_module(edge.callee, module_names)
30
+ if target_mod and target_mod != mod.name:
31
+ adj[mod.name].add(target_mod)
32
+ adj[target_mod].add(mod.name)
33
+
34
+ # Step 2: Find connected components via call graph
35
+ components = _connected_components(module_names, adj)
36
+
37
+ # Step 3: Merge with naming prefix heuristics
38
+ prefix_groups = _group_by_prefix(module_names)
39
+
40
+ # Step 4: Combine call-graph components with prefix groups
41
+ clusters: list[FeatureCluster] = []
42
+ seen: set[str] = set()
43
+
44
+ # First: call-graph components (higher confidence)
45
+ for comp in components:
46
+ if len(comp) < 2:
47
+ continue
48
+ name = _infer_cluster_name(comp)
49
+ evidence: list[str] = []
50
+
51
+ # Check if they share naming prefix
52
+ common_prefix = _common_prefix(comp)
53
+ if common_prefix:
54
+ evidence.append(f"shared prefix: {common_prefix}")
55
+
56
+ # Count call edges between members
57
+ edge_count = sum(
58
+ 1 for m in comp for n in adj.get(m, set()) if n in comp
59
+ )
60
+ if edge_count > 0:
61
+ evidence.append(f"{edge_count} cross-call edges")
62
+
63
+ confidence = min(1.0, 0.4 + 0.1 * edge_count + (0.2 if common_prefix else 0.0))
64
+
65
+ clusters.append(FeatureCluster(
66
+ name=name,
67
+ modules=sorted(comp),
68
+ confidence=round(confidence, 2),
69
+ evidence=evidence,
70
+ ))
71
+ seen.update(comp)
72
+
73
+ # Second: prefix-only groups (lower confidence)
74
+ for prefix, members in prefix_groups.items():
75
+ remaining = [m for m in members if m not in seen]
76
+ if len(remaining) < 2:
77
+ continue
78
+ clusters.append(FeatureCluster(
79
+ name=prefix,
80
+ modules=sorted(remaining),
81
+ confidence=0.3,
82
+ evidence=[f"shared prefix: {prefix}"],
83
+ ))
84
+ seen.update(remaining)
85
+
86
+ facts.feature_clusters = sorted(clusters, key=lambda c: -c.confidence)
87
+
88
+
89
+ def _resolve_callee_module(callee: str, module_names: list[str]) -> str | None:
90
+ """Map a callee like 'auth.verify_token' to module name 'auth'."""
91
+ # Try exact match first
92
+ if callee in module_names:
93
+ return callee
94
+ # Try first dotted segment
95
+ parts = callee.split(".")
96
+ for i in range(len(parts), 0, -1):
97
+ candidate = ".".join(parts[:i])
98
+ if candidate in module_names:
99
+ return candidate
100
+ # Try just the first part (top-level module)
101
+ if parts[0] in module_names:
102
+ return parts[0]
103
+ return None
104
+
105
+
106
+ def _connected_components(nodes: list[str], adj: dict[str, set[str]]) -> list[set[str]]:
107
+ """Find connected components in an undirected graph."""
108
+ visited: set[str] = set()
109
+ components: list[set[str]] = []
110
+
111
+ for node in nodes:
112
+ if node in visited:
113
+ continue
114
+ # BFS
115
+ component: set[str] = set()
116
+ queue = [node]
117
+ while queue:
118
+ current = queue.pop(0)
119
+ if current in visited:
120
+ continue
121
+ visited.add(current)
122
+ component.add(current)
123
+ for neighbor in adj.get(current, set()):
124
+ if neighbor not in visited:
125
+ queue.append(neighbor)
126
+ components.append(component)
127
+
128
+ return components
129
+
130
+
131
+ def _group_by_prefix(module_names: list[str]) -> dict[str, list[str]]:
132
+ """Group modules sharing a common naming prefix (e.g., 'auth_*')."""
133
+ groups: dict[str, list[str]] = defaultdict(list)
134
+ for name in module_names:
135
+ # Split on underscore or dot
136
+ parts = name.replace(".", "_").split("_")
137
+ if len(parts) >= 2:
138
+ prefix = parts[0]
139
+ if len(prefix) >= 2: # Avoid single-char prefixes
140
+ groups[prefix].append(name)
141
+ # Only return groups with 2+ members
142
+ return {k: v for k, v in groups.items() if len(v) >= 2}
143
+
144
+
145
+ def _common_prefix(names: set[str]) -> str:
146
+ """Find common prefix among module names, if any."""
147
+ if not names:
148
+ return ""
149
+ name_list = sorted(names)
150
+ parts_list = [n.replace(".", "_").split("_") for n in name_list]
151
+ if not parts_list or not parts_list[0]:
152
+ return ""
153
+ prefix_parts: list[str] = []
154
+ for i, part in enumerate(parts_list[0]):
155
+ if all(len(p) > i and p[i] == part for p in parts_list):
156
+ prefix_parts.append(part)
157
+ else:
158
+ break
159
+ return "_".join(prefix_parts) if prefix_parts else ""
160
+
161
+
162
+ def _infer_cluster_name(modules: set[str]) -> str:
163
+ """Infer a human-readable name for a cluster."""
164
+ prefix = _common_prefix(modules)
165
+ if prefix:
166
+ return prefix
167
+ # Fall back to shortest module name
168
+ return min(modules, key=len)
@@ -0,0 +1,138 @@
1
+ """R4.3 — Interface contract detection for codd extract.
2
+
3
+ Distinguishes public API (symbols in __init__.py / __all__) from internal
4
+ implementation details. Detects encapsulation violations where other modules
5
+ reach into internals.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ from dataclasses import dataclass, field
12
+ from pathlib import Path
13
+ from typing import TYPE_CHECKING
14
+
15
+ if TYPE_CHECKING:
16
+ from codd.extractor import ProjectFacts
17
+
18
+
19
+ @dataclass
20
+ class InterfaceContract:
21
+ """Public vs internal API surface for a module."""
22
+
23
+ module: str
24
+ public_symbols: list[str] = field(default_factory=list)
25
+ internal_symbols: list[str] = field(default_factory=list)
26
+ api_surface_ratio: float = 0.0
27
+ encapsulation_violations: list[str] = field(default_factory=list)
28
+
29
+
30
+ # ── __init__.py / __all__ parsing ────────────────────────
31
+
32
+ _ALL_RE = re.compile(
33
+ r"__all__\s*=\s*\[([^\]]*)\]",
34
+ re.DOTALL,
35
+ )
36
+
37
+ _REEXPORT_FROM_RE = re.compile(
38
+ r"^from\s+\.[\w.]*\s+import\s+(.+)",
39
+ re.MULTILINE,
40
+ )
41
+
42
+ _IMPORT_AS_RE = re.compile(r"(\w+)\s+as\s+(\w+)")
43
+
44
+
45
+ def detect_init_exports(init_content: str) -> list[str]:
46
+ """Parse ``__init__.py`` content and return publicly-exported symbol names."""
47
+ names: list[str] = []
48
+
49
+ # 1) __all__ takes priority
50
+ m = _ALL_RE.search(init_content)
51
+ if m:
52
+ raw = m.group(1)
53
+ for token in re.findall(r"""['"](\w+)['"]""", raw):
54
+ if token not in names:
55
+ names.append(token)
56
+ return names
57
+
58
+ # 2) Fall back to ``from .xxx import ...`` re-exports
59
+ for m2 in _REEXPORT_FROM_RE.finditer(init_content):
60
+ import_part = m2.group(1).strip().rstrip(")")
61
+ for chunk in import_part.split(","):
62
+ chunk = chunk.strip().strip("()")
63
+ if not chunk:
64
+ continue
65
+ # handle "Foo as Bar" → the exported name is "Bar"
66
+ alias_m = _IMPORT_AS_RE.search(chunk)
67
+ if alias_m:
68
+ name = alias_m.group(2)
69
+ else:
70
+ name = chunk.split()[-1]
71
+ if name.isidentifier() and name not in names:
72
+ names.append(name)
73
+
74
+ return names
75
+
76
+
77
+ # ── Build contracts for every module ─────────────────────
78
+
79
+ def build_interface_contracts(facts: ProjectFacts, project_root: Path) -> None:
80
+ """Populate ``interface_contract`` on every module in *facts*."""
81
+ from codd.extractor import _language_extensions # avoid circular at import time
82
+
83
+ # First pass: compute public/internal for each module
84
+ for mod in facts.modules.values():
85
+ init_files = [
86
+ f for f in mod.files
87
+ if Path(f).name == "__init__.py"
88
+ ]
89
+ all_symbol_names = [s.name for s in mod.symbols]
90
+ if not all_symbol_names:
91
+ continue
92
+
93
+ public: list[str] = []
94
+ if init_files:
95
+ init_path = project_root / init_files[0]
96
+ try:
97
+ init_content = init_path.read_text(errors="ignore")
98
+ except Exception:
99
+ init_content = ""
100
+ public = detect_init_exports(init_content)
101
+
102
+ # For single-file modules (no __init__.py), treat all symbols as public
103
+ if not init_files:
104
+ public = list(all_symbol_names)
105
+
106
+ internal = [n for n in all_symbol_names if n not in public]
107
+ total = len(all_symbol_names)
108
+ ratio = len(public) / total if total else 0.0
109
+
110
+ mod.interface_contract = InterfaceContract(
111
+ module=mod.name,
112
+ public_symbols=public,
113
+ internal_symbols=internal,
114
+ api_surface_ratio=round(ratio, 2),
115
+ )
116
+
117
+ # Second pass: detect encapsulation violations
118
+ # Build internal-symbol lookup: {module_name: set(internal_names)}
119
+ internal_lookup: dict[str, set[str]] = {}
120
+ for mod in facts.modules.values():
121
+ if mod.interface_contract:
122
+ internal_lookup[mod.name] = set(mod.interface_contract.internal_symbols)
123
+
124
+ for mod in facts.modules.values():
125
+ if not mod.interface_contract:
126
+ continue
127
+ for dep_name, import_lines in mod.internal_imports.items():
128
+ if dep_name not in internal_lookup:
129
+ continue
130
+ internals = internal_lookup[dep_name]
131
+ if not internals:
132
+ continue
133
+ for line in import_lines:
134
+ for internal_name in internals:
135
+ if internal_name in line:
136
+ violation = f"{mod.name} uses {dep_name}.{internal_name} (internal)"
137
+ if violation not in mod.interface_contract.encapsulation_violations:
138
+ mod.interface_contract.encapsulation_violations.append(violation)
@@ -53,6 +53,15 @@ class Symbol:
53
53
  implements: list[str] = field(default_factory=list)
54
54
 
55
55
 
56
+ @dataclass
57
+ class CallEdge:
58
+ """A function-to-function call relationship."""
59
+ caller: str # "module.Class.method" or "module.function"
60
+ callee: str # target symbol (resolved to module if possible)
61
+ call_site: str # file:line
62
+ is_async: bool = False
63
+
64
+
56
65
  @dataclass
57
66
  class ModuleInfo:
58
67
  """Aggregated info for a discovered module/package."""
@@ -65,6 +74,17 @@ class ModuleInfo:
65
74
  test_details: list[TestInfo] = field(default_factory=list)
66
75
  line_count: int = 0
67
76
  patterns: dict[str, str] = field(default_factory=dict) # pattern_type -> detail
77
+ call_edges: list[CallEdge] = field(default_factory=list)
78
+ interface_contract: Any = None # InterfaceContract from contracts.py
79
+
80
+
81
+ @dataclass
82
+ class FeatureCluster:
83
+ """A group of modules that collaborate on a feature."""
84
+ name: str
85
+ modules: list[str] = field(default_factory=list)
86
+ confidence: float = 0.0
87
+ evidence: list[str] = field(default_factory=list)
68
88
 
69
89
 
70
90
  @dataclass
@@ -83,6 +103,7 @@ class ProjectFacts:
83
103
  api_specs: dict[str, Any] = field(default_factory=dict)
84
104
  infra_config: dict[str, ConfigInfo] = field(default_factory=dict)
85
105
  build_deps: BuildDepsInfo | None = None
106
+ feature_clusters: list[FeatureCluster] = field(default_factory=list)
86
107
 
87
108
 
88
109
  @dataclass
@@ -151,6 +172,18 @@ def extract_facts(project_root: Path, language: str | None = None,
151
172
  # Detect entry points
152
173
  _detect_entry_points(facts, project_root, language)
153
174
 
175
+ # R4.3: Interface contract detection
176
+ from codd.contracts import build_interface_contracts
177
+ build_interface_contracts(facts, project_root)
178
+
179
+ # R4.1: Call graph extraction + resolution
180
+ _extract_call_graphs(facts, project_root, language, exclude_patterns)
181
+ _resolve_call_graph(facts)
182
+
183
+ # R4.2: Feature clustering
184
+ from codd.clustering import build_feature_clusters
185
+ build_feature_clusters(facts)
186
+
154
187
  return facts
155
188
 
156
189
 
@@ -817,6 +850,50 @@ def _discover_build_deps(project_root: Path) -> BuildDepsInfo | None:
817
850
  return extractor.merge(discovered)
818
851
 
819
852
 
853
+ # ── R4.1 helpers: call-graph extraction & resolution ──────
854
+
855
+ def _extract_call_graphs(facts: ProjectFacts, project_root: Path,
856
+ language: str, exclude_patterns: list[str] | None):
857
+ """Collect call edges for every module using the language extractor."""
858
+ extractor = get_extractor(language, "source")
859
+ if not hasattr(extractor, "extract_call_graph"):
860
+ return
861
+
862
+ for mod in facts.modules.values():
863
+ for rel_file in mod.files:
864
+ full = project_root / rel_file
865
+ try:
866
+ content = full.read_text(errors="ignore")
867
+ except Exception:
868
+ continue
869
+ edges = extractor.extract_call_graph(content, rel_file, mod.symbols)
870
+ mod.call_edges.extend(edges)
871
+
872
+
873
+ def _resolve_call_graph(facts: ProjectFacts):
874
+ """Resolve callee names to fully-qualified module.symbol references."""
875
+ # Build symbol → module lookup
876
+ symbol_to_module: dict[str, str] = {}
877
+ for mod in facts.modules.values():
878
+ for sym in mod.symbols:
879
+ symbol_to_module[sym.name] = mod.name
880
+
881
+ for mod in facts.modules.values():
882
+ for edge in mod.call_edges:
883
+ callee = edge.callee
884
+ # Strip self. prefix
885
+ if callee.startswith("self."):
886
+ callee = callee[5:]
887
+ # Try to resolve bare name to module.name
888
+ bare = callee.split(".")[-1]
889
+ if bare in symbol_to_module:
890
+ target_mod = symbol_to_module[bare]
891
+ if target_mod != mod.name:
892
+ edge.callee = f"{target_mod}.{bare}"
893
+ else:
894
+ edge.callee = bare
895
+
896
+
820
897
  # ═══════════════════════════════════════════════════════════
821
898
  # Phase 2: Synth Docs (template-based, no AI)
822
899
  # ═══════════════════════════════════════════════════════════
@@ -28,7 +28,7 @@ except ModuleNotFoundError:
28
28
  hcl2 = None
29
29
 
30
30
  if TYPE_CHECKING:
31
- from codd.extractor import ModuleInfo, Symbol
31
+ from codd.extractor import CallEdge, ModuleInfo, Symbol
32
32
 
33
33
 
34
34
  _TREE_SITTER_LANGUAGE_PACKAGES = {
@@ -167,6 +167,9 @@ class LanguageExtractor(Protocol):
167
167
  def extract_schema(self, content: str, file_path: str | Path) -> SqlSchemaInfo | PrismaSchemaInfo | None:
168
168
  """Return schema information when supported by the extractor."""
169
169
 
170
+ def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
171
+ """Return call edges found in the given source content."""
172
+
170
173
 
171
174
  class RegexExtractor:
172
175
  """Adapter for regex-based extraction and schema parsing."""
@@ -214,6 +217,9 @@ class RegexExtractor:
214
217
  return _extract_prisma_schema(content, normalized_path)
215
218
  return None
216
219
 
220
+ def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
221
+ return [] # Regex fallback doesn't support call graph
222
+
217
223
 
218
224
  class TreeSitterExtractor:
219
225
  """Tree-sitter backend for Python and TypeScript/JavaScript source files."""
@@ -288,6 +294,17 @@ class TreeSitterExtractor:
288
294
  def extract_schema(self, content: str, file_path: str | Path) -> SqlSchemaInfo | PrismaSchemaInfo | None:
289
295
  return self._fallback.extract_schema(content, file_path)
290
296
 
297
+ def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
298
+ if self.category != "source":
299
+ return []
300
+ try:
301
+ root = self._parse(content)
302
+ if self.language == "python":
303
+ return _extract_python_call_graph(root, content, file_path, symbols)
304
+ except Exception:
305
+ return []
306
+ return []
307
+
291
308
  def _parse(self, content: str):
292
309
  return self._parser.parse(content.encode("utf-8", errors="ignore")).root_node
293
310
 
@@ -330,6 +347,9 @@ class SqlDdlExtractor:
330
347
  fallback = self._fallback.extract_schema(content, path)
331
348
  return fallback if isinstance(fallback, SqlSchemaInfo) else None
332
349
 
350
+ def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
351
+ return []
352
+
333
353
 
334
354
  class PrismaSchemaExtractor:
335
355
  """Regex extractor for Prisma schema files."""
@@ -355,6 +375,9 @@ class PrismaSchemaExtractor:
355
375
  def extract_schema(self, content: str, file_path: str | Path) -> PrismaSchemaInfo | None:
356
376
  return _extract_prisma_schema(content, Path(file_path).as_posix())
357
377
 
378
+ def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
379
+ return []
380
+
358
381
 
359
382
  def _build_parser(language: str):
360
383
  from tree_sitter import Parser
@@ -918,6 +941,74 @@ def _detect_typescript_code_patterns(mod: ModuleInfo, root: Any, content: str) -
918
941
  mod.patterns["api_routes"] = "NestJS controller"
919
942
 
920
943
 
944
+ def _extract_python_call_graph(root: Any, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
945
+ """Extract function call edges from Python AST using tree-sitter."""
946
+ from codd.extractor import CallEdge
947
+
948
+ content_bytes = content.encode("utf-8", errors="ignore")
949
+ edges: list[CallEdge] = []
950
+ symbol_names = {s.name for s in symbols}
951
+
952
+ def _current_scope(node: Any) -> str:
953
+ """Walk parents to find enclosing function/class scope."""
954
+ parts: list[str] = []
955
+ current = node.parent
956
+ while current is not None:
957
+ if current.type in ("function_definition", "class_definition"):
958
+ name = _field_text(content_bytes, current, "name")
959
+ if name:
960
+ parts.append(name)
961
+ current = current.parent
962
+ parts.reverse()
963
+ return ".".join(parts) if parts else "<module>"
964
+
965
+ for node in _iter_named_nodes(root):
966
+ if node.type != "call":
967
+ continue
968
+
969
+ func_node = node.child_by_field_name("function")
970
+ if func_node is None:
971
+ continue
972
+
973
+ callee_text = _node_text(content_bytes, func_node).strip()
974
+
975
+ # Skip builtins and dunder calls
976
+ bare_name = callee_text.split(".")[-1] if "." in callee_text else callee_text
977
+ if bare_name.startswith("__") and bare_name.endswith("__"):
978
+ continue
979
+ if bare_name in ("print", "len", "range", "enumerate", "zip", "map", "filter",
980
+ "sorted", "reversed", "list", "dict", "set", "tuple", "str",
981
+ "int", "float", "bool", "type", "isinstance", "issubclass",
982
+ "getattr", "setattr", "hasattr", "super", "property",
983
+ "staticmethod", "classmethod", "open", "repr", "id", "vars",
984
+ "dir", "any", "all", "min", "max", "sum", "abs", "round",
985
+ "format", "iter", "next", "hash", "callable"):
986
+ continue
987
+
988
+ # Only include calls to known symbols (intra-project)
989
+ if bare_name not in symbol_names and callee_text not in symbol_names:
990
+ # Check if it's a method call on self (self.method)
991
+ if callee_text.startswith("self."):
992
+ method_name = callee_text[5:] # strip "self."
993
+ if method_name not in symbol_names:
994
+ continue
995
+ else:
996
+ continue
997
+
998
+ caller = _current_scope(node)
999
+ line_no = node.start_point.row + 1
1000
+ is_async = node.parent is not None and node.parent.type == "await"
1001
+
1002
+ edges.append(CallEdge(
1003
+ caller=caller,
1004
+ callee=callee_text,
1005
+ call_site=f"{file_path}:{line_no}",
1006
+ is_async=is_async,
1007
+ ))
1008
+
1009
+ return edges
1010
+
1011
+
921
1012
  def _sql_first_object_name(content_bytes: bytes, node: Any) -> str:
922
1013
  for child in getattr(node, "named_children", []):
923
1014
  if child.type == "object_reference":
@@ -1774,6 +1865,9 @@ class BuildDepsExtractor:
1774
1865
  scripts=scripts,
1775
1866
  )
1776
1867
 
1868
+ def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
1869
+ return []
1870
+
1777
1871
 
1778
1872
  class TestExtractor:
1779
1873
  """Extract test metadata from test files."""
@@ -1862,6 +1956,9 @@ class TestExtractor:
1862
1956
  fixtures = re.findall(r"^\s*func\s+(TestMain)\s*\(", content, re.MULTILINE)
1863
1957
  return TestInfo(file_path=file_path, test_functions=tests, fixtures=fixtures)
1864
1958
 
1959
+ def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
1960
+ return []
1961
+
1865
1962
 
1866
1963
  def get_extractor(language: str, category: str = "source") -> LanguageExtractor:
1867
1964
  """Select the best available extractor for a language/category pair."""
@@ -158,6 +158,8 @@ def synth_architecture(
158
158
  ],
159
159
  violations=violations,
160
160
  dependency_lines=dependency_lines,
161
+ feature_clusters=facts.feature_clusters,
162
+ interface_contracts=_interface_contracts_summary(facts),
161
163
  schema_rows=_schema_summary_rows(facts),
162
164
  api_rows=_api_summary_rows(facts),
163
165
  infra_rows=_infra_summary_rows(facts),
@@ -262,6 +264,8 @@ def _render_module_detail(env: Environment, facts: ProjectFacts, module: ModuleI
262
264
  related_api_specs=related_api_specs,
263
265
  files=sorted(module.files),
264
266
  tests=_tests_context(module),
267
+ call_edges=module.call_edges,
268
+ interface_contract=module.interface_contract,
265
269
  )
266
270
  return content
267
271
 
@@ -326,16 +330,43 @@ def _build_frontmatter(
326
330
 
327
331
  def _module_depends_on(facts: ProjectFacts, module: ModuleInfo) -> list[dict[str, Any]]:
328
332
  depends_on = []
333
+ seen_ids: set[str] = set()
329
334
  for dependency_name in sorted(module.internal_imports):
330
335
  if dependency_name not in facts.modules:
331
336
  continue
332
- depends_on.append(
333
- {
334
- "id": _module_node_id(dependency_name),
335
- "relation": "imports",
336
- "semantic": "technical",
337
- }
338
- )
337
+ nid = _module_node_id(dependency_name)
338
+ if nid not in seen_ids:
339
+ depends_on.append(
340
+ {"id": nid, "relation": "imports", "semantic": "technical"}
341
+ )
342
+ seen_ids.add(nid)
343
+
344
+ # R4.1: call-graph edges
345
+ call_targets: set[str] = set()
346
+ for edge in module.call_edges:
347
+ target_mod = edge.callee.split(".")[0]
348
+ if target_mod in facts.modules and target_mod != module.name:
349
+ call_targets.add(target_mod)
350
+ for target in sorted(call_targets):
351
+ nid = _module_node_id(target)
352
+ if nid not in seen_ids:
353
+ depends_on.append(
354
+ {"id": nid, "relation": "calls", "semantic": "technical"}
355
+ )
356
+ seen_ids.add(nid)
357
+
358
+ # R4.2: co-feature edges
359
+ for cluster in facts.feature_clusters:
360
+ if module.name in cluster.modules:
361
+ for peer in cluster.modules:
362
+ if peer != module.name:
363
+ nid = _module_node_id(peer)
364
+ if nid not in seen_ids:
365
+ depends_on.append(
366
+ {"id": nid, "relation": "co_feature", "semantic": "technical"}
367
+ )
368
+ seen_ids.add(nid)
369
+
339
370
  return depends_on
340
371
 
341
372
 
@@ -503,6 +534,23 @@ def _all_external_dependencies(facts: ProjectFacts) -> list[str]:
503
534
  return sorted(dependencies)
504
535
 
505
536
 
537
+ def _interface_contracts_summary(facts: ProjectFacts) -> list[dict[str, Any]]:
538
+ """Build template-friendly interface contract rows."""
539
+ rows: list[dict[str, Any]] = []
540
+ for mod in facts.modules.values():
541
+ ic = mod.interface_contract
542
+ if ic is None:
543
+ continue
544
+ rows.append({
545
+ "module": ic.module,
546
+ "public_count": len(ic.public_symbols),
547
+ "internal_count": len(ic.internal_symbols),
548
+ "ratio": ic.api_surface_ratio,
549
+ "violations": ic.encapsulation_violations,
550
+ })
551
+ return sorted(rows, key=lambda r: r["module"])
552
+
553
+
506
554
  def _dependency_lines(facts: ProjectFacts) -> list[str]:
507
555
  lines = []
508
556
  for module_name, module in sorted(facts.modules.items()):
@@ -31,6 +31,33 @@
31
31
  {% endif %}
32
32
  {% endfor %}
33
33
 
34
+ {% if feature_clusters %}
35
+ ## Feature Clusters
36
+
37
+ {% for cluster in feature_clusters -%}
38
+ ### {{ cluster.name }} (confidence: {{ cluster.confidence }})
39
+
40
+ Modules: {{ cluster.modules | join(", ") }}
41
+ {% if cluster.evidence %}
42
+ Evidence: {{ cluster.evidence | join("; ") }}
43
+ {% endif %}
44
+ {% endfor %}
45
+ {% endif %}
46
+
47
+ {% if interface_contracts %}
48
+ ## Interface Contracts Summary
49
+
50
+ | Module | Public | Internal | API Ratio | Violations |
51
+ |--------|--------|----------|-----------|------------|
52
+ {% for row in interface_contracts -%}
53
+ | `{{ row.module }}` | {{ row.public_count }} | {{ row.internal_count }} | {{ row.ratio }} | {{ row.violations | length }} |
54
+ {% endfor %}
55
+ {% for row in interface_contracts if row.violations %}
56
+
57
+ **{{ row.module }}**: {% for v in row.violations %}{{ v }}{% if not loop.last %}; {% endif %}{% endfor %}
58
+ {% endfor %}
59
+ {% endif %}
60
+
34
61
  ## Layer Violations
35
62
 
36
63
  {% if violations %}
@@ -64,6 +64,40 @@
64
64
  {% endfor %}
65
65
  {% endif %}
66
66
 
67
+ {% if interface_contract %}
68
+ ## Public API
69
+
70
+ {% for name in interface_contract.public_symbols -%}
71
+ - `{{ name }}`
72
+ {% endfor %}
73
+ {% if interface_contract.internal_symbols %}
74
+
75
+ ## Internal API
76
+
77
+ {% for name in interface_contract.internal_symbols -%}
78
+ - `{{ name }}`
79
+ {% endfor %}
80
+ {% endif %}
81
+ {% if interface_contract.encapsulation_violations %}
82
+
83
+ ### Encapsulation Violations
84
+
85
+ {% for v in interface_contract.encapsulation_violations -%}
86
+ - {{ v }}
87
+ {% endfor %}
88
+ {% endif %}
89
+ {% endif %}
90
+
91
+ {% if call_edges %}
92
+ ## Call Graph
93
+
94
+ | Caller | Callee | Location | Async |
95
+ |--------|--------|----------|-------|
96
+ {% for edge in call_edges -%}
97
+ | `{{ edge.caller }}` | `{{ edge.callee }}` | `{{ edge.call_site }}` | {{ "yes" if edge.is_async else "no" }} |
98
+ {% endfor %}
99
+ {% endif %}
100
+
67
101
  {% if internal_dependencies %}
68
102
  ## Import Dependencies
69
103
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "codd-dev"
7
- version = "0.3.0"
7
+ version = "0.4.0"
8
8
  description = "CoDD: Coherence-Driven Development — cross-artifact change impact analysis"
9
9
  readme = "README.md"
10
10
  license = "MIT"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes