codd-dev 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codd_dev-0.3.0 → codd_dev-0.4.0}/PKG-INFO +1 -1
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/__init__.py +1 -1
- codd_dev-0.4.0/codd/clustering.py +168 -0
- codd_dev-0.4.0/codd/contracts.py +138 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/extractor.py +77 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/parsing.py +98 -1
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/synth.py +55 -7
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/extracted/architecture-overview.md.j2 +27 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/extracted/module-detail.md.j2 +34 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/pyproject.toml +1 -1
- {codd_dev-0.3.0 → codd_dev-0.4.0}/.gitignore +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/LICENSE +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/README.md +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/cli.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/config.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/defaults.yaml +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/generator.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/graph.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/hooks.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/implementer.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/planner.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/propagate.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/scanner.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/codd.yaml.tmpl +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/conventions.yaml.tmpl +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/data_dependencies.yaml.tmpl +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/doc_links.yaml.tmpl +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/extracted/api-contract.md.j2 +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/extracted/schema-design.md.j2 +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/extracted/system-context.md.j2 +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/gitignore.tmpl +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/templates/overrides.yaml.tmpl +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/validator.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/codd/verifier.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.4.0}/hooks/pre-commit +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codd-dev
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: CoDD: Coherence-Driven Development — cross-artifact change impact analysis
|
|
5
5
|
Project-URL: Homepage, https://github.com/yohey-w/codd-dev
|
|
6
6
|
Project-URL: Repository, https://github.com/yohey-w/codd-dev
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""R4.2 — Feature clustering for codd extract.
|
|
2
|
+
|
|
3
|
+
Groups modules by functional cohesion using call graph edges,
|
|
4
|
+
naming conventions, and cross-reference density.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from codd.extractor import ProjectFacts
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def build_feature_clusters(facts: ProjectFacts) -> None:
|
|
17
|
+
"""Populate ``facts.feature_clusters`` by analysing call edges and naming."""
|
|
18
|
+
from codd.extractor import FeatureCluster
|
|
19
|
+
|
|
20
|
+
module_names = list(facts.modules.keys())
|
|
21
|
+
if len(module_names) < 2:
|
|
22
|
+
return
|
|
23
|
+
|
|
24
|
+
# Step 1: Build adjacency from call edges
|
|
25
|
+
adj: dict[str, set[str]] = defaultdict(set)
|
|
26
|
+
for mod in facts.modules.values():
|
|
27
|
+
for edge in mod.call_edges:
|
|
28
|
+
# edge.callee may be "module.Class.method" — extract target module
|
|
29
|
+
target_mod = _resolve_callee_module(edge.callee, module_names)
|
|
30
|
+
if target_mod and target_mod != mod.name:
|
|
31
|
+
adj[mod.name].add(target_mod)
|
|
32
|
+
adj[target_mod].add(mod.name)
|
|
33
|
+
|
|
34
|
+
# Step 2: Find connected components via call graph
|
|
35
|
+
components = _connected_components(module_names, adj)
|
|
36
|
+
|
|
37
|
+
# Step 3: Merge with naming prefix heuristics
|
|
38
|
+
prefix_groups = _group_by_prefix(module_names)
|
|
39
|
+
|
|
40
|
+
# Step 4: Combine call-graph components with prefix groups
|
|
41
|
+
clusters: list[FeatureCluster] = []
|
|
42
|
+
seen: set[str] = set()
|
|
43
|
+
|
|
44
|
+
# First: call-graph components (higher confidence)
|
|
45
|
+
for comp in components:
|
|
46
|
+
if len(comp) < 2:
|
|
47
|
+
continue
|
|
48
|
+
name = _infer_cluster_name(comp)
|
|
49
|
+
evidence: list[str] = []
|
|
50
|
+
|
|
51
|
+
# Check if they share naming prefix
|
|
52
|
+
common_prefix = _common_prefix(comp)
|
|
53
|
+
if common_prefix:
|
|
54
|
+
evidence.append(f"shared prefix: {common_prefix}")
|
|
55
|
+
|
|
56
|
+
# Count call edges between members
|
|
57
|
+
edge_count = sum(
|
|
58
|
+
1 for m in comp for n in adj.get(m, set()) if n in comp
|
|
59
|
+
)
|
|
60
|
+
if edge_count > 0:
|
|
61
|
+
evidence.append(f"{edge_count} cross-call edges")
|
|
62
|
+
|
|
63
|
+
confidence = min(1.0, 0.4 + 0.1 * edge_count + (0.2 if common_prefix else 0.0))
|
|
64
|
+
|
|
65
|
+
clusters.append(FeatureCluster(
|
|
66
|
+
name=name,
|
|
67
|
+
modules=sorted(comp),
|
|
68
|
+
confidence=round(confidence, 2),
|
|
69
|
+
evidence=evidence,
|
|
70
|
+
))
|
|
71
|
+
seen.update(comp)
|
|
72
|
+
|
|
73
|
+
# Second: prefix-only groups (lower confidence)
|
|
74
|
+
for prefix, members in prefix_groups.items():
|
|
75
|
+
remaining = [m for m in members if m not in seen]
|
|
76
|
+
if len(remaining) < 2:
|
|
77
|
+
continue
|
|
78
|
+
clusters.append(FeatureCluster(
|
|
79
|
+
name=prefix,
|
|
80
|
+
modules=sorted(remaining),
|
|
81
|
+
confidence=0.3,
|
|
82
|
+
evidence=[f"shared prefix: {prefix}"],
|
|
83
|
+
))
|
|
84
|
+
seen.update(remaining)
|
|
85
|
+
|
|
86
|
+
facts.feature_clusters = sorted(clusters, key=lambda c: -c.confidence)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _resolve_callee_module(callee: str, module_names: list[str]) -> str | None:
|
|
90
|
+
"""Map a callee like 'auth.verify_token' to module name 'auth'."""
|
|
91
|
+
# Try exact match first
|
|
92
|
+
if callee in module_names:
|
|
93
|
+
return callee
|
|
94
|
+
# Try first dotted segment
|
|
95
|
+
parts = callee.split(".")
|
|
96
|
+
for i in range(len(parts), 0, -1):
|
|
97
|
+
candidate = ".".join(parts[:i])
|
|
98
|
+
if candidate in module_names:
|
|
99
|
+
return candidate
|
|
100
|
+
# Try just the first part (top-level module)
|
|
101
|
+
if parts[0] in module_names:
|
|
102
|
+
return parts[0]
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _connected_components(nodes: list[str], adj: dict[str, set[str]]) -> list[set[str]]:
|
|
107
|
+
"""Find connected components in an undirected graph."""
|
|
108
|
+
visited: set[str] = set()
|
|
109
|
+
components: list[set[str]] = []
|
|
110
|
+
|
|
111
|
+
for node in nodes:
|
|
112
|
+
if node in visited:
|
|
113
|
+
continue
|
|
114
|
+
# BFS
|
|
115
|
+
component: set[str] = set()
|
|
116
|
+
queue = [node]
|
|
117
|
+
while queue:
|
|
118
|
+
current = queue.pop(0)
|
|
119
|
+
if current in visited:
|
|
120
|
+
continue
|
|
121
|
+
visited.add(current)
|
|
122
|
+
component.add(current)
|
|
123
|
+
for neighbor in adj.get(current, set()):
|
|
124
|
+
if neighbor not in visited:
|
|
125
|
+
queue.append(neighbor)
|
|
126
|
+
components.append(component)
|
|
127
|
+
|
|
128
|
+
return components
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _group_by_prefix(module_names: list[str]) -> dict[str, list[str]]:
|
|
132
|
+
"""Group modules sharing a common naming prefix (e.g., 'auth_*')."""
|
|
133
|
+
groups: dict[str, list[str]] = defaultdict(list)
|
|
134
|
+
for name in module_names:
|
|
135
|
+
# Split on underscore or dot
|
|
136
|
+
parts = name.replace(".", "_").split("_")
|
|
137
|
+
if len(parts) >= 2:
|
|
138
|
+
prefix = parts[0]
|
|
139
|
+
if len(prefix) >= 2: # Avoid single-char prefixes
|
|
140
|
+
groups[prefix].append(name)
|
|
141
|
+
# Only return groups with 2+ members
|
|
142
|
+
return {k: v for k, v in groups.items() if len(v) >= 2}
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _common_prefix(names: set[str]) -> str:
|
|
146
|
+
"""Find common prefix among module names, if any."""
|
|
147
|
+
if not names:
|
|
148
|
+
return ""
|
|
149
|
+
name_list = sorted(names)
|
|
150
|
+
parts_list = [n.replace(".", "_").split("_") for n in name_list]
|
|
151
|
+
if not parts_list or not parts_list[0]:
|
|
152
|
+
return ""
|
|
153
|
+
prefix_parts: list[str] = []
|
|
154
|
+
for i, part in enumerate(parts_list[0]):
|
|
155
|
+
if all(len(p) > i and p[i] == part for p in parts_list):
|
|
156
|
+
prefix_parts.append(part)
|
|
157
|
+
else:
|
|
158
|
+
break
|
|
159
|
+
return "_".join(prefix_parts) if prefix_parts else ""
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _infer_cluster_name(modules: set[str]) -> str:
|
|
163
|
+
"""Infer a human-readable name for a cluster."""
|
|
164
|
+
prefix = _common_prefix(modules)
|
|
165
|
+
if prefix:
|
|
166
|
+
return prefix
|
|
167
|
+
# Fall back to shortest module name
|
|
168
|
+
return min(modules, key=len)
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""R4.3 — Interface contract detection for codd extract.
|
|
2
|
+
|
|
3
|
+
Distinguishes public API (symbols in __init__.py / __all__) from internal
|
|
4
|
+
implementation details. Detects encapsulation violations where other modules
|
|
5
|
+
reach into internals.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import TYPE_CHECKING
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from codd.extractor import ProjectFacts
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class InterfaceContract:
|
|
21
|
+
"""Public vs internal API surface for a module."""
|
|
22
|
+
|
|
23
|
+
module: str
|
|
24
|
+
public_symbols: list[str] = field(default_factory=list)
|
|
25
|
+
internal_symbols: list[str] = field(default_factory=list)
|
|
26
|
+
api_surface_ratio: float = 0.0
|
|
27
|
+
encapsulation_violations: list[str] = field(default_factory=list)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ── __init__.py / __all__ parsing ────────────────────────
|
|
31
|
+
|
|
32
|
+
_ALL_RE = re.compile(
|
|
33
|
+
r"__all__\s*=\s*\[([^\]]*)\]",
|
|
34
|
+
re.DOTALL,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
_REEXPORT_FROM_RE = re.compile(
|
|
38
|
+
r"^from\s+\.[\w.]*\s+import\s+(.+)",
|
|
39
|
+
re.MULTILINE,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
_IMPORT_AS_RE = re.compile(r"(\w+)\s+as\s+(\w+)")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def detect_init_exports(init_content: str) -> list[str]:
|
|
46
|
+
"""Parse ``__init__.py`` content and return publicly-exported symbol names."""
|
|
47
|
+
names: list[str] = []
|
|
48
|
+
|
|
49
|
+
# 1) __all__ takes priority
|
|
50
|
+
m = _ALL_RE.search(init_content)
|
|
51
|
+
if m:
|
|
52
|
+
raw = m.group(1)
|
|
53
|
+
for token in re.findall(r"""['"](\w+)['"]""", raw):
|
|
54
|
+
if token not in names:
|
|
55
|
+
names.append(token)
|
|
56
|
+
return names
|
|
57
|
+
|
|
58
|
+
# 2) Fall back to ``from .xxx import ...`` re-exports
|
|
59
|
+
for m2 in _REEXPORT_FROM_RE.finditer(init_content):
|
|
60
|
+
import_part = m2.group(1).strip().rstrip(")")
|
|
61
|
+
for chunk in import_part.split(","):
|
|
62
|
+
chunk = chunk.strip().strip("()")
|
|
63
|
+
if not chunk:
|
|
64
|
+
continue
|
|
65
|
+
# handle "Foo as Bar" → the exported name is "Bar"
|
|
66
|
+
alias_m = _IMPORT_AS_RE.search(chunk)
|
|
67
|
+
if alias_m:
|
|
68
|
+
name = alias_m.group(2)
|
|
69
|
+
else:
|
|
70
|
+
name = chunk.split()[-1]
|
|
71
|
+
if name.isidentifier() and name not in names:
|
|
72
|
+
names.append(name)
|
|
73
|
+
|
|
74
|
+
return names
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# ── Build contracts for every module ─────────────────────
|
|
78
|
+
|
|
79
|
+
def build_interface_contracts(facts: ProjectFacts, project_root: Path) -> None:
|
|
80
|
+
"""Populate ``interface_contract`` on every module in *facts*."""
|
|
81
|
+
from codd.extractor import _language_extensions # avoid circular at import time
|
|
82
|
+
|
|
83
|
+
# First pass: compute public/internal for each module
|
|
84
|
+
for mod in facts.modules.values():
|
|
85
|
+
init_files = [
|
|
86
|
+
f for f in mod.files
|
|
87
|
+
if Path(f).name == "__init__.py"
|
|
88
|
+
]
|
|
89
|
+
all_symbol_names = [s.name for s in mod.symbols]
|
|
90
|
+
if not all_symbol_names:
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
public: list[str] = []
|
|
94
|
+
if init_files:
|
|
95
|
+
init_path = project_root / init_files[0]
|
|
96
|
+
try:
|
|
97
|
+
init_content = init_path.read_text(errors="ignore")
|
|
98
|
+
except Exception:
|
|
99
|
+
init_content = ""
|
|
100
|
+
public = detect_init_exports(init_content)
|
|
101
|
+
|
|
102
|
+
# For single-file modules (no __init__.py), treat all symbols as public
|
|
103
|
+
if not init_files:
|
|
104
|
+
public = list(all_symbol_names)
|
|
105
|
+
|
|
106
|
+
internal = [n for n in all_symbol_names if n not in public]
|
|
107
|
+
total = len(all_symbol_names)
|
|
108
|
+
ratio = len(public) / total if total else 0.0
|
|
109
|
+
|
|
110
|
+
mod.interface_contract = InterfaceContract(
|
|
111
|
+
module=mod.name,
|
|
112
|
+
public_symbols=public,
|
|
113
|
+
internal_symbols=internal,
|
|
114
|
+
api_surface_ratio=round(ratio, 2),
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Second pass: detect encapsulation violations
|
|
118
|
+
# Build internal-symbol lookup: {module_name: set(internal_names)}
|
|
119
|
+
internal_lookup: dict[str, set[str]] = {}
|
|
120
|
+
for mod in facts.modules.values():
|
|
121
|
+
if mod.interface_contract:
|
|
122
|
+
internal_lookup[mod.name] = set(mod.interface_contract.internal_symbols)
|
|
123
|
+
|
|
124
|
+
for mod in facts.modules.values():
|
|
125
|
+
if not mod.interface_contract:
|
|
126
|
+
continue
|
|
127
|
+
for dep_name, import_lines in mod.internal_imports.items():
|
|
128
|
+
if dep_name not in internal_lookup:
|
|
129
|
+
continue
|
|
130
|
+
internals = internal_lookup[dep_name]
|
|
131
|
+
if not internals:
|
|
132
|
+
continue
|
|
133
|
+
for line in import_lines:
|
|
134
|
+
for internal_name in internals:
|
|
135
|
+
if internal_name in line:
|
|
136
|
+
violation = f"{mod.name} uses {dep_name}.{internal_name} (internal)"
|
|
137
|
+
if violation not in mod.interface_contract.encapsulation_violations:
|
|
138
|
+
mod.interface_contract.encapsulation_violations.append(violation)
|
|
@@ -53,6 +53,15 @@ class Symbol:
|
|
|
53
53
|
implements: list[str] = field(default_factory=list)
|
|
54
54
|
|
|
55
55
|
|
|
56
|
+
@dataclass
|
|
57
|
+
class CallEdge:
|
|
58
|
+
"""A function-to-function call relationship."""
|
|
59
|
+
caller: str # "module.Class.method" or "module.function"
|
|
60
|
+
callee: str # target symbol (resolved to module if possible)
|
|
61
|
+
call_site: str # file:line
|
|
62
|
+
is_async: bool = False
|
|
63
|
+
|
|
64
|
+
|
|
56
65
|
@dataclass
|
|
57
66
|
class ModuleInfo:
|
|
58
67
|
"""Aggregated info for a discovered module/package."""
|
|
@@ -65,6 +74,17 @@ class ModuleInfo:
|
|
|
65
74
|
test_details: list[TestInfo] = field(default_factory=list)
|
|
66
75
|
line_count: int = 0
|
|
67
76
|
patterns: dict[str, str] = field(default_factory=dict) # pattern_type -> detail
|
|
77
|
+
call_edges: list[CallEdge] = field(default_factory=list)
|
|
78
|
+
interface_contract: Any = None # InterfaceContract from contracts.py
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass
|
|
82
|
+
class FeatureCluster:
|
|
83
|
+
"""A group of modules that collaborate on a feature."""
|
|
84
|
+
name: str
|
|
85
|
+
modules: list[str] = field(default_factory=list)
|
|
86
|
+
confidence: float = 0.0
|
|
87
|
+
evidence: list[str] = field(default_factory=list)
|
|
68
88
|
|
|
69
89
|
|
|
70
90
|
@dataclass
|
|
@@ -83,6 +103,7 @@ class ProjectFacts:
|
|
|
83
103
|
api_specs: dict[str, Any] = field(default_factory=dict)
|
|
84
104
|
infra_config: dict[str, ConfigInfo] = field(default_factory=dict)
|
|
85
105
|
build_deps: BuildDepsInfo | None = None
|
|
106
|
+
feature_clusters: list[FeatureCluster] = field(default_factory=list)
|
|
86
107
|
|
|
87
108
|
|
|
88
109
|
@dataclass
|
|
@@ -151,6 +172,18 @@ def extract_facts(project_root: Path, language: str | None = None,
|
|
|
151
172
|
# Detect entry points
|
|
152
173
|
_detect_entry_points(facts, project_root, language)
|
|
153
174
|
|
|
175
|
+
# R4.3: Interface contract detection
|
|
176
|
+
from codd.contracts import build_interface_contracts
|
|
177
|
+
build_interface_contracts(facts, project_root)
|
|
178
|
+
|
|
179
|
+
# R4.1: Call graph extraction + resolution
|
|
180
|
+
_extract_call_graphs(facts, project_root, language, exclude_patterns)
|
|
181
|
+
_resolve_call_graph(facts)
|
|
182
|
+
|
|
183
|
+
# R4.2: Feature clustering
|
|
184
|
+
from codd.clustering import build_feature_clusters
|
|
185
|
+
build_feature_clusters(facts)
|
|
186
|
+
|
|
154
187
|
return facts
|
|
155
188
|
|
|
156
189
|
|
|
@@ -817,6 +850,50 @@ def _discover_build_deps(project_root: Path) -> BuildDepsInfo | None:
|
|
|
817
850
|
return extractor.merge(discovered)
|
|
818
851
|
|
|
819
852
|
|
|
853
|
+
# ── R4.1 helpers: call-graph extraction & resolution ──────
|
|
854
|
+
|
|
855
|
+
def _extract_call_graphs(facts: ProjectFacts, project_root: Path,
|
|
856
|
+
language: str, exclude_patterns: list[str] | None):
|
|
857
|
+
"""Collect call edges for every module using the language extractor."""
|
|
858
|
+
extractor = get_extractor(language, "source")
|
|
859
|
+
if not hasattr(extractor, "extract_call_graph"):
|
|
860
|
+
return
|
|
861
|
+
|
|
862
|
+
for mod in facts.modules.values():
|
|
863
|
+
for rel_file in mod.files:
|
|
864
|
+
full = project_root / rel_file
|
|
865
|
+
try:
|
|
866
|
+
content = full.read_text(errors="ignore")
|
|
867
|
+
except Exception:
|
|
868
|
+
continue
|
|
869
|
+
edges = extractor.extract_call_graph(content, rel_file, mod.symbols)
|
|
870
|
+
mod.call_edges.extend(edges)
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
def _resolve_call_graph(facts: ProjectFacts):
|
|
874
|
+
"""Resolve callee names to fully-qualified module.symbol references."""
|
|
875
|
+
# Build symbol → module lookup
|
|
876
|
+
symbol_to_module: dict[str, str] = {}
|
|
877
|
+
for mod in facts.modules.values():
|
|
878
|
+
for sym in mod.symbols:
|
|
879
|
+
symbol_to_module[sym.name] = mod.name
|
|
880
|
+
|
|
881
|
+
for mod in facts.modules.values():
|
|
882
|
+
for edge in mod.call_edges:
|
|
883
|
+
callee = edge.callee
|
|
884
|
+
# Strip self. prefix
|
|
885
|
+
if callee.startswith("self."):
|
|
886
|
+
callee = callee[5:]
|
|
887
|
+
# Try to resolve bare name to module.name
|
|
888
|
+
bare = callee.split(".")[-1]
|
|
889
|
+
if bare in symbol_to_module:
|
|
890
|
+
target_mod = symbol_to_module[bare]
|
|
891
|
+
if target_mod != mod.name:
|
|
892
|
+
edge.callee = f"{target_mod}.{bare}"
|
|
893
|
+
else:
|
|
894
|
+
edge.callee = bare
|
|
895
|
+
|
|
896
|
+
|
|
820
897
|
# ═══════════════════════════════════════════════════════════
|
|
821
898
|
# Phase 2: Synth Docs (template-based, no AI)
|
|
822
899
|
# ═══════════════════════════════════════════════════════════
|
|
@@ -28,7 +28,7 @@ except ModuleNotFoundError:
|
|
|
28
28
|
hcl2 = None
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
|
-
from codd.extractor import ModuleInfo, Symbol
|
|
31
|
+
from codd.extractor import CallEdge, ModuleInfo, Symbol
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
_TREE_SITTER_LANGUAGE_PACKAGES = {
|
|
@@ -167,6 +167,9 @@ class LanguageExtractor(Protocol):
|
|
|
167
167
|
def extract_schema(self, content: str, file_path: str | Path) -> SqlSchemaInfo | PrismaSchemaInfo | None:
|
|
168
168
|
"""Return schema information when supported by the extractor."""
|
|
169
169
|
|
|
170
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
171
|
+
"""Return call edges found in the given source content."""
|
|
172
|
+
|
|
170
173
|
|
|
171
174
|
class RegexExtractor:
|
|
172
175
|
"""Adapter for regex-based extraction and schema parsing."""
|
|
@@ -214,6 +217,9 @@ class RegexExtractor:
|
|
|
214
217
|
return _extract_prisma_schema(content, normalized_path)
|
|
215
218
|
return None
|
|
216
219
|
|
|
220
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
221
|
+
return [] # Regex fallback doesn't support call graph
|
|
222
|
+
|
|
217
223
|
|
|
218
224
|
class TreeSitterExtractor:
|
|
219
225
|
"""Tree-sitter backend for Python and TypeScript/JavaScript source files."""
|
|
@@ -288,6 +294,17 @@ class TreeSitterExtractor:
|
|
|
288
294
|
def extract_schema(self, content: str, file_path: str | Path) -> SqlSchemaInfo | PrismaSchemaInfo | None:
|
|
289
295
|
return self._fallback.extract_schema(content, file_path)
|
|
290
296
|
|
|
297
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
298
|
+
if self.category != "source":
|
|
299
|
+
return []
|
|
300
|
+
try:
|
|
301
|
+
root = self._parse(content)
|
|
302
|
+
if self.language == "python":
|
|
303
|
+
return _extract_python_call_graph(root, content, file_path, symbols)
|
|
304
|
+
except Exception:
|
|
305
|
+
return []
|
|
306
|
+
return []
|
|
307
|
+
|
|
291
308
|
def _parse(self, content: str):
|
|
292
309
|
return self._parser.parse(content.encode("utf-8", errors="ignore")).root_node
|
|
293
310
|
|
|
@@ -330,6 +347,9 @@ class SqlDdlExtractor:
|
|
|
330
347
|
fallback = self._fallback.extract_schema(content, path)
|
|
331
348
|
return fallback if isinstance(fallback, SqlSchemaInfo) else None
|
|
332
349
|
|
|
350
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
351
|
+
return []
|
|
352
|
+
|
|
333
353
|
|
|
334
354
|
class PrismaSchemaExtractor:
|
|
335
355
|
"""Regex extractor for Prisma schema files."""
|
|
@@ -355,6 +375,9 @@ class PrismaSchemaExtractor:
|
|
|
355
375
|
def extract_schema(self, content: str, file_path: str | Path) -> PrismaSchemaInfo | None:
|
|
356
376
|
return _extract_prisma_schema(content, Path(file_path).as_posix())
|
|
357
377
|
|
|
378
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
379
|
+
return []
|
|
380
|
+
|
|
358
381
|
|
|
359
382
|
def _build_parser(language: str):
|
|
360
383
|
from tree_sitter import Parser
|
|
@@ -918,6 +941,74 @@ def _detect_typescript_code_patterns(mod: ModuleInfo, root: Any, content: str) -
|
|
|
918
941
|
mod.patterns["api_routes"] = "NestJS controller"
|
|
919
942
|
|
|
920
943
|
|
|
944
|
+
def _extract_python_call_graph(root: Any, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
945
|
+
"""Extract function call edges from Python AST using tree-sitter."""
|
|
946
|
+
from codd.extractor import CallEdge
|
|
947
|
+
|
|
948
|
+
content_bytes = content.encode("utf-8", errors="ignore")
|
|
949
|
+
edges: list[CallEdge] = []
|
|
950
|
+
symbol_names = {s.name for s in symbols}
|
|
951
|
+
|
|
952
|
+
def _current_scope(node: Any) -> str:
|
|
953
|
+
"""Walk parents to find enclosing function/class scope."""
|
|
954
|
+
parts: list[str] = []
|
|
955
|
+
current = node.parent
|
|
956
|
+
while current is not None:
|
|
957
|
+
if current.type in ("function_definition", "class_definition"):
|
|
958
|
+
name = _field_text(content_bytes, current, "name")
|
|
959
|
+
if name:
|
|
960
|
+
parts.append(name)
|
|
961
|
+
current = current.parent
|
|
962
|
+
parts.reverse()
|
|
963
|
+
return ".".join(parts) if parts else "<module>"
|
|
964
|
+
|
|
965
|
+
for node in _iter_named_nodes(root):
|
|
966
|
+
if node.type != "call":
|
|
967
|
+
continue
|
|
968
|
+
|
|
969
|
+
func_node = node.child_by_field_name("function")
|
|
970
|
+
if func_node is None:
|
|
971
|
+
continue
|
|
972
|
+
|
|
973
|
+
callee_text = _node_text(content_bytes, func_node).strip()
|
|
974
|
+
|
|
975
|
+
# Skip builtins and dunder calls
|
|
976
|
+
bare_name = callee_text.split(".")[-1] if "." in callee_text else callee_text
|
|
977
|
+
if bare_name.startswith("__") and bare_name.endswith("__"):
|
|
978
|
+
continue
|
|
979
|
+
if bare_name in ("print", "len", "range", "enumerate", "zip", "map", "filter",
|
|
980
|
+
"sorted", "reversed", "list", "dict", "set", "tuple", "str",
|
|
981
|
+
"int", "float", "bool", "type", "isinstance", "issubclass",
|
|
982
|
+
"getattr", "setattr", "hasattr", "super", "property",
|
|
983
|
+
"staticmethod", "classmethod", "open", "repr", "id", "vars",
|
|
984
|
+
"dir", "any", "all", "min", "max", "sum", "abs", "round",
|
|
985
|
+
"format", "iter", "next", "hash", "callable"):
|
|
986
|
+
continue
|
|
987
|
+
|
|
988
|
+
# Only include calls to known symbols (intra-project)
|
|
989
|
+
if bare_name not in symbol_names and callee_text not in symbol_names:
|
|
990
|
+
# Check if it's a method call on self (self.method)
|
|
991
|
+
if callee_text.startswith("self."):
|
|
992
|
+
method_name = callee_text[5:] # strip "self."
|
|
993
|
+
if method_name not in symbol_names:
|
|
994
|
+
continue
|
|
995
|
+
else:
|
|
996
|
+
continue
|
|
997
|
+
|
|
998
|
+
caller = _current_scope(node)
|
|
999
|
+
line_no = node.start_point.row + 1
|
|
1000
|
+
is_async = node.parent is not None and node.parent.type == "await"
|
|
1001
|
+
|
|
1002
|
+
edges.append(CallEdge(
|
|
1003
|
+
caller=caller,
|
|
1004
|
+
callee=callee_text,
|
|
1005
|
+
call_site=f"{file_path}:{line_no}",
|
|
1006
|
+
is_async=is_async,
|
|
1007
|
+
))
|
|
1008
|
+
|
|
1009
|
+
return edges
|
|
1010
|
+
|
|
1011
|
+
|
|
921
1012
|
def _sql_first_object_name(content_bytes: bytes, node: Any) -> str:
|
|
922
1013
|
for child in getattr(node, "named_children", []):
|
|
923
1014
|
if child.type == "object_reference":
|
|
@@ -1774,6 +1865,9 @@ class BuildDepsExtractor:
|
|
|
1774
1865
|
scripts=scripts,
|
|
1775
1866
|
)
|
|
1776
1867
|
|
|
1868
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
1869
|
+
return []
|
|
1870
|
+
|
|
1777
1871
|
|
|
1778
1872
|
class TestExtractor:
|
|
1779
1873
|
"""Extract test metadata from test files."""
|
|
@@ -1862,6 +1956,9 @@ class TestExtractor:
|
|
|
1862
1956
|
fixtures = re.findall(r"^\s*func\s+(TestMain)\s*\(", content, re.MULTILINE)
|
|
1863
1957
|
return TestInfo(file_path=file_path, test_functions=tests, fixtures=fixtures)
|
|
1864
1958
|
|
|
1959
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
1960
|
+
return []
|
|
1961
|
+
|
|
1865
1962
|
|
|
1866
1963
|
def get_extractor(language: str, category: str = "source") -> LanguageExtractor:
|
|
1867
1964
|
"""Select the best available extractor for a language/category pair."""
|
|
@@ -158,6 +158,8 @@ def synth_architecture(
|
|
|
158
158
|
],
|
|
159
159
|
violations=violations,
|
|
160
160
|
dependency_lines=dependency_lines,
|
|
161
|
+
feature_clusters=facts.feature_clusters,
|
|
162
|
+
interface_contracts=_interface_contracts_summary(facts),
|
|
161
163
|
schema_rows=_schema_summary_rows(facts),
|
|
162
164
|
api_rows=_api_summary_rows(facts),
|
|
163
165
|
infra_rows=_infra_summary_rows(facts),
|
|
@@ -262,6 +264,8 @@ def _render_module_detail(env: Environment, facts: ProjectFacts, module: ModuleI
|
|
|
262
264
|
related_api_specs=related_api_specs,
|
|
263
265
|
files=sorted(module.files),
|
|
264
266
|
tests=_tests_context(module),
|
|
267
|
+
call_edges=module.call_edges,
|
|
268
|
+
interface_contract=module.interface_contract,
|
|
265
269
|
)
|
|
266
270
|
return content
|
|
267
271
|
|
|
@@ -326,16 +330,43 @@ def _build_frontmatter(
|
|
|
326
330
|
|
|
327
331
|
def _module_depends_on(facts: ProjectFacts, module: ModuleInfo) -> list[dict[str, Any]]:
|
|
328
332
|
depends_on = []
|
|
333
|
+
seen_ids: set[str] = set()
|
|
329
334
|
for dependency_name in sorted(module.internal_imports):
|
|
330
335
|
if dependency_name not in facts.modules:
|
|
331
336
|
continue
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
"relation": "imports",
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
337
|
+
nid = _module_node_id(dependency_name)
|
|
338
|
+
if nid not in seen_ids:
|
|
339
|
+
depends_on.append(
|
|
340
|
+
{"id": nid, "relation": "imports", "semantic": "technical"}
|
|
341
|
+
)
|
|
342
|
+
seen_ids.add(nid)
|
|
343
|
+
|
|
344
|
+
# R4.1: call-graph edges
|
|
345
|
+
call_targets: set[str] = set()
|
|
346
|
+
for edge in module.call_edges:
|
|
347
|
+
target_mod = edge.callee.split(".")[0]
|
|
348
|
+
if target_mod in facts.modules and target_mod != module.name:
|
|
349
|
+
call_targets.add(target_mod)
|
|
350
|
+
for target in sorted(call_targets):
|
|
351
|
+
nid = _module_node_id(target)
|
|
352
|
+
if nid not in seen_ids:
|
|
353
|
+
depends_on.append(
|
|
354
|
+
{"id": nid, "relation": "calls", "semantic": "technical"}
|
|
355
|
+
)
|
|
356
|
+
seen_ids.add(nid)
|
|
357
|
+
|
|
358
|
+
# R4.2: co-feature edges
|
|
359
|
+
for cluster in facts.feature_clusters:
|
|
360
|
+
if module.name in cluster.modules:
|
|
361
|
+
for peer in cluster.modules:
|
|
362
|
+
if peer != module.name:
|
|
363
|
+
nid = _module_node_id(peer)
|
|
364
|
+
if nid not in seen_ids:
|
|
365
|
+
depends_on.append(
|
|
366
|
+
{"id": nid, "relation": "co_feature", "semantic": "technical"}
|
|
367
|
+
)
|
|
368
|
+
seen_ids.add(nid)
|
|
369
|
+
|
|
339
370
|
return depends_on
|
|
340
371
|
|
|
341
372
|
|
|
@@ -503,6 +534,23 @@ def _all_external_dependencies(facts: ProjectFacts) -> list[str]:
|
|
|
503
534
|
return sorted(dependencies)
|
|
504
535
|
|
|
505
536
|
|
|
537
|
+
def _interface_contracts_summary(facts: ProjectFacts) -> list[dict[str, Any]]:
|
|
538
|
+
"""Build template-friendly interface contract rows."""
|
|
539
|
+
rows: list[dict[str, Any]] = []
|
|
540
|
+
for mod in facts.modules.values():
|
|
541
|
+
ic = mod.interface_contract
|
|
542
|
+
if ic is None:
|
|
543
|
+
continue
|
|
544
|
+
rows.append({
|
|
545
|
+
"module": ic.module,
|
|
546
|
+
"public_count": len(ic.public_symbols),
|
|
547
|
+
"internal_count": len(ic.internal_symbols),
|
|
548
|
+
"ratio": ic.api_surface_ratio,
|
|
549
|
+
"violations": ic.encapsulation_violations,
|
|
550
|
+
})
|
|
551
|
+
return sorted(rows, key=lambda r: r["module"])
|
|
552
|
+
|
|
553
|
+
|
|
506
554
|
def _dependency_lines(facts: ProjectFacts) -> list[str]:
|
|
507
555
|
lines = []
|
|
508
556
|
for module_name, module in sorted(facts.modules.items()):
|
|
@@ -31,6 +31,33 @@
|
|
|
31
31
|
{% endif %}
|
|
32
32
|
{% endfor %}
|
|
33
33
|
|
|
34
|
+
{% if feature_clusters %}
|
|
35
|
+
## Feature Clusters
|
|
36
|
+
|
|
37
|
+
{% for cluster in feature_clusters -%}
|
|
38
|
+
### {{ cluster.name }} (confidence: {{ cluster.confidence }})
|
|
39
|
+
|
|
40
|
+
Modules: {{ cluster.modules | join(", ") }}
|
|
41
|
+
{% if cluster.evidence %}
|
|
42
|
+
Evidence: {{ cluster.evidence | join("; ") }}
|
|
43
|
+
{% endif %}
|
|
44
|
+
{% endfor %}
|
|
45
|
+
{% endif %}
|
|
46
|
+
|
|
47
|
+
{% if interface_contracts %}
|
|
48
|
+
## Interface Contracts Summary
|
|
49
|
+
|
|
50
|
+
| Module | Public | Internal | API Ratio | Violations |
|
|
51
|
+
|--------|--------|----------|-----------|------------|
|
|
52
|
+
{% for row in interface_contracts -%}
|
|
53
|
+
| `{{ row.module }}` | {{ row.public_count }} | {{ row.internal_count }} | {{ row.ratio }} | {{ row.violations | length }} |
|
|
54
|
+
{% endfor %}
|
|
55
|
+
{% for row in interface_contracts if row.violations %}
|
|
56
|
+
|
|
57
|
+
**{{ row.module }}**: {% for v in row.violations %}{{ v }}{% if not loop.last %}; {% endif %}{% endfor %}
|
|
58
|
+
{% endfor %}
|
|
59
|
+
{% endif %}
|
|
60
|
+
|
|
34
61
|
## Layer Violations
|
|
35
62
|
|
|
36
63
|
{% if violations %}
|
|
@@ -64,6 +64,40 @@
|
|
|
64
64
|
{% endfor %}
|
|
65
65
|
{% endif %}
|
|
66
66
|
|
|
67
|
+
{% if interface_contract %}
|
|
68
|
+
## Public API
|
|
69
|
+
|
|
70
|
+
{% for name in interface_contract.public_symbols -%}
|
|
71
|
+
- `{{ name }}`
|
|
72
|
+
{% endfor %}
|
|
73
|
+
{% if interface_contract.internal_symbols %}
|
|
74
|
+
|
|
75
|
+
## Internal API
|
|
76
|
+
|
|
77
|
+
{% for name in interface_contract.internal_symbols -%}
|
|
78
|
+
- `{{ name }}`
|
|
79
|
+
{% endfor %}
|
|
80
|
+
{% endif %}
|
|
81
|
+
{% if interface_contract.encapsulation_violations %}
|
|
82
|
+
|
|
83
|
+
### Encapsulation Violations
|
|
84
|
+
|
|
85
|
+
{% for v in interface_contract.encapsulation_violations -%}
|
|
86
|
+
- {{ v }}
|
|
87
|
+
{% endfor %}
|
|
88
|
+
{% endif %}
|
|
89
|
+
{% endif %}
|
|
90
|
+
|
|
91
|
+
{% if call_edges %}
|
|
92
|
+
## Call Graph
|
|
93
|
+
|
|
94
|
+
| Caller | Callee | Location | Async |
|
|
95
|
+
|--------|--------|----------|-------|
|
|
96
|
+
{% for edge in call_edges -%}
|
|
97
|
+
| `{{ edge.caller }}` | `{{ edge.callee }}` | `{{ edge.call_site }}` | {{ "yes" if edge.is_async else "no" }} |
|
|
98
|
+
{% endfor %}
|
|
99
|
+
{% endif %}
|
|
100
|
+
|
|
67
101
|
{% if internal_dependencies %}
|
|
68
102
|
## Import Dependencies
|
|
69
103
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|