codd-dev 0.3.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codd_dev-0.3.0 → codd_dev-0.5.0}/PKG-INFO +1 -1
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/__init__.py +1 -1
- codd_dev-0.5.0/codd/clustering.py +168 -0
- codd_dev-0.5.0/codd/contracts.py +138 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/extractor.py +97 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/parsing.py +98 -1
- codd_dev-0.5.0/codd/risk.py +100 -0
- codd_dev-0.5.0/codd/schema_refs.py +122 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/synth.py +88 -7
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/templates/extracted/architecture-overview.md.j2 +37 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/templates/extracted/module-detail.md.j2 +67 -0
- codd_dev-0.5.0/codd/traceability.py +67 -0
- codd_dev-0.5.0/codd/wiring.py +146 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/pyproject.toml +1 -1
- {codd_dev-0.3.0 → codd_dev-0.5.0}/.gitignore +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/LICENSE +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/README.md +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/cli.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/config.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/defaults.yaml +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/generator.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/graph.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/hooks.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/implementer.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/planner.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/propagate.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/scanner.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/templates/codd.yaml.tmpl +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/templates/conventions.yaml.tmpl +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/templates/data_dependencies.yaml.tmpl +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/templates/doc_links.yaml.tmpl +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/templates/extracted/api-contract.md.j2 +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/templates/extracted/schema-design.md.j2 +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/templates/extracted/system-context.md.j2 +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/templates/gitignore.tmpl +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/templates/overrides.yaml.tmpl +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/validator.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/codd/verifier.py +0 -0
- {codd_dev-0.3.0 → codd_dev-0.5.0}/hooks/pre-commit +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codd-dev
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: CoDD: Coherence-Driven Development — cross-artifact change impact analysis
|
|
5
5
|
Project-URL: Homepage, https://github.com/yohey-w/codd-dev
|
|
6
6
|
Project-URL: Repository, https://github.com/yohey-w/codd-dev
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""R4.2 — Feature clustering for codd extract.
|
|
2
|
+
|
|
3
|
+
Groups modules by functional cohesion using call graph edges,
|
|
4
|
+
naming conventions, and cross-reference density.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from codd.extractor import ProjectFacts
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def build_feature_clusters(facts: ProjectFacts) -> None:
|
|
17
|
+
"""Populate ``facts.feature_clusters`` by analysing call edges and naming."""
|
|
18
|
+
from codd.extractor import FeatureCluster
|
|
19
|
+
|
|
20
|
+
module_names = list(facts.modules.keys())
|
|
21
|
+
if len(module_names) < 2:
|
|
22
|
+
return
|
|
23
|
+
|
|
24
|
+
# Step 1: Build adjacency from call edges
|
|
25
|
+
adj: dict[str, set[str]] = defaultdict(set)
|
|
26
|
+
for mod in facts.modules.values():
|
|
27
|
+
for edge in mod.call_edges:
|
|
28
|
+
# edge.callee may be "module.Class.method" — extract target module
|
|
29
|
+
target_mod = _resolve_callee_module(edge.callee, module_names)
|
|
30
|
+
if target_mod and target_mod != mod.name:
|
|
31
|
+
adj[mod.name].add(target_mod)
|
|
32
|
+
adj[target_mod].add(mod.name)
|
|
33
|
+
|
|
34
|
+
# Step 2: Find connected components via call graph
|
|
35
|
+
components = _connected_components(module_names, adj)
|
|
36
|
+
|
|
37
|
+
# Step 3: Merge with naming prefix heuristics
|
|
38
|
+
prefix_groups = _group_by_prefix(module_names)
|
|
39
|
+
|
|
40
|
+
# Step 4: Combine call-graph components with prefix groups
|
|
41
|
+
clusters: list[FeatureCluster] = []
|
|
42
|
+
seen: set[str] = set()
|
|
43
|
+
|
|
44
|
+
# First: call-graph components (higher confidence)
|
|
45
|
+
for comp in components:
|
|
46
|
+
if len(comp) < 2:
|
|
47
|
+
continue
|
|
48
|
+
name = _infer_cluster_name(comp)
|
|
49
|
+
evidence: list[str] = []
|
|
50
|
+
|
|
51
|
+
# Check if they share naming prefix
|
|
52
|
+
common_prefix = _common_prefix(comp)
|
|
53
|
+
if common_prefix:
|
|
54
|
+
evidence.append(f"shared prefix: {common_prefix}")
|
|
55
|
+
|
|
56
|
+
# Count call edges between members
|
|
57
|
+
edge_count = sum(
|
|
58
|
+
1 for m in comp for n in adj.get(m, set()) if n in comp
|
|
59
|
+
)
|
|
60
|
+
if edge_count > 0:
|
|
61
|
+
evidence.append(f"{edge_count} cross-call edges")
|
|
62
|
+
|
|
63
|
+
confidence = min(1.0, 0.4 + 0.1 * edge_count + (0.2 if common_prefix else 0.0))
|
|
64
|
+
|
|
65
|
+
clusters.append(FeatureCluster(
|
|
66
|
+
name=name,
|
|
67
|
+
modules=sorted(comp),
|
|
68
|
+
confidence=round(confidence, 2),
|
|
69
|
+
evidence=evidence,
|
|
70
|
+
))
|
|
71
|
+
seen.update(comp)
|
|
72
|
+
|
|
73
|
+
# Second: prefix-only groups (lower confidence)
|
|
74
|
+
for prefix, members in prefix_groups.items():
|
|
75
|
+
remaining = [m for m in members if m not in seen]
|
|
76
|
+
if len(remaining) < 2:
|
|
77
|
+
continue
|
|
78
|
+
clusters.append(FeatureCluster(
|
|
79
|
+
name=prefix,
|
|
80
|
+
modules=sorted(remaining),
|
|
81
|
+
confidence=0.3,
|
|
82
|
+
evidence=[f"shared prefix: {prefix}"],
|
|
83
|
+
))
|
|
84
|
+
seen.update(remaining)
|
|
85
|
+
|
|
86
|
+
facts.feature_clusters = sorted(clusters, key=lambda c: -c.confidence)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _resolve_callee_module(callee: str, module_names: list[str]) -> str | None:
|
|
90
|
+
"""Map a callee like 'auth.verify_token' to module name 'auth'."""
|
|
91
|
+
# Try exact match first
|
|
92
|
+
if callee in module_names:
|
|
93
|
+
return callee
|
|
94
|
+
# Try first dotted segment
|
|
95
|
+
parts = callee.split(".")
|
|
96
|
+
for i in range(len(parts), 0, -1):
|
|
97
|
+
candidate = ".".join(parts[:i])
|
|
98
|
+
if candidate in module_names:
|
|
99
|
+
return candidate
|
|
100
|
+
# Try just the first part (top-level module)
|
|
101
|
+
if parts[0] in module_names:
|
|
102
|
+
return parts[0]
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _connected_components(nodes: list[str], adj: dict[str, set[str]]) -> list[set[str]]:
|
|
107
|
+
"""Find connected components in an undirected graph."""
|
|
108
|
+
visited: set[str] = set()
|
|
109
|
+
components: list[set[str]] = []
|
|
110
|
+
|
|
111
|
+
for node in nodes:
|
|
112
|
+
if node in visited:
|
|
113
|
+
continue
|
|
114
|
+
# BFS
|
|
115
|
+
component: set[str] = set()
|
|
116
|
+
queue = [node]
|
|
117
|
+
while queue:
|
|
118
|
+
current = queue.pop(0)
|
|
119
|
+
if current in visited:
|
|
120
|
+
continue
|
|
121
|
+
visited.add(current)
|
|
122
|
+
component.add(current)
|
|
123
|
+
for neighbor in adj.get(current, set()):
|
|
124
|
+
if neighbor not in visited:
|
|
125
|
+
queue.append(neighbor)
|
|
126
|
+
components.append(component)
|
|
127
|
+
|
|
128
|
+
return components
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _group_by_prefix(module_names: list[str]) -> dict[str, list[str]]:
|
|
132
|
+
"""Group modules sharing a common naming prefix (e.g., 'auth_*')."""
|
|
133
|
+
groups: dict[str, list[str]] = defaultdict(list)
|
|
134
|
+
for name in module_names:
|
|
135
|
+
# Split on underscore or dot
|
|
136
|
+
parts = name.replace(".", "_").split("_")
|
|
137
|
+
if len(parts) >= 2:
|
|
138
|
+
prefix = parts[0]
|
|
139
|
+
if len(prefix) >= 2: # Avoid single-char prefixes
|
|
140
|
+
groups[prefix].append(name)
|
|
141
|
+
# Only return groups with 2+ members
|
|
142
|
+
return {k: v for k, v in groups.items() if len(v) >= 2}
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _common_prefix(names: set[str]) -> str:
|
|
146
|
+
"""Find common prefix among module names, if any."""
|
|
147
|
+
if not names:
|
|
148
|
+
return ""
|
|
149
|
+
name_list = sorted(names)
|
|
150
|
+
parts_list = [n.replace(".", "_").split("_") for n in name_list]
|
|
151
|
+
if not parts_list or not parts_list[0]:
|
|
152
|
+
return ""
|
|
153
|
+
prefix_parts: list[str] = []
|
|
154
|
+
for i, part in enumerate(parts_list[0]):
|
|
155
|
+
if all(len(p) > i and p[i] == part for p in parts_list):
|
|
156
|
+
prefix_parts.append(part)
|
|
157
|
+
else:
|
|
158
|
+
break
|
|
159
|
+
return "_".join(prefix_parts) if prefix_parts else ""
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _infer_cluster_name(modules: set[str]) -> str:
|
|
163
|
+
"""Infer a human-readable name for a cluster."""
|
|
164
|
+
prefix = _common_prefix(modules)
|
|
165
|
+
if prefix:
|
|
166
|
+
return prefix
|
|
167
|
+
# Fall back to shortest module name
|
|
168
|
+
return min(modules, key=len)
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""R4.3 — Interface contract detection for codd extract.
|
|
2
|
+
|
|
3
|
+
Distinguishes public API (symbols in __init__.py / __all__) from internal
|
|
4
|
+
implementation details. Detects encapsulation violations where other modules
|
|
5
|
+
reach into internals.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import TYPE_CHECKING
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from codd.extractor import ProjectFacts
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class InterfaceContract:
|
|
21
|
+
"""Public vs internal API surface for a module."""
|
|
22
|
+
|
|
23
|
+
module: str
|
|
24
|
+
public_symbols: list[str] = field(default_factory=list)
|
|
25
|
+
internal_symbols: list[str] = field(default_factory=list)
|
|
26
|
+
api_surface_ratio: float = 0.0
|
|
27
|
+
encapsulation_violations: list[str] = field(default_factory=list)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ── __init__.py / __all__ parsing ────────────────────────
|
|
31
|
+
|
|
32
|
+
_ALL_RE = re.compile(
|
|
33
|
+
r"__all__\s*=\s*\[([^\]]*)\]",
|
|
34
|
+
re.DOTALL,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
_REEXPORT_FROM_RE = re.compile(
|
|
38
|
+
r"^from\s+\.[\w.]*\s+import\s+(.+)",
|
|
39
|
+
re.MULTILINE,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
_IMPORT_AS_RE = re.compile(r"(\w+)\s+as\s+(\w+)")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def detect_init_exports(init_content: str) -> list[str]:
|
|
46
|
+
"""Parse ``__init__.py`` content and return publicly-exported symbol names."""
|
|
47
|
+
names: list[str] = []
|
|
48
|
+
|
|
49
|
+
# 1) __all__ takes priority
|
|
50
|
+
m = _ALL_RE.search(init_content)
|
|
51
|
+
if m:
|
|
52
|
+
raw = m.group(1)
|
|
53
|
+
for token in re.findall(r"""['"](\w+)['"]""", raw):
|
|
54
|
+
if token not in names:
|
|
55
|
+
names.append(token)
|
|
56
|
+
return names
|
|
57
|
+
|
|
58
|
+
# 2) Fall back to ``from .xxx import ...`` re-exports
|
|
59
|
+
for m2 in _REEXPORT_FROM_RE.finditer(init_content):
|
|
60
|
+
import_part = m2.group(1).strip().rstrip(")")
|
|
61
|
+
for chunk in import_part.split(","):
|
|
62
|
+
chunk = chunk.strip().strip("()")
|
|
63
|
+
if not chunk:
|
|
64
|
+
continue
|
|
65
|
+
# handle "Foo as Bar" → the exported name is "Bar"
|
|
66
|
+
alias_m = _IMPORT_AS_RE.search(chunk)
|
|
67
|
+
if alias_m:
|
|
68
|
+
name = alias_m.group(2)
|
|
69
|
+
else:
|
|
70
|
+
name = chunk.split()[-1]
|
|
71
|
+
if name.isidentifier() and name not in names:
|
|
72
|
+
names.append(name)
|
|
73
|
+
|
|
74
|
+
return names
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# ── Build contracts for every module ─────────────────────
|
|
78
|
+
|
|
79
|
+
def build_interface_contracts(facts: ProjectFacts, project_root: Path) -> None:
|
|
80
|
+
"""Populate ``interface_contract`` on every module in *facts*."""
|
|
81
|
+
from codd.extractor import _language_extensions # avoid circular at import time
|
|
82
|
+
|
|
83
|
+
# First pass: compute public/internal for each module
|
|
84
|
+
for mod in facts.modules.values():
|
|
85
|
+
init_files = [
|
|
86
|
+
f for f in mod.files
|
|
87
|
+
if Path(f).name == "__init__.py"
|
|
88
|
+
]
|
|
89
|
+
all_symbol_names = [s.name for s in mod.symbols]
|
|
90
|
+
if not all_symbol_names:
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
public: list[str] = []
|
|
94
|
+
if init_files:
|
|
95
|
+
init_path = project_root / init_files[0]
|
|
96
|
+
try:
|
|
97
|
+
init_content = init_path.read_text(errors="ignore")
|
|
98
|
+
except Exception:
|
|
99
|
+
init_content = ""
|
|
100
|
+
public = detect_init_exports(init_content)
|
|
101
|
+
|
|
102
|
+
# For single-file modules (no __init__.py), treat all symbols as public
|
|
103
|
+
if not init_files:
|
|
104
|
+
public = list(all_symbol_names)
|
|
105
|
+
|
|
106
|
+
internal = [n for n in all_symbol_names if n not in public]
|
|
107
|
+
total = len(all_symbol_names)
|
|
108
|
+
ratio = len(public) / total if total else 0.0
|
|
109
|
+
|
|
110
|
+
mod.interface_contract = InterfaceContract(
|
|
111
|
+
module=mod.name,
|
|
112
|
+
public_symbols=public,
|
|
113
|
+
internal_symbols=internal,
|
|
114
|
+
api_surface_ratio=round(ratio, 2),
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Second pass: detect encapsulation violations
|
|
118
|
+
# Build internal-symbol lookup: {module_name: set(internal_names)}
|
|
119
|
+
internal_lookup: dict[str, set[str]] = {}
|
|
120
|
+
for mod in facts.modules.values():
|
|
121
|
+
if mod.interface_contract:
|
|
122
|
+
internal_lookup[mod.name] = set(mod.interface_contract.internal_symbols)
|
|
123
|
+
|
|
124
|
+
for mod in facts.modules.values():
|
|
125
|
+
if not mod.interface_contract:
|
|
126
|
+
continue
|
|
127
|
+
for dep_name, import_lines in mod.internal_imports.items():
|
|
128
|
+
if dep_name not in internal_lookup:
|
|
129
|
+
continue
|
|
130
|
+
internals = internal_lookup[dep_name]
|
|
131
|
+
if not internals:
|
|
132
|
+
continue
|
|
133
|
+
for line in import_lines:
|
|
134
|
+
for internal_name in internals:
|
|
135
|
+
if internal_name in line:
|
|
136
|
+
violation = f"{mod.name} uses {dep_name}.{internal_name} (internal)"
|
|
137
|
+
if violation not in mod.interface_contract.encapsulation_violations:
|
|
138
|
+
mod.interface_contract.encapsulation_violations.append(violation)
|
|
@@ -53,6 +53,15 @@ class Symbol:
|
|
|
53
53
|
implements: list[str] = field(default_factory=list)
|
|
54
54
|
|
|
55
55
|
|
|
56
|
+
@dataclass
|
|
57
|
+
class CallEdge:
|
|
58
|
+
"""A function-to-function call relationship."""
|
|
59
|
+
caller: str # "module.Class.method" or "module.function"
|
|
60
|
+
callee: str # target symbol (resolved to module if possible)
|
|
61
|
+
call_site: str # file:line
|
|
62
|
+
is_async: bool = False
|
|
63
|
+
|
|
64
|
+
|
|
56
65
|
@dataclass
|
|
57
66
|
class ModuleInfo:
|
|
58
67
|
"""Aggregated info for a discovered module/package."""
|
|
@@ -65,6 +74,20 @@ class ModuleInfo:
|
|
|
65
74
|
test_details: list[TestInfo] = field(default_factory=list)
|
|
66
75
|
line_count: int = 0
|
|
67
76
|
patterns: dict[str, str] = field(default_factory=dict) # pattern_type -> detail
|
|
77
|
+
call_edges: list[CallEdge] = field(default_factory=list)
|
|
78
|
+
interface_contract: Any = None # InterfaceContract from contracts.py
|
|
79
|
+
test_coverage: Any = None # TestCoverage from traceability.py
|
|
80
|
+
schema_refs: list[Any] = field(default_factory=list) # SchemaRef from schema_refs.py
|
|
81
|
+
runtime_wires: list[Any] = field(default_factory=list) # RuntimeWire from wiring.py
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class FeatureCluster:
|
|
86
|
+
"""A group of modules that collaborate on a feature."""
|
|
87
|
+
name: str
|
|
88
|
+
modules: list[str] = field(default_factory=list)
|
|
89
|
+
confidence: float = 0.0
|
|
90
|
+
evidence: list[str] = field(default_factory=list)
|
|
68
91
|
|
|
69
92
|
|
|
70
93
|
@dataclass
|
|
@@ -83,6 +106,8 @@ class ProjectFacts:
|
|
|
83
106
|
api_specs: dict[str, Any] = field(default_factory=dict)
|
|
84
107
|
infra_config: dict[str, ConfigInfo] = field(default_factory=dict)
|
|
85
108
|
build_deps: BuildDepsInfo | None = None
|
|
109
|
+
feature_clusters: list[FeatureCluster] = field(default_factory=list)
|
|
110
|
+
change_risks: list[Any] = field(default_factory=list) # ChangeRisk from risk.py
|
|
86
111
|
|
|
87
112
|
|
|
88
113
|
@dataclass
|
|
@@ -151,6 +176,34 @@ def extract_facts(project_root: Path, language: str | None = None,
|
|
|
151
176
|
# Detect entry points
|
|
152
177
|
_detect_entry_points(facts, project_root, language)
|
|
153
178
|
|
|
179
|
+
# R4.3: Interface contract detection
|
|
180
|
+
from codd.contracts import build_interface_contracts
|
|
181
|
+
build_interface_contracts(facts, project_root)
|
|
182
|
+
|
|
183
|
+
# R4.1: Call graph extraction + resolution
|
|
184
|
+
_extract_call_graphs(facts, project_root, language, exclude_patterns)
|
|
185
|
+
_resolve_call_graph(facts)
|
|
186
|
+
|
|
187
|
+
# R4.2: Feature clustering
|
|
188
|
+
from codd.clustering import build_feature_clusters
|
|
189
|
+
build_feature_clusters(facts)
|
|
190
|
+
|
|
191
|
+
# R5.1: Test traceability
|
|
192
|
+
from codd.traceability import build_test_traceability
|
|
193
|
+
build_test_traceability(facts, project_root)
|
|
194
|
+
|
|
195
|
+
# R5.2: Schema-code dependency
|
|
196
|
+
from codd.schema_refs import build_schema_refs
|
|
197
|
+
build_schema_refs(facts, project_root)
|
|
198
|
+
|
|
199
|
+
# R5.3: Runtime wiring detection
|
|
200
|
+
from codd.wiring import build_runtime_wires
|
|
201
|
+
build_runtime_wires(facts, project_root)
|
|
202
|
+
|
|
203
|
+
# R5.4: Change risk scoring (depends on R4.3, R5.1)
|
|
204
|
+
from codd.risk import build_change_risks
|
|
205
|
+
build_change_risks(facts)
|
|
206
|
+
|
|
154
207
|
return facts
|
|
155
208
|
|
|
156
209
|
|
|
@@ -817,6 +870,50 @@ def _discover_build_deps(project_root: Path) -> BuildDepsInfo | None:
|
|
|
817
870
|
return extractor.merge(discovered)
|
|
818
871
|
|
|
819
872
|
|
|
873
|
+
# ── R4.1 helpers: call-graph extraction & resolution ──────
|
|
874
|
+
|
|
875
|
+
def _extract_call_graphs(facts: ProjectFacts, project_root: Path,
|
|
876
|
+
language: str, exclude_patterns: list[str] | None):
|
|
877
|
+
"""Collect call edges for every module using the language extractor."""
|
|
878
|
+
extractor = get_extractor(language, "source")
|
|
879
|
+
if not hasattr(extractor, "extract_call_graph"):
|
|
880
|
+
return
|
|
881
|
+
|
|
882
|
+
for mod in facts.modules.values():
|
|
883
|
+
for rel_file in mod.files:
|
|
884
|
+
full = project_root / rel_file
|
|
885
|
+
try:
|
|
886
|
+
content = full.read_text(errors="ignore")
|
|
887
|
+
except Exception:
|
|
888
|
+
continue
|
|
889
|
+
edges = extractor.extract_call_graph(content, rel_file, mod.symbols)
|
|
890
|
+
mod.call_edges.extend(edges)
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
def _resolve_call_graph(facts: ProjectFacts):
|
|
894
|
+
"""Resolve callee names to fully-qualified module.symbol references."""
|
|
895
|
+
# Build symbol → module lookup
|
|
896
|
+
symbol_to_module: dict[str, str] = {}
|
|
897
|
+
for mod in facts.modules.values():
|
|
898
|
+
for sym in mod.symbols:
|
|
899
|
+
symbol_to_module[sym.name] = mod.name
|
|
900
|
+
|
|
901
|
+
for mod in facts.modules.values():
|
|
902
|
+
for edge in mod.call_edges:
|
|
903
|
+
callee = edge.callee
|
|
904
|
+
# Strip self. prefix
|
|
905
|
+
if callee.startswith("self."):
|
|
906
|
+
callee = callee[5:]
|
|
907
|
+
# Try to resolve bare name to module.name
|
|
908
|
+
bare = callee.split(".")[-1]
|
|
909
|
+
if bare in symbol_to_module:
|
|
910
|
+
target_mod = symbol_to_module[bare]
|
|
911
|
+
if target_mod != mod.name:
|
|
912
|
+
edge.callee = f"{target_mod}.{bare}"
|
|
913
|
+
else:
|
|
914
|
+
edge.callee = bare
|
|
915
|
+
|
|
916
|
+
|
|
820
917
|
# ═══════════════════════════════════════════════════════════
|
|
821
918
|
# Phase 2: Synth Docs (template-based, no AI)
|
|
822
919
|
# ═══════════════════════════════════════════════════════════
|
|
@@ -28,7 +28,7 @@ except ModuleNotFoundError:
|
|
|
28
28
|
hcl2 = None
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
|
-
from codd.extractor import ModuleInfo, Symbol
|
|
31
|
+
from codd.extractor import CallEdge, ModuleInfo, Symbol
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
_TREE_SITTER_LANGUAGE_PACKAGES = {
|
|
@@ -167,6 +167,9 @@ class LanguageExtractor(Protocol):
|
|
|
167
167
|
def extract_schema(self, content: str, file_path: str | Path) -> SqlSchemaInfo | PrismaSchemaInfo | None:
|
|
168
168
|
"""Return schema information when supported by the extractor."""
|
|
169
169
|
|
|
170
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
171
|
+
"""Return call edges found in the given source content."""
|
|
172
|
+
|
|
170
173
|
|
|
171
174
|
class RegexExtractor:
|
|
172
175
|
"""Adapter for regex-based extraction and schema parsing."""
|
|
@@ -214,6 +217,9 @@ class RegexExtractor:
|
|
|
214
217
|
return _extract_prisma_schema(content, normalized_path)
|
|
215
218
|
return None
|
|
216
219
|
|
|
220
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
221
|
+
return [] # Regex fallback doesn't support call graph
|
|
222
|
+
|
|
217
223
|
|
|
218
224
|
class TreeSitterExtractor:
|
|
219
225
|
"""Tree-sitter backend for Python and TypeScript/JavaScript source files."""
|
|
@@ -288,6 +294,17 @@ class TreeSitterExtractor:
|
|
|
288
294
|
def extract_schema(self, content: str, file_path: str | Path) -> SqlSchemaInfo | PrismaSchemaInfo | None:
|
|
289
295
|
return self._fallback.extract_schema(content, file_path)
|
|
290
296
|
|
|
297
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
298
|
+
if self.category != "source":
|
|
299
|
+
return []
|
|
300
|
+
try:
|
|
301
|
+
root = self._parse(content)
|
|
302
|
+
if self.language == "python":
|
|
303
|
+
return _extract_python_call_graph(root, content, file_path, symbols)
|
|
304
|
+
except Exception:
|
|
305
|
+
return []
|
|
306
|
+
return []
|
|
307
|
+
|
|
291
308
|
def _parse(self, content: str):
|
|
292
309
|
return self._parser.parse(content.encode("utf-8", errors="ignore")).root_node
|
|
293
310
|
|
|
@@ -330,6 +347,9 @@ class SqlDdlExtractor:
|
|
|
330
347
|
fallback = self._fallback.extract_schema(content, path)
|
|
331
348
|
return fallback if isinstance(fallback, SqlSchemaInfo) else None
|
|
332
349
|
|
|
350
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
351
|
+
return []
|
|
352
|
+
|
|
333
353
|
|
|
334
354
|
class PrismaSchemaExtractor:
|
|
335
355
|
"""Regex extractor for Prisma schema files."""
|
|
@@ -355,6 +375,9 @@ class PrismaSchemaExtractor:
|
|
|
355
375
|
def extract_schema(self, content: str, file_path: str | Path) -> PrismaSchemaInfo | None:
|
|
356
376
|
return _extract_prisma_schema(content, Path(file_path).as_posix())
|
|
357
377
|
|
|
378
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
379
|
+
return []
|
|
380
|
+
|
|
358
381
|
|
|
359
382
|
def _build_parser(language: str):
|
|
360
383
|
from tree_sitter import Parser
|
|
@@ -918,6 +941,74 @@ def _detect_typescript_code_patterns(mod: ModuleInfo, root: Any, content: str) -
|
|
|
918
941
|
mod.patterns["api_routes"] = "NestJS controller"
|
|
919
942
|
|
|
920
943
|
|
|
944
|
+
def _extract_python_call_graph(root: Any, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
945
|
+
"""Extract function call edges from Python AST using tree-sitter."""
|
|
946
|
+
from codd.extractor import CallEdge
|
|
947
|
+
|
|
948
|
+
content_bytes = content.encode("utf-8", errors="ignore")
|
|
949
|
+
edges: list[CallEdge] = []
|
|
950
|
+
symbol_names = {s.name for s in symbols}
|
|
951
|
+
|
|
952
|
+
def _current_scope(node: Any) -> str:
|
|
953
|
+
"""Walk parents to find enclosing function/class scope."""
|
|
954
|
+
parts: list[str] = []
|
|
955
|
+
current = node.parent
|
|
956
|
+
while current is not None:
|
|
957
|
+
if current.type in ("function_definition", "class_definition"):
|
|
958
|
+
name = _field_text(content_bytes, current, "name")
|
|
959
|
+
if name:
|
|
960
|
+
parts.append(name)
|
|
961
|
+
current = current.parent
|
|
962
|
+
parts.reverse()
|
|
963
|
+
return ".".join(parts) if parts else "<module>"
|
|
964
|
+
|
|
965
|
+
for node in _iter_named_nodes(root):
|
|
966
|
+
if node.type != "call":
|
|
967
|
+
continue
|
|
968
|
+
|
|
969
|
+
func_node = node.child_by_field_name("function")
|
|
970
|
+
if func_node is None:
|
|
971
|
+
continue
|
|
972
|
+
|
|
973
|
+
callee_text = _node_text(content_bytes, func_node).strip()
|
|
974
|
+
|
|
975
|
+
# Skip builtins and dunder calls
|
|
976
|
+
bare_name = callee_text.split(".")[-1] if "." in callee_text else callee_text
|
|
977
|
+
if bare_name.startswith("__") and bare_name.endswith("__"):
|
|
978
|
+
continue
|
|
979
|
+
if bare_name in ("print", "len", "range", "enumerate", "zip", "map", "filter",
|
|
980
|
+
"sorted", "reversed", "list", "dict", "set", "tuple", "str",
|
|
981
|
+
"int", "float", "bool", "type", "isinstance", "issubclass",
|
|
982
|
+
"getattr", "setattr", "hasattr", "super", "property",
|
|
983
|
+
"staticmethod", "classmethod", "open", "repr", "id", "vars",
|
|
984
|
+
"dir", "any", "all", "min", "max", "sum", "abs", "round",
|
|
985
|
+
"format", "iter", "next", "hash", "callable"):
|
|
986
|
+
continue
|
|
987
|
+
|
|
988
|
+
# Only include calls to known symbols (intra-project)
|
|
989
|
+
if bare_name not in symbol_names and callee_text not in symbol_names:
|
|
990
|
+
# Check if it's a method call on self (self.method)
|
|
991
|
+
if callee_text.startswith("self."):
|
|
992
|
+
method_name = callee_text[5:] # strip "self."
|
|
993
|
+
if method_name not in symbol_names:
|
|
994
|
+
continue
|
|
995
|
+
else:
|
|
996
|
+
continue
|
|
997
|
+
|
|
998
|
+
caller = _current_scope(node)
|
|
999
|
+
line_no = node.start_point.row + 1
|
|
1000
|
+
is_async = node.parent is not None and node.parent.type == "await"
|
|
1001
|
+
|
|
1002
|
+
edges.append(CallEdge(
|
|
1003
|
+
caller=caller,
|
|
1004
|
+
callee=callee_text,
|
|
1005
|
+
call_site=f"{file_path}:{line_no}",
|
|
1006
|
+
is_async=is_async,
|
|
1007
|
+
))
|
|
1008
|
+
|
|
1009
|
+
return edges
|
|
1010
|
+
|
|
1011
|
+
|
|
921
1012
|
def _sql_first_object_name(content_bytes: bytes, node: Any) -> str:
|
|
922
1013
|
for child in getattr(node, "named_children", []):
|
|
923
1014
|
if child.type == "object_reference":
|
|
@@ -1774,6 +1865,9 @@ class BuildDepsExtractor:
|
|
|
1774
1865
|
scripts=scripts,
|
|
1775
1866
|
)
|
|
1776
1867
|
|
|
1868
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
1869
|
+
return []
|
|
1870
|
+
|
|
1777
1871
|
|
|
1778
1872
|
class TestExtractor:
|
|
1779
1873
|
"""Extract test metadata from test files."""
|
|
@@ -1862,6 +1956,9 @@ class TestExtractor:
|
|
|
1862
1956
|
fixtures = re.findall(r"^\s*func\s+(TestMain)\s*\(", content, re.MULTILINE)
|
|
1863
1957
|
return TestInfo(file_path=file_path, test_functions=tests, fixtures=fixtures)
|
|
1864
1958
|
|
|
1959
|
+
def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
|
|
1960
|
+
return []
|
|
1961
|
+
|
|
1865
1962
|
|
|
1866
1963
|
def get_extractor(language: str, category: str = "source") -> LanguageExtractor:
|
|
1867
1964
|
"""Select the best available extractor for a language/category pair."""
|