PyPI - codd-dev - Versions diffs - 0.3.0__tar.gz → 0.5.0__tar.gz - Mend

codd-dev 0.3.0tar.gz → 0.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{codd_dev-0.3.0 → codd_dev-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codd-dev
-Version: 0.3.0
+Version: 0.5.0
 Summary: CoDD: Coherence-Driven Development — cross-artifact change impact analysis
 Project-URL: Homepage, https://github.com/yohey-w/codd-dev
 Project-URL: Repository, https://github.com/yohey-w/codd-dev

{codd_dev-0.3.0 → codd_dev-0.5.0}/codd/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """CoDD — Coherence-Driven Development."""
-__version__ = "0.2.0a1"
+__version__ = "0.4.0"

codd_dev-0.5.0/codd/clustering.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""R4.2 — Feature clustering for codd extract.
+Groups modules by functional cohesion using call graph edges,
+naming conventions, and cross-reference density.
+"""
+from __future__ import annotations
+from collections import defaultdict
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from codd.extractor import ProjectFacts
+def build_feature_clusters(facts: ProjectFacts) -> None:
+    """Populate ``facts.feature_clusters`` by analysing call edges and naming."""
+    from codd.extractor import FeatureCluster
+    module_names = list(facts.modules.keys())
+    if len(module_names) < 2:
+        return
+    # Step 1: Build adjacency from call edges
+    adj: dict[str, set[str]] = defaultdict(set)
+    for mod in facts.modules.values():
+        for edge in mod.call_edges:
+            # edge.callee may be "module.Class.method" — extract target module
+            target_mod = _resolve_callee_module(edge.callee, module_names)
+            if target_mod and target_mod != mod.name:
+                adj[mod.name].add(target_mod)
+                adj[target_mod].add(mod.name)
+    # Step 2: Find connected components via call graph
+    components = _connected_components(module_names, adj)
+    # Step 3: Merge with naming prefix heuristics
+    prefix_groups = _group_by_prefix(module_names)
+    # Step 4: Combine call-graph components with prefix groups
+    clusters: list[FeatureCluster] = []
+    seen: set[str] = set()
+    # First: call-graph components (higher confidence)
+    for comp in components:
+        if len(comp) < 2:
+            continue
+        name = _infer_cluster_name(comp)
+        evidence: list[str] = []
+        # Check if they share naming prefix
+        common_prefix = _common_prefix(comp)
+        if common_prefix:
+            evidence.append(f"shared prefix: {common_prefix}")
+        # Count call edges between members
+        edge_count = sum(
+            1 for m in comp for n in adj.get(m, set()) if n in comp
+        )
+        if edge_count > 0:
+            evidence.append(f"{edge_count} cross-call edges")
+        confidence = min(1.0, 0.4 + 0.1 * edge_count + (0.2 if common_prefix else 0.0))
+        clusters.append(FeatureCluster(
+            name=name,
+            modules=sorted(comp),
+            confidence=round(confidence, 2),
+            evidence=evidence,
+        ))
+        seen.update(comp)
+    # Second: prefix-only groups (lower confidence)
+    for prefix, members in prefix_groups.items():
+        remaining = [m for m in members if m not in seen]
+        if len(remaining) < 2:
+            continue
+        clusters.append(FeatureCluster(
+            name=prefix,
+            modules=sorted(remaining),
+            confidence=0.3,
+            evidence=[f"shared prefix: {prefix}"],
+        ))
+        seen.update(remaining)
+    facts.feature_clusters = sorted(clusters, key=lambda c: -c.confidence)
+def _resolve_callee_module(callee: str, module_names: list[str]) -> str | None:
+    """Map a callee like 'auth.verify_token' to module name 'auth'."""
+    # Try exact match first
+    if callee in module_names:
+        return callee
+    # Try first dotted segment
+    parts = callee.split(".")
+    for i in range(len(parts), 0, -1):
+        candidate = ".".join(parts[:i])
+        if candidate in module_names:
+            return candidate
+    # Try just the first part (top-level module)
+    if parts[0] in module_names:
+        return parts[0]
+    return None
+def _connected_components(nodes: list[str], adj: dict[str, set[str]]) -> list[set[str]]:
+    """Find connected components in an undirected graph."""
+    visited: set[str] = set()
+    components: list[set[str]] = []
+    for node in nodes:
+        if node in visited:
+            continue
+        # BFS
+        component: set[str] = set()
+        queue = [node]
+        while queue:
+            current = queue.pop(0)
+            if current in visited:
+                continue
+            visited.add(current)
+            component.add(current)
+            for neighbor in adj.get(current, set()):
+                if neighbor not in visited:
+                    queue.append(neighbor)
+        components.append(component)
+    return components
+def _group_by_prefix(module_names: list[str]) -> dict[str, list[str]]:
+    """Group modules sharing a common naming prefix (e.g., 'auth_*')."""
+    groups: dict[str, list[str]] = defaultdict(list)
+    for name in module_names:
+        # Split on underscore or dot
+        parts = name.replace(".", "_").split("_")
+        if len(parts) >= 2:
+            prefix = parts[0]
+            if len(prefix) >= 2:  # Avoid single-char prefixes
+                groups[prefix].append(name)
+    # Only return groups with 2+ members
+    return {k: v for k, v in groups.items() if len(v) >= 2}
+def _common_prefix(names: set[str]) -> str:
+    """Find common prefix among module names, if any."""
+    if not names:
+        return ""
+    name_list = sorted(names)
+    parts_list = [n.replace(".", "_").split("_") for n in name_list]
+    if not parts_list or not parts_list[0]:
+        return ""
+    prefix_parts: list[str] = []
+    for i, part in enumerate(parts_list[0]):
+        if all(len(p) > i and p[i] == part for p in parts_list):
+            prefix_parts.append(part)
+        else:
+            break
+    return "_".join(prefix_parts) if prefix_parts else ""
+def _infer_cluster_name(modules: set[str]) -> str:
+    """Infer a human-readable name for a cluster."""
+    prefix = _common_prefix(modules)
+    if prefix:
+        return prefix
+    # Fall back to shortest module name
+    return min(modules, key=len)

codd_dev-0.5.0/codd/contracts.py ADDED Viewed

@@ -0,0 +1,138 @@
+"""R4.3 — Interface contract detection for codd extract.
+Distinguishes public API (symbols in __init__.py / __all__) from internal
+implementation details.  Detects encapsulation violations where other modules
+reach into internals.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from codd.extractor import ProjectFacts
+@dataclass
+class InterfaceContract:
+    """Public vs internal API surface for a module."""
+    module: str
+    public_symbols: list[str] = field(default_factory=list)
+    internal_symbols: list[str] = field(default_factory=list)
+    api_surface_ratio: float = 0.0
+    encapsulation_violations: list[str] = field(default_factory=list)
+# ── __init__.py / __all__ parsing ────────────────────────
+_ALL_RE = re.compile(
+    r"__all__\s*=\s*\[([^\]]*)\]",
+    re.DOTALL,
+)
+_REEXPORT_FROM_RE = re.compile(
+    r"^from\s+\.[\w.]*\s+import\s+(.+)",
+    re.MULTILINE,
+)
+_IMPORT_AS_RE = re.compile(r"(\w+)\s+as\s+(\w+)")
+def detect_init_exports(init_content: str) -> list[str]:
+    """Parse ``__init__.py`` content and return publicly-exported symbol names."""
+    names: list[str] = []
+    # 1) __all__ takes priority
+    m = _ALL_RE.search(init_content)
+    if m:
+        raw = m.group(1)
+        for token in re.findall(r"""['"](\w+)['"]""", raw):
+            if token not in names:
+                names.append(token)
+        return names
+    # 2) Fall back to ``from .xxx import ...`` re-exports
+    for m2 in _REEXPORT_FROM_RE.finditer(init_content):
+        import_part = m2.group(1).strip().rstrip(")")
+        for chunk in import_part.split(","):
+            chunk = chunk.strip().strip("()")
+            if not chunk:
+                continue
+            # handle "Foo as Bar" → the exported name is "Bar"
+            alias_m = _IMPORT_AS_RE.search(chunk)
+            if alias_m:
+                name = alias_m.group(2)
+            else:
+                name = chunk.split()[-1]
+            if name.isidentifier() and name not in names:
+                names.append(name)
+    return names
+# ── Build contracts for every module ─────────────────────
+def build_interface_contracts(facts: ProjectFacts, project_root: Path) -> None:
+    """Populate ``interface_contract`` on every module in *facts*."""
+    from codd.extractor import _language_extensions  # avoid circular at import time
+    # First pass: compute public/internal for each module
+    for mod in facts.modules.values():
+        init_files = [
+            f for f in mod.files
+            if Path(f).name == "__init__.py"
+        ]
+        all_symbol_names = [s.name for s in mod.symbols]
+        if not all_symbol_names:
+            continue
+        public: list[str] = []
+        if init_files:
+            init_path = project_root / init_files[0]
+            try:
+                init_content = init_path.read_text(errors="ignore")
+            except Exception:
+                init_content = ""
+            public = detect_init_exports(init_content)
+        # For single-file modules (no __init__.py), treat all symbols as public
+        if not init_files:
+            public = list(all_symbol_names)
+        internal = [n for n in all_symbol_names if n not in public]
+        total = len(all_symbol_names)
+        ratio = len(public) / total if total else 0.0
+        mod.interface_contract = InterfaceContract(
+            module=mod.name,
+            public_symbols=public,
+            internal_symbols=internal,
+            api_surface_ratio=round(ratio, 2),
+        )
+    # Second pass: detect encapsulation violations
+    # Build internal-symbol lookup: {module_name: set(internal_names)}
+    internal_lookup: dict[str, set[str]] = {}
+    for mod in facts.modules.values():
+        if mod.interface_contract:
+            internal_lookup[mod.name] = set(mod.interface_contract.internal_symbols)
+    for mod in facts.modules.values():
+        if not mod.interface_contract:
+            continue
+        for dep_name, import_lines in mod.internal_imports.items():
+            if dep_name not in internal_lookup:
+                continue
+            internals = internal_lookup[dep_name]
+            if not internals:
+                continue
+            for line in import_lines:
+                for internal_name in internals:
+                    if internal_name in line:
+                        violation = f"{mod.name} uses {dep_name}.{internal_name} (internal)"
+                        if violation not in mod.interface_contract.encapsulation_violations:
+                            mod.interface_contract.encapsulation_violations.append(violation)

{codd_dev-0.3.0 → codd_dev-0.5.0}/codd/extractor.py RENAMED Viewed

@@ -53,6 +53,15 @@ class Symbol:
     implements: list[str] = field(default_factory=list)
+@dataclass
+class CallEdge:
+    """A function-to-function call relationship."""
+    caller: str          # "module.Class.method" or "module.function"
+    callee: str          # target symbol (resolved to module if possible)
+    call_site: str       # file:line
+    is_async: bool = False
 @dataclass
 class ModuleInfo:
     """Aggregated info for a discovered module/package."""
@@ -65,6 +74,20 @@ class ModuleInfo:
     test_details: list[TestInfo] = field(default_factory=list)
     line_count: int = 0
     patterns: dict[str, str] = field(default_factory=dict)  # pattern_type -> detail
+    call_edges: list[CallEdge] = field(default_factory=list)
+    interface_contract: Any = None  # InterfaceContract from contracts.py
+    test_coverage: Any = None       # TestCoverage from traceability.py
+    schema_refs: list[Any] = field(default_factory=list)    # SchemaRef from schema_refs.py
+    runtime_wires: list[Any] = field(default_factory=list)  # RuntimeWire from wiring.py
+@dataclass
+class FeatureCluster:
+    """A group of modules that collaborate on a feature."""
+    name: str
+    modules: list[str] = field(default_factory=list)
+    confidence: float = 0.0
+    evidence: list[str] = field(default_factory=list)
 @dataclass
@@ -83,6 +106,8 @@ class ProjectFacts:
     api_specs: dict[str, Any] = field(default_factory=dict)
     infra_config: dict[str, ConfigInfo] = field(default_factory=dict)
     build_deps: BuildDepsInfo | None = None
+    feature_clusters: list[FeatureCluster] = field(default_factory=list)
+    change_risks: list[Any] = field(default_factory=list)  # ChangeRisk from risk.py
 @dataclass
@@ -151,6 +176,34 @@ def extract_facts(project_root: Path, language: str | None = None,
     # Detect entry points
     _detect_entry_points(facts, project_root, language)
+    # R4.3: Interface contract detection
+    from codd.contracts import build_interface_contracts
+    build_interface_contracts(facts, project_root)
+    # R4.1: Call graph extraction + resolution
+    _extract_call_graphs(facts, project_root, language, exclude_patterns)
+    _resolve_call_graph(facts)
+    # R4.2: Feature clustering
+    from codd.clustering import build_feature_clusters
+    build_feature_clusters(facts)
+    # R5.1: Test traceability
+    from codd.traceability import build_test_traceability
+    build_test_traceability(facts, project_root)
+    # R5.2: Schema-code dependency
+    from codd.schema_refs import build_schema_refs
+    build_schema_refs(facts, project_root)
+    # R5.3: Runtime wiring detection
+    from codd.wiring import build_runtime_wires
+    build_runtime_wires(facts, project_root)
+    # R5.4: Change risk scoring (depends on R4.3, R5.1)
+    from codd.risk import build_change_risks
+    build_change_risks(facts)
     return facts
@@ -817,6 +870,50 @@ def _discover_build_deps(project_root: Path) -> BuildDepsInfo | None:
     return extractor.merge(discovered)
+# ── R4.1 helpers: call-graph extraction & resolution ──────
+def _extract_call_graphs(facts: ProjectFacts, project_root: Path,
+                         language: str, exclude_patterns: list[str] | None):
+    """Collect call edges for every module using the language extractor."""
+    extractor = get_extractor(language, "source")
+    if not hasattr(extractor, "extract_call_graph"):
+        return
+    for mod in facts.modules.values():
+        for rel_file in mod.files:
+            full = project_root / rel_file
+            try:
+                content = full.read_text(errors="ignore")
+            except Exception:
+                continue
+            edges = extractor.extract_call_graph(content, rel_file, mod.symbols)
+            mod.call_edges.extend(edges)
+def _resolve_call_graph(facts: ProjectFacts):
+    """Resolve callee names to fully-qualified module.symbol references."""
+    # Build symbol → module lookup
+    symbol_to_module: dict[str, str] = {}
+    for mod in facts.modules.values():
+        for sym in mod.symbols:
+            symbol_to_module[sym.name] = mod.name
+    for mod in facts.modules.values():
+        for edge in mod.call_edges:
+            callee = edge.callee
+            # Strip self. prefix
+            if callee.startswith("self."):
+                callee = callee[5:]
+            # Try to resolve bare name to module.name
+            bare = callee.split(".")[-1]
+            if bare in symbol_to_module:
+                target_mod = symbol_to_module[bare]
+                if target_mod != mod.name:
+                    edge.callee = f"{target_mod}.{bare}"
+                else:
+                    edge.callee = bare
 # ═══════════════════════════════════════════════════════════
 # Phase 2: Synth Docs (template-based, no AI)
 # ═══════════════════════════════════════════════════════════

{codd_dev-0.3.0 → codd_dev-0.5.0}/codd/parsing.py RENAMED Viewed

@@ -28,7 +28,7 @@ except ModuleNotFoundError:
     hcl2 = None
 if TYPE_CHECKING:
-    from codd.extractor import ModuleInfo, Symbol
+    from codd.extractor import CallEdge, ModuleInfo, Symbol
 _TREE_SITTER_LANGUAGE_PACKAGES = {
@@ -167,6 +167,9 @@ class LanguageExtractor(Protocol):
     def extract_schema(self, content: str, file_path: str | Path) -> SqlSchemaInfo | PrismaSchemaInfo | None:
         """Return schema information when supported by the extractor."""
+    def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
+        """Return call edges found in the given source content."""
 class RegexExtractor:
     """Adapter for regex-based extraction and schema parsing."""
@@ -214,6 +217,9 @@ class RegexExtractor:
             return _extract_prisma_schema(content, normalized_path)
         return None
+    def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
+        return []  # Regex fallback doesn't support call graph
 class TreeSitterExtractor:
     """Tree-sitter backend for Python and TypeScript/JavaScript source files."""
@@ -288,6 +294,17 @@ class TreeSitterExtractor:
     def extract_schema(self, content: str, file_path: str | Path) -> SqlSchemaInfo | PrismaSchemaInfo | None:
         return self._fallback.extract_schema(content, file_path)
+    def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
+        if self.category != "source":
+            return []
+        try:
+            root = self._parse(content)
+            if self.language == "python":
+                return _extract_python_call_graph(root, content, file_path, symbols)
+        except Exception:
+            return []
+        return []
     def _parse(self, content: str):
         return self._parser.parse(content.encode("utf-8", errors="ignore")).root_node
@@ -330,6 +347,9 @@ class SqlDdlExtractor:
             fallback = self._fallback.extract_schema(content, path)
             return fallback if isinstance(fallback, SqlSchemaInfo) else None
+    def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
+        return []
 class PrismaSchemaExtractor:
     """Regex extractor for Prisma schema files."""
@@ -355,6 +375,9 @@ class PrismaSchemaExtractor:
     def extract_schema(self, content: str, file_path: str | Path) -> PrismaSchemaInfo | None:
         return _extract_prisma_schema(content, Path(file_path).as_posix())
+    def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
+        return []
 def _build_parser(language: str):
     from tree_sitter import Parser
@@ -918,6 +941,74 @@ def _detect_typescript_code_patterns(mod: ModuleInfo, root: Any, content: str) -
         mod.patterns["api_routes"] = "NestJS controller"
+def _extract_python_call_graph(root: Any, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
+    """Extract function call edges from Python AST using tree-sitter."""
+    from codd.extractor import CallEdge
+    content_bytes = content.encode("utf-8", errors="ignore")
+    edges: list[CallEdge] = []
+    symbol_names = {s.name for s in symbols}
+    def _current_scope(node: Any) -> str:
+        """Walk parents to find enclosing function/class scope."""
+        parts: list[str] = []
+        current = node.parent
+        while current is not None:
+            if current.type in ("function_definition", "class_definition"):
+                name = _field_text(content_bytes, current, "name")
+                if name:
+                    parts.append(name)
+            current = current.parent
+        parts.reverse()
+        return ".".join(parts) if parts else "<module>"
+    for node in _iter_named_nodes(root):
+        if node.type != "call":
+            continue
+        func_node = node.child_by_field_name("function")
+        if func_node is None:
+            continue
+        callee_text = _node_text(content_bytes, func_node).strip()
+        # Skip builtins and dunder calls
+        bare_name = callee_text.split(".")[-1] if "." in callee_text else callee_text
+        if bare_name.startswith("__") and bare_name.endswith("__"):
+            continue
+        if bare_name in ("print", "len", "range", "enumerate", "zip", "map", "filter",
+                         "sorted", "reversed", "list", "dict", "set", "tuple", "str",
+                         "int", "float", "bool", "type", "isinstance", "issubclass",
+                         "getattr", "setattr", "hasattr", "super", "property",
+                         "staticmethod", "classmethod", "open", "repr", "id", "vars",
+                         "dir", "any", "all", "min", "max", "sum", "abs", "round",
+                         "format", "iter", "next", "hash", "callable"):
+            continue
+        # Only include calls to known symbols (intra-project)
+        if bare_name not in symbol_names and callee_text not in symbol_names:
+            # Check if it's a method call on self (self.method)
+            if callee_text.startswith("self."):
+                method_name = callee_text[5:]  # strip "self."
+                if method_name not in symbol_names:
+                    continue
+            else:
+                continue
+        caller = _current_scope(node)
+        line_no = node.start_point.row + 1
+        is_async = node.parent is not None and node.parent.type == "await"
+        edges.append(CallEdge(
+            caller=caller,
+            callee=callee_text,
+            call_site=f"{file_path}:{line_no}",
+            is_async=is_async,
+        ))
+    return edges
 def _sql_first_object_name(content_bytes: bytes, node: Any) -> str:
     for child in getattr(node, "named_children", []):
         if child.type == "object_reference":
@@ -1774,6 +1865,9 @@ class BuildDepsExtractor:
             scripts=scripts,
         )
+    def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
+        return []
 class TestExtractor:
     """Extract test metadata from test files."""
@@ -1862,6 +1956,9 @@ class TestExtractor:
         fixtures = re.findall(r"^\s*func\s+(TestMain)\s*\(", content, re.MULTILINE)
         return TestInfo(file_path=file_path, test_functions=tests, fixtures=fixtures)
+    def extract_call_graph(self, content: str, file_path: str, symbols: list[Symbol]) -> list[CallEdge]:
+        return []
 def get_extractor(language: str, category: str = "source") -> LanguageExtractor:
     """Select the best available extractor for a language/category pair."""

codd-dev 0.3.0__tar.gz → 0.5.0__tar.gz

codd-dev 0.3.0tar.gz → 0.5.0tar.gz