PyPI - codespine - Versions diffs - 0.4.3__tar.gz → 0.5.1__tar.gz - Mend

codespine 0.4.3tar.gz → 0.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{codespine-0.4.3 → codespine-0.5.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codespine
-Version: 0.4.3
+Version: 0.5.1
 Summary: Local Java code intelligence indexer backed by a graph database
 Author: CodeSpine contributors
 License: MIT License

{codespine-0.4.3 → codespine-0.5.1}/codespine/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 """CodeSpine package."""
 __all__ = ["__version__"]
-__version__ = "0.4.3"
+__version__ = "0.5.1"

codespine-0.5.1/codespine/analysis/crossmodule.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""Cross-module call edge linker.
+After all modules in a workspace have been individually indexed, each module's
+call resolver only sees methods within that module.  This module fills the gap
+by scanning the graph for cross-project class references (REFERENCES_TYPE and
+IMPLEMENTS edges) and creating CALLS edges between methods where the call is
+plausible.
+Strategy A — Name + arity match  (confidence 0.7)
+    If src_class references dst_class (cross-project) and both have a method
+    with the same name and same parameter count, create a CALLS edge.  This
+    catches delegation, interface-implementation forwarding, and adapter
+    patterns.
+Strategy B — Type-reference fallback  (confidence 0.4)
+    For each *public* method in dst_class that received NO name-match edge,
+    create ONE low-confidence edge from a representative src method (preferring
+    one with zero outgoing calls).  This prevents methods that are genuinely
+    used cross-module from appearing as dead code.
+"""
+from __future__ import annotations
+import logging
+from collections import defaultdict
+LOGGER = logging.getLogger(__name__)
+def _param_count(sig: str) -> int:
+    """Count parameters from a method signature string."""
+    if not sig or "(" not in sig or ")" not in sig:
+        return 0
+    arg_str = sig[sig.find("(") + 1: sig.rfind(")")]
+    return 0 if not arg_str.strip() else arg_str.count(",") + 1
+def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
+    """Create CALLS edges between methods in different projects.
+    Returns the number of new cross-module call edges created.
+    """
+    if project_ids is None:
+        proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
+        project_ids = [r["id"] for r in proj_recs]
+    if len(project_ids) < 2:
+        LOGGER.info(
+            "Only %d project(s) indexed — skipping cross-module linking.",
+            len(project_ids),
+        )
+        return 0
+    # ── 1. Collect cross-project class pairs ──────────────────────────
+    ref_pairs = store.query_records(
+        """
+        MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class), (sf:File), (df:File)
+        WHERE src.file_id = sf.id AND dst.file_id = df.id
+          AND sf.project_id <> df.project_id
+        RETURN DISTINCT src.id as src_cid, dst.id as dst_cid
+        """
+    )
+    impl_pairs = store.query_records(
+        """
+        MATCH (src:Class)-[:IMPLEMENTS]->(dst:Class), (sf:File), (df:File)
+        WHERE src.file_id = sf.id AND dst.file_id = df.id
+          AND sf.project_id <> df.project_id
+        RETURN DISTINCT src.id as src_cid, dst.id as dst_cid
+        """
+    )
+    all_pairs: set[tuple[str, str]] = set()
+    for p in ref_pairs:
+        all_pairs.add((p["src_cid"], p["dst_cid"]))
+    for p in impl_pairs:
+        all_pairs.add((p["src_cid"], p["dst_cid"]))
+    if not all_pairs:
+        LOGGER.info("No cross-project class references found.")
+        return 0
+    LOGGER.info(
+        "Cross-module: %d cross-project class pair(s) to process.",
+        len(all_pairs),
+    )
+    # ── 2. Process each class pair ────────────────────────────────────
+    new_edges = 0
+    seen: set[tuple[str, str]] = set()
+    for src_cid, dst_cid in all_pairs:
+        src_methods = store.query_records(
+            """MATCH (m:Method) WHERE m.class_id = $cid
+               RETURN m.id as mid, m.name as name, m.signature as sig""",
+            {"cid": src_cid},
+        )
+        dst_methods = store.query_records(
+            """MATCH (m:Method) WHERE m.class_id = $cid
+               RETURN m.id as mid, m.name as name, m.signature as sig,
+                      m.modifiers as modifiers, m.is_constructor as is_ctor""",
+            {"cid": dst_cid},
+        )
+        if not src_methods or not dst_methods:
+            continue
+        # Build name → methods index for src class
+        src_by_name: dict[str, list[dict]] = defaultdict(list)
+        for sm in src_methods:
+            src_by_name[sm["name"]].append(sm)
+        # ── Strategy A: name + arity matching ─────────────────────────
+        matched_dst_mids: set[str] = set()
+        for dm in dst_methods:
+            dm_name = dm["name"]
+            dm_pc = _param_count(dm.get("sig") or "")
+            candidates = src_by_name.get(dm_name, [])
+            for sm in candidates:
+                sm_pc = _param_count(sm.get("sig") or "")
+                if sm_pc == dm_pc:
+                    pair = (sm["mid"], dm["mid"])
+                    if pair in seen:
+                        matched_dst_mids.add(dm["mid"])
+                        continue
+                    seen.add(pair)
+                    try:
+                        store.add_call(
+                            sm["mid"], dm["mid"], 0.7, "cross_module_name_match",
+                        )
+                        new_edges += 1
+                        matched_dst_mids.add(dm["mid"])
+                    except Exception as exc:
+                        LOGGER.debug("Name-match edge failed: %s", exc)
+        # ── Strategy B: fallback for unmatched public dst methods ─────
+        # Find a representative caller: prefer src methods with 0 outgoing calls
+        fallback_src = None
+        for sm in src_methods:
+            out = store.query_records(
+                "MATCH (m:Method {id: $mid})-[:CALLS]->(:Method) RETURN count(*) as n",
+                {"mid": sm["mid"]},
+            )
+            if out and out[0]["n"] == 0:
+                fallback_src = sm
+                break
+        if fallback_src is None and src_methods:
+            fallback_src = src_methods[0]
+        if fallback_src:
+            for dm in dst_methods:
+                if dm["mid"] in matched_dst_mids:
+                    continue
+                # Skip constructors and private methods
+                if dm.get("is_ctor"):
+                    continue
+                mods = dm.get("modifiers") or []
+                mod_strs = {str(m).strip() for m in mods} if mods else set()
+                if "private" in mod_strs:
+                    continue
+                pair = (fallback_src["mid"], dm["mid"])
+                if pair in seen:
+                    continue
+                seen.add(pair)
+                try:
+                    store.add_call(
+                        fallback_src["mid"], dm["mid"], 0.4, "cross_module_type_ref",
+                    )
+                    new_edges += 1
+                except Exception as exc:
+                    LOGGER.debug("Fallback edge failed: %s", exc)
+    LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
+    return new_edges

codespine-0.5.1/codespine/analysis/deadcode.py ADDED Viewed

@@ -0,0 +1,308 @@
+from __future__ import annotations
+from collections import defaultdict
+# ── Annotation sets ──────────────────────────────────────────────────
+# Entry-point annotations — exempt even in strict mode.  These represent
+# actual runtime entry points that the framework calls reflectively.
+ENTRY_POINT_ANNOTATIONS = {
+    # JUnit / testing
+    "Test",
+    "ParameterizedTest",
+    "BeforeEach",
+    "AfterEach",
+    "BeforeAll",
+    "AfterAll",
+    # Spring – web entry points
+    "RequestMapping",
+    "GetMapping",
+    "PostMapping",
+    "PutMapping",
+    "DeleteMapping",
+    "PatchMapping",
+    "MessageMapping",
+    # Spring – messaging / async entry points
+    "KafkaListener",
+    "RabbitListener",
+    "JmsListener",
+    "SqsListener",
+    "StreamListener",
+    # Spring – lifecycle / event hooks
+    "PostConstruct",
+    "PreDestroy",
+    "EventListener",
+    "TransactionalEventListener",
+    "Scheduled",
+}
+# Broad annotations — exempt only in normal mode.  These indicate the
+# method is *likely* used via DI / serialisation / reflection, but in a
+# strict audit the user may want to verify that manually.
+BROAD_ANNOTATIONS = {
+    # Java standard
+    "Override",
+    # Spring – component model (class-level; methods inside are never "dead")
+    "Component",
+    "Service",
+    "Repository",
+    "Controller",
+    "RestController",
+    "Configuration",
+    "Bean",
+    "Aspect",
+    # Spring Data / persistence
+    "Query",
+    "Modifying",
+    # Guice DI
+    "Inject",
+    "Provides",
+    "Singleton",
+    "Named",
+    "Qualifier",
+    # Jakarta / javax DI
+    "ApplicationScoped",
+    "RequestScoped",
+    "SessionScoped",
+    "Dependent",
+    # Jackson / serialization
+    "JsonCreator",
+    "JsonProperty",
+    "JsonDeserialize",
+    "JsonSerialize",
+}
+# Full set used in normal mode
+EXEMPT_ANNOTATIONS = ENTRY_POINT_ANNOTATIONS | BROAD_ANNOTATIONS
+EXEMPT_CONTRACT_METHODS = {
+    "toString",
+    "hashCode",
+    "equals",
+    "compareTo",
+}
+def _modifier_tokens(modifiers) -> set[str]:
+    if not modifiers:
+        return set()
+    return {str(m).strip() for m in modifiers}
+def _matched_annotation(mods: set[str], annotation_set: set[str]) -> str | None:
+    """Return the first annotation in *mods* that appears in *annotation_set*, or None."""
+    for m in mods:
+        bare = m.lstrip("@")
+        if bare in annotation_set:
+            return bare
+    return None
+def _assign_confidence(candidate: dict, strict: bool) -> str:
+    """Assign a confidence level (high / medium / low) to each dead method.
+    Heuristic:
+      - high:   private method with no callers — almost certainly dead.
+      - medium: package-private or protected method with no callers.
+      - low:    public method — could be called via reflection / external JAR.
+    In strict mode, every method that passes the minimal exemptions is 'high'.
+    """
+    if strict:
+        return "high"
+    mods = _modifier_tokens(candidate.get("modifiers"))
+    if "private" in mods:
+        return "high"
+    if "public" in mods:
+        return "low"
+    # Default: protected / package-private
+    return "medium"
+def detect_dead_code(store, limit: int = 200, project: str | None = None, strict: bool = False) -> list[dict] | None:
+    """Java-aware dead code detection with exemption passes.
+    Parameters:
+      limit   – Max results to return.
+      project – Scope to a single module.
+      strict  – When True, only exempt main()/@Test methods and explicit
+                entry-point annotations (RequestMapping, KafkaListener, etc.).
+                Skips the broad bean-getter/setter, contract-method,
+                constructor, Override, and DI annotation exemptions.
+    Returns a list of dead method dicts, each with:
+      method_id, name, signature, class_fqcn, file_path, reason, confidence.
+    The return value is augmented with a ``_stats`` entry (a sentinel dict
+    with key ``_stats``) containing pre/post-exemption counts, a breakdown
+    of exemption reasons, and a sample of exempted methods so callers can
+    validate that the exemption logic is working correctly.
+    """
+    if project:
+        candidates = store.query_records(
+            """
+            MATCH (m:Method), (c:Class), (f:File)
+            WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $proj
+              AND NOT EXISTS { MATCH (:Method)-[:CALLS]->(m) }
+            RETURN m.id as method_id,
+                   m.name as name,
+                   m.signature as signature,
+                   m.modifiers as modifiers,
+                   c.fqcn as class_fqcn,
+                   m.is_constructor as is_constructor,
+                   m.is_test as is_test,
+                   f.path as file_path
+            LIMIT $limit
+            """,
+            {"limit": int(limit * 5), "proj": project},
+        )
+    else:
+        candidates = store.query_records(
+            """
+            MATCH (m:Method), (c:Class), (f:File)
+            WHERE m.class_id = c.id AND c.file_id = f.id
+              AND NOT EXISTS { MATCH (:Method)-[:CALLS]->(m) }
+            RETURN m.id as method_id,
+                   m.name as name,
+                   m.signature as signature,
+                   m.modifiers as modifiers,
+                   c.fqcn as class_fqcn,
+                   m.is_constructor as is_constructor,
+                   m.is_test as is_test,
+                   f.path as file_path
+            LIMIT $limit
+            """,
+            {"limit": int(limit * 5)},
+        )
+    if not candidates:
+        return []
+    n_candidates = len(candidates)
+    # Track exemptions as {method_id: reason} instead of a plain set
+    exempt: dict[str, str] = {}
+    # Choose annotation set based on mode
+    annotations_to_check = ENTRY_POINT_ANNOTATIONS if strict else EXEMPT_ANNOTATIONS
+    # ── Exemption passes ──────────────────────────────────────────────
+    for c in candidates:
+        mid = c["method_id"]
+        if mid in exempt:
+            continue
+        sig = (c.get("signature") or "").lower()
+        name = c.get("name") or ""
+        mods = _modifier_tokens(c.get("modifiers"))
+        # Always exempt test methods and main()
+        if c.get("is_test"):
+            exempt[mid] = "test_method"
+            continue
+        if name == "main" and "string[]" in sig:
+            exempt[mid] = "main_method"
+            continue
+        # Exempt methods with entry-point (strict) or all framework (normal) annotations
+        matched = _matched_annotation(mods, annotations_to_check)
+        if matched:
+            exempt[mid] = f"annotation:{matched}"
+            continue
+        # ── Broad exemptions (only in normal mode) ────────────────────
+        if not strict:
+            if c.get("is_constructor"):
+                exempt[mid] = "constructor"
+                continue
+            if name in EXEMPT_CONTRACT_METHODS:
+                exempt[mid] = f"contract_method:{name}"
+                continue
+            # Java bean-ish APIs often rely on reflection/serialization.
+            if "public" in mods and (
+                name.startswith("get") or name.startswith("set") or name.startswith("is")
+            ):
+                exempt[mid] = "bean_accessor"
+                continue
+            # Reflection-style hooks
+            if name in {"valueOf", "fromString", "builder"}:
+                exempt[mid] = f"reflection_hook:{name}"
+                continue
+    # Exempt methods that DIRECTLY override another method.
+    # In strict mode, overrides are NOT exempted — if nobody calls the method,
+    # it's flagged regardless of whether it overrides a parent.
+    if not strict:
+        override_methods = store.query_records(
+            """
+            MATCH (m:Method)-[:OVERRIDES]->(:Method)
+            RETURN DISTINCT m.id as method_id
+            """
+        )
+        for r in override_methods:
+            mid = r["method_id"]
+            if mid not in exempt:
+                exempt[mid] = "method_override"
+    # ── Build dead list ───────────────────────────────────────────────
+    dead = []
+    for c in candidates:
+        if c["method_id"] in exempt:
+            continue
+        dead.append(
+            {
+                "method_id": c["method_id"],
+                "name": c.get("name"),
+                "signature": c.get("signature"),
+                "class_fqcn": c.get("class_fqcn"),
+                "file_path": c.get("file_path"),
+                "confidence": _assign_confidence(c, strict),
+                "reason": "no_incoming_calls_after_exemptions",
+            }
+        )
+    result = dead[:limit]
+    # ── Stats with exemption breakdown ────────────────────────────────
+    reason_counts: dict[str, int] = defaultdict(int)
+    for reason in exempt.values():
+        # Group annotation reasons by prefix for readability
+        key = reason.split(":")[0] if ":" in reason else reason
+        reason_counts[key] += 1
+    # Sample of exempted methods (up to 10) for user inspection
+    exempted_sample = []
+    for mid, reason in list(exempt.items())[:10]:
+        candidate = next((c for c in candidates if c["method_id"] == mid), None)
+        if candidate:
+            exempted_sample.append({
+                "name": candidate.get("name"),
+                "signature": candidate.get("signature"),
+                "class_fqcn": candidate.get("class_fqcn"),
+                "exemption_reason": reason,
+            })
+    if strict:
+        exemption_note = (
+            "STRICT MODE: Only test methods, main(), and entry-point "
+            "annotations (RequestMapping, KafkaListener, Scheduled, etc.) "
+            "are exempted. Constructors, getters/setters, @Override, DI "
+            "annotations, and contract methods are NOT exempt."
+        )
+    else:
+        exemption_note = (
+            "Exemptions cover: constructors, test methods, main(), "
+            "toString/hashCode/equals/compareTo, public getters/setters, "
+            "methods with DI/framework annotations, and direct method overrides. "
+            "Use strict=True for minimal exemptions."
+        )
+    result.append({
+        "_stats": {
+            "candidates_with_no_callers": n_candidates,
+            "exempted": len(exempt),
+            "dead_returned": len(result),
+            "mode": "strict" if strict else "normal",
+            "note": exemption_note,
+            "exemptions_breakdown": dict(reason_counts),
+            "exempted_sample": exempted_sample,
+        }
+    })
+    return result

{codespine-0.4.3 → codespine-0.5.1}/codespine/cli.py RENAMED Viewed

@@ -14,6 +14,7 @@ import psutil
 from codespine.analysis.community import detect_communities, symbol_community
 from codespine.analysis.context import build_symbol_context
 from codespine.analysis.coupling import compute_coupling, get_coupling
+from codespine.analysis.crossmodule import link_cross_module_calls
 from codespine.analysis.deadcode import detect_dead_code
 from codespine.analysis.flow import trace_execution_flows
 from codespine.analysis.impact import analyze_impact
@@ -216,6 +217,16 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
         elif parse_state["indexed"] < parse_state["total"]:
             _phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
+    # ── Cross-module call linking ──────────────────────────────────────
+    # When multiple modules/projects are indexed, attempt to resolve call
+    # edges that span module boundaries using import + REFERENCES_TYPE info.
+    if is_multi and len(modules_with_ids) > 1:
+        xmod_pids = [pid for _, pid in modules_with_ids]
+        xmod_edges = link_cross_module_calls(store, project_ids=xmod_pids)
+        _phase("Cross-module linking...", f"{xmod_edges} cross-module call edges")
+    else:
+        _phase("Cross-module linking...", "skipped (single module)")
     communities: list[dict] = []
     flows: list[dict] = []
     dead: list[dict] = []

codespine 0.4.3__tar.gz → 0.5.1__tar.gz

codespine 0.4.3tar.gz → 0.5.1tar.gz