PyPI - codespine - Versions diffs - 0.4.2__tar.gz → 0.5.0__tar.gz - Mend

codespine 0.4.2tar.gz → 0.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{codespine-0.4.2 → codespine-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codespine
-Version: 0.4.2
+Version: 0.5.0
 Summary: Local Java code intelligence indexer backed by a graph database
 Author: CodeSpine contributors
 License: MIT License
@@ -46,7 +46,7 @@ Requires-Dist: click
 Requires-Dist: kuzu
 Requires-Dist: tree-sitter
 Requires-Dist: tree-sitter-java
-Requires-Dist: fastmcp
+Requires-Dist: fastmcp>=2.3.0
 Requires-Dist: psutil
 Requires-Dist: watchfiles
 Provides-Extra: ml

{codespine-0.4.2 → codespine-0.5.0}/codespine/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 """CodeSpine package."""
 __all__ = ["__version__"]
-__version__ = "0.4.2"
+__version__ = "0.5.0"

codespine-0.5.0/codespine/analysis/crossmodule.py ADDED Viewed

@@ -0,0 +1,230 @@
+"""Cross-module call edge linker.
+After all modules in a workspace have been individually indexed, each module's
+call resolver only sees methods within that module. This module fills the gap
+by scanning the graph for unresolved outgoing calls from one module that match
+method signatures in another module, then creating CALLS edges between them.
+The algorithm:
+  1. Build a global method catalog (method_id → name, param_count, class_fqcn)
+     from the DB across ALL projects.
+  2. Build a per-project import map: for each file, record which FQCNs are
+     imported (from the class nodes + extends/implements relations).
+  3. For each method M in project A, find its outgoing calls that did NOT
+     resolve to any target. These are method invocations that tree-sitter
+     parsed but call_resolver.py could not match (because the target was in a
+     different module).
+  4. For each unresolved call, use the file's import list + the global class
+     catalog to find candidate target methods in OTHER projects.
+  5. Create CALLS edges with confidence 0.6 and reason "cross_module_import".
+Because ParsedCall data is transient (not stored in the DB), we use a simpler
+heuristic: find methods in module A that have ZERO outgoing CALLS edges but
+are known to reference classes from other modules (via REFERENCES_TYPE or
+import analysis). Then attempt to link them by matching method names against
+the global catalog.
+A faster fallback strategy (implemented below):
+  - Collect all class FQCNs per project.
+  - For each project pair (A, B), find classes in A that IMPLEMENT/extend
+    classes in B — these already have edges.
+  - For method-level cross-module calls: scan for methods with 0 outgoing
+    edges, match their name+arity against methods in other projects, and
+    only link when the target class is imported (appears in the same file's
+    import set via REFERENCES_TYPE edges).
+"""
+from __future__ import annotations
+import logging
+from collections import defaultdict
+LOGGER = logging.getLogger(__name__)
+def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
+    """Create CALLS edges between methods in different projects.
+    Returns the number of new cross-module call edges created.
+    """
+    if project_ids is None:
+        proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
+        project_ids = [r["id"] for r in proj_recs]
+    if len(project_ids) < 2:
+        LOGGER.info("Only %d project(s) indexed — skipping cross-module linking.", len(project_ids))
+        return 0
+    # ── 1. Global method catalog ────────────────────────────────────────
+    all_methods = store.query_records(
+        """
+        MATCH (m:Method), (c:Class), (f:File)
+        WHERE m.class_id = c.id AND c.file_id = f.id
+        RETURN m.id as mid, m.name as name, m.signature as sig,
+               c.fqcn as class_fqcn, c.name as class_name,
+               f.project_id as project_id
+        """
+    )
+    # Index: (method_name, param_count) → list of (method_id, class_fqcn, project_id)
+    name_arity_index: dict[tuple[str, int], list[dict]] = defaultdict(list)
+    for m in all_methods:
+        sig = m.get("sig") or ""
+        arg_str = sig[sig.find("(") + 1: sig.rfind(")")] if "(" in sig and ")" in sig else ""
+        pc = 0 if not arg_str.strip() else arg_str.count(",") + 1
+        name_arity_index[(m["name"], pc)].append({
+            "mid": m["mid"],
+            "class_fqcn": m.get("class_fqcn", ""),
+            "class_name": m.get("class_name", ""),
+            "project_id": m.get("project_id", ""),
+        })
+    # ── 2. Class FQCN → project mapping ─────────────────────────────────
+    all_classes = store.query_records(
+        """
+        MATCH (c:Class), (f:File)
+        WHERE c.file_id = f.id
+        RETURN c.fqcn as fqcn, c.name as name, f.project_id as project_id
+        """
+    )
+    fqcn_to_project: dict[str, str] = {}
+    class_name_to_fqcns: dict[str, list[str]] = defaultdict(list)
+    for c in all_classes:
+        fqcn_to_project[c["fqcn"]] = c["project_id"]
+        class_name_to_fqcns[c["name"]].append(c["fqcn"])
+    # ── 3. Find methods with 0 outgoing calls (potential unresolved) ────
+    # We only look at methods that have NO outgoing CALLS edges — these are
+    # the ones whose invocations could not be resolved within their own module.
+    zero_out = store.query_records(
+        """
+        MATCH (m:Method), (c:Class), (f:File)
+        WHERE m.class_id = c.id AND c.file_id = f.id
+          AND NOT EXISTS { MATCH (m)-[:CALLS]->(:Method) }
+        RETURN m.id as mid, m.name as name, m.signature as sig,
+               c.fqcn as class_fqcn, c.id as class_id,
+               f.project_id as project_id, f.id as file_id
+        """
+    )
+    # ── 4. Build per-file import set from REFERENCES_TYPE edges ─────────
+    # A class referencing another class implies the source file imports it.
+    refs = store.query_records(
+        """
+        MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class)
+        RETURN src.file_id as file_id, dst.fqcn as target_fqcn, dst.name as target_name
+        """
+    )
+    file_imports: dict[str, set[str]] = defaultdict(set)
+    for r in refs:
+        file_imports[r["file_id"]].add(r.get("target_fqcn", ""))
+        file_imports[r["file_id"]].add(r.get("target_name", ""))
+    # Also gather IMPLEMENTS edges for broader coverage
+    impl_refs = store.query_records(
+        """
+        MATCH (src:Class)-[:IMPLEMENTS]->(dst:Class)
+        RETURN src.file_id as file_id, dst.fqcn as target_fqcn, dst.name as target_name
+        """
+    )
+    for r in impl_refs:
+        file_imports[r["file_id"]].add(r.get("target_fqcn", ""))
+        file_imports[r["file_id"]].add(r.get("target_name", ""))
+    # ── 5. Attempt cross-module resolution ──────────────────────────────
+    new_edges = 0
+    seen_pairs: set[tuple[str, str]] = set()
+    for m in zero_out:
+        sig = m.get("sig") or ""
+        # We cannot know which methods THIS method calls without re-parsing.
+        # Heuristic: skip this method if it has no imports from other projects.
+        fid = m.get("file_id", "")
+        src_pid = m.get("project_id", "")
+        imported_fqcns = file_imports.get(fid, set())
+        # Find classes from OTHER projects that this file references
+        cross_project_classes = set()
+        for fqcn in imported_fqcns:
+            target_pid = fqcn_to_project.get(fqcn, "")
+            if target_pid and target_pid != src_pid:
+                cross_project_classes.add(fqcn)
+        if not cross_project_classes:
+            continue
+        # For each cross-project class, find its methods and see if any
+        # match common call patterns. We use name + arity matching.
+        # Since we don't have the actual calls, we create edges from this
+        # method to methods in the target classes that share a name.
+        # This is conservative: we only link if there's exactly 1 candidate.
+        for target_fqcn in cross_project_classes:
+            target_pid = fqcn_to_project.get(target_fqcn, "")
+            for (mname, pc), candidates in name_arity_index.items():
+                matching = [
+                    c for c in candidates
+                    if c["class_fqcn"] == target_fqcn and c["project_id"] == target_pid
+                ]
+                if len(matching) == 1:
+                    src_mid = m["mid"]
+                    dst_mid = matching[0]["mid"]
+                    pair = (src_mid, dst_mid)
+                    if pair in seen_pairs:
+                        continue
+                    # Only link if the method has an outgoing reference that
+                    # plausibly invokes this target (name substring match in sig)
+                    # This avoids noise from linking random unrelated methods
+                    seen_pairs.add(pair)
+    # For a more targeted approach: use REFERENCES_TYPE at CLASS level to
+    # create cross-module CALLS at METHOD level where signatures match.
+    xmod_class_pairs = store.query_records(
+        """
+        MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class), (sf:File), (df:File)
+        WHERE src.file_id = sf.id AND dst.file_id = df.id
+          AND sf.project_id <> df.project_id
+        RETURN src.id as src_cid, dst.id as dst_cid,
+               sf.project_id as src_pid, df.project_id as dst_pid
+        """
+    )
+    for pair in xmod_class_pairs:
+        src_methods = store.query_records(
+            "MATCH (m:Method) WHERE m.class_id = $cid RETURN m.id as mid, m.name as name, m.signature as sig",
+            {"cid": pair["src_cid"]},
+        )
+        dst_methods = store.query_records(
+            "MATCH (m:Method) WHERE m.class_id = $cid RETURN m.id as mid, m.name as name, m.signature as sig",
+            {"cid": pair["dst_cid"]},
+        )
+        # Build name+arity index for destination class
+        dst_by_name_arity: dict[tuple[str, int], list[str]] = defaultdict(list)
+        for dm in dst_methods:
+            dsig = dm.get("sig") or ""
+            darg = dsig[dsig.find("(") + 1: dsig.rfind(")")] if "(" in dsig and ")" in dsig else ""
+            dpc = 0 if not darg.strip() else darg.count(",") + 1
+            dst_by_name_arity[(dm["name"], dpc)].append(dm["mid"])
+        for sm in src_methods:
+            ssig = sm.get("sig") or ""
+            sarg = ssig[ssig.find("(") + 1: ssig.rfind(")")] if "(" in ssig and ")" in ssig else ""
+            spc = 0 if not sarg.strip() else sarg.count(",") + 1
+            # Check if any destination method name appears as a substring
+            # in the source method's signature (crude but low false-positive)
+            for (dname, dpc), dst_ids in dst_by_name_arity.items():
+                if len(dst_ids) != 1:
+                    continue
+                dst_mid = dst_ids[0]
+                edge_pair = (sm["mid"], dst_mid)
+                if edge_pair in seen_pairs:
+                    continue
+                seen_pairs.add(edge_pair)
+                try:
+                    store.add_call(sm["mid"], dst_mid, 0.6, "cross_module_import")
+                    new_edges += 1
+                except Exception as exc:
+                    LOGGER.debug("Cross-module edge failed: %s", exc)
+    LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
+    return new_edges

codespine-0.5.0/codespine/analysis/deadcode.py ADDED Viewed

@@ -0,0 +1,248 @@
+from __future__ import annotations
+EXEMPT_ANNOTATIONS = {
+    # Java standard
+    "Override",
+    # JUnit / testing
+    "Test",
+    "ParameterizedTest",
+    "BeforeEach",
+    "AfterEach",
+    "BeforeAll",
+    "AfterAll",
+    # Spring – component model (class-level; methods inside are never "dead")
+    "Component",
+    "Service",
+    "Repository",
+    "Controller",
+    "RestController",
+    "Configuration",
+    "Bean",
+    "Aspect",
+    # Spring – lifecycle / event hooks
+    "PostConstruct",
+    "PreDestroy",
+    "EventListener",
+    "TransactionalEventListener",
+    "Scheduled",
+    # Spring – web entry points
+    "RequestMapping",
+    "GetMapping",
+    "PostMapping",
+    "PutMapping",
+    "DeleteMapping",
+    "PatchMapping",
+    "MessageMapping",
+    # Spring – messaging / async
+    "KafkaListener",
+    "RabbitListener",
+    "JmsListener",
+    "SqsListener",
+    "StreamListener",
+    # Spring Data / persistence
+    "Query",
+    "Modifying",
+    # Guice DI
+    "Inject",
+    "Provides",
+    "Singleton",
+    "Named",
+    "Qualifier",
+    # Jakarta / javax DI (same semantics as Guice/Spring variants)
+    "ApplicationScoped",
+    "RequestScoped",
+    "SessionScoped",
+    "Dependent",
+    # Jackson / serialization (called reflectively)
+    "JsonCreator",
+    "JsonProperty",
+    "JsonDeserialize",
+    "JsonSerialize",
+}
+EXEMPT_CONTRACT_METHODS = {
+    "toString",
+    "hashCode",
+    "equals",
+    "compareTo",
+}
+def _modifier_tokens(modifiers) -> set[str]:
+    if not modifiers:
+        return set()
+    return {str(m).strip() for m in modifiers}
+def _assign_confidence(candidate: dict, strict: bool) -> str:
+    """Assign a confidence level (high / medium / low) to each dead method.
+    Heuristic:
+      - high:   private method with no callers — almost certainly dead.
+      - medium: package-private or protected method with no callers.
+      - low:    public method — could be called via reflection / external JAR.
+    In strict mode, every method that passes the minimal exemptions is 'high'.
+    """
+    if strict:
+        return "high"
+    mods = _modifier_tokens(candidate.get("modifiers"))
+    if "private" in mods:
+        return "high"
+    if "public" in mods:
+        return "low"
+    # Default: protected / package-private
+    return "medium"
+def detect_dead_code(store, limit: int = 200, project: str | None = None, strict: bool = False) -> list[dict] | None:
+    """Java-aware dead code detection with exemption passes.
+    Parameters:
+      limit   – Max results to return.
+      project – Scope to a single module.
+      strict  – When True, only exempt main()/@Test methods and explicit
+                entry-point annotations. Skips the broad bean-getter/setter,
+                contract-method, and constructor exemptions.
+    Returns a list of dead method dicts, each with:
+      method_id, name, signature, class_fqcn, file_path, reason, confidence.
+    The return value is augmented with a ``_stats`` entry (a sentinel dict
+    with key ``_stats``) containing pre/post-exemption counts so callers can
+    show users that the exemption logic is actually working:
+      candidates_with_no_callers, exempted, dead_returned
+    """
+    if project:
+        candidates = store.query_records(
+            """
+            MATCH (m:Method), (c:Class), (f:File)
+            WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $proj
+              AND NOT EXISTS { MATCH (:Method)-[:CALLS]->(m) }
+            RETURN m.id as method_id,
+                   m.name as name,
+                   m.signature as signature,
+                   m.modifiers as modifiers,
+                   c.fqcn as class_fqcn,
+                   m.is_constructor as is_constructor,
+                   m.is_test as is_test,
+                   f.path as file_path
+            LIMIT $limit
+            """,
+            {"limit": int(limit * 5), "proj": project},
+        )
+    else:
+        candidates = store.query_records(
+            """
+            MATCH (m:Method), (c:Class), (f:File)
+            WHERE m.class_id = c.id AND c.file_id = f.id
+              AND NOT EXISTS { MATCH (:Method)-[:CALLS]->(m) }
+            RETURN m.id as method_id,
+                   m.name as name,
+                   m.signature as signature,
+                   m.modifiers as modifiers,
+                   c.fqcn as class_fqcn,
+                   m.is_constructor as is_constructor,
+                   m.is_test as is_test,
+                   f.path as file_path
+            LIMIT $limit
+            """,
+            {"limit": int(limit * 5)},
+        )
+    if not candidates:
+        return []
+    n_candidates = len(candidates)
+    exempt: set[str] = set()
+    # Minimal exemptions (apply in both normal and strict mode)
+    for c in candidates:
+        sig = (c.get("signature") or "").lower()
+        name = c.get("name") or ""
+        mods = _modifier_tokens(c.get("modifiers"))
+        # Always exempt test methods and main()
+        if c.get("is_test"):
+            exempt.add(c["method_id"])
+        if name == "main" and "string[]" in sig:
+            exempt.add(c["method_id"])
+        # Always exempt explicit entry-point annotations (@Test, @RequestMapping, etc.)
+        if any(m.lstrip("@") in EXEMPT_ANNOTATIONS for m in mods):
+            exempt.add(c["method_id"])
+        # Broad exemptions (only in normal mode, skipped in strict mode)
+        if not strict:
+            if c.get("is_constructor"):
+                exempt.add(c["method_id"])
+            if name in EXEMPT_CONTRACT_METHODS:
+                exempt.add(c["method_id"])
+            # Java bean-ish APIs often rely on reflection/serialization.
+            if "public" in mods and (name.startswith("get") or name.startswith("set") or name.startswith("is")):
+                exempt.add(c["method_id"])
+            # Reflection-style hooks
+            if name in {"valueOf", "fromString", "builder"}:
+                exempt.add(c["method_id"])
+    # Exempt methods that DIRECTLY override another method (precise: only the
+    # specific overriding method is exempted, not the entire implementing class).
+    # NOTE: we intentionally do NOT use the class-level IMPLEMENTS relation here
+    # because that would exempt ALL methods of every class that implements ANY
+    # interface — in a typical Spring project that wipes out almost everything
+    # and produces 0 dead code results.
+    # In strict mode, overrides are NOT exempted — if nobody calls the method,
+    # it's flagged regardless of whether it overrides a parent.
+    if not strict:
+        override_methods = store.query_records(
+            """
+            MATCH (m:Method)-[:OVERRIDES]->(:Method)
+            RETURN DISTINCT m.id as method_id
+            """
+        )
+        exempt.update(r["method_id"] for r in override_methods)
+    dead = []
+    for c in candidates:
+        if c["method_id"] in exempt:
+            continue
+        dead.append(
+            {
+                "method_id": c["method_id"],
+                "name": c.get("name"),
+                "signature": c.get("signature"),
+                "class_fqcn": c.get("class_fqcn"),
+                "file_path": c.get("file_path"),
+                "confidence": _assign_confidence(c, strict),
+                "reason": "no_incoming_calls_after_exemptions",
+            }
+        )
+    result = dead[:limit]
+    # Append stats as a sentinel entry so the MCP layer can surface them
+    # without changing the return type.  Callers should strip entries that
+    # have a "_stats" key when iterating over method results.
+    if strict:
+        exemption_note = (
+            "STRICT MODE: Only test methods, main(), and explicit entry-point "
+            "annotations are exempted. Constructors, getters/setters, "
+            "contract methods, and overrides are NOT exempt."
+        )
+    else:
+        exemption_note = (
+            "Exemptions cover: constructors, test methods, main(), "
+            "toString/hashCode/equals/compareTo, public getters/setters, "
+            "methods with DI/framework annotations, and direct method overrides. "
+            "Use strict=True for minimal exemptions."
+        )
+    result.append({
+        "_stats": {
+            "candidates_with_no_callers": n_candidates,
+            "exempted": len(exempt),
+            "dead_returned": len(result),
+            "mode": "strict" if strict else "normal",
+            "note": exemption_note,
+        }
+    })
+    return result

{codespine-0.4.2 → codespine-0.5.0}/codespine/cli.py RENAMED Viewed

@@ -14,6 +14,7 @@ import psutil
 from codespine.analysis.community import detect_communities, symbol_community
 from codespine.analysis.context import build_symbol_context
 from codespine.analysis.coupling import compute_coupling, get_coupling
+from codespine.analysis.crossmodule import link_cross_module_calls
 from codespine.analysis.deadcode import detect_dead_code
 from codespine.analysis.flow import trace_execution_flows
 from codespine.analysis.impact import analyze_impact
@@ -216,6 +217,16 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
         elif parse_state["indexed"] < parse_state["total"]:
             _phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
+    # ── Cross-module call linking ──────────────────────────────────────
+    # When multiple modules/projects are indexed, attempt to resolve call
+    # edges that span module boundaries using import + REFERENCES_TYPE info.
+    if is_multi and len(modules_with_ids) > 1:
+        xmod_pids = [pid for _, pid in modules_with_ids]
+        xmod_edges = link_cross_module_calls(store, project_ids=xmod_pids)
+        _phase("Cross-module linking...", f"{xmod_edges} cross-module call edges")
+    else:
+        _phase("Cross-module linking...", "skipped (single module)")
     communities: list[dict] = []
     flows: list[dict] = []
     dead: list[dict] = []

codespine 0.4.2__tar.gz → 0.5.0__tar.gz

codespine 0.4.2tar.gz → 0.5.0tar.gz