PyPI - codespine - Versions diffs - 0.5.0__tar.gz → 0.5.1__tar.gz - Mend

codespine 0.5.0tar.gz → 0.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{codespine-0.5.0 → codespine-0.5.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codespine
-Version: 0.5.0
+Version: 0.5.1
 Summary: Local Java code intelligence indexer backed by a graph database
 Author: CodeSpine contributors
 License: MIT License

{codespine-0.5.0 → codespine-0.5.1}/codespine/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 """CodeSpine package."""
 __all__ = ["__version__"]
-__version__ = "0.5.0"
+__version__ = "0.5.1"

codespine-0.5.1/codespine/analysis/crossmodule.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""Cross-module call edge linker.
+After all modules in a workspace have been individually indexed, each module's
+call resolver only sees methods within that module.  This module fills the gap
+by scanning the graph for cross-project class references (REFERENCES_TYPE and
+IMPLEMENTS edges) and creating CALLS edges between methods where the call is
+plausible.
+Strategy A — Name + arity match  (confidence 0.7)
+    If src_class references dst_class (cross-project) and both have a method
+    with the same name and same parameter count, create a CALLS edge.  This
+    catches delegation, interface-implementation forwarding, and adapter
+    patterns.
+Strategy B — Type-reference fallback  (confidence 0.4)
+    For each *public* method in dst_class that received NO name-match edge,
+    create ONE low-confidence edge from a representative src method (preferring
+    one with zero outgoing calls).  This prevents methods that are genuinely
+    used cross-module from appearing as dead code.
+"""
+from __future__ import annotations
+import logging
+from collections import defaultdict
+LOGGER = logging.getLogger(__name__)
+def _param_count(sig: str) -> int:
+    """Count parameters from a method signature string."""
+    if not sig or "(" not in sig or ")" not in sig:
+        return 0
+    arg_str = sig[sig.find("(") + 1: sig.rfind(")")]
+    return 0 if not arg_str.strip() else arg_str.count(",") + 1
+def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
+    """Create CALLS edges between methods in different projects.
+    Returns the number of new cross-module call edges created.
+    """
+    if project_ids is None:
+        proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
+        project_ids = [r["id"] for r in proj_recs]
+    if len(project_ids) < 2:
+        LOGGER.info(
+            "Only %d project(s) indexed — skipping cross-module linking.",
+            len(project_ids),
+        )
+        return 0
+    # ── 1. Collect cross-project class pairs ──────────────────────────
+    ref_pairs = store.query_records(
+        """
+        MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class), (sf:File), (df:File)
+        WHERE src.file_id = sf.id AND dst.file_id = df.id
+          AND sf.project_id <> df.project_id
+        RETURN DISTINCT src.id as src_cid, dst.id as dst_cid
+        """
+    )
+    impl_pairs = store.query_records(
+        """
+        MATCH (src:Class)-[:IMPLEMENTS]->(dst:Class), (sf:File), (df:File)
+        WHERE src.file_id = sf.id AND dst.file_id = df.id
+          AND sf.project_id <> df.project_id
+        RETURN DISTINCT src.id as src_cid, dst.id as dst_cid
+        """
+    )
+    all_pairs: set[tuple[str, str]] = set()
+    for p in ref_pairs:
+        all_pairs.add((p["src_cid"], p["dst_cid"]))
+    for p in impl_pairs:
+        all_pairs.add((p["src_cid"], p["dst_cid"]))
+    if not all_pairs:
+        LOGGER.info("No cross-project class references found.")
+        return 0
+    LOGGER.info(
+        "Cross-module: %d cross-project class pair(s) to process.",
+        len(all_pairs),
+    )
+    # ── 2. Process each class pair ────────────────────────────────────
+    new_edges = 0
+    seen: set[tuple[str, str]] = set()
+    for src_cid, dst_cid in all_pairs:
+        src_methods = store.query_records(
+            """MATCH (m:Method) WHERE m.class_id = $cid
+               RETURN m.id as mid, m.name as name, m.signature as sig""",
+            {"cid": src_cid},
+        )
+        dst_methods = store.query_records(
+            """MATCH (m:Method) WHERE m.class_id = $cid
+               RETURN m.id as mid, m.name as name, m.signature as sig,
+                      m.modifiers as modifiers, m.is_constructor as is_ctor""",
+            {"cid": dst_cid},
+        )
+        if not src_methods or not dst_methods:
+            continue
+        # Build name → methods index for src class
+        src_by_name: dict[str, list[dict]] = defaultdict(list)
+        for sm in src_methods:
+            src_by_name[sm["name"]].append(sm)
+        # ── Strategy A: name + arity matching ─────────────────────────
+        matched_dst_mids: set[str] = set()
+        for dm in dst_methods:
+            dm_name = dm["name"]
+            dm_pc = _param_count(dm.get("sig") or "")
+            candidates = src_by_name.get(dm_name, [])
+            for sm in candidates:
+                sm_pc = _param_count(sm.get("sig") or "")
+                if sm_pc == dm_pc:
+                    pair = (sm["mid"], dm["mid"])
+                    if pair in seen:
+                        matched_dst_mids.add(dm["mid"])
+                        continue
+                    seen.add(pair)
+                    try:
+                        store.add_call(
+                            sm["mid"], dm["mid"], 0.7, "cross_module_name_match",
+                        )
+                        new_edges += 1
+                        matched_dst_mids.add(dm["mid"])
+                    except Exception as exc:
+                        LOGGER.debug("Name-match edge failed: %s", exc)
+        # ── Strategy B: fallback for unmatched public dst methods ─────
+        # Find a representative caller: prefer src methods with 0 outgoing calls
+        fallback_src = None
+        for sm in src_methods:
+            out = store.query_records(
+                "MATCH (m:Method {id: $mid})-[:CALLS]->(:Method) RETURN count(*) as n",
+                {"mid": sm["mid"]},
+            )
+            if out and out[0]["n"] == 0:
+                fallback_src = sm
+                break
+        if fallback_src is None and src_methods:
+            fallback_src = src_methods[0]
+        if fallback_src:
+            for dm in dst_methods:
+                if dm["mid"] in matched_dst_mids:
+                    continue
+                # Skip constructors and private methods
+                if dm.get("is_ctor"):
+                    continue
+                mods = dm.get("modifiers") or []
+                mod_strs = {str(m).strip() for m in mods} if mods else set()
+                if "private" in mod_strs:
+                    continue
+                pair = (fallback_src["mid"], dm["mid"])
+                if pair in seen:
+                    continue
+                seen.add(pair)
+                try:
+                    store.add_call(
+                        fallback_src["mid"], dm["mid"], 0.4, "cross_module_type_ref",
+                    )
+                    new_edges += 1
+                except Exception as exc:
+                    LOGGER.debug("Fallback edge failed: %s", exc)
+    LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
+    return new_edges

{codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/deadcode.py RENAMED Viewed

@@ -1,8 +1,11 @@
 from __future__ import annotations
-EXEMPT_ANNOTATIONS = {
-    # Java standard
-    "Override",
+from collections import defaultdict
+# ── Annotation sets ──────────────────────────────────────────────────
+# Entry-point annotations — exempt even in strict mode.  These represent
+# actual runtime entry points that the framework calls reflectively.
+ENTRY_POINT_ANNOTATIONS = {
     # JUnit / testing
     "Test",
     "ParameterizedTest",
@@ -10,21 +13,6 @@ EXEMPT_ANNOTATIONS = {
     "AfterEach",
     "BeforeAll",
     "AfterAll",
-    # Spring – component model (class-level; methods inside are never "dead")
-    "Component",
-    "Service",
-    "Repository",
-    "Controller",
-    "RestController",
-    "Configuration",
-    "Bean",
-    "Aspect",
-    # Spring – lifecycle / event hooks
-    "PostConstruct",
-    "PreDestroy",
-    "EventListener",
-    "TransactionalEventListener",
-    "Scheduled",
     # Spring – web entry points
     "RequestMapping",
     "GetMapping",
@@ -33,12 +21,35 @@ EXEMPT_ANNOTATIONS = {
     "DeleteMapping",
     "PatchMapping",
     "MessageMapping",
-    # Spring – messaging / async
+    # Spring – messaging / async entry points
     "KafkaListener",
     "RabbitListener",
     "JmsListener",
     "SqsListener",
     "StreamListener",
+    # Spring – lifecycle / event hooks
+    "PostConstruct",
+    "PreDestroy",
+    "EventListener",
+    "TransactionalEventListener",
+    "Scheduled",
+}
+# Broad annotations — exempt only in normal mode.  These indicate the
+# method is *likely* used via DI / serialisation / reflection, but in a
+# strict audit the user may want to verify that manually.
+BROAD_ANNOTATIONS = {
+    # Java standard
+    "Override",
+    # Spring – component model (class-level; methods inside are never "dead")
+    "Component",
+    "Service",
+    "Repository",
+    "Controller",
+    "RestController",
+    "Configuration",
+    "Bean",
+    "Aspect",
     # Spring Data / persistence
     "Query",
     "Modifying",
@@ -48,18 +59,21 @@ EXEMPT_ANNOTATIONS = {
     "Singleton",
     "Named",
     "Qualifier",
-    # Jakarta / javax DI (same semantics as Guice/Spring variants)
+    # Jakarta / javax DI
     "ApplicationScoped",
     "RequestScoped",
     "SessionScoped",
     "Dependent",
-    # Jackson / serialization (called reflectively)
+    # Jackson / serialization
     "JsonCreator",
     "JsonProperty",
     "JsonDeserialize",
     "JsonSerialize",
 }
+# Full set used in normal mode
+EXEMPT_ANNOTATIONS = ENTRY_POINT_ANNOTATIONS | BROAD_ANNOTATIONS
 EXEMPT_CONTRACT_METHODS = {
     "toString",
     "hashCode",
@@ -74,6 +88,15 @@ def _modifier_tokens(modifiers) -> set[str]:
     return {str(m).strip() for m in modifiers}
+def _matched_annotation(mods: set[str], annotation_set: set[str]) -> str | None:
+    """Return the first annotation in *mods* that appears in *annotation_set*, or None."""
+    for m in mods:
+        bare = m.lstrip("@")
+        if bare in annotation_set:
+            return bare
+    return None
 def _assign_confidence(candidate: dict, strict: bool) -> str:
     """Assign a confidence level (high / medium / low) to each dead method.
@@ -101,16 +124,17 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
       limit   – Max results to return.
       project – Scope to a single module.
       strict  – When True, only exempt main()/@Test methods and explicit
-                entry-point annotations. Skips the broad bean-getter/setter,
-                contract-method, and constructor exemptions.
+                entry-point annotations (RequestMapping, KafkaListener, etc.).
+                Skips the broad bean-getter/setter, contract-method,
+                constructor, Override, and DI annotation exemptions.
     Returns a list of dead method dicts, each with:
       method_id, name, signature, class_fqcn, file_path, reason, confidence.
     The return value is augmented with a ``_stats`` entry (a sentinel dict
-    with key ``_stats``) containing pre/post-exemption counts so callers can
-    show users that the exemption logic is actually working:
-      candidates_with_no_callers, exempted, dead_returned
+    with key ``_stats``) containing pre/post-exemption counts, a breakdown
+    of exemption reasons, and a sample of exempted methods so callers can
+    validate that the exemption logic is working correctly.
     """
     if project:
         candidates = store.query_records(
@@ -153,43 +177,56 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
         return []
     n_candidates = len(candidates)
-    exempt: set[str] = set()
-    # Minimal exemptions (apply in both normal and strict mode)
+    # Track exemptions as {method_id: reason} instead of a plain set
+    exempt: dict[str, str] = {}
+    # Choose annotation set based on mode
+    annotations_to_check = ENTRY_POINT_ANNOTATIONS if strict else EXEMPT_ANNOTATIONS
+    # ── Exemption passes ──────────────────────────────────────────────
     for c in candidates:
+        mid = c["method_id"]
+        if mid in exempt:
+            continue
         sig = (c.get("signature") or "").lower()
         name = c.get("name") or ""
         mods = _modifier_tokens(c.get("modifiers"))
         # Always exempt test methods and main()
         if c.get("is_test"):
-            exempt.add(c["method_id"])
+            exempt[mid] = "test_method"
+            continue
         if name == "main" and "string[]" in sig:
-            exempt.add(c["method_id"])
+            exempt[mid] = "main_method"
+            continue
-        # Always exempt explicit entry-point annotations (@Test, @RequestMapping, etc.)
-        if any(m.lstrip("@") in EXEMPT_ANNOTATIONS for m in mods):
-            exempt.add(c["method_id"])
+        # Exempt methods with entry-point (strict) or all framework (normal) annotations
+        matched = _matched_annotation(mods, annotations_to_check)
+        if matched:
+            exempt[mid] = f"annotation:{matched}"
+            continue
-        # Broad exemptions (only in normal mode, skipped in strict mode)
+        # ── Broad exemptions (only in normal mode) ────────────────────
         if not strict:
             if c.get("is_constructor"):
-                exempt.add(c["method_id"])
+                exempt[mid] = "constructor"
+                continue
             if name in EXEMPT_CONTRACT_METHODS:
-                exempt.add(c["method_id"])
+                exempt[mid] = f"contract_method:{name}"
+                continue
             # Java bean-ish APIs often rely on reflection/serialization.
-            if "public" in mods and (name.startswith("get") or name.startswith("set") or name.startswith("is")):
-                exempt.add(c["method_id"])
+            if "public" in mods and (
+                name.startswith("get") or name.startswith("set") or name.startswith("is")
+            ):
+                exempt[mid] = "bean_accessor"
+                continue
             # Reflection-style hooks
             if name in {"valueOf", "fromString", "builder"}:
-                exempt.add(c["method_id"])
-    # Exempt methods that DIRECTLY override another method (precise: only the
-    # specific overriding method is exempted, not the entire implementing class).
-    # NOTE: we intentionally do NOT use the class-level IMPLEMENTS relation here
-    # because that would exempt ALL methods of every class that implements ANY
-    # interface — in a typical Spring project that wipes out almost everything
-    # and produces 0 dead code results.
+                exempt[mid] = f"reflection_hook:{name}"
+                continue
+    # Exempt methods that DIRECTLY override another method.
     # In strict mode, overrides are NOT exempted — if nobody calls the method,
     # it's flagged regardless of whether it overrides a parent.
     if not strict:
@@ -199,8 +236,12 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
             RETURN DISTINCT m.id as method_id
             """
         )
-        exempt.update(r["method_id"] for r in override_methods)
+        for r in override_methods:
+            mid = r["method_id"]
+            if mid not in exempt:
+                exempt[mid] = "method_override"
+    # ── Build dead list ───────────────────────────────────────────────
     dead = []
     for c in candidates:
         if c["method_id"] in exempt:
@@ -219,14 +260,31 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
     result = dead[:limit]
-    # Append stats as a sentinel entry so the MCP layer can surface them
-    # without changing the return type.  Callers should strip entries that
-    # have a "_stats" key when iterating over method results.
+    # ── Stats with exemption breakdown ────────────────────────────────
+    reason_counts: dict[str, int] = defaultdict(int)
+    for reason in exempt.values():
+        # Group annotation reasons by prefix for readability
+        key = reason.split(":")[0] if ":" in reason else reason
+        reason_counts[key] += 1
+    # Sample of exempted methods (up to 10) for user inspection
+    exempted_sample = []
+    for mid, reason in list(exempt.items())[:10]:
+        candidate = next((c for c in candidates if c["method_id"] == mid), None)
+        if candidate:
+            exempted_sample.append({
+                "name": candidate.get("name"),
+                "signature": candidate.get("signature"),
+                "class_fqcn": candidate.get("class_fqcn"),
+                "exemption_reason": reason,
+            })
     if strict:
         exemption_note = (
-            "STRICT MODE: Only test methods, main(), and explicit entry-point "
-            "annotations are exempted. Constructors, getters/setters, "
-            "contract methods, and overrides are NOT exempt."
+            "STRICT MODE: Only test methods, main(), and entry-point "
+            "annotations (RequestMapping, KafkaListener, Scheduled, etc.) "
+            "are exempted. Constructors, getters/setters, @Override, DI "
+            "annotations, and contract methods are NOT exempt."
         )
     else:
         exemption_note = (
@@ -242,6 +300,8 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
             "dead_returned": len(result),
             "mode": "strict" if strict else "normal",
             "note": exemption_note,
+            "exemptions_breakdown": dict(reason_counts),
+            "exempted_sample": exempted_sample,
         }
     })

{codespine-0.5.0 → codespine-0.5.1}/codespine/mcp/server.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import json as _json_mod
 import subprocess
 import sys
 import tempfile
@@ -18,6 +19,16 @@ from codespine.diff.branch_diff import compare_branches as compare_branches_anal
 from codespine.search.hybrid import hybrid_search
+def _json(data: dict) -> str:
+    """Serialize response dict to a JSON string.
+    FastMCP double-serialises dict return values on many transports (SSE,
+    stdio) producing duplicate JSON payloads that waste ~50 K tokens/session.
+    Returning a pre-serialised string guarantees a single TextContent block.
+    """
+    return _json_mod.dumps(data, separators=(",", ":"))
 def _git_available(path: str) -> bool:
     """Return True if path is inside a git repository."""
     try:
@@ -44,14 +55,27 @@ def _resolve_repo_path(store, project: str | None, repo_path_provider) -> str:
     return repo_path_provider()
-def _no_symbols_response(note: str = "No symbols indexed. Run 'codespine analyse <path>' first.") -> dict:
-    return {"available": False, "note": note}
+def _no_symbols_response(note: str = "No symbols indexed. Run 'codespine analyse <path>' first.") -> str:
+    return _json({"available": False, "note": note})
-def _staleness_meta(store, response: dict, project: str | None = None) -> dict:
-    """Inject index staleness metadata into every tool response.
+def _parse_indexed_at(raw) -> int:
+    """Robustly parse an indexed_at value that may be str, int, float, or None."""
+    if raw is None:
+        return 0
+    try:
+        val = int(float(str(raw)))
+        # Sanity check: must look like a Unix timestamp (> year 2000)
+        return val if val > 946684800 else 0
+    except (ValueError, TypeError):
+        return 0
+def _staleness_meta(store, response: dict, project: str | None = None) -> str:
+    """Inject index staleness metadata into every tool response and serialise.
     Adds ``index_age_seconds`` and ``stale_warning`` when the index is old.
+    Returns a JSON string (not a dict) to avoid FastMCP double-serialisation.
     """
     try:
         if project:
@@ -64,10 +88,11 @@ def _staleness_meta(store, response: dict, project: str | None = None) -> dict:
                 "MATCH (p:Project) RETURN p.indexed_at as ts ORDER BY p.indexed_at ASC LIMIT 1"
             )
         if recs:
-            ts = int(recs[0].get("ts") or 0)
+            ts = _parse_indexed_at(recs[0].get("ts"))
             if ts:
                 age = int(time.time()) - ts
                 response["index_age_seconds"] = age
+                response["indexed_at_epoch"] = ts
                 if age > 3600:
                     response["stale_warning"] = (
                         f"Index is {age // 3600}h {(age % 3600) // 60}m old. "
@@ -75,11 +100,40 @@ def _staleness_meta(store, response: dict, project: str | None = None) -> dict:
                     )
     except Exception:
         pass
-    return response
+    return _json(response)
 def build_mcp_server(store, repo_path_provider):
-    mcp = FastMCP("codespine")
+    _raw_mcp = FastMCP("codespine")
+    # ── Anti-duplicate-JSON wrapper ────────────────────────────────────
+    # FastMCP double-serialises dict return values on many transports,
+    # producing duplicate JSON payloads that waste ~50 K tokens/session.
+    # We intercept tool registration so every tool's dict return is
+    # pre-serialised to a JSON string (single TextContent block).
+    import functools as _functools
+    class _JsonMCP:
+        """Thin proxy that wraps tool functions to return JSON strings."""
+        def __getattr__(self, name):
+            return getattr(_raw_mcp, name)
+        def tool(self, *args, **kwargs):
+            original_decorator = _raw_mcp.tool(*args, **kwargs)
+            def wrapper(fn):
+                @_functools.wraps(fn)
+                def json_fn(*a, **kw):
+                    result = fn(*a, **kw)
+                    if isinstance(result, dict):
+                        return _json(result)
+                    return result
+                return original_decorator(json_fn)
+            return wrapper
+        def run(self):
+            return _raw_mcp.run()
+    mcp = _JsonMCP()
     # Background job state (per-server-instance, persists across tool calls)
     _watch: dict = {"proc": None, "path": None, "started_at": None, "interval": 30}
@@ -92,7 +146,7 @@ def build_mcp_server(store, repo_path_provider):
     @mcp.tool()
     def ping():
         """Verify the MCP server is alive. Call this first to confirm connectivity."""
-        return {"status": "ok", "version": __version__}
+        return _json({"status": "ok", "version": __version__})
     @mcp.tool()
     def get_capabilities():
@@ -1243,21 +1297,41 @@ def build_mcp_server(store, repo_path_provider):
         proj_path = proj_recs[0]["path"]
-        # Run incremental index via subprocess to avoid read-only DB constraint
+        # Run incremental index via subprocess to avoid read-only DB constraint.
+        # Use Popen + communicate() with a timeout so that a hang never crashes
+        # the MCP server process — the subprocess is killed gracefully instead.
         cmd = [
             sys.executable, "-m", "codespine.cli",
             "analyse", proj_path,
             "--incremental", "--no-embed", "--allow-running",
         ]
         t0 = time.time()
-        proc = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
-        elapsed = round(time.time() - t0, 2)
+        try:
+            proc = subprocess.Popen(
+                cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
+            )
+            stdout, stderr = proc.communicate(timeout=30)
+            elapsed = round(time.time() - t0, 2)
+        except subprocess.TimeoutExpired:
+            proc.kill()
+            proc.communicate()  # reap zombie
+            elapsed = round(time.time() - t0, 2)
+            return {
+                "available": False,
+                "note": f"Re-index timed out after {elapsed}s. The project may be too large for single-file re-index. Use analyse_project() instead.",
+            }
+        except Exception as exc:
+            elapsed = round(time.time() - t0, 2)
+            return {
+                "available": False,
+                "note": f"Re-index error: {exc}",
+            }
         if proc.returncode != 0:
             return {
                 "available": False,
                 "note": f"Re-index failed (code {proc.returncode})",
-                "error": proc.stderr.strip() or proc.stdout.strip(),
+                "error": (stderr or stdout or "").strip()[:500],
             }
         return {
@@ -1278,4 +1352,4 @@ def build_mcp_server(store, repo_path_provider):
         records = store.query_records(query)
         return {"available": True, "records": records, "count": len(records)}
-    return mcp
+    return _raw_mcp

{codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codespine
-Version: 0.5.0
+Version: 0.5.1
 Summary: Local Java code intelligence indexer backed by a graph database
 Author: CodeSpine contributors
 License: MIT License

{codespine-0.5.0 → codespine-0.5.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "codespine"
-version = "0.5.0"
+version = "0.5.1"
 description = "Local Java code intelligence indexer backed by a graph database"
 readme = "README.md"
 requires-python = ">=3.10"

codespine-0.5.0/codespine/analysis/crossmodule.py DELETED Viewed

@@ -1,230 +0,0 @@
-"""Cross-module call edge linker.
-After all modules in a workspace have been individually indexed, each module's
-call resolver only sees methods within that module. This module fills the gap
-by scanning the graph for unresolved outgoing calls from one module that match
-method signatures in another module, then creating CALLS edges between them.
-The algorithm:
-  1. Build a global method catalog (method_id → name, param_count, class_fqcn)
-     from the DB across ALL projects.
-  2. Build a per-project import map: for each file, record which FQCNs are
-     imported (from the class nodes + extends/implements relations).
-  3. For each method M in project A, find its outgoing calls that did NOT
-     resolve to any target. These are method invocations that tree-sitter
-     parsed but call_resolver.py could not match (because the target was in a
-     different module).
-  4. For each unresolved call, use the file's import list + the global class
-     catalog to find candidate target methods in OTHER projects.
-  5. Create CALLS edges with confidence 0.6 and reason "cross_module_import".
-Because ParsedCall data is transient (not stored in the DB), we use a simpler
-heuristic: find methods in module A that have ZERO outgoing CALLS edges but
-are known to reference classes from other modules (via REFERENCES_TYPE or
-import analysis). Then attempt to link them by matching method names against
-the global catalog.
-A faster fallback strategy (implemented below):
-  - Collect all class FQCNs per project.
-  - For each project pair (A, B), find classes in A that IMPLEMENT/extend
-    classes in B — these already have edges.
-  - For method-level cross-module calls: scan for methods with 0 outgoing
-    edges, match their name+arity against methods in other projects, and
-    only link when the target class is imported (appears in the same file's
-    import set via REFERENCES_TYPE edges).
-"""
-from __future__ import annotations
-import logging
-from collections import defaultdict
-LOGGER = logging.getLogger(__name__)
-def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
-    """Create CALLS edges between methods in different projects.
-    Returns the number of new cross-module call edges created.
-    """
-    if project_ids is None:
-        proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
-        project_ids = [r["id"] for r in proj_recs]
-    if len(project_ids) < 2:
-        LOGGER.info("Only %d project(s) indexed — skipping cross-module linking.", len(project_ids))
-        return 0
-    # ── 1. Global method catalog ────────────────────────────────────────
-    all_methods = store.query_records(
-        """
-        MATCH (m:Method), (c:Class), (f:File)
-        WHERE m.class_id = c.id AND c.file_id = f.id
-        RETURN m.id as mid, m.name as name, m.signature as sig,
-               c.fqcn as class_fqcn, c.name as class_name,
-               f.project_id as project_id
-        """
-    )
-    # Index: (method_name, param_count) → list of (method_id, class_fqcn, project_id)
-    name_arity_index: dict[tuple[str, int], list[dict]] = defaultdict(list)
-    for m in all_methods:
-        sig = m.get("sig") or ""
-        arg_str = sig[sig.find("(") + 1: sig.rfind(")")] if "(" in sig and ")" in sig else ""
-        pc = 0 if not arg_str.strip() else arg_str.count(",") + 1
-        name_arity_index[(m["name"], pc)].append({
-            "mid": m["mid"],
-            "class_fqcn": m.get("class_fqcn", ""),
-            "class_name": m.get("class_name", ""),
-            "project_id": m.get("project_id", ""),
-        })
-    # ── 2. Class FQCN → project mapping ─────────────────────────────────
-    all_classes = store.query_records(
-        """
-        MATCH (c:Class), (f:File)
-        WHERE c.file_id = f.id
-        RETURN c.fqcn as fqcn, c.name as name, f.project_id as project_id
-        """
-    )
-    fqcn_to_project: dict[str, str] = {}
-    class_name_to_fqcns: dict[str, list[str]] = defaultdict(list)
-    for c in all_classes:
-        fqcn_to_project[c["fqcn"]] = c["project_id"]
-        class_name_to_fqcns[c["name"]].append(c["fqcn"])
-    # ── 3. Find methods with 0 outgoing calls (potential unresolved) ────
-    # We only look at methods that have NO outgoing CALLS edges — these are
-    # the ones whose invocations could not be resolved within their own module.
-    zero_out = store.query_records(
-        """
-        MATCH (m:Method), (c:Class), (f:File)
-        WHERE m.class_id = c.id AND c.file_id = f.id
-          AND NOT EXISTS { MATCH (m)-[:CALLS]->(:Method) }
-        RETURN m.id as mid, m.name as name, m.signature as sig,
-               c.fqcn as class_fqcn, c.id as class_id,
-               f.project_id as project_id, f.id as file_id
-        """
-    )
-    # ── 4. Build per-file import set from REFERENCES_TYPE edges ─────────
-    # A class referencing another class implies the source file imports it.
-    refs = store.query_records(
-        """
-        MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class)
-        RETURN src.file_id as file_id, dst.fqcn as target_fqcn, dst.name as target_name
-        """
-    )
-    file_imports: dict[str, set[str]] = defaultdict(set)
-    for r in refs:
-        file_imports[r["file_id"]].add(r.get("target_fqcn", ""))
-        file_imports[r["file_id"]].add(r.get("target_name", ""))
-    # Also gather IMPLEMENTS edges for broader coverage
-    impl_refs = store.query_records(
-        """
-        MATCH (src:Class)-[:IMPLEMENTS]->(dst:Class)
-        RETURN src.file_id as file_id, dst.fqcn as target_fqcn, dst.name as target_name
-        """
-    )
-    for r in impl_refs:
-        file_imports[r["file_id"]].add(r.get("target_fqcn", ""))
-        file_imports[r["file_id"]].add(r.get("target_name", ""))
-    # ── 5. Attempt cross-module resolution ──────────────────────────────
-    new_edges = 0
-    seen_pairs: set[tuple[str, str]] = set()
-    for m in zero_out:
-        sig = m.get("sig") or ""
-        # We cannot know which methods THIS method calls without re-parsing.
-        # Heuristic: skip this method if it has no imports from other projects.
-        fid = m.get("file_id", "")
-        src_pid = m.get("project_id", "")
-        imported_fqcns = file_imports.get(fid, set())
-        # Find classes from OTHER projects that this file references
-        cross_project_classes = set()
-        for fqcn in imported_fqcns:
-            target_pid = fqcn_to_project.get(fqcn, "")
-            if target_pid and target_pid != src_pid:
-                cross_project_classes.add(fqcn)
-        if not cross_project_classes:
-            continue
-        # For each cross-project class, find its methods and see if any
-        # match common call patterns. We use name + arity matching.
-        # Since we don't have the actual calls, we create edges from this
-        # method to methods in the target classes that share a name.
-        # This is conservative: we only link if there's exactly 1 candidate.
-        for target_fqcn in cross_project_classes:
-            target_pid = fqcn_to_project.get(target_fqcn, "")
-            for (mname, pc), candidates in name_arity_index.items():
-                matching = [
-                    c for c in candidates
-                    if c["class_fqcn"] == target_fqcn and c["project_id"] == target_pid
-                ]
-                if len(matching) == 1:
-                    src_mid = m["mid"]
-                    dst_mid = matching[0]["mid"]
-                    pair = (src_mid, dst_mid)
-                    if pair in seen_pairs:
-                        continue
-                    # Only link if the method has an outgoing reference that
-                    # plausibly invokes this target (name substring match in sig)
-                    # This avoids noise from linking random unrelated methods
-                    seen_pairs.add(pair)
-    # For a more targeted approach: use REFERENCES_TYPE at CLASS level to
-    # create cross-module CALLS at METHOD level where signatures match.
-    xmod_class_pairs = store.query_records(
-        """
-        MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class), (sf:File), (df:File)
-        WHERE src.file_id = sf.id AND dst.file_id = df.id
-          AND sf.project_id <> df.project_id
-        RETURN src.id as src_cid, dst.id as dst_cid,
-               sf.project_id as src_pid, df.project_id as dst_pid
-        """
-    )
-    for pair in xmod_class_pairs:
-        src_methods = store.query_records(
-            "MATCH (m:Method) WHERE m.class_id = $cid RETURN m.id as mid, m.name as name, m.signature as sig",
-            {"cid": pair["src_cid"]},
-        )
-        dst_methods = store.query_records(
-            "MATCH (m:Method) WHERE m.class_id = $cid RETURN m.id as mid, m.name as name, m.signature as sig",
-            {"cid": pair["dst_cid"]},
-        )
-        # Build name+arity index for destination class
-        dst_by_name_arity: dict[tuple[str, int], list[str]] = defaultdict(list)
-        for dm in dst_methods:
-            dsig = dm.get("sig") or ""
-            darg = dsig[dsig.find("(") + 1: dsig.rfind(")")] if "(" in dsig and ")" in dsig else ""
-            dpc = 0 if not darg.strip() else darg.count(",") + 1
-            dst_by_name_arity[(dm["name"], dpc)].append(dm["mid"])
-        for sm in src_methods:
-            ssig = sm.get("sig") or ""
-            sarg = ssig[ssig.find("(") + 1: ssig.rfind(")")] if "(" in ssig and ")" in ssig else ""
-            spc = 0 if not sarg.strip() else sarg.count(",") + 1
-            # Check if any destination method name appears as a substring
-            # in the source method's signature (crude but low false-positive)
-            for (dname, dpc), dst_ids in dst_by_name_arity.items():
-                if len(dst_ids) != 1:
-                    continue
-                dst_mid = dst_ids[0]
-                edge_pair = (sm["mid"], dst_mid)
-                if edge_pair in seen_pairs:
-                    continue
-                seen_pairs.add(edge_pair)
-                try:
-                    store.add_call(sm["mid"], dst_mid, 0.6, "cross_module_import")
-                    new_edges += 1
-                except Exception as exc:
-                    LOGGER.debug("Cross-module edge failed: %s", exc)
-    LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
-    return new_edges

{codespine-0.5.0 → codespine-0.5.1}/LICENSE RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/README.md RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/community.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/context.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/coupling.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/flow.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/impact.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/cli.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/config.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/db/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/db/schema.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/db/store.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/diff/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/diff/branch_diff.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/call_resolver.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/engine.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/java_parser.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/symbol_builder.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/mcp/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/noise/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/noise/blocklist.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/search/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/search/bm25.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/search/fuzzy.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/search/hybrid.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/search/rrf.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/search/vector.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/watch/__init__.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine/watch/watcher.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/entry_points.txt RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/requires.txt RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/top_level.txt RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/gindex.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/setup.cfg RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/tests/test_branch_diff_normalize.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/tests/test_call_resolver.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/tests/test_index_and_hybrid.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/tests/test_java_parser.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/tests/test_multimodule_index.py RENAMED Viewed

File without changes

{codespine-0.5.0 → codespine-0.5.1}/tests/test_search_ranking.py RENAMED Viewed

File without changes

codespine 0.5.0__tar.gz → 0.5.1__tar.gz

codespine 0.5.0tar.gz → 0.5.1tar.gz