PyPI - codespine - Versions diffs - 0.1.1__py3-none-any.whl - Mend

codespine 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

codespine/__init__.py +4 -0
codespine/analysis/__init__.py +1 -0
codespine/analysis/community.py +75 -0
codespine/analysis/context.py +24 -0
codespine/analysis/coupling.py +119 -0
codespine/analysis/deadcode.py +107 -0
codespine/analysis/flow.py +77 -0
codespine/analysis/impact.py +90 -0
codespine/cli.py +424 -0
codespine/config.py +22 -0
codespine/db/__init__.py +1 -0
codespine/db/schema.py +82 -0
codespine/db/store.py +313 -0
codespine/diff/__init__.py +1 -0
codespine/diff/branch_diff.py +163 -0
codespine/indexer/__init__.py +1 -0
codespine/indexer/call_resolver.py +137 -0
codespine/indexer/engine.py +305 -0
codespine/indexer/java_parser.py +350 -0
codespine/indexer/symbol_builder.py +32 -0
codespine/mcp/__init__.py +1 -0
codespine/mcp/server.py +67 -0
codespine/noise/__init__.py +1 -0
codespine/noise/blocklist.py +37 -0
codespine/search/__init__.py +1 -0
codespine/search/bm25.py +52 -0
codespine/search/fuzzy.py +36 -0
codespine/search/hybrid.py +80 -0
codespine/search/rrf.py +9 -0
codespine/search/vector.py +113 -0
codespine/watch/__init__.py +1 -0
codespine/watch/watcher.py +38 -0
codespine-0.1.1.dist-info/METADATA +336 -0
codespine-0.1.1.dist-info/RECORD +39 -0
codespine-0.1.1.dist-info/WHEEL +5 -0
codespine-0.1.1.dist-info/entry_points.txt +3 -0
codespine-0.1.1.dist-info/licenses/LICENSE +21 -0
codespine-0.1.1.dist-info/top_level.txt +2 -0
gindex.py +10 -0

codespine/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+"""CodeSpine package."""
+__all__ = ["__version__"]
+__version__ = "0.1.0"

codespine/analysis/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Analysis layer."""

codespine/analysis/community.py ADDED Viewed

@@ -0,0 +1,75 @@
+from __future__ import annotations
+from collections import defaultdict
+def detect_communities(store) -> list[dict]:
+    symbols = store.query_records("MATCH (s:Symbol) RETURN s.id as id, s.fqname as fqname")
+    edges = store.query_records(
+        """
+        MATCH (a:Method)-[:CALLS]->(b:Method)
+        RETURN a.id as src, b.id as dst
+        """
+    )
+    if not symbols:
+        return []
+    ids = [s["id"] for s in symbols]
+    index_of = {sid: i for i, sid in enumerate(ids)}
+    membership: dict[str, int] = {}
+    try:
+        import igraph as ig
+        import leidenalg
+        g = ig.Graph(directed=False)
+        g.add_vertices(len(ids))
+        graph_edges = []
+        for e in edges:
+            if e["src"] in index_of and e["dst"] in index_of:
+                graph_edges.append((index_of[e["src"]], index_of[e["dst"]]))
+        if graph_edges:
+            g.add_edges(graph_edges)
+        part = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition)
+        for idx, cid in enumerate(part.membership):
+            membership[ids[idx]] = int(cid)
+    except Exception:
+        # Fallback: group by package prefix from fqname.
+        for s in symbols:
+            fq = s.get("fqname") or ""
+            key = fq.rsplit(".", 2)[0] if "." in fq else fq
+            membership[s["id"]] = abs(hash(key)) % 10000
+    grouped: dict[int, list[str]] = defaultdict(list)
+    for sid, cid in membership.items():
+        grouped[cid].append(sid)
+    communities: list[dict] = []
+    for cid, symbol_ids in grouped.items():
+        cohesion = 1.0 / max(len(symbol_ids), 1)
+        label = f"community_{cid}"
+        store.set_community(str(cid), label, cohesion, symbol_ids)
+        communities.append(
+            {
+                "community_id": str(cid),
+                "label": label,
+                "cohesion": cohesion,
+                "size": len(symbol_ids),
+            }
+        )
+    communities.sort(key=lambda c: c["size"], reverse=True)
+    return communities
+def symbol_community(store, symbol_query: str) -> dict:
+    recs = store.query_records(
+        """
+        MATCH (s:Symbol)-[:IN_COMMUNITY]->(c:Community)
+        WHERE s.id = $q OR lower(s.fqname) = lower($q) OR lower(s.name) = lower($q)
+        RETURN s.id as symbol_id, s.fqname as fqname, c.id as community_id, c.label as label, c.cohesion as cohesion
+        LIMIT 20
+        """,
+        {"q": symbol_query},
+    )
+    return {"query": symbol_query, "matches": recs}

codespine/analysis/context.py ADDED Viewed

@@ -0,0 +1,24 @@
+from __future__ import annotations
+from codespine.analysis.community import symbol_community
+from codespine.analysis.flow import trace_execution_flows
+from codespine.analysis.impact import analyze_impact
+from codespine.search.hybrid import hybrid_search
+def build_symbol_context(store, query: str, max_depth: int = 3) -> dict:
+    search_results = hybrid_search(store, query, k=10)
+    focus = search_results[0] if search_results else None
+    impact = analyze_impact(store, query, max_depth=max_depth)
+    community = symbol_community(store, query)
+    flows = trace_execution_flows(store, entry_symbol=query, max_depth=max_depth + 2)
+    return {
+        "query": query,
+        "focus": focus,
+        "search_candidates": search_results,
+        "impact": impact,
+        "community": community,
+        "flows": flows,
+    }

codespine/analysis/coupling.py ADDED Viewed

@@ -0,0 +1,119 @@
+from __future__ import annotations
+import itertools
+import os
+import subprocess
+from collections import Counter, defaultdict
+from codespine.config import SETTINGS
+from codespine.indexer.symbol_builder import file_id
+def _git_changed_file_sets(repo_path: str, months: int) -> list[set[str]]:
+    cmd = [
+        "git",
+        "-C",
+        repo_path,
+        "log",
+        "--name-only",
+        "--pretty=format:__COMMIT__",
+        f"--since={months}.months",
+    ]
+    proc = subprocess.run(cmd, capture_output=True, text=True, check=False)
+    if proc.returncode != 0:
+        return []
+    changesets: list[set[str]] = []
+    current: set[str] = set()
+    for line in proc.stdout.splitlines():
+        line = line.strip()
+        if line == "__COMMIT__":
+            if current:
+                changesets.append(current)
+            current = set()
+            continue
+        if line:
+            current.add(line)
+    if current:
+        changesets.append(current)
+    return changesets
+def compute_coupling(
+    store,
+    repo_path: str,
+    project_id: str,
+    months: int = SETTINGS.default_coupling_months,
+    min_strength: float = SETTINGS.default_min_coupling_strength,
+    min_cochanges: int = SETTINGS.default_min_cochanges,
+) -> list[dict]:
+    changesets = _git_changed_file_sets(repo_path, months)
+    if not changesets:
+        return []
+    file_changes = Counter()
+    co_changes: Counter[tuple[str, str]] = Counter()
+    for cs in changesets:
+        for path in cs:
+            file_changes[path] += 1
+        for a, b in itertools.combinations(sorted(cs), 2):
+            co_changes[(a, b)] += 1
+    results = []
+    for (a, b), pair_count in co_changes.items():
+        denom = max(file_changes[a], file_changes[b])
+        strength = pair_count / max(denom, 1)
+        if strength < min_strength or pair_count < min_cochanges:
+            continue
+        aid = file_id(project_id, a)
+        bid = file_id(project_id, b)
+        store.upsert_coupling(aid, bid, strength, pair_count, months)
+        results.append(
+            {
+                "file_a": a,
+                "file_b": b,
+                "strength": strength,
+                "cochanges": pair_count,
+            }
+        )
+    results.sort(key=lambda r: (r["strength"], r["cochanges"]), reverse=True)
+    return results
+def get_coupling(store, symbol: str | None = None, months: int = 6, min_strength: float = 0.3, min_cochanges: int = 3) -> dict:
+    if symbol:
+        recs = store.query_records(
+            """
+            MATCH (s:Symbol)-[:DECLARES]-(f:File)-[r:CO_CHANGED_WITH]-(f2:File)
+            WHERE s.id = $q OR lower(s.fqname) = lower($q) OR lower(s.name) = lower($q)
+            AND r.strength >= $min_strength AND r.cochanges >= $min_cochanges
+            RETURN f.path as file, f2.path as coupled_file, r.strength as strength, r.cochanges as cochanges
+            ORDER BY strength DESC, cochanges DESC
+            LIMIT 200
+            """,
+            {
+                "q": symbol,
+                "min_strength": min_strength,
+                "min_cochanges": min_cochanges,
+            },
+        )
+        return {"symbol": symbol, "couplings": recs}
+    recs = store.query_records(
+        """
+        MATCH (f:File)-[r:CO_CHANGED_WITH]-(f2:File)
+        WHERE r.months = $months AND r.strength >= $min_strength AND r.cochanges >= $min_cochanges
+        RETURN f.path as file, f2.path as coupled_file, r.strength as strength, r.cochanges as cochanges
+        ORDER BY strength DESC, cochanges DESC
+        LIMIT 500
+        """,
+        {
+            "months": months,
+            "min_strength": min_strength,
+            "min_cochanges": min_cochanges,
+        },
+    )
+    return {"symbol": None, "couplings": recs}

codespine/analysis/deadcode.py ADDED Viewed

@@ -0,0 +1,107 @@
+from __future__ import annotations
+EXEMPT_ANNOTATIONS = {
+    "Override",
+    "Test",
+    "ParameterizedTest",
+    "Bean",
+    "PostConstruct",
+    "PreDestroy",
+    "Scheduled",
+    "KafkaListener",
+    "EventListener",
+    "JsonCreator",
+    "Inject",
+}
+EXEMPT_CONTRACT_METHODS = {
+    "toString",
+    "hashCode",
+    "equals",
+    "compareTo",
+}
+def _modifier_tokens(modifiers) -> set[str]:
+    if not modifiers:
+        return set()
+    return {str(m).strip() for m in modifiers}
+def detect_dead_code(store, limit: int = 200) -> list[dict]:
+    """Java-aware dead code detection with exemption passes."""
+    candidates = store.query_records(
+        """
+        MATCH (m:Method), (c:Class)
+        WHERE m.class_id = c.id
+          AND NOT EXISTS { MATCH (:Method)-[:CALLS]->(m) }
+        RETURN m.id as method_id,
+               m.name as name,
+               m.signature as signature,
+               m.modifiers as modifiers,
+               c.fqcn as class_fqcn,
+               m.is_constructor as is_constructor,
+               m.is_test as is_test
+        LIMIT $limit
+        """,
+        {"limit": int(limit * 3)},
+    )
+    if not candidates:
+        return []
+    exempt: set[str] = set()
+    # Exempt constructors, test methods, and Java main entrypoints.
+    for c in candidates:
+        sig = (c.get("signature") or "").lower()
+        name = c.get("name") or ""
+        mods = _modifier_tokens(c.get("modifiers"))
+        if c.get("is_constructor"):
+            exempt.add(c["method_id"])
+        if c.get("is_test"):
+            exempt.add(c["method_id"])
+        if name == "main" and "string[]" in sig:
+            exempt.add(c["method_id"])
+        if name in EXEMPT_CONTRACT_METHODS:
+            exempt.add(c["method_id"])
+        if any(m.lstrip("@") in EXEMPT_ANNOTATIONS for m in mods):
+            exempt.add(c["method_id"])
+        # Java bean-ish APIs often rely on reflection/serialization.
+        if "public" in mods and (name.startswith("get") or name.startswith("set") or name.startswith("is")):
+            exempt.add(c["method_id"])
+        # Reflection-style hooks
+        if name in {"valueOf", "fromString", "builder"}:
+            exempt.add(c["method_id"])
+    # Exempt override/interface contract methods if relation exists.
+    override_methods = store.query_records(
+        """
+        MATCH (m:Method)-[:OVERRIDES]->(:Method)
+        RETURN DISTINCT m.id as method_id
+        """
+    )
+    interface_methods = store.query_records(
+        """
+        MATCH (c:Class)-[:IMPLEMENTS]->(:Class), (m:Method)
+        WHERE m.class_id = c.id
+        RETURN DISTINCT m.id as method_id
+        """
+    )
+    exempt.update(r["method_id"] for r in override_methods)
+    exempt.update(r["method_id"] for r in interface_methods)
+    dead = []
+    for c in candidates:
+        if c["method_id"] in exempt:
+            continue
+        dead.append(
+            {
+                "method_id": c["method_id"],
+                "name": c.get("name"),
+                "signature": c.get("signature"),
+                "reason": "no_incoming_calls_after_exemptions",
+            }
+        )
+    return dead[:limit]

codespine/analysis/flow.py ADDED Viewed

@@ -0,0 +1,77 @@
+from __future__ import annotations
+from collections import defaultdict, deque
+def _entry_methods(store) -> list[str]:
+    recs = store.query_records(
+        """
+        MATCH (m:Method)
+        WHERE m.name = 'main' OR m.is_test = true
+        RETURN m.id as id
+        """
+    )
+    ids = [r["id"] for r in recs]
+    if ids:
+        return ids
+    fallback = store.query_records(
+        """
+        MATCH (m:Method)
+        WITH m ORDER BY m.name LIMIT 10
+        RETURN m.id as id
+        """
+    )
+    return [r["id"] for r in fallback]
+def trace_execution_flows(store, entry_symbol: str | None = None, max_depth: int = 6) -> list[dict]:
+    edges = store.query_records(
+        """
+        MATCH (a:Method)-[:CALLS]->(b:Method)
+        RETURN a.id as src, b.id as dst
+        """
+    )
+    adj: dict[str, list[str]] = defaultdict(list)
+    for edge in edges:
+        adj[edge["src"]].append(edge["dst"])
+    if entry_symbol:
+        start = store.query_records(
+            """
+            MATCH (m:Method)
+            WHERE m.id = $q OR lower(m.name) = lower($q) OR lower(m.signature) CONTAINS lower($q)
+            RETURN m.id as id
+            LIMIT 10
+            """,
+            {"q": entry_symbol},
+        )
+        entries = [r["id"] for r in start]
+    else:
+        entries = _entry_methods(store)
+    flows = []
+    for e in entries:
+        visited = {e}
+        q = deque([(e, 0)])
+        nodes_with_depth = [(e, 0)]
+        while q:
+            node, depth = q.popleft()
+            if depth >= max_depth:
+                continue
+            for nxt in adj.get(node, []):
+                if nxt in visited:
+                    continue
+                visited.add(nxt)
+                q.append((nxt, depth + 1))
+                nodes_with_depth.append((nxt, depth + 1))
+        flows.append(
+            {
+                "entry": e,
+                "kind": "cross_community" if len(nodes_with_depth) > 12 else "intra_community",
+                "nodes": [{"symbol": n, "depth": d} for n, d in nodes_with_depth],
+            }
+        )
+    return flows

codespine/analysis/impact.py ADDED Viewed

@@ -0,0 +1,90 @@
+from __future__ import annotations
+from collections import defaultdict, deque
+def _resolve_symbol_ids(store, symbol_query: str) -> list[str]:
+    recs = store.query_records(
+        """
+        MATCH (s:Symbol)
+        WHERE s.id = $q OR lower(s.name) = lower($q) OR lower(s.fqname) = lower($q) OR lower(s.fqname) CONTAINS lower($q)
+        RETURN s.id as id
+        LIMIT 50
+        """,
+        {"q": symbol_query},
+    )
+    return [r["id"] for r in recs]
+def analyze_impact(store, symbol_query: str, max_depth: int = 4) -> dict:
+    target_symbol_ids = _resolve_symbol_ids(store, symbol_query)
+    if not target_symbol_ids:
+        return {"target": symbol_query, "depth_groups": {"1": [], "2": [], "3+": []}}
+    symbol_to_method = {
+        r["sid"]: r["mid"]
+        for r in store.query_records(
+            """
+            MATCH (s:Symbol),(m:Method)
+            WHERE s.kind = 'method' AND s.fqname CONTAINS m.signature
+            RETURN s.id as sid, m.id as mid
+            """
+        )
+    }
+    target_method_ids = [symbol_to_method[sid] for sid in target_symbol_ids if sid in symbol_to_method]
+    if not target_method_ids:
+        return {"target": symbol_query, "depth_groups": {"1": [], "2": [], "3+": []}}
+    edges = store.query_records(
+        """
+        MATCH (a:Method)-[r:CALLS]->(b:Method)
+        RETURN a.id as src, b.id as dst, 'CALLS' as edge_type,
+               coalesce(r.confidence, 0.5) as confidence,
+               coalesce(r.reason, 'unknown') as reason
+        """
+    )
+    reverse_adj: dict[str, list[dict]] = defaultdict(list)
+    for edge in edges:
+        reverse_adj[edge["dst"]].append(edge)
+    depth_groups: dict[str, list[dict]] = {"1": [], "2": [], "3+": []}
+    visited: set[str] = set(target_method_ids)
+    queue = deque([(mid, 0, [mid]) for mid in target_method_ids])
+    while queue:
+        node, depth, path = queue.popleft()
+        if depth >= max_depth:
+            continue
+        for edge in reverse_adj.get(node, []):
+            src = edge["src"]
+            if src in visited:
+                continue
+            visited.add(src)
+            next_depth = depth + 1
+            item = {
+                "symbol": src,
+                "depth": next_depth,
+                "edge_type": edge["edge_type"],
+                "confidence": float(edge["confidence"]),
+                "path": path + [src],
+            }
+            if next_depth == 1:
+                depth_groups["1"].append(item)
+            elif next_depth == 2:
+                depth_groups["2"].append(item)
+            else:
+                depth_groups["3+"].append(item)
+            queue.append((src, next_depth, path + [src]))
+    return {
+        "target": symbol_query,
+        "targets_resolved": target_method_ids,
+        "depth_groups": depth_groups,
+        "summary": {
+            "direct": len(depth_groups["1"]),
+            "indirect": len(depth_groups["2"]),
+            "transitive": len(depth_groups["3+"]),
+        },
+    }