PyPI - codespine - Versions diffs - 0.3.0__tar.gz → 0.4.1__tar.gz - Mend

codespine 0.3.0tar.gz → 0.4.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

{codespine-0.3.0 → codespine-0.4.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codespine
-Version: 0.3.0
+Version: 0.4.1
 Summary: Local Java code intelligence indexer backed by a graph database
 Author: CodeSpine contributors
 License: MIT License

{codespine-0.3.0 → codespine-0.4.1}/codespine/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 """CodeSpine package."""
 __all__ = ["__version__"]
-__version__ = "0.3.0"
+__version__ = "0.4.1"

{codespine-0.3.0 → codespine-0.4.1}/codespine/analysis/deadcode.py RENAMED Viewed

@@ -1,17 +1,63 @@
 from __future__ import annotations
 EXEMPT_ANNOTATIONS = {
+    # Java standard
     "Override",
+    # JUnit / testing
     "Test",
     "ParameterizedTest",
+    "BeforeEach",
+    "AfterEach",
+    "BeforeAll",
+    "AfterAll",
+    # Spring – component model (class-level; methods inside are never "dead")
+    "Component",
+    "Service",
+    "Repository",
+    "Controller",
+    "RestController",
+    "Configuration",
     "Bean",
+    "Aspect",
+    # Spring – lifecycle / event hooks
     "PostConstruct",
     "PreDestroy",
+    "EventListener",
+    "TransactionalEventListener",
     "Scheduled",
+    # Spring – web entry points
+    "RequestMapping",
+    "GetMapping",
+    "PostMapping",
+    "PutMapping",
+    "DeleteMapping",
+    "PatchMapping",
+    "MessageMapping",
+    # Spring – messaging / async
     "KafkaListener",
-    "EventListener",
-    "JsonCreator",
+    "RabbitListener",
+    "JmsListener",
+    "SqsListener",
+    "StreamListener",
+    # Spring Data / persistence
+    "Query",
+    "Modifying",
+    # Guice DI
     "Inject",
+    "Provides",
+    "Singleton",
+    "Named",
+    "Qualifier",
+    # Jakarta / javax DI (same semantics as Guice/Spring variants)
+    "ApplicationScoped",
+    "RequestScoped",
+    "SessionScoped",
+    "Dependent",
+    # Jackson / serialization (called reflectively)
+    "JsonCreator",
+    "JsonProperty",
+    "JsonDeserialize",
+    "JsonSerialize",
 }
 EXEMPT_CONTRACT_METHODS = {

{codespine-0.3.0 → codespine-0.4.1}/codespine/config.py RENAMED Viewed

@@ -7,7 +7,7 @@ class Settings:
     db_path: str = os.path.expanduser("~/.codespine_db")
     pid_file: str = os.path.expanduser("~/.codespine.pid")
     log_file: str = os.path.expanduser("~/.codespine.log")
-    embedding_cache_db: str = os.path.expanduser("~/.codespine_embedding_cache.sqlite3")
+    embedding_cache_path: str = os.path.expanduser("~/.codespine_embedding_cache.json")
     index_meta_dir: str = os.path.expanduser("~/.codespine_index_meta")
     embedding_model: str = "BAAI/bge-small-en-v1.5"
     vector_dim: int = 384

{codespine-0.3.0 → codespine-0.4.1}/codespine/db/schema.py RENAMED Viewed

@@ -10,7 +10,7 @@ NODE_TABLES: list[tuple[str, str]] = [
     ("SchemaMeta", "CREATE NODE TABLE SchemaMeta(key STRING, value STRING, PRIMARY KEY (key))"),
     (
         "Project",
-        "CREATE NODE TABLE Project(id STRING, path STRING, language STRING, PRIMARY KEY (id))",
+        "CREATE NODE TABLE Project(id STRING, path STRING, language STRING, indexed_at STRING, PRIMARY KEY (id))",
     ),
     (
         "File",
@@ -76,7 +76,10 @@ def ensure_schema(conn) -> None:
     _safe_execute(conn, "CALL CREATE_FTS_INDEX('method_fts', 'Method', ['name', 'signature'])")
     _safe_execute(conn, "CALL CREATE_FTS_INDEX('class_fts', 'Class', ['name', 'fqcn'])")
+    # Best-effort migration: add indexed_at column to existing Project tables.
+    _safe_execute(conn, "ALTER TABLE Project ADD indexed_at STRING DEFAULT ''")
     _safe_execute(
         conn,
-        "MERGE (s:SchemaMeta {key: 'schema_version'}) SET s.value = '2'",
+        "MERGE (s:SchemaMeta {key: 'schema_version'}) SET s.value = '3'",
     )

{codespine-0.3.0 → codespine-0.4.1}/codespine/db/store.py RENAMED Viewed

@@ -5,6 +5,7 @@ import json
 import logging
 import os
 import threading
+import time
 from contextlib import contextmanager
 from dataclasses import dataclass
 from typing import Any
@@ -69,7 +70,12 @@ class GraphStore:
                 self.execute("COMMIT")
         except Exception:
             if tx_started:
-                self.execute("ROLLBACK")
+                try:
+                    self.execute("ROLLBACK")
+                except Exception:
+                    # Kuzu may have already rolled back (e.g. on OOM), making a
+                    # second ROLLBACK raise "No active transaction". Swallow it.
+                    pass
             raise
     def clear_project(self, project_id: str) -> None:
@@ -108,8 +114,8 @@ class GraphStore:
     def upsert_project(self, project_id: str, path: str) -> None:
         self.execute(
-            "MERGE (p:Project {id: $id}) SET p.path = $path, p.language = 'java'",
-            {"id": project_id, "path": path},
+            "MERGE (p:Project {id: $id}) SET p.path = $path, p.language = 'java', p.indexed_at = $ts",
+            {"id": project_id, "path": path, "ts": str(int(time.time()))},
         )
     def project_file_hashes(self, project_id: str) -> dict[str, dict[str, str]]:
@@ -297,14 +303,18 @@ class GraphStore:
             "MERGE (c:Community {id: $id}) SET c.label = $label, c.cohesion = $cohesion",
             {"id": community_id, "label": label, "cohesion": cohesion},
         )
-        # Batch all symbol→community edges in one transaction to prevent buffer pool exhaustion
-        # on large projects (53 K+ symbols would OOM without a single commit boundary).
-        with self.transaction():
-            for sid in symbol_ids:
-                self.execute(
-                    "MATCH (s:Symbol {id: $sid}), (c:Community {id: $cid}) MERGE (s)-[:IN_COMMUNITY]->(c)",
-                    {"sid": sid, "cid": community_id},
-                )
+        # Commit in batches of 50 to keep Kuzu's buffer pool from OOMing on large
+        # communities. A single transaction over thousands of MERGE statements exhausts
+        # the 256 MB buffer pool before it can page out.
+        _BATCH = 50
+        for i in range(0, len(symbol_ids), _BATCH):
+            batch = symbol_ids[i : i + _BATCH]
+            with self.transaction():
+                for sid in batch:
+                    self.execute(
+                        "MATCH (s:Symbol {id: $sid}), (c:Community {id: $cid}) MERGE (s)-[:IN_COMMUNITY]->(c)",
+                        {"sid": sid, "cid": community_id},
+                    )
     def set_flow(self, flow_id: str, entry_symbol_id: str, kind: str, symbols_at_depth: list[tuple[str, int]]) -> None:
         self.execute(

{codespine-0.3.0 → codespine-0.4.1}/codespine/indexer/engine.py RENAMED Viewed

@@ -167,6 +167,10 @@ class JavaIndexer:
             to_reindex = current_files
             deleted_file_ids = []
             meta_cache = {}
+            # Wipe the embedding cache on a full re-index so stale embeddings
+            # (including those from the old SQLite format) are not carried over.
+            from codespine.search.vector import _CACHE as _embed_cache
+            _embed_cache.clear()
         else:
             to_reindex, deleted_file_ids, meta_cache = self._plan_incremental(
                 project_id,

{codespine-0.3.0 → codespine-0.4.1}/codespine/mcp/server.py RENAMED Viewed

@@ -71,7 +71,16 @@ def build_mcp_server(store, repo_path_provider):
         Call this before other tools so you know what's ready without trial-and-error.
         Features marked false may need 'codespine analyse --deep' or optional dependencies.
         """
-        projects = store.query_records("MATCH (p:Project) RETURN p.id as id, p.path as path")
+        try:
+            projects = store.query_records(
+                "MATCH (p:Project) RETURN p.id as id, p.path as path, p.indexed_at as indexed_at"
+            )
+        except Exception:
+            # Old DB schema (pre-0.4.0) doesn't have indexed_at column yet.
+            # Falls back gracefully; column is added next time 'analyse' runs.
+            projects = store.query_records(
+                "MATCH (p:Project) RETURN p.id as id, p.path as path"
+            )
         sym_q = store.query_records("MATCH (s:Symbol) RETURN count(s) as count")
         comm_q = store.query_records("MATCH (c:Community) RETURN count(c) as count")
         flow_q = store.query_records("MATCH (f:Flow) RETURN count(f) as count")
@@ -97,7 +106,20 @@ def build_mcp_server(store, repo_path_provider):
         watch_running = _watch["proc"] is not None and _watch["proc"].poll() is None
         analyse_running = _analyse["proc"] is not None and _analyse["proc"].poll() is None
+        now = int(time.time())
+        stale_projects = []
+        for p in projects:
+            ts = int(p.get("indexed_at") or 0)
+            if ts and (now - ts) > 3600 and not watch_running:
+                age_h = (now - ts) // 3600
+                stale_projects.append(f"{p['id']} ({age_h}h old)")
         notes: dict[str, str] = {}
+        if stale_projects:
+            notes["stale_index"] = (
+                f"Index is stale for: {', '.join(stale_projects)}. "
+                "Run analyse_project() or start_watch() to refresh."
+            )
         if not n_comm:
             notes["community_detection"] = "Run 'codespine analyse --deep' to enable"
         if not n_flows:
@@ -156,9 +178,17 @@ def build_mcp_server(store, repo_path_provider):
     @mcp.tool()
     def list_projects():
         """List all indexed projects with their symbol and file counts."""
-        projects = store.query_records("MATCH (p:Project) RETURN p.id as id, p.path as path")
+        try:
+            projects = store.query_records(
+                "MATCH (p:Project) RETURN p.id as id, p.path as path, p.indexed_at as indexed_at"
+            )
+        except Exception:
+            projects = store.query_records(
+                "MATCH (p:Project) RETURN p.id as id, p.path as path"
+            )
         if not projects:
             return {"available": False, "note": "No projects indexed yet. Run 'codespine analyse <path>'."}
+        now = int(time.time())
         result = []
         for p in projects:
             sym = store.query_records(
@@ -173,14 +203,22 @@ def build_mcp_server(store, repo_path_provider):
                 "MATCH (f:File) WHERE f.project_id = $pid RETURN count(f) as count",
                 {"pid": p["id"]},
             )
-            result.append(
-                {
-                    "project_id": p["id"],
-                    "path": p["path"],
-                    "symbol_count": sym[0]["count"] if sym else 0,
-                    "file_count": files[0]["count"] if files else 0,
-                }
-            )
+            indexed_at_ts = int(p.get("indexed_at") or 0)
+            age_s = now - indexed_at_ts if indexed_at_ts else None
+            entry: dict = {
+                "project_id": p["id"],
+                "path": p["path"],
+                "symbol_count": sym[0]["count"] if sym else 0,
+                "file_count": files[0]["count"] if files else 0,
+                "indexed_at_epoch": indexed_at_ts or None,
+                "index_age_seconds": age_s,
+            }
+            if age_s is not None and age_s > 3600:
+                entry["stale_warning"] = (
+                    f"Index is {age_s // 3600}h {(age_s % 3600) // 60}m old. "
+                    "Run analyse_project() or start_watch() to refresh."
+                )
+            result.append(entry)
         return {"available": True, "projects": result}
     # ------------------------------------------------------------------
@@ -371,7 +409,10 @@ def build_mcp_server(store, repo_path_provider):
         """
         name_lower = name.lower()
         project_clause = "AND f.project_id = $proj" if project else ""
-        params: dict = {"name": name, "namel": name_lower, "lim": limit}
+        # Note: only $namel and $lim are referenced in the queries below.
+        # Do NOT add extra keys here — some Kuzu versions raise "Parameter not found"
+        # when the params dict contains keys absent from the query string.
+        params: dict = {"namel": name_lower, "lim": limit}
         if project:
             params["proj"] = project
@@ -591,15 +632,40 @@ def build_mcp_server(store, repo_path_provider):
         if not os.path.isdir(abs_path):
             return {"available": False, "note": f"Path does not exist or is not a directory: {abs_path}"}
+        import tempfile as _tempfile
+        watch_err_file = _tempfile.NamedTemporaryFile(
+            mode="w", suffix=".log", prefix="codespine_watch_", delete=False
+        )
+        watch_err_path = watch_err_file.name
+        watch_err_file.close()
         proc = subprocess.Popen(
             [
                 sys.executable, "-m", "codespine.cli",
                 "watch", "--path", abs_path,
                 "--global-interval", str(global_interval),
             ],
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL,
+            stdout=open(watch_err_path, "w", encoding="utf-8"),
+            stderr=subprocess.STDOUT,
         )
+        # Brief health check — if the process dies within 1 s it crashed at startup.
+        time.sleep(1)
+        if proc.poll() is not None:
+            try:
+                with open(watch_err_path, "r", encoding="utf-8", errors="replace") as fh:
+                    err_tail = fh.read().strip().splitlines()[-10:]
+            except Exception:
+                err_tail = []
+            return {
+                "available": False,
+                "note": (
+                    f"Watch mode process exited immediately (code {proc.returncode}). "
+                    "Check that the path is valid and watchfiles is installed."
+                ),
+                "error_tail": err_tail,
+            }
         _watch["proc"] = proc
         _watch["path"] = abs_path
         _watch["started_at"] = time.time()

{codespine-0.3.0 → codespine-0.4.1}/codespine/search/bm25.py RENAMED Viewed

@@ -5,10 +5,26 @@ import re
 from collections import Counter
 TOKEN_RE = re.compile(r"[A-Za-z0-9_]+")
+_CAMEL_SPLIT_RE = re.compile(r"(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|(?<=\D)(?=\d)|(?<=\d)(?=\D)")
 def tokenize(text: str) -> list[str]:
-    return [t.lower() for t in TOKEN_RE.findall(text or "")]
+    """Tokenize text, splitting on camelCase and underscores in addition to whitespace.
+    'SolicitPanFetchActionCompletionEvent' → ['solicit', 'pan', 'fetch', 'action', 'completion', 'event']
+    'get_symbol_context' → ['get', 'symbol', 'context']
+    """
+    raw_tokens = TOKEN_RE.findall(text or "")
+    out: list[str] = []
+    for tok in raw_tokens:
+        # Split underscore-joined segments first, then camelCase within each
+        for segment in tok.split("_"):
+            if not segment:
+                continue
+            for part in _CAMEL_SPLIT_RE.split(segment):
+                if part:
+                    out.append(part.lower())
+    return out
 def rank_bm25(query: str, docs: list[tuple[str, str]], k1: float = 1.2, b: float = 0.75) -> list[tuple[str, float]]:

{codespine-0.3.0 → codespine-0.4.1}/codespine/search/hybrid.py RENAMED Viewed

@@ -1,18 +1,22 @@
 from __future__ import annotations
-from codespine.config import SETTINGS
 from codespine.search.bm25 import rank_bm25
 from codespine.search.fuzzy import rank_fuzzy
 from codespine.search.rrf import reciprocal_rank_fusion
 from codespine.search.vector import rank_semantic
+_LOW_CONFIDENCE_THRESHOLD = 0.05
 def hybrid_search(store, query: str, k: int = 20, project: str | None = None) -> list[dict]:
     project_clause = "AND f.project_id = $proj" if project else ""
-    params: dict = {"lim": SETTINGS.semantic_candidate_pool}
+    params: dict = {}
     if project:
         params["proj"] = project
+    # No LIMIT — load all symbols for the scoped project so that exact class names
+    # are never missing from the candidate pool (previously capped at 2000 which
+    # caused exact matches on 4000+ file projects to be silently dropped).
     recs = store.query_records(
         f"""
         MATCH (s:Symbol), (f:File)
@@ -24,7 +28,6 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
                s.embedding as embedding,
                f.path as file_path,
                f.is_test as is_test
-        LIMIT $lim
         """,
         params,
     )
@@ -32,6 +35,8 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
     if not recs:
         return []
+    query_lower = query.lower().strip()
     lexical_docs = [(r["id"], f"{r.get('name', '')} {r.get('fqname', '')}") for r in recs]
     fuzzy_docs = [(r["id"], r.get("name", "")) for r in recs]
     vector_docs = [(r["id"], r.get("embedding")) for r in recs]
@@ -40,11 +45,11 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
     fuzzy_rank = rank_fuzzy(query, fuzzy_docs)
     semantic_rank = rank_semantic(query, vector_docs)
-    fused = reciprocal_rank_fusion([bm25_rank, semantic_rank, fuzzy_rank], k=SETTINGS.rrf_k)
+    fused = reciprocal_rank_fusion([bm25_rank, semantic_rank, fuzzy_rank])
     rec_by_id = {r["id"]: r for r in recs}
     results = []
-    for doc_id, score in fused[: max(k * 3, k)]:
+    for doc_id, score in fused:
         rec = rec_by_id.get(doc_id)
         if not rec:
             continue
@@ -55,7 +60,12 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
         if rec.get("kind") in {"method", "class"}:
             multiplier *= 1.2
-        final_score = score * multiplier
+        # Exact name match: guarantee this symbol ranks first regardless of RRF score.
+        name_lower = (rec.get("name") or "").lower()
+        fqname_lower = (rec.get("fqname") or "").lower()
+        if name_lower == query_lower or fqname_lower == query_lower:
+            multiplier *= 5.0
         results.append(
             {
                 "id": doc_id,
@@ -63,14 +73,15 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
                 "name": rec.get("name"),
                 "fqname": rec.get("fqname"),
                 "file_path": rec.get("file_path"),
-                "score": final_score,
+                "score": score * multiplier,
             }
         )
     results.sort(key=lambda x: x["score"], reverse=True)
+    top_k = results[:k]
     # Attach architectural context in same response.
-    for item in results[:k]:
+    for item in top_k:
         ctx = store.query_records(
             """
             MATCH (s:Symbol {id: $sid})-[:IN_COMMUNITY]->(c:Community)
@@ -83,4 +94,15 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
         )
         item["context"] = ctx
-    return results[:k]
+    # Warn when all scores are near zero — the results are likely noise.
+    if top_k and top_k[0]["score"] < _LOW_CONFIDENCE_THRESHOLD:
+        for item in top_k:
+            item["low_confidence"] = True
+        top_k.append({
+            "note": (
+                "Low confidence results — all scores below threshold. "
+                "If searching for an exact class or method name, use find_symbol instead."
+            )
+        })
+    return top_k

codespine-0.4.1/codespine/search/vector.py ADDED Viewed

@@ -0,0 +1,155 @@
+from __future__ import annotations
+import hashlib
+import json
+import math
+import os
+import threading
+from functools import lru_cache
+from codespine.config import SETTINGS
+def _hash_vector(text: str, dim: int) -> list[float]:
+    """Deterministic fallback embedding when sentence-transformers is unavailable."""
+    vec = [0.0] * dim
+    if not text:
+        return vec
+    tokens = text.lower().split()
+    for token in tokens:
+        digest = hashlib.sha1(token.encode("utf-8")).digest()
+        idx = int.from_bytes(digest[:2], "big") % dim
+        sign = 1.0 if digest[2] % 2 == 0 else -1.0
+        vec[idx] += sign
+    norm = math.sqrt(sum(v * v for v in vec)) or 1.0
+    return [v / norm for v in vec]
+@lru_cache(maxsize=1)
+def _load_model():
+    try:
+        from sentence_transformers import SentenceTransformer
+        return SentenceTransformer(SETTINGS.embedding_model)
+    except Exception:
+        return None
+class _EmbeddingCache:
+    """Thread-safe in-memory embedding cache backed by a JSON file.
+    Replaces the previous SQLite-based cache which caused threading issues
+    (database is locked / created in wrong thread) under MCP server concurrency.
+    """
+    def __init__(self, path: str) -> None:
+        self._path = path
+        self._lock = threading.Lock()
+        self._data: dict[str, str] | None = None  # loaded lazily
+    def _ensure_loaded(self) -> None:
+        """Load cache from disk. Must be called with _lock held."""
+        if self._data is not None:
+            return
+        # Delete the old SQLite cache file left by versions < 0.4.0.
+        old_sqlite = self._path.replace(".json", ".sqlite3")
+        if os.path.isfile(old_sqlite):
+            try:
+                os.remove(old_sqlite)
+            except OSError:
+                pass
+        if os.path.isfile(self._path):
+            try:
+                with open(self._path, "r", encoding="utf-8") as f:
+                    loaded = json.load(f)
+                if isinstance(loaded, dict):
+                    self._data = loaded
+                    return
+            except Exception:
+                pass
+        self._data = {}
+    def clear(self) -> None:
+        """Wipe the in-memory cache and delete the backing file."""
+        with self._lock:
+            self._data = {}
+            try:
+                os.remove(self._path)
+            except OSError:
+                pass
+    def _flush(self) -> None:
+        """Persist cache to disk atomically. Must be called with _lock held."""
+        try:
+            dir_path = os.path.dirname(self._path)
+            if dir_path:
+                os.makedirs(dir_path, exist_ok=True)
+            tmp = self._path + ".tmp"
+            with open(tmp, "w", encoding="utf-8") as f:
+                json.dump(self._data, f, separators=(",", ":"))
+            os.replace(tmp, self._path)
+        except Exception:
+            pass
+    def get(self, key: str) -> list[float] | None:
+        with self._lock:
+            self._ensure_loaded()
+            raw = self._data.get(key)  # type: ignore[union-attr]
+        if raw is None:
+            return None
+        try:
+            return [float(x) for x in json.loads(raw)]
+        except Exception:
+            return None
+    def set(self, key: str, vec: list[float]) -> None:
+        with self._lock:
+            self._ensure_loaded()
+            self._data[key] = json.dumps(vec)  # type: ignore[index]
+            self._flush()
+_CACHE = _EmbeddingCache(SETTINGS.embedding_cache_path)
+def _cache_key(text: str, dim: int) -> str:
+    return hashlib.sha1(f"{SETTINGS.embedding_model}|{dim}|{text}".encode("utf-8")).hexdigest()
+def embed_text(text: str, dim: int | None = None) -> list[float]:
+    dim = dim or SETTINGS.vector_dim
+    key = _cache_key(text or "", dim)
+    cached = _CACHE.get(key)
+    if cached is not None:
+        return cached
+    model = _load_model()
+    if model is None:
+        vec = _hash_vector(text, dim)
+    else:
+        vec = [float(x) for x in model.encode([text or ""], normalize_embeddings=True)[0]]
+    _CACHE.set(key, vec)
+    return vec
+def cosine_similarity(vec_a: list[float], vec_b: list[float]) -> float:
+    if not vec_a or not vec_b:
+        return 0.0
+    n = min(len(vec_a), len(vec_b))
+    dot = sum(vec_a[i] * vec_b[i] for i in range(n))
+    na = math.sqrt(sum(vec_a[i] * vec_a[i] for i in range(n))) or 1.0
+    nb = math.sqrt(sum(vec_b[i] * vec_b[i] for i in range(n))) or 1.0
+    return dot / (na * nb)
+def rank_semantic(query: str, docs: list[tuple[str, list[float] | None]]) -> list[tuple[str, float]]:
+    qv = embed_text(query)
+    ranked: list[tuple[str, float]] = []
+    for doc_id, emb in docs:
+        if emb is None:
+            continue
+        ranked.append((doc_id, cosine_similarity(qv, emb)))
+    ranked.sort(key=lambda x: x[1], reverse=True)
+    return ranked

{codespine-0.3.0 → codespine-0.4.1}/codespine.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codespine
-Version: 0.3.0
+Version: 0.4.1
 Summary: Local Java code intelligence indexer backed by a graph database
 Author: CodeSpine contributors
 License: MIT License

{codespine-0.3.0 → codespine-0.4.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "codespine"
-version = "0.3.0"
+version = "0.4.1"
 description = "Local Java code intelligence indexer backed by a graph database"
 readme = "README.md"
 requires-python = ">=3.10"

codespine-0.3.0/codespine/search/vector.py DELETED Viewed

@@ -1,122 +0,0 @@
-from __future__ import annotations
-import hashlib
-import math
-import sqlite3
-from functools import lru_cache
-from codespine.config import SETTINGS
-def _hash_vector(text: str, dim: int) -> list[float]:
-    """Deterministic fallback embedding when sentence-transformers is unavailable."""
-    vec = [0.0] * dim
-    if not text:
-        return vec
-    tokens = text.lower().split()
-    for token in tokens:
-        digest = hashlib.sha1(token.encode("utf-8")).digest()
-        idx = int.from_bytes(digest[:2], "big") % dim
-        sign = 1.0 if digest[2] % 2 == 0 else -1.0
-        vec[idx] += sign
-    norm = math.sqrt(sum(v * v for v in vec)) or 1.0
-    return [v / norm for v in vec]
-@lru_cache(maxsize=1)
-def _load_model():
-    try:
-        from sentence_transformers import SentenceTransformer
-        return SentenceTransformer(SETTINGS.embedding_model)
-    except Exception:
-        return None
-@lru_cache(maxsize=1)
-def _embedding_cache_conn():
-    path = SETTINGS.embedding_cache_db
-    try:
-        os_dir = path.rsplit("/", 1)[0] if "/" in path else ""
-        if os_dir:
-            import os
-            os.makedirs(os_dir, exist_ok=True)
-        conn = sqlite3.connect(path, check_same_thread=False)
-    except Exception:
-        conn = sqlite3.connect("/tmp/.codespine_embedding_cache.sqlite3", check_same_thread=False)
-    conn.execute(
-        """
-        CREATE TABLE IF NOT EXISTS embedding_cache (
-            cache_key TEXT PRIMARY KEY,
-            dim INTEGER NOT NULL,
-            vector_json TEXT NOT NULL
-        )
-        """
-    )
-    return conn
-def _cache_key(text: str, dim: int) -> str:
-    return hashlib.sha1(f"{SETTINGS.embedding_model}|{dim}|{text}".encode("utf-8")).hexdigest()
-def _get_cached_embedding(text: str, dim: int) -> list[float] | None:
-    key = _cache_key(text, dim)
-    conn = _embedding_cache_conn()
-    row = conn.execute("SELECT vector_json FROM embedding_cache WHERE cache_key = ? AND dim = ?", (key, dim)).fetchone()
-    if not row:
-        return None
-    import json
-    return [float(x) for x in json.loads(row[0])]
-def _set_cached_embedding(text: str, dim: int, vec: list[float]) -> None:
-    key = _cache_key(text, dim)
-    conn = _embedding_cache_conn()
-    import json
-    conn.execute(
-        "INSERT OR REPLACE INTO embedding_cache(cache_key, dim, vector_json) VALUES (?, ?, ?)",
-        (key, dim, json.dumps(vec)),
-    )
-    conn.commit()
-def embed_text(text: str, dim: int | None = None) -> list[float]:
-    dim = dim or SETTINGS.vector_dim
-    cached = _get_cached_embedding(text or "", dim)
-    if cached is not None:
-        return cached
-    model = _load_model()
-    if model is None:
-        vec = _hash_vector(text, dim)
-        _set_cached_embedding(text or "", dim, vec)
-        return vec
-    vec = [float(x) for x in model.encode([text or ""], normalize_embeddings=True)[0]]
-    _set_cached_embedding(text or "", dim, vec)
-    return vec
-def cosine_similarity(vec_a: list[float], vec_b: list[float]) -> float:
-    if not vec_a or not vec_b:
-        return 0.0
-    n = min(len(vec_a), len(vec_b))
-    dot = sum(vec_a[i] * vec_b[i] for i in range(n))
-    na = math.sqrt(sum(vec_a[i] * vec_a[i] for i in range(n))) or 1.0
-    nb = math.sqrt(sum(vec_b[i] * vec_b[i] for i in range(n))) or 1.0
-    return dot / (na * nb)
-def rank_semantic(query: str, docs: list[tuple[str, list[float] | None]]) -> list[tuple[str, float]]:
-    qv = embed_text(query)
-    ranked: list[tuple[str, float]] = []
-    for doc_id, emb in docs:
-        if emb is None:
-            continue
-        ranked.append((doc_id, cosine_similarity(qv, emb)))
-    ranked.sort(key=lambda x: x[1], reverse=True)
-    return ranked

{codespine-0.3.0 → codespine-0.4.1}/LICENSE RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/README.md RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/analysis/__init__.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/analysis/community.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/analysis/context.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/analysis/coupling.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/analysis/flow.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/analysis/impact.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/cli.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/db/__init__.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/diff/__init__.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/diff/branch_diff.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/indexer/__init__.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/indexer/call_resolver.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/indexer/java_parser.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/indexer/symbol_builder.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/mcp/__init__.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/noise/__init__.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/noise/blocklist.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/search/__init__.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/search/fuzzy.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/search/rrf.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/watch/__init__.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine/watch/watcher.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine.egg-info/entry_points.txt RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine.egg-info/requires.txt RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/codespine.egg-info/top_level.txt RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/gindex.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/setup.cfg RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/tests/test_branch_diff_normalize.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/tests/test_call_resolver.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/tests/test_index_and_hybrid.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/tests/test_java_parser.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/tests/test_multimodule_index.py RENAMED Viewed

File without changes

{codespine-0.3.0 → codespine-0.4.1}/tests/test_search_ranking.py RENAMED Viewed

File without changes

codespine 0.3.0__tar.gz → 0.4.1__tar.gz

codespine 0.3.0tar.gz → 0.4.1tar.gz