codespine 0.9.7__tar.gz → 0.9.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.9.7 → codespine-0.9.9}/PKG-INFO +4 -1
- {codespine-0.9.7 → codespine-0.9.9}/codespine/__init__.py +1 -1
- codespine-0.9.9/codespine/cache/__init__.py +4 -0
- codespine-0.9.9/codespine/cache/result_cache.py +167 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/cli.py +39 -3
- {codespine-0.9.7 → codespine-0.9.9}/codespine/config.py +5 -1
- codespine-0.9.9/codespine/db/duckdb_store.py +882 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/db/store.py +1 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/mcp/server.py +230 -5
- {codespine-0.9.7 → codespine-0.9.9}/codespine/search/vector.py +21 -4
- {codespine-0.9.7 → codespine-0.9.9}/codespine/sharding/store.py +37 -17
- {codespine-0.9.7 → codespine-0.9.9}/codespine.egg-info/PKG-INFO +4 -1
- {codespine-0.9.7 → codespine-0.9.9}/codespine.egg-info/SOURCES.txt +5 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine.egg-info/requires.txt +4 -0
- {codespine-0.9.7 → codespine-0.9.9}/pyproject.toml +6 -2
- codespine-0.9.9/tests/test_duckdb_store.py +401 -0
- codespine-0.9.9/tests/test_result_cache.py +179 -0
- {codespine-0.9.7 → codespine-0.9.9}/LICENSE +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/README.md +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/__init__.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/community.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/context.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/coupling.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/crossmodule.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/flow.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/analysis/impact.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/db/__init__.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/db/schema.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/diff/__init__.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/guide.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/indexer/__init__.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/indexer/di_resolver.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/indexer/engine.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/mcp/__init__.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/noise/__init__.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/noise/blocklist.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/overlay/__init__.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/overlay/git_state.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/overlay/merge.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/overlay/store.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/search/__init__.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/search/bm25.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/search/fuzzy.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/search/hybrid.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/search/rrf.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/sharding/__init__.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/sharding/router.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/watch/__init__.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/watch/git_hook.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine/watch/watcher.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/gindex.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/setup.cfg +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/tests/test_call_resolver.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/tests/test_community_detection.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/tests/test_deadcode.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/tests/test_java_parser.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/tests/test_multimodule_index.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/tests/test_overlay.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/tests/test_search_ranking.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/tests/test_sharding.py +0 -0
- {codespine-0.9.7 → codespine-0.9.9}/tests/test_store_recovery.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codespine
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.9
|
|
4
4
|
Summary: Local Java code intelligence indexer backed by a graph database
|
|
5
5
|
Author: CodeSpine contributors
|
|
6
6
|
License: MIT License
|
|
@@ -55,11 +55,14 @@ Requires-Dist: numpy; extra == "ml"
|
|
|
55
55
|
Provides-Extra: community
|
|
56
56
|
Requires-Dist: igraph; extra == "community"
|
|
57
57
|
Requires-Dist: leidenalg; extra == "community"
|
|
58
|
+
Provides-Extra: duckdb
|
|
59
|
+
Requires-Dist: duckdb>=0.10.0; extra == "duckdb"
|
|
58
60
|
Provides-Extra: full
|
|
59
61
|
Requires-Dist: sentence-transformers; extra == "full"
|
|
60
62
|
Requires-Dist: numpy; extra == "full"
|
|
61
63
|
Requires-Dist: igraph; extra == "full"
|
|
62
64
|
Requires-Dist: leidenalg; extra == "full"
|
|
65
|
+
Requires-Dist: duckdb>=0.10.0; extra == "full"
|
|
63
66
|
Dynamic: license-file
|
|
64
67
|
|
|
65
68
|
# CodeSpine
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""LRU result cache for CodeSpine MCP tools.
|
|
2
|
+
|
|
3
|
+
Avoids recomputing expensive analyses (impact BFS, dead-code scan, community
|
|
4
|
+
lookup) when the same arguments are passed and the underlying index hasn't
|
|
5
|
+
changed since the last call.
|
|
6
|
+
|
|
7
|
+
Cache key: ``(tool_name, args_hash, snapshot_mtime_rounded)``
|
|
8
|
+
- ``tool_name`` — the MCP tool that produced the result
|
|
9
|
+
- ``args_hash`` — SHA-1 of the JSON-serialised arguments (sorted keys)
|
|
10
|
+
- ``snapshot_mtime_rounded`` — read-replica mtime rounded to 1 s, so a new
|
|
11
|
+
snapshot invalidates all cached results for the affected store
|
|
12
|
+
|
|
13
|
+
TTL: entries are evicted after ``ttl_s`` seconds (default 300 s / 5 min) even
|
|
14
|
+
if the cache isn't full, preventing stale results across long sessions.
|
|
15
|
+
|
|
16
|
+
Usage
|
|
17
|
+
-----
|
|
18
|
+
from codespine.cache.result_cache import ResultCache
|
|
19
|
+
|
|
20
|
+
_cache = ResultCache(maxsize=256, ttl_s=300.0)
|
|
21
|
+
|
|
22
|
+
key = _cache.make_key("get_impact", {"symbol": "Foo", "project": "myapp"}, mtime)
|
|
23
|
+
cached = _cache.get(key)
|
|
24
|
+
if cached is not None:
|
|
25
|
+
return cached
|
|
26
|
+
result = expensive_computation(...)
|
|
27
|
+
_cache.put(key, result)
|
|
28
|
+
return result
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from __future__ import annotations
|
|
32
|
+
|
|
33
|
+
import hashlib
|
|
34
|
+
import json
|
|
35
|
+
import threading
|
|
36
|
+
import time
|
|
37
|
+
from collections import OrderedDict
|
|
38
|
+
from typing import Any
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ResultCache:
|
|
42
|
+
"""Thread-safe LRU cache for pre-serialised JSON tool results.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
maxsize:
|
|
47
|
+
Maximum number of entries to keep. Oldest entry is evicted when
|
|
48
|
+
the cache is full (LRU eviction).
|
|
49
|
+
ttl_s:
|
|
50
|
+
Time-to-live in seconds. Entries older than this are treated as
|
|
51
|
+
missing even if they're still in the cache.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(self, maxsize: int = 256, ttl_s: float = 300.0) -> None:
|
|
55
|
+
self._maxsize = maxsize
|
|
56
|
+
self._ttl = ttl_s
|
|
57
|
+
# OrderedDict preserves insertion order: oldest → newest
|
|
58
|
+
self._cache: OrderedDict[tuple, tuple[str, float]] = OrderedDict()
|
|
59
|
+
self._lock = threading.Lock()
|
|
60
|
+
self._hits = 0
|
|
61
|
+
self._misses = 0
|
|
62
|
+
|
|
63
|
+
# ------------------------------------------------------------------
|
|
64
|
+
# Key construction
|
|
65
|
+
# ------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def make_key(
|
|
69
|
+
tool_name: str,
|
|
70
|
+
args: dict[str, Any],
|
|
71
|
+
snapshot_mtime: float,
|
|
72
|
+
) -> tuple:
|
|
73
|
+
"""Build a cache key from tool name, arguments, and index timestamp.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
tool_name:
|
|
78
|
+
Name of the MCP tool (e.g. ``"get_impact"``).
|
|
79
|
+
args:
|
|
80
|
+
Tool arguments dict (``None`` values included so missing optional
|
|
81
|
+
args don't collide with explicitly-set ones).
|
|
82
|
+
snapshot_mtime:
|
|
83
|
+
Last-modified time of the read-replica sentinel file, rounded to
|
|
84
|
+
1-second precision. A new snapshot invalidates old entries.
|
|
85
|
+
"""
|
|
86
|
+
try:
|
|
87
|
+
args_bytes = json.dumps(args, sort_keys=True, default=str).encode()
|
|
88
|
+
except Exception:
|
|
89
|
+
args_bytes = str(args).encode()
|
|
90
|
+
args_hash = hashlib.sha1(args_bytes).hexdigest()[:16]
|
|
91
|
+
return (tool_name, args_hash, round(snapshot_mtime, 0))
|
|
92
|
+
|
|
93
|
+
# ------------------------------------------------------------------
|
|
94
|
+
# Cache operations
|
|
95
|
+
# ------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
def get(self, key: tuple) -> str | None:
|
|
98
|
+
"""Return the cached value for *key*, or ``None`` if missing/expired."""
|
|
99
|
+
with self._lock:
|
|
100
|
+
if key not in self._cache:
|
|
101
|
+
self._misses += 1
|
|
102
|
+
return None
|
|
103
|
+
value, inserted_at = self._cache[key]
|
|
104
|
+
if time.monotonic() - inserted_at > self._ttl:
|
|
105
|
+
del self._cache[key]
|
|
106
|
+
self._misses += 1
|
|
107
|
+
return None
|
|
108
|
+
# Promote to most-recently-used position.
|
|
109
|
+
self._cache.move_to_end(key)
|
|
110
|
+
self._hits += 1
|
|
111
|
+
return value
|
|
112
|
+
|
|
113
|
+
def put(self, key: tuple, value: str) -> None:
|
|
114
|
+
"""Store *value* under *key*. Evicts LRU entry if cache is full."""
|
|
115
|
+
with self._lock:
|
|
116
|
+
if key in self._cache:
|
|
117
|
+
self._cache.move_to_end(key)
|
|
118
|
+
self._cache[key] = (value, time.monotonic())
|
|
119
|
+
# Evict oldest entries until we're within maxsize.
|
|
120
|
+
while len(self._cache) > self._maxsize:
|
|
121
|
+
self._cache.popitem(last=False)
|
|
122
|
+
|
|
123
|
+
def invalidate(self) -> int:
|
|
124
|
+
"""Clear the entire cache. Call after any index mutation.
|
|
125
|
+
|
|
126
|
+
Returns the number of entries evicted.
|
|
127
|
+
"""
|
|
128
|
+
with self._lock:
|
|
129
|
+
n = len(self._cache)
|
|
130
|
+
self._cache.clear()
|
|
131
|
+
return n
|
|
132
|
+
|
|
133
|
+
def invalidate_tool(self, tool_name: str) -> int:
|
|
134
|
+
"""Evict all entries for a specific tool.
|
|
135
|
+
|
|
136
|
+
Returns the number of entries removed.
|
|
137
|
+
"""
|
|
138
|
+
with self._lock:
|
|
139
|
+
keys_to_remove = [k for k in self._cache if k[0] == tool_name]
|
|
140
|
+
for k in keys_to_remove:
|
|
141
|
+
del self._cache[k]
|
|
142
|
+
return len(keys_to_remove)
|
|
143
|
+
|
|
144
|
+
# ------------------------------------------------------------------
|
|
145
|
+
# Stats / introspection
|
|
146
|
+
# ------------------------------------------------------------------
|
|
147
|
+
|
|
148
|
+
def stats(self) -> dict[str, Any]:
|
|
149
|
+
"""Return cache statistics (size, hit/miss counts, hit rate)."""
|
|
150
|
+
with self._lock:
|
|
151
|
+
total = self._hits + self._misses
|
|
152
|
+
return {
|
|
153
|
+
"size": len(self._cache),
|
|
154
|
+
"maxsize": self._maxsize,
|
|
155
|
+
"ttl_s": self._ttl,
|
|
156
|
+
"hits": self._hits,
|
|
157
|
+
"misses": self._misses,
|
|
158
|
+
"hit_rate": round(self._hits / total, 3) if total else 0.0,
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
def __repr__(self) -> str: # pragma: no cover
|
|
162
|
+
s = self.stats()
|
|
163
|
+
return (
|
|
164
|
+
f"ResultCache(size={s['size']}/{s['maxsize']}, "
|
|
165
|
+
f"hits={s['hits']}, misses={s['misses']}, "
|
|
166
|
+
f"hit_rate={s['hit_rate']:.1%})"
|
|
167
|
+
)
|
|
@@ -293,7 +293,13 @@ def main() -> None:
|
|
|
293
293
|
@main.command()
|
|
294
294
|
@click.argument("path", type=click.Path(exists=True))
|
|
295
295
|
@click.option("--full/--incremental", default=False, show_default=True)
|
|
296
|
-
@click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
|
|
296
|
+
@click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses (auto-on for repos ≤3 k files).")
|
|
297
|
+
@click.option(
|
|
298
|
+
"--incremental-deep",
|
|
299
|
+
is_flag=True,
|
|
300
|
+
default=False,
|
|
301
|
+
help="Force deep analysis even during incremental re-index. Useful after large refactors.",
|
|
302
|
+
)
|
|
297
303
|
@click.option(
|
|
298
304
|
"--embed/--no-embed",
|
|
299
305
|
default=True,
|
|
@@ -301,7 +307,7 @@ def main() -> None:
|
|
|
301
307
|
help="Generate vector embeddings. Uses sentence-transformers if installed (pip install codespine[ml]), otherwise falls back to hash-based vectors.",
|
|
302
308
|
)
|
|
303
309
|
@click.option("--allow-running", is_flag=True, hidden=True, help="Skip MCP running check (used by MCP analyse_project tool).")
|
|
304
|
-
def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool) -> None:
|
|
310
|
+
def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bool, allow_running: bool) -> None:
|
|
305
311
|
"""Index a local Java project (auto-detects workspace / Maven / Gradle layout).
|
|
306
312
|
|
|
307
313
|
Embeddings are generated by default. If sentence-transformers is installed
|
|
@@ -459,7 +465,7 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
|
|
|
459
465
|
dead: list[dict] = []
|
|
460
466
|
coupling_pairs: list[dict] = []
|
|
461
467
|
|
|
462
|
-
should_run_deep = deep or total_files_found <=
|
|
468
|
+
should_run_deep = deep or incremental_deep or total_files_found <= 3000
|
|
463
469
|
if should_run_deep:
|
|
464
470
|
comm_label = "Detecting communities..."
|
|
465
471
|
_live_phase(comm_label, "running")
|
|
@@ -1138,6 +1144,36 @@ def stop() -> None:
|
|
|
1138
1144
|
os.remove(SETTINGS.pid_file)
|
|
1139
1145
|
|
|
1140
1146
|
|
|
1147
|
+
@main.command("install-model")
|
|
1148
|
+
def install_model() -> None:
|
|
1149
|
+
"""Download and cache the sentence-transformers embedding model.
|
|
1150
|
+
|
|
1151
|
+
Requires 'pip install codespine[ml]'. The model is downloaded once and
|
|
1152
|
+
cached locally; subsequent analyse runs use the cache without network access.
|
|
1153
|
+
"""
|
|
1154
|
+
try:
|
|
1155
|
+
from sentence_transformers import SentenceTransformer # noqa: F401
|
|
1156
|
+
except ImportError:
|
|
1157
|
+
click.secho(
|
|
1158
|
+
"sentence-transformers is not installed.\n"
|
|
1159
|
+
"Run: pip install codespine[ml]",
|
|
1160
|
+
fg="red",
|
|
1161
|
+
)
|
|
1162
|
+
return
|
|
1163
|
+
|
|
1164
|
+
model_name = SETTINGS.embedding_model
|
|
1165
|
+
click.secho(f"Downloading model '{model_name}' …", fg="cyan")
|
|
1166
|
+
try:
|
|
1167
|
+
from sentence_transformers import SentenceTransformer
|
|
1168
|
+
|
|
1169
|
+
model = SentenceTransformer(model_name)
|
|
1170
|
+
# Run a tiny inference to confirm the model is usable.
|
|
1171
|
+
_ = model.encode(["hello world"])
|
|
1172
|
+
click.secho(f"✓ Model '{model_name}' ready. Semantic search is now enabled.", fg="green")
|
|
1173
|
+
except Exception as exc:
|
|
1174
|
+
click.secho(f"✗ Failed to load model: {exc}", fg="red")
|
|
1175
|
+
|
|
1176
|
+
|
|
1141
1177
|
@main.command("run-mcp", hidden=True)
|
|
1142
1178
|
def run_mcp() -> None:
|
|
1143
1179
|
"""Run MCP server in stdio mode."""
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from dataclasses import dataclass
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
@dataclass(frozen=True)
|
|
@@ -15,6 +15,10 @@ class Settings:
|
|
|
15
15
|
num_shards: int = 4
|
|
16
16
|
shards_dir: str = os.path.expanduser("~/.codespine/shards")
|
|
17
17
|
|
|
18
|
+
# Storage backend: "kuzu" (default, property-graph) or "duckdb" (relational).
|
|
19
|
+
# Override at runtime via CODESPINE_BACKEND env var before starting the process.
|
|
20
|
+
backend: str = field(default_factory=lambda: os.environ.get("CODESPINE_BACKEND", "kuzu"))
|
|
21
|
+
|
|
18
22
|
pid_file: str = os.path.expanduser("~/.codespine.pid")
|
|
19
23
|
log_file: str = os.path.expanduser("~/.codespine.log")
|
|
20
24
|
embedding_cache_path: str = os.path.expanduser("~/.codespine_embedding_cache.json")
|