codespine 0.4.0__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.4.0 → codespine-0.4.2}/PKG-INFO +1 -1
- {codespine-0.4.0 → codespine-0.4.2}/codespine/__init__.py +1 -1
- {codespine-0.4.0 → codespine-0.4.2}/codespine/analysis/flow.py +22 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/analysis/impact.py +58 -1
- {codespine-0.4.0 → codespine-0.4.2}/codespine/indexer/engine.py +4 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/mcp/server.py +23 -8
- {codespine-0.4.0 → codespine-0.4.2}/codespine/search/hybrid.py +17 -4
- {codespine-0.4.0 → codespine-0.4.2}/codespine/search/vector.py +16 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.4.0 → codespine-0.4.2}/pyproject.toml +1 -1
- {codespine-0.4.0 → codespine-0.4.2}/LICENSE +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/README.md +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/analysis/__init__.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/analysis/community.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/analysis/context.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/analysis/coupling.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/cli.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/config.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/db/__init__.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/db/schema.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/db/store.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/diff/__init__.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/indexer/__init__.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/mcp/__init__.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/noise/__init__.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/noise/blocklist.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/search/__init__.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/search/bm25.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/search/fuzzy.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/search/rrf.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/watch/__init__.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine/watch/watcher.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/gindex.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/setup.cfg +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/tests/test_call_resolver.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/tests/test_java_parser.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/tests/test_multimodule_index.py +0 -0
- {codespine-0.4.0 → codespine-0.4.2}/tests/test_search_ranking.py +0 -0
|
@@ -2,6 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from collections import defaultdict, deque
|
|
4
4
|
|
|
5
|
+
from codespine.analysis.impact import _resolve_method_metadata
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
def _entry_methods(store, project: str | None = None) -> list[str]:
|
|
7
9
|
if project:
|
|
@@ -108,4 +110,24 @@ def trace_execution_flows(store, entry_symbol: str | None = None, max_depth: int
|
|
|
108
110
|
}
|
|
109
111
|
)
|
|
110
112
|
|
|
113
|
+
# ------------------------------------------------------------------ #
|
|
114
|
+
# Enrich every node with human-readable metadata so AI agents don't
|
|
115
|
+
# need a second round-trip to resolve raw method ID hashes.
|
|
116
|
+
# Collect all unique IDs across all flows, resolve in one bulk query.
|
|
117
|
+
# ------------------------------------------------------------------ #
|
|
118
|
+
all_ids = list({node["symbol"] for flow in flows for node in flow["nodes"]})
|
|
119
|
+
meta = _resolve_method_metadata(store, all_ids)
|
|
120
|
+
|
|
121
|
+
for flow in flows:
|
|
122
|
+
entry_m = meta.get(flow["entry"], {})
|
|
123
|
+
flow["entry_name"] = entry_m.get("name")
|
|
124
|
+
flow["entry_fqname"] = entry_m.get("fqname")
|
|
125
|
+
flow["entry_file_path"] = entry_m.get("file_path")
|
|
126
|
+
for node in flow["nodes"]:
|
|
127
|
+
m = meta.get(node["symbol"], {})
|
|
128
|
+
node["name"] = m.get("name")
|
|
129
|
+
node["fqname"] = m.get("fqname")
|
|
130
|
+
node["file_path"] = m.get("file_path")
|
|
131
|
+
node["project_id"] = m.get("project_id")
|
|
132
|
+
|
|
111
133
|
return flows
|
|
@@ -21,6 +21,27 @@ def _resolve_symbol_ids(store, symbol_query: str, project: str | None = None) ->
|
|
|
21
21
|
return [r["id"] for r in recs]
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
def _resolve_method_metadata(store, method_ids: list[str]) -> dict[str, dict]:
|
|
25
|
+
"""Bulk-resolve method IDs to human-readable metadata in a single query.
|
|
26
|
+
|
|
27
|
+
Returns a dict keyed by method ID with fields:
|
|
28
|
+
name, fqname (= m.signature), class_fqcn, file_path, project_id.
|
|
29
|
+
Any ID not found in the graph is silently omitted.
|
|
30
|
+
"""
|
|
31
|
+
if not method_ids:
|
|
32
|
+
return {}
|
|
33
|
+
recs = store.query_records(
|
|
34
|
+
"""
|
|
35
|
+
MATCH (m:Method), (c:Class), (f:File)
|
|
36
|
+
WHERE m.id IN $ids AND m.class_id = c.id AND c.file_id = f.id
|
|
37
|
+
RETURN m.id as id, m.name as name, m.signature as fqname,
|
|
38
|
+
c.fqcn as class_fqcn, f.path as file_path, f.project_id as project_id
|
|
39
|
+
""",
|
|
40
|
+
{"ids": method_ids},
|
|
41
|
+
)
|
|
42
|
+
return {r["id"]: r for r in recs}
|
|
43
|
+
|
|
44
|
+
|
|
24
45
|
def analyze_impact(store, symbol_query: str, max_depth: int = 4, project: str | None = None) -> dict:
|
|
25
46
|
target_symbol_ids = _resolve_symbol_ids(store, symbol_query, project=project)
|
|
26
47
|
if not target_symbol_ids:
|
|
@@ -85,9 +106,45 @@ def analyze_impact(store, symbol_query: str, max_depth: int = 4, project: str |
|
|
|
85
106
|
depth_groups["3+"].append(item)
|
|
86
107
|
queue.append((src, next_depth, path + [src]))
|
|
87
108
|
|
|
109
|
+
# ------------------------------------------------------------------ #
|
|
110
|
+
# Enrich every caller entry with human-readable metadata so AI agents
|
|
111
|
+
# don't need a second round-trip to resolve raw ID hashes.
|
|
112
|
+
# A single bulk query resolves all collected method IDs at once.
|
|
113
|
+
# ------------------------------------------------------------------ #
|
|
114
|
+
all_caller_ids = [item["symbol"] for items in depth_groups.values() for item in items]
|
|
115
|
+
meta = _resolve_method_metadata(store, all_caller_ids)
|
|
116
|
+
|
|
117
|
+
for items in depth_groups.values():
|
|
118
|
+
for item in items:
|
|
119
|
+
m = meta.get(item["symbol"], {})
|
|
120
|
+
item["name"] = m.get("name")
|
|
121
|
+
item["fqname"] = m.get("fqname")
|
|
122
|
+
item["file_path"] = m.get("file_path")
|
|
123
|
+
item["project_id"] = m.get("project_id")
|
|
124
|
+
item["class_fqcn"] = m.get("class_fqcn")
|
|
125
|
+
# Convert the call-path from a list of raw IDs to human-readable names
|
|
126
|
+
# so an agent can read the chain without additional lookups.
|
|
127
|
+
item["path"] = [
|
|
128
|
+
meta.get(pid, {}).get("name") or pid
|
|
129
|
+
for pid in item["path"]
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
# Also enrich the targets_resolved list for context
|
|
133
|
+
target_meta = _resolve_method_metadata(store, target_method_ids)
|
|
134
|
+
resolved_targets = [
|
|
135
|
+
{
|
|
136
|
+
"id": mid,
|
|
137
|
+
"name": target_meta.get(mid, {}).get("name"),
|
|
138
|
+
"fqname": target_meta.get(mid, {}).get("fqname"),
|
|
139
|
+
"file_path": target_meta.get(mid, {}).get("file_path"),
|
|
140
|
+
"class_fqcn": target_meta.get(mid, {}).get("class_fqcn"),
|
|
141
|
+
}
|
|
142
|
+
for mid in target_method_ids
|
|
143
|
+
]
|
|
144
|
+
|
|
88
145
|
return {
|
|
89
146
|
"target": symbol_query,
|
|
90
|
-
"targets_resolved":
|
|
147
|
+
"targets_resolved": resolved_targets,
|
|
91
148
|
"depth_groups": depth_groups,
|
|
92
149
|
"summary": {
|
|
93
150
|
"direct": len(depth_groups["1"]),
|
|
@@ -167,6 +167,10 @@ class JavaIndexer:
|
|
|
167
167
|
to_reindex = current_files
|
|
168
168
|
deleted_file_ids = []
|
|
169
169
|
meta_cache = {}
|
|
170
|
+
# Wipe the embedding cache on a full re-index so stale embeddings
|
|
171
|
+
# (including those from the old SQLite format) are not carried over.
|
|
172
|
+
from codespine.search.vector import _CACHE as _embed_cache
|
|
173
|
+
_embed_cache.clear()
|
|
170
174
|
else:
|
|
171
175
|
to_reindex, deleted_file_ids, meta_cache = self._plan_incremental(
|
|
172
176
|
project_id,
|
|
@@ -71,9 +71,16 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
71
71
|
Call this before other tools so you know what's ready without trial-and-error.
|
|
72
72
|
Features marked false may need 'codespine analyse --deep' or optional dependencies.
|
|
73
73
|
"""
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
74
|
+
try:
|
|
75
|
+
projects = store.query_records(
|
|
76
|
+
"MATCH (p:Project) RETURN p.id as id, p.path as path, p.indexed_at as indexed_at"
|
|
77
|
+
)
|
|
78
|
+
except Exception:
|
|
79
|
+
# Old DB schema (pre-0.4.0) doesn't have indexed_at column yet.
|
|
80
|
+
# Falls back gracefully; column is added next time 'analyse' runs.
|
|
81
|
+
projects = store.query_records(
|
|
82
|
+
"MATCH (p:Project) RETURN p.id as id, p.path as path"
|
|
83
|
+
)
|
|
77
84
|
sym_q = store.query_records("MATCH (s:Symbol) RETURN count(s) as count")
|
|
78
85
|
comm_q = store.query_records("MATCH (c:Community) RETURN count(c) as count")
|
|
79
86
|
flow_q = store.query_records("MATCH (f:Flow) RETURN count(f) as count")
|
|
@@ -171,9 +178,14 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
171
178
|
@mcp.tool()
|
|
172
179
|
def list_projects():
|
|
173
180
|
"""List all indexed projects with their symbol and file counts."""
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
181
|
+
try:
|
|
182
|
+
projects = store.query_records(
|
|
183
|
+
"MATCH (p:Project) RETURN p.id as id, p.path as path, p.indexed_at as indexed_at"
|
|
184
|
+
)
|
|
185
|
+
except Exception:
|
|
186
|
+
projects = store.query_records(
|
|
187
|
+
"MATCH (p:Project) RETURN p.id as id, p.path as path"
|
|
188
|
+
)
|
|
177
189
|
if not projects:
|
|
178
190
|
return {"available": False, "note": "No projects indexed yet. Run 'codespine analyse <path>'."}
|
|
179
191
|
now = int(time.time())
|
|
@@ -261,7 +273,10 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
261
273
|
@mcp.tool()
|
|
262
274
|
def get_symbol_community(symbol: str):
|
|
263
275
|
"""Return the architectural community cluster a symbol belongs to."""
|
|
264
|
-
detect_communities(
|
|
276
|
+
# NOTE: do NOT call detect_communities() here — the MCP server opens the
|
|
277
|
+
# graph DB read-only, so any write attempt raises "Cannot execute write
|
|
278
|
+
# operations in a read-only database!". Communities are computed once
|
|
279
|
+
# during 'codespine analyse --deep' and persisted; we just read them.
|
|
265
280
|
result = symbol_community(store, symbol)
|
|
266
281
|
if not result.get("matches"):
|
|
267
282
|
return {"available": False, "note": "No community data yet. Run 'codespine analyse --deep'."}
|
|
@@ -495,7 +510,7 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
495
510
|
MATCH (c:Class), (f:File)
|
|
496
511
|
WHERE c.file_id = f.id {project_clause}
|
|
497
512
|
RETURN c.package as package, f.project_id as project_id, count(c) as class_count
|
|
498
|
-
ORDER BY
|
|
513
|
+
ORDER BY project_id, package
|
|
499
514
|
LIMIT $lim
|
|
500
515
|
""",
|
|
501
516
|
params,
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from codespine.search.bm25 import rank_bm25
|
|
4
4
|
from codespine.search.fuzzy import rank_fuzzy
|
|
5
5
|
from codespine.search.rrf import reciprocal_rank_fusion
|
|
6
|
-
from codespine.search.vector import rank_semantic
|
|
6
|
+
from codespine.search.vector import _load_model, rank_semantic
|
|
7
7
|
|
|
8
8
|
_LOW_CONFIDENCE_THRESHOLD = 0.05
|
|
9
9
|
|
|
@@ -95,14 +95,27 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
|
|
|
95
95
|
item["context"] = ctx
|
|
96
96
|
|
|
97
97
|
# Warn when all scores are near zero — the results are likely noise.
|
|
98
|
+
# The threshold 0.05 is calibrated for embedding mode. Without sentence-
|
|
99
|
+
# transformers the hash-fallback vector and BM25/fuzzy signals produce lower
|
|
100
|
+
# RRF scores, so the warning fires on nearly every query. Make the note
|
|
101
|
+
# context-aware so the agent understands whether this is a calibration issue
|
|
102
|
+
# or a genuine low-relevance result.
|
|
98
103
|
if top_k and top_k[0]["score"] < _LOW_CONFIDENCE_THRESHOLD:
|
|
104
|
+
has_model = _load_model() is not None
|
|
99
105
|
for item in top_k:
|
|
100
106
|
item["low_confidence"] = True
|
|
101
|
-
|
|
102
|
-
|
|
107
|
+
if has_model:
|
|
108
|
+
note = (
|
|
103
109
|
"Low confidence results — all scores below threshold. "
|
|
104
110
|
"If searching for an exact class or method name, use find_symbol instead."
|
|
105
111
|
)
|
|
106
|
-
|
|
112
|
+
else:
|
|
113
|
+
note = (
|
|
114
|
+
"Low confidence results — scores are lower in BM25/fuzzy-only mode "
|
|
115
|
+
"(no embedding model detected). "
|
|
116
|
+
"This is expected without 'codespine[ml]' installed; results may still be correct. "
|
|
117
|
+
"For exact name matches, use find_symbol instead."
|
|
118
|
+
)
|
|
119
|
+
top_k.append({"note": note})
|
|
107
120
|
|
|
108
121
|
return top_k
|
|
@@ -51,6 +51,13 @@ class _EmbeddingCache:
|
|
|
51
51
|
"""Load cache from disk. Must be called with _lock held."""
|
|
52
52
|
if self._data is not None:
|
|
53
53
|
return
|
|
54
|
+
# Delete the old SQLite cache file left by versions < 0.4.0.
|
|
55
|
+
old_sqlite = self._path.replace(".json", ".sqlite3")
|
|
56
|
+
if os.path.isfile(old_sqlite):
|
|
57
|
+
try:
|
|
58
|
+
os.remove(old_sqlite)
|
|
59
|
+
except OSError:
|
|
60
|
+
pass
|
|
54
61
|
if os.path.isfile(self._path):
|
|
55
62
|
try:
|
|
56
63
|
with open(self._path, "r", encoding="utf-8") as f:
|
|
@@ -62,6 +69,15 @@ class _EmbeddingCache:
|
|
|
62
69
|
pass
|
|
63
70
|
self._data = {}
|
|
64
71
|
|
|
72
|
+
def clear(self) -> None:
|
|
73
|
+
"""Wipe the in-memory cache and delete the backing file."""
|
|
74
|
+
with self._lock:
|
|
75
|
+
self._data = {}
|
|
76
|
+
try:
|
|
77
|
+
os.remove(self._path)
|
|
78
|
+
except OSError:
|
|
79
|
+
pass
|
|
80
|
+
|
|
65
81
|
def _flush(self) -> None:
|
|
66
82
|
"""Persist cache to disk atomically. Must be called with _lock held."""
|
|
67
83
|
try:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|