codespine 0.1.8__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.1.8 → codespine-0.1.9}/PKG-INFO +1 -1
- {codespine-0.1.8 → codespine-0.1.9}/codespine/__init__.py +1 -1
- {codespine-0.1.8 → codespine-0.1.9}/codespine/cli.py +4 -3
- {codespine-0.1.8 → codespine-0.1.9}/codespine/config.py +1 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/indexer/engine.py +132 -20
- {codespine-0.1.8 → codespine-0.1.9}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.1.8 → codespine-0.1.9}/pyproject.toml +1 -1
- {codespine-0.1.8 → codespine-0.1.9}/tests/test_index_and_hybrid.py +13 -0
- {codespine-0.1.8 → codespine-0.1.9}/LICENSE +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/README.md +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/community.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/context.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/coupling.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/flow.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/impact.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/db/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/db/schema.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/db/store.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/diff/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/indexer/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/mcp/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/mcp/server.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/noise/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/noise/blocklist.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/search/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/search/bm25.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/search/fuzzy.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/search/hybrid.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/search/rrf.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/search/vector.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/watch/__init__.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine/watch/watcher.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/gindex.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/setup.cfg +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/tests/test_call_resolver.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/tests/test_java_parser.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/tests/test_multimodule_index.py +0 -0
- {codespine-0.1.8 → codespine-0.1.9}/tests/test_search_ranking.py +0 -0
|
@@ -77,7 +77,7 @@ def main() -> None:
|
|
|
77
77
|
|
|
78
78
|
@main.command()
|
|
79
79
|
@click.argument("path", type=click.Path(exists=True))
|
|
80
|
-
@click.option("--full/--incremental", default=
|
|
80
|
+
@click.option("--full/--incremental", default=False, show_default=True)
|
|
81
81
|
@click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
|
|
82
82
|
def analyse(path: str, full: bool, deep: bool) -> None:
|
|
83
83
|
"""Index a local Java project."""
|
|
@@ -89,7 +89,7 @@ def analyse(path: str, full: bool, deep: bool) -> None:
|
|
|
89
89
|
abs_path = os.path.abspath(path)
|
|
90
90
|
store = GraphStore(read_only=False)
|
|
91
91
|
indexer = JavaIndexer(store)
|
|
92
|
-
parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0}
|
|
92
|
+
parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0, "printed_zero": False}
|
|
93
93
|
call_state = {"shown": False, "count": 0, "last_ts": 0.0}
|
|
94
94
|
|
|
95
95
|
def _progress(event: str, payload: dict) -> None:
|
|
@@ -105,6 +105,7 @@ def analyse(path: str, full: bool, deep: bool) -> None:
|
|
|
105
105
|
_phase("Index mode...", f"{mode} ({to_index} files to index, {deleted} deleted)")
|
|
106
106
|
if to_index == 0:
|
|
107
107
|
_phase("Parsing code...", "0/0")
|
|
108
|
+
parse_state["printed_zero"] = True
|
|
108
109
|
return
|
|
109
110
|
if event == "parse_progress":
|
|
110
111
|
indexed = int(payload.get("indexed", 0))
|
|
@@ -149,7 +150,7 @@ def analyse(path: str, full: bool, deep: bool) -> None:
|
|
|
149
150
|
result = indexer.index_project(abs_path, full=full, progress=_progress)
|
|
150
151
|
if parse_state["shown"]:
|
|
151
152
|
click.echo()
|
|
152
|
-
if parse_state["total"] == 0:
|
|
153
|
+
if parse_state["total"] == 0 and not parse_state["printed_zero"]:
|
|
153
154
|
_phase("Parsing code...", "0/0")
|
|
154
155
|
elif parse_state["indexed"] < parse_state["total"]:
|
|
155
156
|
_phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
|
|
@@ -8,6 +8,7 @@ class Settings:
|
|
|
8
8
|
pid_file: str = os.path.expanduser("~/.codespine.pid")
|
|
9
9
|
log_file: str = os.path.expanduser("~/.codespine.log")
|
|
10
10
|
embedding_cache_db: str = os.path.expanduser("~/.codespine_embedding_cache.sqlite3")
|
|
11
|
+
index_meta_dir: str = os.path.expanduser("~/.codespine_index_meta")
|
|
11
12
|
embedding_model: str = "BAAI/bge-small-en-v1.5"
|
|
12
13
|
vector_dim: int = 384
|
|
13
14
|
rrf_k: int = 60
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
import os
|
|
4
5
|
from dataclasses import dataclass
|
|
5
6
|
from typing import Callable
|
|
6
7
|
|
|
8
|
+
from codespine.config import SETTINGS
|
|
7
9
|
from codespine.indexer.call_resolver import resolve_calls
|
|
8
10
|
from codespine.indexer.java_parser import parse_java_source
|
|
9
11
|
from codespine.indexer.symbol_builder import class_id, digest_bytes, file_id, method_id, symbol_id
|
|
@@ -37,21 +39,24 @@ class JavaIndexer:
|
|
|
37
39
|
current_files = self._collect_java_files(root_path)
|
|
38
40
|
self._emit(progress, "scan_done", files_found=len(current_files))
|
|
39
41
|
db_files = self.store.project_file_hashes(project_id) if not full else {}
|
|
40
|
-
|
|
42
|
+
meta_cache = self._load_file_meta_cache(project_id)
|
|
43
|
+
current_file_ids = {
|
|
44
|
+
file_id(project_id, os.path.relpath(fp, root_path))
|
|
45
|
+
for fp in current_files
|
|
46
|
+
}
|
|
41
47
|
|
|
42
48
|
if full:
|
|
43
49
|
to_reindex = current_files
|
|
44
50
|
deleted_file_ids = []
|
|
51
|
+
meta_cache = {}
|
|
45
52
|
else:
|
|
46
|
-
to_reindex =
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if old != digest:
|
|
54
|
-
to_reindex.append(file_path)
|
|
53
|
+
to_reindex, deleted_file_ids, meta_cache = self._plan_incremental(
|
|
54
|
+
project_id,
|
|
55
|
+
root_path,
|
|
56
|
+
current_files,
|
|
57
|
+
db_files,
|
|
58
|
+
meta_cache,
|
|
59
|
+
)
|
|
55
60
|
self._emit(
|
|
56
61
|
progress,
|
|
57
62
|
"plan_done",
|
|
@@ -59,6 +64,21 @@ class JavaIndexer:
|
|
|
59
64
|
deleted_files=len(deleted_file_ids),
|
|
60
65
|
mode="full" if full else "incremental",
|
|
61
66
|
)
|
|
67
|
+
if not full and not to_reindex and not deleted_file_ids:
|
|
68
|
+
self._prune_meta_cache(meta_cache, current_file_ids)
|
|
69
|
+
self._save_file_meta_cache(project_id, meta_cache)
|
|
70
|
+
self._emit(progress, "resolve_calls_done", calls_resolved=0)
|
|
71
|
+
self._emit(progress, "resolve_types_done", type_relationships=0)
|
|
72
|
+
return IndexResult(
|
|
73
|
+
project_id=project_id,
|
|
74
|
+
files_found=len(current_files),
|
|
75
|
+
files_indexed=0,
|
|
76
|
+
classes_indexed=0,
|
|
77
|
+
methods_indexed=0,
|
|
78
|
+
calls_resolved=0,
|
|
79
|
+
type_relationships=0,
|
|
80
|
+
embeddings_generated=0,
|
|
81
|
+
)
|
|
62
82
|
|
|
63
83
|
files_indexed = 0
|
|
64
84
|
classes_indexed = 0
|
|
@@ -92,10 +112,12 @@ class JavaIndexer:
|
|
|
92
112
|
|
|
93
113
|
parsed = parse_java_source(source)
|
|
94
114
|
f_id = file_id(project_id, rel_path)
|
|
115
|
+
file_digest = digest_bytes(source)
|
|
95
116
|
if not full:
|
|
96
117
|
# Drop old symbols/methods/classes for changed files before reinserting.
|
|
97
118
|
self.store.clear_file(f_id)
|
|
98
|
-
self.store.upsert_file(f_id, file_path, project_id, is_test,
|
|
119
|
+
self.store.upsert_file(f_id, file_path, project_id, is_test, file_digest)
|
|
120
|
+
self._update_meta_cache_entry(meta_cache, f_id, file_path, file_digest, len(source))
|
|
99
121
|
|
|
100
122
|
for cls in parsed.classes:
|
|
101
123
|
c_id = class_id(cls.fqcn, scope)
|
|
@@ -199,6 +221,9 @@ class JavaIndexer:
|
|
|
199
221
|
)
|
|
200
222
|
self._emit(progress, "resolve_types_done", type_relationships=type_relationships)
|
|
201
223
|
|
|
224
|
+
self._prune_meta_cache(meta_cache, current_file_ids)
|
|
225
|
+
self._save_file_meta_cache(project_id, meta_cache)
|
|
226
|
+
|
|
202
227
|
return IndexResult(
|
|
203
228
|
project_id=project_id,
|
|
204
229
|
files_found=len(current_files),
|
|
@@ -224,15 +249,52 @@ class JavaIndexer:
|
|
|
224
249
|
out.append(os.path.join(root, filename))
|
|
225
250
|
return out
|
|
226
251
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
252
|
+
def _plan_incremental(
|
|
253
|
+
self,
|
|
254
|
+
project_id: str,
|
|
255
|
+
root_path: str,
|
|
256
|
+
files: list[str],
|
|
257
|
+
db_files: dict[str, dict[str, str]],
|
|
258
|
+
meta_cache: dict[str, dict],
|
|
259
|
+
) -> tuple[list[str], list[str], dict[str, dict]]:
|
|
260
|
+
current_ids = {
|
|
261
|
+
file_id(project_id, os.path.relpath(fp, root_path))
|
|
262
|
+
for fp in files
|
|
263
|
+
}
|
|
264
|
+
deleted_file_ids = [fid for fid in db_files if fid not in current_ids]
|
|
265
|
+
to_reindex: list[str] = []
|
|
266
|
+
|
|
267
|
+
for file_path in files:
|
|
268
|
+
rel_path = os.path.relpath(file_path, root_path)
|
|
269
|
+
fid = file_id(project_id, rel_path)
|
|
270
|
+
old_hash = db_files.get(fid, {}).get("hash")
|
|
271
|
+
try:
|
|
272
|
+
st = os.stat(file_path)
|
|
273
|
+
except OSError:
|
|
274
|
+
continue
|
|
275
|
+
mtime_ns = int(getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000)))
|
|
276
|
+
size = int(st.st_size)
|
|
277
|
+
cached = meta_cache.get(fid, {})
|
|
278
|
+
|
|
279
|
+
if (
|
|
280
|
+
cached
|
|
281
|
+
and int(cached.get("mtime_ns", -1)) == mtime_ns
|
|
282
|
+
and int(cached.get("size", -1)) == size
|
|
283
|
+
and cached.get("hash")
|
|
284
|
+
and cached.get("hash") == old_hash
|
|
285
|
+
):
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
with open(file_path, "rb") as f:
|
|
289
|
+
digest = digest_bytes(f.read())
|
|
290
|
+
meta_cache[fid] = {"mtime_ns": mtime_ns, "size": size, "hash": digest}
|
|
291
|
+
if old_hash != digest:
|
|
292
|
+
to_reindex.append(file_path)
|
|
293
|
+
|
|
294
|
+
for fid in deleted_file_ids:
|
|
295
|
+
meta_cache.pop(fid, None)
|
|
296
|
+
|
|
297
|
+
return to_reindex, deleted_file_ids, meta_cache
|
|
236
298
|
|
|
237
299
|
def _existing_method_catalog(self, project_id: str) -> dict[str, dict]:
|
|
238
300
|
recs = self.store.query_records(
|
|
@@ -370,6 +432,56 @@ class JavaIndexer:
|
|
|
370
432
|
rel_count += 1
|
|
371
433
|
return rel_count
|
|
372
434
|
|
|
435
|
+
@staticmethod
|
|
436
|
+
def _meta_cache_path(project_id: str) -> str:
|
|
437
|
+
base = SETTINGS.index_meta_dir
|
|
438
|
+
try:
|
|
439
|
+
os.makedirs(base, exist_ok=True)
|
|
440
|
+
except OSError:
|
|
441
|
+
base = "/tmp/.codespine_index_meta"
|
|
442
|
+
os.makedirs(base, exist_ok=True)
|
|
443
|
+
return os.path.join(base, f"{project_id}.json")
|
|
444
|
+
|
|
445
|
+
def _load_file_meta_cache(self, project_id: str) -> dict[str, dict]:
|
|
446
|
+
path = self._meta_cache_path(project_id)
|
|
447
|
+
if not os.path.exists(path):
|
|
448
|
+
return {}
|
|
449
|
+
try:
|
|
450
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
451
|
+
data = json.load(f)
|
|
452
|
+
if isinstance(data, dict):
|
|
453
|
+
return data
|
|
454
|
+
except (OSError, ValueError, TypeError):
|
|
455
|
+
return {}
|
|
456
|
+
return {}
|
|
457
|
+
|
|
458
|
+
def _save_file_meta_cache(self, project_id: str, data: dict[str, dict]) -> None:
|
|
459
|
+
path = self._meta_cache_path(project_id)
|
|
460
|
+
tmp_path = f"{path}.tmp"
|
|
461
|
+
try:
|
|
462
|
+
with open(tmp_path, "w", encoding="utf-8") as f:
|
|
463
|
+
json.dump(data, f, separators=(",", ":"))
|
|
464
|
+
os.replace(tmp_path, path)
|
|
465
|
+
except OSError:
|
|
466
|
+
return
|
|
467
|
+
|
|
468
|
+
@staticmethod
|
|
469
|
+
def _update_meta_cache_entry(meta_cache: dict[str, dict], fid: str, file_path: str, digest: str, size_hint: int) -> None:
|
|
470
|
+
try:
|
|
471
|
+
st = os.stat(file_path)
|
|
472
|
+
mtime_ns = int(getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000)))
|
|
473
|
+
size = int(st.st_size)
|
|
474
|
+
except OSError:
|
|
475
|
+
mtime_ns = -1
|
|
476
|
+
size = size_hint
|
|
477
|
+
meta_cache[fid] = {"mtime_ns": mtime_ns, "size": size, "hash": digest}
|
|
478
|
+
|
|
479
|
+
@staticmethod
|
|
480
|
+
def _prune_meta_cache(meta_cache: dict[str, dict], current_file_ids: set[str]) -> None:
|
|
481
|
+
for fid in list(meta_cache.keys()):
|
|
482
|
+
if fid not in current_file_ids:
|
|
483
|
+
del meta_cache[fid]
|
|
484
|
+
|
|
373
485
|
@staticmethod
|
|
374
486
|
def _emit(progress: Callable[[str, dict], None] | None, event: str, **payload: object) -> None:
|
|
375
487
|
if progress is None:
|
|
@@ -19,3 +19,16 @@ def test_index_and_hybrid_search():
|
|
|
19
19
|
results = hybrid_search(store, "process payment", k=5)
|
|
20
20
|
assert results
|
|
21
21
|
assert any("processPayment" in (r.get("fqname") or "") for r in results)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_incremental_no_change_reindexes_zero_files():
|
|
25
|
+
fixture = Path(__file__).parent / "fixtures" / "java_simple"
|
|
26
|
+
store = GraphStore(read_only=False)
|
|
27
|
+
indexer = JavaIndexer(store)
|
|
28
|
+
|
|
29
|
+
first = indexer.index_project(str(fixture), full=True)
|
|
30
|
+
second = indexer.index_project(str(fixture), full=False)
|
|
31
|
+
|
|
32
|
+
assert first.files_found >= 2
|
|
33
|
+
assert second.files_found == first.files_found
|
|
34
|
+
assert second.files_indexed == 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|