codespine 0.1.7__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.1.7 → codespine-0.1.9}/PKG-INFO +1 -1
- {codespine-0.1.7 → codespine-0.1.9}/codespine/__init__.py +1 -1
- {codespine-0.1.7 → codespine-0.1.9}/codespine/cli.py +4 -3
- {codespine-0.1.7 → codespine-0.1.9}/codespine/config.py +1 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/indexer/call_resolver.py +13 -6
- {codespine-0.1.7 → codespine-0.1.9}/codespine/indexer/engine.py +210 -50
- codespine-0.1.9/codespine/indexer/symbol_builder.py +35 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.1.7 → codespine-0.1.9}/codespine.egg-info/SOURCES.txt +1 -0
- {codespine-0.1.7 → codespine-0.1.9}/pyproject.toml +1 -1
- codespine-0.1.9/tests/test_call_resolver.py +43 -0
- {codespine-0.1.7 → codespine-0.1.9}/tests/test_index_and_hybrid.py +13 -0
- codespine-0.1.9/tests/test_multimodule_index.py +55 -0
- codespine-0.1.7/codespine/indexer/symbol_builder.py +0 -32
- codespine-0.1.7/tests/test_call_resolver.py +0 -30
- {codespine-0.1.7 → codespine-0.1.9}/LICENSE +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/README.md +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/__init__.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/community.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/context.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/coupling.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/flow.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/impact.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/db/__init__.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/db/schema.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/db/store.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/diff/__init__.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/indexer/__init__.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/mcp/__init__.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/mcp/server.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/noise/__init__.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/noise/blocklist.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/search/__init__.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/search/bm25.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/search/fuzzy.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/search/hybrid.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/search/rrf.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/search/vector.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/watch/__init__.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine/watch/watcher.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/gindex.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/setup.cfg +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/tests/test_java_parser.py +0 -0
- {codespine-0.1.7 → codespine-0.1.9}/tests/test_search_ranking.py +0 -0
|
@@ -77,7 +77,7 @@ def main() -> None:
|
|
|
77
77
|
|
|
78
78
|
@main.command()
|
|
79
79
|
@click.argument("path", type=click.Path(exists=True))
|
|
80
|
-
@click.option("--full/--incremental", default=
|
|
80
|
+
@click.option("--full/--incremental", default=False, show_default=True)
|
|
81
81
|
@click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
|
|
82
82
|
def analyse(path: str, full: bool, deep: bool) -> None:
|
|
83
83
|
"""Index a local Java project."""
|
|
@@ -89,7 +89,7 @@ def analyse(path: str, full: bool, deep: bool) -> None:
|
|
|
89
89
|
abs_path = os.path.abspath(path)
|
|
90
90
|
store = GraphStore(read_only=False)
|
|
91
91
|
indexer = JavaIndexer(store)
|
|
92
|
-
parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0}
|
|
92
|
+
parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0, "printed_zero": False}
|
|
93
93
|
call_state = {"shown": False, "count": 0, "last_ts": 0.0}
|
|
94
94
|
|
|
95
95
|
def _progress(event: str, payload: dict) -> None:
|
|
@@ -105,6 +105,7 @@ def analyse(path: str, full: bool, deep: bool) -> None:
|
|
|
105
105
|
_phase("Index mode...", f"{mode} ({to_index} files to index, {deleted} deleted)")
|
|
106
106
|
if to_index == 0:
|
|
107
107
|
_phase("Parsing code...", "0/0")
|
|
108
|
+
parse_state["printed_zero"] = True
|
|
108
109
|
return
|
|
109
110
|
if event == "parse_progress":
|
|
110
111
|
indexed = int(payload.get("indexed", 0))
|
|
@@ -149,7 +150,7 @@ def analyse(path: str, full: bool, deep: bool) -> None:
|
|
|
149
150
|
result = indexer.index_project(abs_path, full=full, progress=_progress)
|
|
150
151
|
if parse_state["shown"]:
|
|
151
152
|
click.echo()
|
|
152
|
-
if parse_state["total"] == 0:
|
|
153
|
+
if parse_state["total"] == 0 and not parse_state["printed_zero"]:
|
|
153
154
|
_phase("Parsing code...", "0/0")
|
|
154
155
|
elif parse_state["indexed"] < parse_state["total"]:
|
|
155
156
|
_phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
|
|
@@ -8,6 +8,7 @@ class Settings:
|
|
|
8
8
|
pid_file: str = os.path.expanduser("~/.codespine.pid")
|
|
9
9
|
log_file: str = os.path.expanduser("~/.codespine.log")
|
|
10
10
|
embedding_cache_db: str = os.path.expanduser("~/.codespine_embedding_cache.sqlite3")
|
|
11
|
+
index_meta_dir: str = os.path.expanduser("~/.codespine_index_meta")
|
|
11
12
|
embedding_model: str = "BAAI/bge-small-en-v1.5"
|
|
12
13
|
vector_dim: int = 384
|
|
13
14
|
rrf_k: int = 60
|
|
@@ -62,16 +62,23 @@ def resolve_calls(
|
|
|
62
62
|
Yields tuples: (source_method_id, target_method_id, confidence, reason)
|
|
63
63
|
"""
|
|
64
64
|
name_arity_to_method_ids: dict[tuple[str, int], list[str]] = defaultdict(list)
|
|
65
|
-
|
|
65
|
+
class_method_index_by_id: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
|
|
66
|
+
class_method_index_by_fqcn: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
|
|
66
67
|
for method_id, meta in method_catalog.items():
|
|
67
68
|
key = (meta["name"], int(meta["param_count"]))
|
|
68
69
|
name_arity_to_method_ids[key].append(method_id)
|
|
69
|
-
|
|
70
|
+
class_id = meta.get("class_id", "")
|
|
71
|
+
class_fqcn = meta.get("class_fqcn", "")
|
|
72
|
+
if class_id:
|
|
73
|
+
class_method_index_by_id[class_id][key].append(method_id)
|
|
74
|
+
if class_fqcn:
|
|
75
|
+
class_method_index_by_fqcn[class_fqcn][key].append(method_id)
|
|
70
76
|
|
|
71
77
|
for source_id, call_sites in calls.items():
|
|
72
78
|
src_meta = method_catalog.get(source_id, {})
|
|
73
79
|
src_ctx = method_context.get(source_id, {})
|
|
74
|
-
|
|
80
|
+
src_class_id = src_meta.get("class_id", "") or src_ctx.get("class_id", "")
|
|
81
|
+
src_class_fqcn = src_meta.get("class_fqcn", "")
|
|
75
82
|
local_types = src_ctx.get("local_types", {}) or {}
|
|
76
83
|
field_types = src_ctx.get("field_types", {}) or {}
|
|
77
84
|
|
|
@@ -90,7 +97,7 @@ def resolve_calls(
|
|
|
90
97
|
receiver_type = None
|
|
91
98
|
receiver_is_this = False
|
|
92
99
|
if receiver == "this":
|
|
93
|
-
receiver_type =
|
|
100
|
+
receiver_type = src_class_fqcn
|
|
94
101
|
receiver_is_this = True
|
|
95
102
|
elif receiver in local_types:
|
|
96
103
|
receiver_type = local_types[receiver]
|
|
@@ -102,14 +109,14 @@ def resolve_calls(
|
|
|
102
109
|
receiver_fqcn_candidates = _resolve_type_candidates(receiver_type, src_ctx, class_catalog)
|
|
103
110
|
|
|
104
111
|
for fqcn in receiver_fqcn_candidates:
|
|
105
|
-
targets.extend(
|
|
112
|
+
targets.extend(class_method_index_by_fqcn.get(fqcn, {}).get(key, []))
|
|
106
113
|
|
|
107
114
|
if targets:
|
|
108
115
|
confidence = 1.0 if receiver_is_this else 0.8
|
|
109
116
|
reason = "receiver_this_exact" if receiver_is_this else "receiver_method_match"
|
|
110
117
|
|
|
111
118
|
if not targets:
|
|
112
|
-
in_class =
|
|
119
|
+
in_class = class_method_index_by_id.get(src_class_id, {}).get(key, [])
|
|
113
120
|
if in_class:
|
|
114
121
|
targets = in_class
|
|
115
122
|
confidence = 1.0
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
import os
|
|
4
5
|
from dataclasses import dataclass
|
|
5
6
|
from typing import Callable
|
|
6
7
|
|
|
8
|
+
from codespine.config import SETTINGS
|
|
7
9
|
from codespine.indexer.call_resolver import resolve_calls
|
|
8
10
|
from codespine.indexer.java_parser import parse_java_source
|
|
9
11
|
from codespine.indexer.symbol_builder import class_id, digest_bytes, file_id, method_id, symbol_id
|
|
@@ -37,21 +39,24 @@ class JavaIndexer:
|
|
|
37
39
|
current_files = self._collect_java_files(root_path)
|
|
38
40
|
self._emit(progress, "scan_done", files_found=len(current_files))
|
|
39
41
|
db_files = self.store.project_file_hashes(project_id) if not full else {}
|
|
40
|
-
|
|
42
|
+
meta_cache = self._load_file_meta_cache(project_id)
|
|
43
|
+
current_file_ids = {
|
|
44
|
+
file_id(project_id, os.path.relpath(fp, root_path))
|
|
45
|
+
for fp in current_files
|
|
46
|
+
}
|
|
41
47
|
|
|
42
48
|
if full:
|
|
43
49
|
to_reindex = current_files
|
|
44
50
|
deleted_file_ids = []
|
|
51
|
+
meta_cache = {}
|
|
45
52
|
else:
|
|
46
|
-
to_reindex =
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if old != digest:
|
|
54
|
-
to_reindex.append(file_path)
|
|
53
|
+
to_reindex, deleted_file_ids, meta_cache = self._plan_incremental(
|
|
54
|
+
project_id,
|
|
55
|
+
root_path,
|
|
56
|
+
current_files,
|
|
57
|
+
db_files,
|
|
58
|
+
meta_cache,
|
|
59
|
+
)
|
|
55
60
|
self._emit(
|
|
56
61
|
progress,
|
|
57
62
|
"plan_done",
|
|
@@ -59,6 +64,21 @@ class JavaIndexer:
|
|
|
59
64
|
deleted_files=len(deleted_file_ids),
|
|
60
65
|
mode="full" if full else "incremental",
|
|
61
66
|
)
|
|
67
|
+
if not full and not to_reindex and not deleted_file_ids:
|
|
68
|
+
self._prune_meta_cache(meta_cache, current_file_ids)
|
|
69
|
+
self._save_file_meta_cache(project_id, meta_cache)
|
|
70
|
+
self._emit(progress, "resolve_calls_done", calls_resolved=0)
|
|
71
|
+
self._emit(progress, "resolve_types_done", type_relationships=0)
|
|
72
|
+
return IndexResult(
|
|
73
|
+
project_id=project_id,
|
|
74
|
+
files_found=len(current_files),
|
|
75
|
+
files_indexed=0,
|
|
76
|
+
classes_indexed=0,
|
|
77
|
+
methods_indexed=0,
|
|
78
|
+
calls_resolved=0,
|
|
79
|
+
type_relationships=0,
|
|
80
|
+
embeddings_generated=0,
|
|
81
|
+
)
|
|
62
82
|
|
|
63
83
|
files_indexed = 0
|
|
64
84
|
classes_indexed = 0
|
|
@@ -70,6 +90,7 @@ class JavaIndexer:
|
|
|
70
90
|
method_calls: dict[str, list] = {}
|
|
71
91
|
method_context: dict[str, dict] = {}
|
|
72
92
|
class_catalog: dict[str, list[str]] = self._existing_class_catalog(project_id) if not full else {}
|
|
93
|
+
fqcn_to_class_ids: dict[str, list[str]] = self._existing_class_ids_by_fqcn(project_id) if not full else {}
|
|
73
94
|
class_meta: dict[str, dict] = {}
|
|
74
95
|
class_methods: dict[str, dict[str, str]] = self._existing_class_methods(project_id) if not full else {}
|
|
75
96
|
|
|
@@ -84,32 +105,40 @@ class JavaIndexer:
|
|
|
84
105
|
for file_path in to_reindex:
|
|
85
106
|
rel_path = os.path.relpath(file_path, root_path)
|
|
86
107
|
is_test = "src/test/java" in file_path.replace("\\", "/")
|
|
108
|
+
scope = self._scope_from_rel_path(rel_path)
|
|
87
109
|
|
|
88
110
|
with open(file_path, "rb") as f:
|
|
89
111
|
source = f.read()
|
|
90
112
|
|
|
91
113
|
parsed = parse_java_source(source)
|
|
92
114
|
f_id = file_id(project_id, rel_path)
|
|
115
|
+
file_digest = digest_bytes(source)
|
|
93
116
|
if not full:
|
|
94
117
|
# Drop old symbols/methods/classes for changed files before reinserting.
|
|
95
118
|
self.store.clear_file(f_id)
|
|
96
|
-
self.store.upsert_file(f_id, file_path, project_id, is_test,
|
|
119
|
+
self.store.upsert_file(f_id, file_path, project_id, is_test, file_digest)
|
|
120
|
+
self._update_meta_cache_entry(meta_cache, f_id, file_path, file_digest, len(source))
|
|
97
121
|
|
|
98
122
|
for cls in parsed.classes:
|
|
99
|
-
c_id = class_id(cls.fqcn)
|
|
123
|
+
c_id = class_id(cls.fqcn, scope)
|
|
100
124
|
self.store.upsert_class(c_id, cls.fqcn, cls.name, cls.package, f_id)
|
|
101
125
|
class_catalog.setdefault(cls.name, [])
|
|
102
126
|
if cls.fqcn not in class_catalog[cls.name]:
|
|
103
127
|
class_catalog[cls.name].append(cls.fqcn)
|
|
104
|
-
|
|
128
|
+
fqcn_to_class_ids.setdefault(cls.fqcn, [])
|
|
129
|
+
if c_id not in fqcn_to_class_ids[cls.fqcn]:
|
|
130
|
+
fqcn_to_class_ids[cls.fqcn].append(c_id)
|
|
131
|
+
class_meta[c_id] = {
|
|
132
|
+
"fqcn": cls.fqcn,
|
|
105
133
|
"package": parsed.package,
|
|
106
134
|
"imports": parsed.imports,
|
|
107
135
|
"extends": cls.extends,
|
|
108
136
|
"interfaces": cls.interfaces,
|
|
137
|
+
"scope": scope,
|
|
109
138
|
}
|
|
110
|
-
class_methods.setdefault(
|
|
139
|
+
class_methods.setdefault(c_id, {})
|
|
111
140
|
|
|
112
|
-
cls_symbol_id = symbol_id("class", cls.fqcn)
|
|
141
|
+
cls_symbol_id = symbol_id("class", cls.fqcn, scope)
|
|
113
142
|
self.store.upsert_symbol(
|
|
114
143
|
symbol_id=cls_symbol_id,
|
|
115
144
|
kind="class",
|
|
@@ -123,7 +152,7 @@ class JavaIndexer:
|
|
|
123
152
|
classes_indexed += 1
|
|
124
153
|
|
|
125
154
|
for method in cls.methods:
|
|
126
|
-
m_id = method_id(cls.fqcn, method.signature)
|
|
155
|
+
m_id = method_id(cls.fqcn, method.signature, scope)
|
|
127
156
|
self.store.upsert_method(
|
|
128
157
|
method_id=m_id,
|
|
129
158
|
class_id=c_id,
|
|
@@ -136,7 +165,7 @@ class JavaIndexer:
|
|
|
136
165
|
)
|
|
137
166
|
|
|
138
167
|
fqname = f"{cls.fqcn}#{method.signature}"
|
|
139
|
-
m_symbol_id = symbol_id("method", fqname)
|
|
168
|
+
m_symbol_id = symbol_id("method", fqname, scope)
|
|
140
169
|
self.store.upsert_symbol(
|
|
141
170
|
symbol_id=m_symbol_id,
|
|
142
171
|
kind="method",
|
|
@@ -154,16 +183,18 @@ class JavaIndexer:
|
|
|
154
183
|
"name": method.name,
|
|
155
184
|
"param_count": len(method.parameter_types),
|
|
156
185
|
"class_fqcn": cls.fqcn,
|
|
186
|
+
"class_id": c_id,
|
|
157
187
|
}
|
|
158
188
|
method_calls[m_id] = method.calls
|
|
159
189
|
method_context[m_id] = {
|
|
190
|
+
"class_id": c_id,
|
|
160
191
|
"class_fqcn": cls.fqcn,
|
|
161
192
|
"local_types": method.local_types,
|
|
162
193
|
"field_types": cls.field_types,
|
|
163
194
|
"imports": parsed.imports,
|
|
164
195
|
"package": parsed.package,
|
|
165
196
|
}
|
|
166
|
-
class_methods[
|
|
197
|
+
class_methods[c_id][method.signature] = m_id
|
|
167
198
|
files_indexed += 1
|
|
168
199
|
self._emit(
|
|
169
200
|
progress,
|
|
@@ -182,9 +213,17 @@ class JavaIndexer:
|
|
|
182
213
|
self._emit(progress, "resolve_calls_done", calls_resolved=calls_resolved)
|
|
183
214
|
|
|
184
215
|
self._emit(progress, "resolve_types_start")
|
|
185
|
-
type_relationships += self._build_inheritance_edges(
|
|
216
|
+
type_relationships += self._build_inheritance_edges(
|
|
217
|
+
class_meta,
|
|
218
|
+
class_catalog,
|
|
219
|
+
class_methods,
|
|
220
|
+
fqcn_to_class_ids,
|
|
221
|
+
)
|
|
186
222
|
self._emit(progress, "resolve_types_done", type_relationships=type_relationships)
|
|
187
223
|
|
|
224
|
+
self._prune_meta_cache(meta_cache, current_file_ids)
|
|
225
|
+
self._save_file_meta_cache(project_id, meta_cache)
|
|
226
|
+
|
|
188
227
|
return IndexResult(
|
|
189
228
|
project_id=project_id,
|
|
190
229
|
files_found=len(current_files),
|
|
@@ -210,22 +249,59 @@ class JavaIndexer:
|
|
|
210
249
|
out.append(os.path.join(root, filename))
|
|
211
250
|
return out
|
|
212
251
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
252
|
+
def _plan_incremental(
|
|
253
|
+
self,
|
|
254
|
+
project_id: str,
|
|
255
|
+
root_path: str,
|
|
256
|
+
files: list[str],
|
|
257
|
+
db_files: dict[str, dict[str, str]],
|
|
258
|
+
meta_cache: dict[str, dict],
|
|
259
|
+
) -> tuple[list[str], list[str], dict[str, dict]]:
|
|
260
|
+
current_ids = {
|
|
261
|
+
file_id(project_id, os.path.relpath(fp, root_path))
|
|
262
|
+
for fp in files
|
|
263
|
+
}
|
|
264
|
+
deleted_file_ids = [fid for fid in db_files if fid not in current_ids]
|
|
265
|
+
to_reindex: list[str] = []
|
|
266
|
+
|
|
267
|
+
for file_path in files:
|
|
268
|
+
rel_path = os.path.relpath(file_path, root_path)
|
|
269
|
+
fid = file_id(project_id, rel_path)
|
|
270
|
+
old_hash = db_files.get(fid, {}).get("hash")
|
|
271
|
+
try:
|
|
272
|
+
st = os.stat(file_path)
|
|
273
|
+
except OSError:
|
|
274
|
+
continue
|
|
275
|
+
mtime_ns = int(getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000)))
|
|
276
|
+
size = int(st.st_size)
|
|
277
|
+
cached = meta_cache.get(fid, {})
|
|
278
|
+
|
|
279
|
+
if (
|
|
280
|
+
cached
|
|
281
|
+
and int(cached.get("mtime_ns", -1)) == mtime_ns
|
|
282
|
+
and int(cached.get("size", -1)) == size
|
|
283
|
+
and cached.get("hash")
|
|
284
|
+
and cached.get("hash") == old_hash
|
|
285
|
+
):
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
with open(file_path, "rb") as f:
|
|
289
|
+
digest = digest_bytes(f.read())
|
|
290
|
+
meta_cache[fid] = {"mtime_ns": mtime_ns, "size": size, "hash": digest}
|
|
291
|
+
if old_hash != digest:
|
|
292
|
+
to_reindex.append(file_path)
|
|
293
|
+
|
|
294
|
+
for fid in deleted_file_ids:
|
|
295
|
+
meta_cache.pop(fid, None)
|
|
296
|
+
|
|
297
|
+
return to_reindex, deleted_file_ids, meta_cache
|
|
222
298
|
|
|
223
299
|
def _existing_method_catalog(self, project_id: str) -> dict[str, dict]:
|
|
224
300
|
recs = self.store.query_records(
|
|
225
301
|
"""
|
|
226
302
|
MATCH (m:Method), (c:Class), (f:File)
|
|
227
303
|
WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
|
|
228
|
-
RETURN m.id as method_id, m.name as name, m.signature as signature, c.fqcn as class_fqcn
|
|
304
|
+
RETURN m.id as method_id, m.name as name, m.signature as signature, c.fqcn as class_fqcn, c.id as class_id
|
|
229
305
|
""",
|
|
230
306
|
{"pid": project_id},
|
|
231
307
|
)
|
|
@@ -239,9 +315,30 @@ class JavaIndexer:
|
|
|
239
315
|
"name": r.get("name", ""),
|
|
240
316
|
"param_count": param_count,
|
|
241
317
|
"class_fqcn": r.get("class_fqcn", ""),
|
|
318
|
+
"class_id": r.get("class_id", ""),
|
|
242
319
|
}
|
|
243
320
|
return out
|
|
244
321
|
|
|
322
|
+
def _existing_class_ids_by_fqcn(self, project_id: str) -> dict[str, list[str]]:
|
|
323
|
+
recs = self.store.query_records(
|
|
324
|
+
"""
|
|
325
|
+
MATCH (c:Class), (f:File)
|
|
326
|
+
WHERE c.file_id = f.id AND f.project_id = $pid
|
|
327
|
+
RETURN c.fqcn as fqcn, c.id as class_id
|
|
328
|
+
""",
|
|
329
|
+
{"pid": project_id},
|
|
330
|
+
)
|
|
331
|
+
out: dict[str, list[str]] = {}
|
|
332
|
+
for r in recs:
|
|
333
|
+
fqcn = r.get("fqcn", "")
|
|
334
|
+
cid = r.get("class_id", "")
|
|
335
|
+
if not fqcn or not cid:
|
|
336
|
+
continue
|
|
337
|
+
out.setdefault(fqcn, [])
|
|
338
|
+
if cid not in out[fqcn]:
|
|
339
|
+
out[fqcn].append(cid)
|
|
340
|
+
return out
|
|
341
|
+
|
|
245
342
|
def _existing_class_catalog(self, project_id: str) -> dict[str, list[str]]:
|
|
246
343
|
recs = self.store.query_records(
|
|
247
344
|
"""
|
|
@@ -263,14 +360,17 @@ class JavaIndexer:
|
|
|
263
360
|
"""
|
|
264
361
|
MATCH (m:Method), (c:Class), (f:File)
|
|
265
362
|
WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
|
|
266
|
-
RETURN c.
|
|
363
|
+
RETURN c.id as class_id, m.signature as signature, m.id as method_id
|
|
267
364
|
""",
|
|
268
365
|
{"pid": project_id},
|
|
269
366
|
)
|
|
270
367
|
out: dict[str, dict[str, str]] = {}
|
|
271
368
|
for r in recs:
|
|
272
|
-
|
|
273
|
-
|
|
369
|
+
class_key = r.get("class_id")
|
|
370
|
+
if not class_key:
|
|
371
|
+
continue
|
|
372
|
+
out.setdefault(class_key, {})
|
|
373
|
+
out[class_key][r["signature"]] = r["method_id"]
|
|
274
374
|
return out
|
|
275
375
|
|
|
276
376
|
@staticmethod
|
|
@@ -302,38 +402,98 @@ class JavaIndexer:
|
|
|
302
402
|
class_meta: dict[str, dict],
|
|
303
403
|
class_catalog: dict[str, list[str]],
|
|
304
404
|
class_methods: dict[str, dict[str, str]],
|
|
405
|
+
fqcn_to_class_ids: dict[str, list[str]],
|
|
305
406
|
) -> int:
|
|
306
407
|
rel_count = 0
|
|
307
|
-
for
|
|
308
|
-
src_id = class_id(fqcn)
|
|
408
|
+
for src_id, meta in class_meta.items():
|
|
309
409
|
ctx = {"package": meta.get("package", ""), "imports": meta.get("imports", [])}
|
|
310
410
|
|
|
311
411
|
parent_candidates = self._resolve_type_candidates(meta.get("extends"), ctx, class_catalog)
|
|
312
412
|
for parent_fqcn in parent_candidates:
|
|
313
|
-
dst_id
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
413
|
+
for dst_id in fqcn_to_class_ids.get(parent_fqcn, []):
|
|
414
|
+
self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 0.8)
|
|
415
|
+
rel_count += 1
|
|
416
|
+
for sig, method_id in class_methods.get(src_id, {}).items():
|
|
417
|
+
parent_method = class_methods.get(dst_id, {}).get(sig)
|
|
418
|
+
if parent_method:
|
|
419
|
+
self.store.add_reference("OVERRIDES", "Method", method_id, "Method", parent_method, 1.0)
|
|
420
|
+
rel_count += 1
|
|
321
421
|
|
|
322
422
|
for iface in meta.get("interfaces", []):
|
|
323
423
|
iface_candidates = self._resolve_type_candidates(iface, ctx, class_catalog)
|
|
324
424
|
for iface_fqcn in iface_candidates:
|
|
325
|
-
dst_id
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
425
|
+
for dst_id in fqcn_to_class_ids.get(iface_fqcn, []):
|
|
426
|
+
self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 1.0)
|
|
427
|
+
rel_count += 1
|
|
428
|
+
for sig, method_id in class_methods.get(src_id, {}).items():
|
|
429
|
+
iface_method = class_methods.get(dst_id, {}).get(sig)
|
|
430
|
+
if iface_method:
|
|
431
|
+
self.store.add_reference("OVERRIDES", "Method", method_id, "Method", iface_method, 1.0)
|
|
432
|
+
rel_count += 1
|
|
333
433
|
return rel_count
|
|
334
434
|
|
|
435
|
+
@staticmethod
|
|
436
|
+
def _meta_cache_path(project_id: str) -> str:
|
|
437
|
+
base = SETTINGS.index_meta_dir
|
|
438
|
+
try:
|
|
439
|
+
os.makedirs(base, exist_ok=True)
|
|
440
|
+
except OSError:
|
|
441
|
+
base = "/tmp/.codespine_index_meta"
|
|
442
|
+
os.makedirs(base, exist_ok=True)
|
|
443
|
+
return os.path.join(base, f"{project_id}.json")
|
|
444
|
+
|
|
445
|
+
def _load_file_meta_cache(self, project_id: str) -> dict[str, dict]:
|
|
446
|
+
path = self._meta_cache_path(project_id)
|
|
447
|
+
if not os.path.exists(path):
|
|
448
|
+
return {}
|
|
449
|
+
try:
|
|
450
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
451
|
+
data = json.load(f)
|
|
452
|
+
if isinstance(data, dict):
|
|
453
|
+
return data
|
|
454
|
+
except (OSError, ValueError, TypeError):
|
|
455
|
+
return {}
|
|
456
|
+
return {}
|
|
457
|
+
|
|
458
|
+
def _save_file_meta_cache(self, project_id: str, data: dict[str, dict]) -> None:
|
|
459
|
+
path = self._meta_cache_path(project_id)
|
|
460
|
+
tmp_path = f"{path}.tmp"
|
|
461
|
+
try:
|
|
462
|
+
with open(tmp_path, "w", encoding="utf-8") as f:
|
|
463
|
+
json.dump(data, f, separators=(",", ":"))
|
|
464
|
+
os.replace(tmp_path, path)
|
|
465
|
+
except OSError:
|
|
466
|
+
return
|
|
467
|
+
|
|
468
|
+
@staticmethod
|
|
469
|
+
def _update_meta_cache_entry(meta_cache: dict[str, dict], fid: str, file_path: str, digest: str, size_hint: int) -> None:
|
|
470
|
+
try:
|
|
471
|
+
st = os.stat(file_path)
|
|
472
|
+
mtime_ns = int(getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000)))
|
|
473
|
+
size = int(st.st_size)
|
|
474
|
+
except OSError:
|
|
475
|
+
mtime_ns = -1
|
|
476
|
+
size = size_hint
|
|
477
|
+
meta_cache[fid] = {"mtime_ns": mtime_ns, "size": size, "hash": digest}
|
|
478
|
+
|
|
479
|
+
@staticmethod
|
|
480
|
+
def _prune_meta_cache(meta_cache: dict[str, dict], current_file_ids: set[str]) -> None:
|
|
481
|
+
for fid in list(meta_cache.keys()):
|
|
482
|
+
if fid not in current_file_ids:
|
|
483
|
+
del meta_cache[fid]
|
|
484
|
+
|
|
335
485
|
@staticmethod
|
|
336
486
|
def _emit(progress: Callable[[str, dict], None] | None, event: str, **payload: object) -> None:
|
|
337
487
|
if progress is None:
|
|
338
488
|
return
|
|
339
489
|
progress(event, payload)
|
|
490
|
+
|
|
491
|
+
@staticmethod
|
|
492
|
+
def _scope_from_rel_path(rel_path: str) -> str:
|
|
493
|
+
normalized = rel_path.replace("\\", "/")
|
|
494
|
+
if "/java/" in normalized:
|
|
495
|
+
return normalized.split("/java/", 1)[0]
|
|
496
|
+
if "/src/" in normalized:
|
|
497
|
+
return normalized.split("/src/", 1)[0]
|
|
498
|
+
scope = os.path.dirname(normalized).strip()
|
|
499
|
+
return scope or "."
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class SymbolRef:
|
|
9
|
+
symbol_id: str
|
|
10
|
+
method_id: str
|
|
11
|
+
class_id: str
|
|
12
|
+
file_id: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def digest_bytes(payload: bytes) -> str:
|
|
16
|
+
return hashlib.sha1(payload).hexdigest()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def file_id(project_id: str, rel_path: str) -> str:
|
|
20
|
+
return hashlib.sha1(f"{project_id}:{rel_path}".encode("utf-8")).hexdigest()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def class_id(fqcn: str, scope: str | None = None) -> str:
|
|
24
|
+
key = f"{scope}::{fqcn}" if scope else fqcn
|
|
25
|
+
return hashlib.sha1(key.encode("utf-8")).hexdigest()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def method_id(fqcn: str, signature: str, scope: str | None = None) -> str:
|
|
29
|
+
key = f"{scope}::{fqcn}#{signature}" if scope else f"{fqcn}#{signature}"
|
|
30
|
+
return hashlib.sha1(key.encode("utf-8")).hexdigest()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def symbol_id(kind: str, fqname: str, scope: str | None = None) -> str:
|
|
34
|
+
key = f"{kind}:{scope}:{fqname}" if scope else f"{kind}:{fqname}"
|
|
35
|
+
return hashlib.sha1(key.encode("utf-8")).hexdigest()
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from types import SimpleNamespace
|
|
2
|
+
|
|
3
|
+
from codespine.indexer.call_resolver import resolve_calls
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_resolver_prefers_receiver_type_and_arity():
|
|
7
|
+
method_catalog = {
|
|
8
|
+
"src": {
|
|
9
|
+
"name": "entry",
|
|
10
|
+
"param_count": 0,
|
|
11
|
+
"class_id": "c_service",
|
|
12
|
+
"class_fqcn": "com.example.Service",
|
|
13
|
+
"signature": "entry()",
|
|
14
|
+
},
|
|
15
|
+
"m1": {"name": "run", "param_count": 0, "class_id": "c_service", "class_fqcn": "com.example.Service", "signature": "run()"},
|
|
16
|
+
"m2": {
|
|
17
|
+
"name": "run",
|
|
18
|
+
"param_count": 1,
|
|
19
|
+
"class_id": "c_service",
|
|
20
|
+
"class_fqcn": "com.example.Service",
|
|
21
|
+
"signature": "run(String)",
|
|
22
|
+
},
|
|
23
|
+
"m3": {"name": "save", "param_count": 0, "class_id": "c_repo", "class_fqcn": "com.example.Repo", "signature": "save()"},
|
|
24
|
+
}
|
|
25
|
+
calls = {
|
|
26
|
+
"src": [
|
|
27
|
+
SimpleNamespace(name="run", receiver="this", arg_count=0),
|
|
28
|
+
SimpleNamespace(name="save", receiver="repo", arg_count=0),
|
|
29
|
+
]
|
|
30
|
+
}
|
|
31
|
+
method_context = {
|
|
32
|
+
"src": {
|
|
33
|
+
"class_id": "c_service",
|
|
34
|
+
"class_fqcn": "com.example.Service",
|
|
35
|
+
"local_types": {"repo": "Repo"},
|
|
36
|
+
"field_types": {},
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
class_catalog = {"Service": ["com.example.Service"], "Repo": ["com.example.Repo"]}
|
|
40
|
+
|
|
41
|
+
out = list(resolve_calls(method_catalog, calls, method_context, class_catalog))
|
|
42
|
+
assert ("src", "m1", 1.0, "receiver_this_exact") in out
|
|
43
|
+
assert ("src", "m3", 0.8, "receiver_method_match") in out
|
|
@@ -19,3 +19,16 @@ def test_index_and_hybrid_search():
|
|
|
19
19
|
results = hybrid_search(store, "process payment", k=5)
|
|
20
20
|
assert results
|
|
21
21
|
assert any("processPayment" in (r.get("fqname") or "") for r in results)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_incremental_no_change_reindexes_zero_files():
|
|
25
|
+
fixture = Path(__file__).parent / "fixtures" / "java_simple"
|
|
26
|
+
store = GraphStore(read_only=False)
|
|
27
|
+
indexer = JavaIndexer(store)
|
|
28
|
+
|
|
29
|
+
first = indexer.index_project(str(fixture), full=True)
|
|
30
|
+
second = indexer.index_project(str(fixture), full=False)
|
|
31
|
+
|
|
32
|
+
assert first.files_found >= 2
|
|
33
|
+
assert second.files_found == first.files_found
|
|
34
|
+
assert second.files_indexed == 0
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
pytest.importorskip("kuzu")
|
|
6
|
+
pytest.importorskip("tree_sitter_java")
|
|
7
|
+
|
|
8
|
+
from codespine.db.store import GraphStore
|
|
9
|
+
from codespine.indexer.engine import JavaIndexer
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _write_java(path: Path, content: str) -> None:
|
|
13
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
14
|
+
path.write_text(content, encoding="utf-8")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_multimodule_duplicate_fqcn_is_indexed_without_collision(tmp_path: Path):
|
|
18
|
+
_write_java(
|
|
19
|
+
tmp_path / "module-a" / "src" / "main" / "java" / "com" / "example" / "App.java",
|
|
20
|
+
"""
|
|
21
|
+
package com.example;
|
|
22
|
+
public class App { public void fromA() {} }
|
|
23
|
+
""",
|
|
24
|
+
)
|
|
25
|
+
_write_java(
|
|
26
|
+
tmp_path / "module-b" / "src" / "main" / "java" / "com" / "example" / "App.java",
|
|
27
|
+
"""
|
|
28
|
+
package com.example;
|
|
29
|
+
public class App { public void fromB() {} }
|
|
30
|
+
""",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
store = GraphStore(read_only=False)
|
|
34
|
+
result = JavaIndexer(store).index_project(str(tmp_path), full=True)
|
|
35
|
+
|
|
36
|
+
classes = store.query_records(
|
|
37
|
+
"""
|
|
38
|
+
MATCH (c:Class), (f:File)
|
|
39
|
+
WHERE c.file_id = f.id AND f.project_id = $pid AND c.fqcn = $fqcn
|
|
40
|
+
RETURN c.id as id, f.path as path
|
|
41
|
+
""",
|
|
42
|
+
{"pid": result.project_id, "fqcn": "com.example.App"},
|
|
43
|
+
)
|
|
44
|
+
methods = store.query_records(
|
|
45
|
+
"""
|
|
46
|
+
MATCH (m:Method), (c:Class), (f:File)
|
|
47
|
+
WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
|
|
48
|
+
RETURN m.name as name
|
|
49
|
+
""",
|
|
50
|
+
{"pid": result.project_id},
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
assert len(classes) == 2
|
|
54
|
+
assert len({c["id"] for c in classes}) == 2
|
|
55
|
+
assert {"fromA", "fromB"}.issubset({m["name"] for m in methods})
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import hashlib
|
|
4
|
-
from dataclasses import dataclass
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@dataclass
|
|
8
|
-
class SymbolRef:
|
|
9
|
-
symbol_id: str
|
|
10
|
-
method_id: str
|
|
11
|
-
class_id: str
|
|
12
|
-
file_id: str
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def digest_bytes(payload: bytes) -> str:
|
|
16
|
-
return hashlib.sha1(payload).hexdigest()
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def file_id(project_id: str, rel_path: str) -> str:
|
|
20
|
-
return hashlib.sha1(f"{project_id}:{rel_path}".encode("utf-8")).hexdigest()
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def class_id(fqcn: str) -> str:
|
|
24
|
-
return hashlib.sha1(fqcn.encode("utf-8")).hexdigest()
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def method_id(fqcn: str, signature: str) -> str:
|
|
28
|
-
return hashlib.sha1(f"{fqcn}#{signature}".encode("utf-8")).hexdigest()
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def symbol_id(kind: str, fqname: str) -> str:
|
|
32
|
-
return hashlib.sha1(f"{kind}:{fqname}".encode("utf-8")).hexdigest()
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from types import SimpleNamespace
|
|
2
|
-
|
|
3
|
-
from codespine.indexer.call_resolver import resolve_calls
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def test_resolver_prefers_receiver_type_and_arity():
|
|
7
|
-
method_catalog = {
|
|
8
|
-
"src": {"name": "entry", "param_count": 0, "class_fqcn": "com.example.Service", "signature": "entry()"},
|
|
9
|
-
"m1": {"name": "run", "param_count": 0, "class_fqcn": "com.example.Service", "signature": "run()"},
|
|
10
|
-
"m2": {"name": "run", "param_count": 1, "class_fqcn": "com.example.Service", "signature": "run(String)"},
|
|
11
|
-
"m3": {"name": "save", "param_count": 0, "class_fqcn": "com.example.Repo", "signature": "save()"},
|
|
12
|
-
}
|
|
13
|
-
calls = {
|
|
14
|
-
"src": [
|
|
15
|
-
SimpleNamespace(name="run", receiver="this", arg_count=0),
|
|
16
|
-
SimpleNamespace(name="save", receiver="repo", arg_count=0),
|
|
17
|
-
]
|
|
18
|
-
}
|
|
19
|
-
method_context = {
|
|
20
|
-
"src": {
|
|
21
|
-
"class_fqcn": "com.example.Service",
|
|
22
|
-
"local_types": {"repo": "Repo"},
|
|
23
|
-
"field_types": {},
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
class_catalog = {"Service": ["com.example.Service"], "Repo": ["com.example.Repo"]}
|
|
27
|
-
|
|
28
|
-
out = resolve_calls(method_catalog, calls, method_context, class_catalog)
|
|
29
|
-
assert ("src", "m1", 1.0, "receiver_this_exact") in out
|
|
30
|
-
assert ("src", "m3", 0.8, "receiver_method_match") in out
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|