codespine 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codespine/__init__.py +4 -0
- codespine/analysis/__init__.py +1 -0
- codespine/analysis/community.py +75 -0
- codespine/analysis/context.py +24 -0
- codespine/analysis/coupling.py +119 -0
- codespine/analysis/deadcode.py +107 -0
- codespine/analysis/flow.py +77 -0
- codespine/analysis/impact.py +90 -0
- codespine/cli.py +424 -0
- codespine/config.py +22 -0
- codespine/db/__init__.py +1 -0
- codespine/db/schema.py +82 -0
- codespine/db/store.py +313 -0
- codespine/diff/__init__.py +1 -0
- codespine/diff/branch_diff.py +163 -0
- codespine/indexer/__init__.py +1 -0
- codespine/indexer/call_resolver.py +137 -0
- codespine/indexer/engine.py +305 -0
- codespine/indexer/java_parser.py +350 -0
- codespine/indexer/symbol_builder.py +32 -0
- codespine/mcp/__init__.py +1 -0
- codespine/mcp/server.py +67 -0
- codespine/noise/__init__.py +1 -0
- codespine/noise/blocklist.py +37 -0
- codespine/search/__init__.py +1 -0
- codespine/search/bm25.py +52 -0
- codespine/search/fuzzy.py +36 -0
- codespine/search/hybrid.py +80 -0
- codespine/search/rrf.py +9 -0
- codespine/search/vector.py +113 -0
- codespine/watch/__init__.py +1 -0
- codespine/watch/watcher.py +38 -0
- codespine-0.1.1.dist-info/METADATA +336 -0
- codespine-0.1.1.dist-info/RECORD +39 -0
- codespine-0.1.1.dist-info/WHEEL +5 -0
- codespine-0.1.1.dist-info/entry_points.txt +3 -0
- codespine-0.1.1.dist-info/licenses/LICENSE +21 -0
- codespine-0.1.1.dist-info/top_level.txt +2 -0
- gindex.py +10 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from codespine.indexer.call_resolver import resolve_calls
|
|
7
|
+
from codespine.indexer.java_parser import parse_java_source
|
|
8
|
+
from codespine.indexer.symbol_builder import class_id, digest_bytes, file_id, method_id, symbol_id
|
|
9
|
+
from codespine.search.vector import embed_text
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class IndexResult:
|
|
14
|
+
project_id: str
|
|
15
|
+
files_found: int
|
|
16
|
+
files_indexed: int
|
|
17
|
+
classes_indexed: int
|
|
18
|
+
methods_indexed: int
|
|
19
|
+
calls_resolved: int
|
|
20
|
+
type_relationships: int
|
|
21
|
+
embeddings_generated: int
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class JavaIndexer:
|
|
25
|
+
def __init__(self, store):
|
|
26
|
+
self.store = store
|
|
27
|
+
|
|
28
|
+
def index_project(self, root_path: str, full: bool = True) -> IndexResult:
|
|
29
|
+
root_path = os.path.abspath(root_path)
|
|
30
|
+
project_id = os.path.basename(root_path)
|
|
31
|
+
current_files = self._collect_java_files(root_path)
|
|
32
|
+
current_hashes = self._hash_files(project_id, root_path, current_files)
|
|
33
|
+
db_files = self.store.project_file_hashes(project_id) if not full else {}
|
|
34
|
+
|
|
35
|
+
if full:
|
|
36
|
+
to_reindex = current_files
|
|
37
|
+
deleted_file_ids = []
|
|
38
|
+
else:
|
|
39
|
+
to_reindex = []
|
|
40
|
+
deleted_file_ids = [fid for fid in db_files if fid not in current_hashes]
|
|
41
|
+
for file_path in current_files:
|
|
42
|
+
rel_path = os.path.relpath(file_path, root_path)
|
|
43
|
+
fid = file_id(project_id, rel_path)
|
|
44
|
+
digest = current_hashes[fid]
|
|
45
|
+
old = db_files.get(fid, {}).get("hash")
|
|
46
|
+
if old != digest:
|
|
47
|
+
to_reindex.append(file_path)
|
|
48
|
+
|
|
49
|
+
files_indexed = 0
|
|
50
|
+
classes_indexed = 0
|
|
51
|
+
methods_indexed = 0
|
|
52
|
+
calls_resolved = 0
|
|
53
|
+
type_relationships = 0
|
|
54
|
+
|
|
55
|
+
method_catalog: dict[str, dict] = self._existing_method_catalog(project_id) if not full else {}
|
|
56
|
+
method_calls: dict[str, list] = {}
|
|
57
|
+
method_context: dict[str, dict] = {}
|
|
58
|
+
class_catalog: dict[str, list[str]] = self._existing_class_catalog(project_id) if not full else {}
|
|
59
|
+
class_meta: dict[str, dict] = {}
|
|
60
|
+
class_methods: dict[str, dict[str, str]] = self._existing_class_methods(project_id) if not full else {}
|
|
61
|
+
|
|
62
|
+
with self.store.transaction():
|
|
63
|
+
self.store.upsert_project(project_id, root_path)
|
|
64
|
+
if full:
|
|
65
|
+
self.store.clear_project(project_id)
|
|
66
|
+
else:
|
|
67
|
+
for fid in deleted_file_ids:
|
|
68
|
+
self.store.clear_file(fid)
|
|
69
|
+
|
|
70
|
+
for file_path in to_reindex:
|
|
71
|
+
rel_path = os.path.relpath(file_path, root_path)
|
|
72
|
+
is_test = "src/test/java" in file_path.replace("\\", "/")
|
|
73
|
+
|
|
74
|
+
with open(file_path, "rb") as f:
|
|
75
|
+
source = f.read()
|
|
76
|
+
|
|
77
|
+
parsed = parse_java_source(source)
|
|
78
|
+
f_id = file_id(project_id, rel_path)
|
|
79
|
+
if not full:
|
|
80
|
+
# Drop old symbols/methods/classes for changed files before reinserting.
|
|
81
|
+
self.store.clear_file(f_id)
|
|
82
|
+
self.store.upsert_file(f_id, file_path, project_id, is_test, digest_bytes(source))
|
|
83
|
+
|
|
84
|
+
for cls in parsed.classes:
|
|
85
|
+
c_id = class_id(cls.fqcn)
|
|
86
|
+
self.store.upsert_class(c_id, cls.fqcn, cls.name, cls.package, f_id)
|
|
87
|
+
class_catalog.setdefault(cls.name, [])
|
|
88
|
+
if cls.fqcn not in class_catalog[cls.name]:
|
|
89
|
+
class_catalog[cls.name].append(cls.fqcn)
|
|
90
|
+
class_meta[cls.fqcn] = {
|
|
91
|
+
"package": parsed.package,
|
|
92
|
+
"imports": parsed.imports,
|
|
93
|
+
"extends": cls.extends,
|
|
94
|
+
"interfaces": cls.interfaces,
|
|
95
|
+
}
|
|
96
|
+
class_methods.setdefault(cls.fqcn, {})
|
|
97
|
+
|
|
98
|
+
cls_symbol_id = symbol_id("class", cls.fqcn)
|
|
99
|
+
self.store.upsert_symbol(
|
|
100
|
+
symbol_id=cls_symbol_id,
|
|
101
|
+
kind="class",
|
|
102
|
+
name=cls.name,
|
|
103
|
+
fqname=cls.fqcn,
|
|
104
|
+
file_id=f_id,
|
|
105
|
+
line=cls.line,
|
|
106
|
+
col=cls.col,
|
|
107
|
+
embedding=embed_text(f"class {cls.fqcn}"),
|
|
108
|
+
)
|
|
109
|
+
classes_indexed += 1
|
|
110
|
+
|
|
111
|
+
for method in cls.methods:
|
|
112
|
+
m_id = method_id(cls.fqcn, method.signature)
|
|
113
|
+
self.store.upsert_method(
|
|
114
|
+
method_id=m_id,
|
|
115
|
+
class_id=c_id,
|
|
116
|
+
name=method.name,
|
|
117
|
+
signature=method.signature,
|
|
118
|
+
return_type=method.return_type,
|
|
119
|
+
modifiers=method.modifiers + [f"@{a}" for a in method.annotations],
|
|
120
|
+
is_constructor=(method.name == cls.name),
|
|
121
|
+
is_test=is_test,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
fqname = f"{cls.fqcn}#{method.signature}"
|
|
125
|
+
m_symbol_id = symbol_id("method", fqname)
|
|
126
|
+
self.store.upsert_symbol(
|
|
127
|
+
symbol_id=m_symbol_id,
|
|
128
|
+
kind="method",
|
|
129
|
+
name=method.name,
|
|
130
|
+
fqname=fqname,
|
|
131
|
+
file_id=f_id,
|
|
132
|
+
line=method.line,
|
|
133
|
+
col=method.col,
|
|
134
|
+
embedding=embed_text(f"method {fqname} returns {method.return_type}"),
|
|
135
|
+
)
|
|
136
|
+
methods_indexed += 1
|
|
137
|
+
|
|
138
|
+
method_catalog[m_id] = {
|
|
139
|
+
"signature": method.signature,
|
|
140
|
+
"name": method.name,
|
|
141
|
+
"param_count": len(method.parameter_types),
|
|
142
|
+
"class_fqcn": cls.fqcn,
|
|
143
|
+
}
|
|
144
|
+
method_calls[m_id] = method.calls
|
|
145
|
+
method_context[m_id] = {
|
|
146
|
+
"class_fqcn": cls.fqcn,
|
|
147
|
+
"local_types": method.local_types,
|
|
148
|
+
"field_types": cls.field_types,
|
|
149
|
+
"imports": parsed.imports,
|
|
150
|
+
"package": parsed.package,
|
|
151
|
+
}
|
|
152
|
+
class_methods[cls.fqcn][method.signature] = m_id
|
|
153
|
+
files_indexed += 1
|
|
154
|
+
|
|
155
|
+
for src, dst, confidence, reason in resolve_calls(method_catalog, method_calls, method_context, class_catalog):
|
|
156
|
+
self.store.add_call(src, dst, confidence, reason)
|
|
157
|
+
calls_resolved += 1
|
|
158
|
+
|
|
159
|
+
type_relationships += self._build_inheritance_edges(class_meta, class_catalog, class_methods)
|
|
160
|
+
|
|
161
|
+
return IndexResult(
|
|
162
|
+
project_id=project_id,
|
|
163
|
+
files_found=len(current_files),
|
|
164
|
+
files_indexed=files_indexed,
|
|
165
|
+
classes_indexed=classes_indexed,
|
|
166
|
+
methods_indexed=methods_indexed,
|
|
167
|
+
calls_resolved=calls_resolved,
|
|
168
|
+
type_relationships=type_relationships,
|
|
169
|
+
embeddings_generated=classes_indexed + methods_indexed,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def _collect_java_files(root_path: str) -> list[str]:
|
|
174
|
+
out: list[str] = []
|
|
175
|
+
for root, _, files in os.walk(root_path):
|
|
176
|
+
if "src" not in root:
|
|
177
|
+
continue
|
|
178
|
+
if any(skip in root for skip in ["target", "build", "out", ".git"]):
|
|
179
|
+
continue
|
|
180
|
+
for filename in files:
|
|
181
|
+
if filename.endswith(".java"):
|
|
182
|
+
out.append(os.path.join(root, filename))
|
|
183
|
+
return out
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def _hash_files(project_id: str, root_path: str, files: list[str]) -> dict[str, str]:
|
|
187
|
+
hashes: dict[str, str] = {}
|
|
188
|
+
for fp in files:
|
|
189
|
+
rel = os.path.relpath(fp, root_path)
|
|
190
|
+
fid = file_id(project_id, rel)
|
|
191
|
+
with open(fp, "rb") as f:
|
|
192
|
+
hashes[fid] = digest_bytes(f.read())
|
|
193
|
+
return hashes
|
|
194
|
+
|
|
195
|
+
def _existing_method_catalog(self, project_id: str) -> dict[str, dict]:
|
|
196
|
+
recs = self.store.query_records(
|
|
197
|
+
"""
|
|
198
|
+
MATCH (m:Method), (c:Class), (f:File)
|
|
199
|
+
WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
|
|
200
|
+
RETURN m.id as method_id, m.name as name, m.signature as signature, c.fqcn as class_fqcn
|
|
201
|
+
""",
|
|
202
|
+
{"pid": project_id},
|
|
203
|
+
)
|
|
204
|
+
out: dict[str, dict] = {}
|
|
205
|
+
for r in recs:
|
|
206
|
+
sig = r.get("signature") or ""
|
|
207
|
+
arg_str = sig[sig.find("(") + 1 : sig.rfind(")")] if "(" in sig and ")" in sig else ""
|
|
208
|
+
param_count = 0 if not arg_str else arg_str.count(",") + 1
|
|
209
|
+
out[r["method_id"]] = {
|
|
210
|
+
"signature": sig,
|
|
211
|
+
"name": r.get("name", ""),
|
|
212
|
+
"param_count": param_count,
|
|
213
|
+
"class_fqcn": r.get("class_fqcn", ""),
|
|
214
|
+
}
|
|
215
|
+
return out
|
|
216
|
+
|
|
217
|
+
def _existing_class_catalog(self, project_id: str) -> dict[str, list[str]]:
|
|
218
|
+
recs = self.store.query_records(
|
|
219
|
+
"""
|
|
220
|
+
MATCH (c:Class), (f:File)
|
|
221
|
+
WHERE c.file_id = f.id AND f.project_id = $pid
|
|
222
|
+
RETURN c.name as name, c.fqcn as fqcn
|
|
223
|
+
""",
|
|
224
|
+
{"pid": project_id},
|
|
225
|
+
)
|
|
226
|
+
out: dict[str, list[str]] = {}
|
|
227
|
+
for r in recs:
|
|
228
|
+
out.setdefault(r["name"], [])
|
|
229
|
+
if r["fqcn"] not in out[r["name"]]:
|
|
230
|
+
out[r["name"]].append(r["fqcn"])
|
|
231
|
+
return out
|
|
232
|
+
|
|
233
|
+
def _existing_class_methods(self, project_id: str) -> dict[str, dict[str, str]]:
|
|
234
|
+
recs = self.store.query_records(
|
|
235
|
+
"""
|
|
236
|
+
MATCH (m:Method), (c:Class), (f:File)
|
|
237
|
+
WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
|
|
238
|
+
RETURN c.fqcn as fqcn, m.signature as signature, m.id as method_id
|
|
239
|
+
""",
|
|
240
|
+
{"pid": project_id},
|
|
241
|
+
)
|
|
242
|
+
out: dict[str, dict[str, str]] = {}
|
|
243
|
+
for r in recs:
|
|
244
|
+
out.setdefault(r["fqcn"], {})
|
|
245
|
+
out[r["fqcn"]][r["signature"]] = r["method_id"]
|
|
246
|
+
return out
|
|
247
|
+
|
|
248
|
+
@staticmethod
|
|
249
|
+
def _resolve_type_candidates(type_name: str | None, context: dict, class_catalog: dict[str, list[str]]) -> list[str]:
|
|
250
|
+
if not type_name:
|
|
251
|
+
return []
|
|
252
|
+
raw = type_name.strip()
|
|
253
|
+
simple = raw.split(".")[-1]
|
|
254
|
+
candidates: list[str] = []
|
|
255
|
+
if "." in raw:
|
|
256
|
+
candidates.append(raw)
|
|
257
|
+
for imp in context.get("imports", []) or []:
|
|
258
|
+
if imp.endswith(f".{simple}"):
|
|
259
|
+
candidates.append(imp)
|
|
260
|
+
pkg = context.get("package", "")
|
|
261
|
+
if pkg:
|
|
262
|
+
candidates.append(f"{pkg}.{simple}")
|
|
263
|
+
candidates.extend(class_catalog.get(simple, []))
|
|
264
|
+
uniq: list[str] = []
|
|
265
|
+
seen = set()
|
|
266
|
+
for c in candidates:
|
|
267
|
+
if c and c not in seen:
|
|
268
|
+
uniq.append(c)
|
|
269
|
+
seen.add(c)
|
|
270
|
+
return uniq
|
|
271
|
+
|
|
272
|
+
def _build_inheritance_edges(
|
|
273
|
+
self,
|
|
274
|
+
class_meta: dict[str, dict],
|
|
275
|
+
class_catalog: dict[str, list[str]],
|
|
276
|
+
class_methods: dict[str, dict[str, str]],
|
|
277
|
+
) -> int:
|
|
278
|
+
rel_count = 0
|
|
279
|
+
for fqcn, meta in class_meta.items():
|
|
280
|
+
src_id = class_id(fqcn)
|
|
281
|
+
ctx = {"package": meta.get("package", ""), "imports": meta.get("imports", [])}
|
|
282
|
+
|
|
283
|
+
parent_candidates = self._resolve_type_candidates(meta.get("extends"), ctx, class_catalog)
|
|
284
|
+
for parent_fqcn in parent_candidates:
|
|
285
|
+
dst_id = class_id(parent_fqcn)
|
|
286
|
+
self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 0.8)
|
|
287
|
+
rel_count += 1
|
|
288
|
+
for sig, method_id in class_methods.get(fqcn, {}).items():
|
|
289
|
+
parent_method = class_methods.get(parent_fqcn, {}).get(sig)
|
|
290
|
+
if parent_method:
|
|
291
|
+
self.store.add_reference("OVERRIDES", "Method", method_id, "Method", parent_method, 1.0)
|
|
292
|
+
rel_count += 1
|
|
293
|
+
|
|
294
|
+
for iface in meta.get("interfaces", []):
|
|
295
|
+
iface_candidates = self._resolve_type_candidates(iface, ctx, class_catalog)
|
|
296
|
+
for iface_fqcn in iface_candidates:
|
|
297
|
+
dst_id = class_id(iface_fqcn)
|
|
298
|
+
self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 1.0)
|
|
299
|
+
rel_count += 1
|
|
300
|
+
for sig, method_id in class_methods.get(fqcn, {}).items():
|
|
301
|
+
iface_method = class_methods.get(iface_fqcn, {}).get(sig)
|
|
302
|
+
if iface_method:
|
|
303
|
+
self.store.add_reference("OVERRIDES", "Method", method_id, "Method", iface_method, 1.0)
|
|
304
|
+
rel_count += 1
|
|
305
|
+
return rel_count
|
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
|
|
7
|
+
import tree_sitter_java as tsjava
|
|
8
|
+
from tree_sitter import Language, Parser, Query
|
|
9
|
+
|
|
10
|
+
JAVA_LANGUAGE = Language(tsjava.language())
|
|
11
|
+
PARSER = Parser(JAVA_LANGUAGE)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class ParsedMethod:
|
|
16
|
+
name: str
|
|
17
|
+
signature: str
|
|
18
|
+
return_type: str
|
|
19
|
+
modifiers: list[str]
|
|
20
|
+
annotations: list[str]
|
|
21
|
+
parameter_types: list[str]
|
|
22
|
+
line: int
|
|
23
|
+
col: int
|
|
24
|
+
body_hash: str
|
|
25
|
+
calls: list["ParsedCall"] = field(default_factory=list)
|
|
26
|
+
local_types: dict[str, str] = field(default_factory=dict)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class ParsedCall:
|
|
31
|
+
name: str
|
|
32
|
+
receiver: str | None
|
|
33
|
+
arg_count: int
|
|
34
|
+
line: int
|
|
35
|
+
col: int
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class ParsedClass:
|
|
40
|
+
name: str
|
|
41
|
+
package: str
|
|
42
|
+
fqcn: str
|
|
43
|
+
line: int
|
|
44
|
+
col: int
|
|
45
|
+
modifiers: list[str] = field(default_factory=list)
|
|
46
|
+
annotations: list[str] = field(default_factory=list)
|
|
47
|
+
interfaces: list[str] = field(default_factory=list)
|
|
48
|
+
extends: str | None = None
|
|
49
|
+
field_types: dict[str, str] = field(default_factory=dict)
|
|
50
|
+
body_hash: str = ""
|
|
51
|
+
methods: list[ParsedMethod] = field(default_factory=list)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class ParsedFile:
|
|
56
|
+
package: str
|
|
57
|
+
imports: list[str]
|
|
58
|
+
classes: list[ParsedClass]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _text(node) -> str:
|
|
62
|
+
return node.text.decode("utf-8")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _hash_node(node) -> str:
|
|
66
|
+
return hashlib.sha1(_normalize_java_bytes(node.text).encode("utf-8")).hexdigest()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _normalize_java_bytes(source: bytes) -> str:
|
|
70
|
+
text = source.decode("utf-8", errors="ignore")
|
|
71
|
+
text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL)
|
|
72
|
+
text = re.sub(r"//.*?$", "", text, flags=re.MULTILINE)
|
|
73
|
+
text = re.sub(r"\s+", " ", text).strip()
|
|
74
|
+
return text
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _node_type_name(node) -> str:
|
|
78
|
+
if node is None:
|
|
79
|
+
return ""
|
|
80
|
+
if node.type in {"type_identifier", "identifier", "scoped_identifier"}:
|
|
81
|
+
return _text(node)
|
|
82
|
+
for child in node.named_children:
|
|
83
|
+
name = _node_type_name(child)
|
|
84
|
+
if name:
|
|
85
|
+
return name
|
|
86
|
+
return _text(node).strip()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _extract_modifiers_and_annotations(node) -> tuple[list[str], list[str]]:
|
|
90
|
+
modifiers: list[str] = []
|
|
91
|
+
annotations: list[str] = []
|
|
92
|
+
for child in node.children:
|
|
93
|
+
if child.type != "modifiers":
|
|
94
|
+
continue
|
|
95
|
+
for m in child.named_children:
|
|
96
|
+
m_text = _text(m).strip()
|
|
97
|
+
if not m_text:
|
|
98
|
+
continue
|
|
99
|
+
if m.type == "annotation" or m_text.startswith("@"):
|
|
100
|
+
annotations.append(m_text.lstrip("@"))
|
|
101
|
+
else:
|
|
102
|
+
modifiers.append(m_text)
|
|
103
|
+
return modifiers, annotations
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _arg_count(args_text: str) -> int:
|
|
107
|
+
args = args_text.strip()
|
|
108
|
+
if not args.startswith("(") or not args.endswith(")"):
|
|
109
|
+
return 0
|
|
110
|
+
inner = args[1:-1].strip()
|
|
111
|
+
if not inner:
|
|
112
|
+
return 0
|
|
113
|
+
return inner.count(",") + 1
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _extract_local_types(method_node) -> dict[str, str]:
|
|
117
|
+
q = Query(
|
|
118
|
+
JAVA_LANGUAGE,
|
|
119
|
+
"""
|
|
120
|
+
(local_variable_declaration
|
|
121
|
+
type: (_) @type
|
|
122
|
+
declarator: (variable_declarator name: (identifier) @name))
|
|
123
|
+
""",
|
|
124
|
+
)
|
|
125
|
+
captures = q.captures(method_node)
|
|
126
|
+
locals_map: dict[str, str] = {}
|
|
127
|
+
current_type = None
|
|
128
|
+
for node, tag in captures:
|
|
129
|
+
if tag == "type":
|
|
130
|
+
current_type = _node_type_name(node)
|
|
131
|
+
elif tag == "name" and current_type:
|
|
132
|
+
locals_map[_text(node)] = current_type
|
|
133
|
+
return locals_map
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _extract_field_types(class_node) -> dict[str, str]:
|
|
137
|
+
q = Query(
|
|
138
|
+
JAVA_LANGUAGE,
|
|
139
|
+
"""
|
|
140
|
+
(field_declaration
|
|
141
|
+
type: (_) @type
|
|
142
|
+
declarator: (variable_declarator name: (identifier) @name))
|
|
143
|
+
""",
|
|
144
|
+
)
|
|
145
|
+
captures = q.captures(class_node)
|
|
146
|
+
field_map: dict[str, str] = {}
|
|
147
|
+
current_type = None
|
|
148
|
+
for node, tag in captures:
|
|
149
|
+
if tag == "type":
|
|
150
|
+
current_type = _node_type_name(node)
|
|
151
|
+
elif tag == "name" and current_type:
|
|
152
|
+
field_map[_text(node)] = current_type
|
|
153
|
+
return field_map
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _extract_parameter_types(params_node) -> list[str]:
|
|
157
|
+
if params_node is None:
|
|
158
|
+
return []
|
|
159
|
+
q = Query(
|
|
160
|
+
JAVA_LANGUAGE,
|
|
161
|
+
"""
|
|
162
|
+
[
|
|
163
|
+
(formal_parameter type: (_) @ptype)
|
|
164
|
+
(spread_parameter type: (_) @ptype)
|
|
165
|
+
]
|
|
166
|
+
""",
|
|
167
|
+
)
|
|
168
|
+
return [_node_type_name(node) for node, tag in q.captures(params_node) if tag == "ptype"]
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _extract_inheritance(class_node) -> tuple[str | None, list[str]]:
|
|
172
|
+
extends_name = None
|
|
173
|
+
interfaces: list[str] = []
|
|
174
|
+
|
|
175
|
+
super_node = class_node.child_by_field_name("superclass")
|
|
176
|
+
if super_node is not None:
|
|
177
|
+
extends_name = _node_type_name(super_node)
|
|
178
|
+
|
|
179
|
+
iface_node = class_node.child_by_field_name("interfaces")
|
|
180
|
+
if iface_node is not None:
|
|
181
|
+
type_query = Query(
|
|
182
|
+
JAVA_LANGUAGE,
|
|
183
|
+
"""
|
|
184
|
+
[
|
|
185
|
+
(type_identifier) @t
|
|
186
|
+
(scoped_type_identifier) @t
|
|
187
|
+
(generic_type) @t
|
|
188
|
+
(scoped_identifier) @t
|
|
189
|
+
]
|
|
190
|
+
""",
|
|
191
|
+
)
|
|
192
|
+
interfaces = [_node_type_name(n) for n, tag in type_query.captures(iface_node) if tag == "t"]
|
|
193
|
+
|
|
194
|
+
# Fallback for grammar variants where interfaces are not exposed as a field.
|
|
195
|
+
if not interfaces:
|
|
196
|
+
for child in class_node.named_children:
|
|
197
|
+
if child.type in {"super_interfaces", "type_list"}:
|
|
198
|
+
type_query = Query(
|
|
199
|
+
JAVA_LANGUAGE,
|
|
200
|
+
"""
|
|
201
|
+
[
|
|
202
|
+
(type_identifier) @t
|
|
203
|
+
(scoped_type_identifier) @t
|
|
204
|
+
(generic_type) @t
|
|
205
|
+
(scoped_identifier) @t
|
|
206
|
+
]
|
|
207
|
+
""",
|
|
208
|
+
)
|
|
209
|
+
interfaces.extend([_node_type_name(n) for n, tag in type_query.captures(child) if tag == "t"])
|
|
210
|
+
|
|
211
|
+
return extends_name, interfaces
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def parse_java_source(source: bytes) -> ParsedFile:
|
|
215
|
+
tree = PARSER.parse(source)
|
|
216
|
+
root = tree.root_node
|
|
217
|
+
|
|
218
|
+
pkg_query = Query(JAVA_LANGUAGE, "(package_declaration (scoped_identifier) @pkg)")
|
|
219
|
+
import_query = Query(JAVA_LANGUAGE, "(import_declaration (scoped_identifier) @imp)")
|
|
220
|
+
cls_query = Query(
|
|
221
|
+
JAVA_LANGUAGE,
|
|
222
|
+
"""
|
|
223
|
+
(class_declaration
|
|
224
|
+
name: (identifier) @class_name
|
|
225
|
+
body: (class_body) @class_body) @class_decl
|
|
226
|
+
""",
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
package_name = ""
|
|
230
|
+
imports: list[str] = []
|
|
231
|
+
|
|
232
|
+
for node, tag in pkg_query.captures(root):
|
|
233
|
+
if tag == "pkg":
|
|
234
|
+
package_name = _text(node)
|
|
235
|
+
break
|
|
236
|
+
|
|
237
|
+
for node, tag in import_query.captures(root):
|
|
238
|
+
if tag == "imp":
|
|
239
|
+
imports.append(_text(node))
|
|
240
|
+
|
|
241
|
+
classes: list[ParsedClass] = []
|
|
242
|
+
method_query = Query(
|
|
243
|
+
JAVA_LANGUAGE,
|
|
244
|
+
"""
|
|
245
|
+
(method_declaration
|
|
246
|
+
type: (_) @return_type
|
|
247
|
+
name: (identifier) @method_name
|
|
248
|
+
parameters: (formal_parameters) @params
|
|
249
|
+
body: (block) @body) @method_decl
|
|
250
|
+
""",
|
|
251
|
+
)
|
|
252
|
+
ctor_query = Query(
|
|
253
|
+
JAVA_LANGUAGE,
|
|
254
|
+
"""
|
|
255
|
+
(constructor_declaration
|
|
256
|
+
name: (identifier) @method_name
|
|
257
|
+
parameters: (formal_parameters) @params
|
|
258
|
+
body: (constructor_body) @body) @method_decl
|
|
259
|
+
""",
|
|
260
|
+
)
|
|
261
|
+
call_query = Query(
|
|
262
|
+
JAVA_LANGUAGE,
|
|
263
|
+
"""
|
|
264
|
+
(method_invocation
|
|
265
|
+
name: (identifier) @call_name
|
|
266
|
+
arguments: (argument_list) @call_args) @call_inv
|
|
267
|
+
""",
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
for node, tag in cls_query.captures(root):
|
|
271
|
+
if tag != "class_decl":
|
|
272
|
+
continue
|
|
273
|
+
|
|
274
|
+
cls_name_node = node.child_by_field_name("name")
|
|
275
|
+
if cls_name_node is None:
|
|
276
|
+
continue
|
|
277
|
+
cls_name = _text(cls_name_node)
|
|
278
|
+
fqcn = f"{package_name}.{cls_name}" if package_name else cls_name
|
|
279
|
+
cls_modifiers, cls_annotations = _extract_modifiers_and_annotations(node)
|
|
280
|
+
extends_name, interface_names = _extract_inheritance(node)
|
|
281
|
+
parsed_class = ParsedClass(
|
|
282
|
+
name=cls_name,
|
|
283
|
+
package=package_name,
|
|
284
|
+
fqcn=fqcn,
|
|
285
|
+
line=node.start_point[0] + 1,
|
|
286
|
+
col=node.start_point[1] + 1,
|
|
287
|
+
modifiers=cls_modifiers,
|
|
288
|
+
annotations=cls_annotations,
|
|
289
|
+
extends=extends_name,
|
|
290
|
+
interfaces=interface_names,
|
|
291
|
+
field_types=_extract_field_types(node),
|
|
292
|
+
body_hash=_hash_node(node),
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
method_nodes = [n for n, t in method_query.captures(node) if t == "method_decl"]
|
|
296
|
+
method_nodes.extend([n for n, t in ctor_query.captures(node) if t == "method_decl"])
|
|
297
|
+
|
|
298
|
+
for m_node in method_nodes:
|
|
299
|
+
m_name_node = m_node.child_by_field_name("name")
|
|
300
|
+
m_type_node = m_node.child_by_field_name("type")
|
|
301
|
+
m_params_node = m_node.child_by_field_name("parameters")
|
|
302
|
+
if m_name_node is None:
|
|
303
|
+
continue
|
|
304
|
+
|
|
305
|
+
method_name = _text(m_name_node)
|
|
306
|
+
return_type = _text(m_type_node) if m_type_node else cls_name
|
|
307
|
+
param_types = _extract_parameter_types(m_params_node)
|
|
308
|
+
signature = f"{method_name}({','.join(param_types)})"
|
|
309
|
+
modifiers, annotations = _extract_modifiers_and_annotations(m_node)
|
|
310
|
+
parsed_method = ParsedMethod(
|
|
311
|
+
name=method_name,
|
|
312
|
+
signature=signature,
|
|
313
|
+
return_type=return_type,
|
|
314
|
+
modifiers=modifiers,
|
|
315
|
+
annotations=annotations,
|
|
316
|
+
parameter_types=param_types,
|
|
317
|
+
line=m_node.start_point[0] + 1,
|
|
318
|
+
col=m_node.start_point[1] + 1,
|
|
319
|
+
body_hash=_hash_node(m_node),
|
|
320
|
+
local_types=_extract_local_types(m_node),
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
body_node = m_node.child_by_field_name("body")
|
|
324
|
+
if body_node is not None:
|
|
325
|
+
grouped: dict[object, dict[str, str]] = {}
|
|
326
|
+
for c_node, c_tag in call_query.captures(body_node):
|
|
327
|
+
inv_node = c_node if c_tag == "call_inv" else c_node.parent
|
|
328
|
+
grouped.setdefault(inv_node, {})[c_tag] = _text(c_node)
|
|
329
|
+
for inv_node, capture_map in grouped.items():
|
|
330
|
+
name_text = capture_map.get("call_name")
|
|
331
|
+
if not name_text:
|
|
332
|
+
continue
|
|
333
|
+
receiver_node = inv_node.child_by_field_name("object")
|
|
334
|
+
receiver = _text(receiver_node) if receiver_node is not None else None
|
|
335
|
+
args = capture_map.get("call_args", "()")
|
|
336
|
+
parsed_method.calls.append(
|
|
337
|
+
ParsedCall(
|
|
338
|
+
name=name_text,
|
|
339
|
+
receiver=receiver,
|
|
340
|
+
arg_count=_arg_count(args),
|
|
341
|
+
line=inv_node.start_point[0] + 1,
|
|
342
|
+
col=inv_node.start_point[1] + 1,
|
|
343
|
+
)
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
parsed_class.methods.append(parsed_method)
|
|
347
|
+
|
|
348
|
+
classes.append(parsed_class)
|
|
349
|
+
|
|
350
|
+
return ParsedFile(package=package_name, imports=imports, classes=classes)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class SymbolRef:
|
|
9
|
+
symbol_id: str
|
|
10
|
+
method_id: str
|
|
11
|
+
class_id: str
|
|
12
|
+
file_id: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def digest_bytes(payload: bytes) -> str:
|
|
16
|
+
return hashlib.sha1(payload).hexdigest()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def file_id(project_id: str, rel_path: str) -> str:
|
|
20
|
+
return hashlib.sha1(f"{project_id}:{rel_path}".encode("utf-8")).hexdigest()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def class_id(fqcn: str) -> str:
|
|
24
|
+
return hashlib.sha1(fqcn.encode("utf-8")).hexdigest()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def method_id(fqcn: str, signature: str) -> str:
|
|
28
|
+
return hashlib.sha1(f"{fqcn}#{signature}".encode("utf-8")).hexdigest()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def symbol_id(kind: str, fqname: str) -> str:
|
|
32
|
+
return hashlib.sha1(f"{kind}:{fqname}".encode("utf-8")).hexdigest()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""MCP server layer."""
|