codespine 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codespine/__init__.py +4 -0
- codespine/analysis/__init__.py +1 -0
- codespine/analysis/community.py +75 -0
- codespine/analysis/context.py +24 -0
- codespine/analysis/coupling.py +119 -0
- codespine/analysis/deadcode.py +107 -0
- codespine/analysis/flow.py +77 -0
- codespine/analysis/impact.py +90 -0
- codespine/cli.py +424 -0
- codespine/config.py +22 -0
- codespine/db/__init__.py +1 -0
- codespine/db/schema.py +82 -0
- codespine/db/store.py +313 -0
- codespine/diff/__init__.py +1 -0
- codespine/diff/branch_diff.py +163 -0
- codespine/indexer/__init__.py +1 -0
- codespine/indexer/call_resolver.py +137 -0
- codespine/indexer/engine.py +305 -0
- codespine/indexer/java_parser.py +350 -0
- codespine/indexer/symbol_builder.py +32 -0
- codespine/mcp/__init__.py +1 -0
- codespine/mcp/server.py +67 -0
- codespine/noise/__init__.py +1 -0
- codespine/noise/blocklist.py +37 -0
- codespine/search/__init__.py +1 -0
- codespine/search/bm25.py +52 -0
- codespine/search/fuzzy.py +36 -0
- codespine/search/hybrid.py +80 -0
- codespine/search/rrf.py +9 -0
- codespine/search/vector.py +113 -0
- codespine/watch/__init__.py +1 -0
- codespine/watch/watcher.py +38 -0
- codespine-0.1.1.dist-info/METADATA +336 -0
- codespine-0.1.1.dist-info/RECORD +39 -0
- codespine-0.1.1.dist-info/WHEEL +5 -0
- codespine-0.1.1.dist-info/entry_points.txt +3 -0
- codespine-0.1.1.dist-info/licenses/LICENSE +21 -0
- codespine-0.1.1.dist-info/top_level.txt +2 -0
- gindex.py +10 -0
codespine/db/store.py
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import kuzu
|
|
11
|
+
|
|
12
|
+
from codespine.config import SETTINGS
|
|
13
|
+
from codespine.db.schema import ensure_schema
|
|
14
|
+
|
|
15
|
+
LOGGER = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class GraphStore:
|
|
20
|
+
read_only: bool = False
|
|
21
|
+
|
|
22
|
+
def __post_init__(self) -> None:
|
|
23
|
+
self.db = kuzu.Database(SETTINGS.db_path, buffer_pool_size=1024**3)
|
|
24
|
+
self.conn = kuzu.Connection(self.db)
|
|
25
|
+
if not self.read_only:
|
|
26
|
+
ensure_schema(self.conn)
|
|
27
|
+
|
|
28
|
+
@staticmethod
|
|
29
|
+
def stable_id(*parts: str) -> str:
|
|
30
|
+
raw = "::".join(parts)
|
|
31
|
+
return hashlib.sha1(raw.encode("utf-8")).hexdigest()
|
|
32
|
+
|
|
33
|
+
def execute(self, query: str, params: dict[str, Any] | None = None):
|
|
34
|
+
return self.conn.execute(query, params or {})
|
|
35
|
+
|
|
36
|
+
@contextmanager
|
|
37
|
+
def transaction(self):
|
|
38
|
+
tx_started = True
|
|
39
|
+
try:
|
|
40
|
+
self.execute("BEGIN TRANSACTION")
|
|
41
|
+
except Exception:
|
|
42
|
+
tx_started = False
|
|
43
|
+
try:
|
|
44
|
+
yield
|
|
45
|
+
if tx_started:
|
|
46
|
+
self.execute("COMMIT")
|
|
47
|
+
except Exception:
|
|
48
|
+
if tx_started:
|
|
49
|
+
self.execute("ROLLBACK")
|
|
50
|
+
raise
|
|
51
|
+
|
|
52
|
+
def clear_project(self, project_id: str) -> None:
|
|
53
|
+
# Keep project node and rebuild attached graph artifacts.
|
|
54
|
+
self.execute(
|
|
55
|
+
"""
|
|
56
|
+
MATCH (s:Symbol), (f:File)
|
|
57
|
+
WHERE s.file_id = f.id AND f.project_id = $pid
|
|
58
|
+
DETACH DELETE s
|
|
59
|
+
""",
|
|
60
|
+
{"pid": project_id},
|
|
61
|
+
)
|
|
62
|
+
self.execute(
|
|
63
|
+
"""
|
|
64
|
+
MATCH (m:Method), (c:Class), (f:File)
|
|
65
|
+
WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
|
|
66
|
+
DETACH DELETE m
|
|
67
|
+
""",
|
|
68
|
+
{"pid": project_id},
|
|
69
|
+
)
|
|
70
|
+
self.execute(
|
|
71
|
+
"""
|
|
72
|
+
MATCH (c:Class), (f:File)
|
|
73
|
+
WHERE c.file_id = f.id AND f.project_id = $pid
|
|
74
|
+
DETACH DELETE c
|
|
75
|
+
""",
|
|
76
|
+
{"pid": project_id},
|
|
77
|
+
)
|
|
78
|
+
self.execute(
|
|
79
|
+
"""
|
|
80
|
+
MATCH (f:File) WHERE f.project_id = $pid
|
|
81
|
+
DETACH DELETE f
|
|
82
|
+
""",
|
|
83
|
+
{"pid": project_id},
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def upsert_project(self, project_id: str, path: str) -> None:
|
|
87
|
+
self.execute(
|
|
88
|
+
"MERGE (p:Project {id: $id}) SET p.path = $path, p.language = 'java'",
|
|
89
|
+
{"id": project_id, "path": path},
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def project_file_hashes(self, project_id: str) -> dict[str, dict[str, str]]:
|
|
93
|
+
recs = self.query_records(
|
|
94
|
+
"""
|
|
95
|
+
MATCH (f:File)
|
|
96
|
+
WHERE f.project_id = $pid
|
|
97
|
+
RETURN f.id as id, f.path as path, f.hash as hash
|
|
98
|
+
""",
|
|
99
|
+
{"pid": project_id},
|
|
100
|
+
)
|
|
101
|
+
return {r["id"]: {"path": r.get("path", ""), "hash": r.get("hash", "")} for r in recs}
|
|
102
|
+
|
|
103
|
+
def clear_file(self, file_id: str) -> None:
|
|
104
|
+
self.execute(
|
|
105
|
+
"""
|
|
106
|
+
MATCH (s:Symbol) WHERE s.file_id = $fid
|
|
107
|
+
DETACH DELETE s
|
|
108
|
+
""",
|
|
109
|
+
{"fid": file_id},
|
|
110
|
+
)
|
|
111
|
+
self.execute(
|
|
112
|
+
"""
|
|
113
|
+
MATCH (m:Method), (c:Class)
|
|
114
|
+
WHERE m.class_id = c.id AND c.file_id = $fid
|
|
115
|
+
DETACH DELETE m
|
|
116
|
+
""",
|
|
117
|
+
{"fid": file_id},
|
|
118
|
+
)
|
|
119
|
+
self.execute(
|
|
120
|
+
"""
|
|
121
|
+
MATCH (c:Class) WHERE c.file_id = $fid
|
|
122
|
+
DETACH DELETE c
|
|
123
|
+
""",
|
|
124
|
+
{"fid": file_id},
|
|
125
|
+
)
|
|
126
|
+
self.execute(
|
|
127
|
+
"""
|
|
128
|
+
MATCH (f:File {id: $fid})
|
|
129
|
+
DETACH DELETE f
|
|
130
|
+
""",
|
|
131
|
+
{"fid": file_id},
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
def list_methods(self) -> list[dict[str, Any]]:
|
|
135
|
+
return self.query_records(
|
|
136
|
+
"""
|
|
137
|
+
MATCH (m:Method), (c:Class)
|
|
138
|
+
WHERE m.class_id = c.id
|
|
139
|
+
RETURN m.id as method_id, m.name as name, m.signature as signature, c.fqcn as class_fqcn
|
|
140
|
+
"""
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def upsert_file(self, file_id: str, path: str, project_id: str, is_test: bool, digest: str) -> None:
|
|
144
|
+
self.execute(
|
|
145
|
+
"""
|
|
146
|
+
MERGE (f:File {id: $id})
|
|
147
|
+
SET f.path = $path, f.project_id = $project_id, f.is_test = $is_test, f.hash = $hash
|
|
148
|
+
""",
|
|
149
|
+
{
|
|
150
|
+
"id": file_id,
|
|
151
|
+
"path": path,
|
|
152
|
+
"project_id": project_id,
|
|
153
|
+
"is_test": is_test,
|
|
154
|
+
"hash": digest,
|
|
155
|
+
},
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
def upsert_class(self, class_id: str, fqcn: str, name: str, package: str, file_id: str) -> None:
|
|
159
|
+
self.execute(
|
|
160
|
+
"""
|
|
161
|
+
MERGE (c:Class {id: $id})
|
|
162
|
+
SET c.fqcn = $fqcn, c.name = $name, c.package = $package, c.file_id = $file_id
|
|
163
|
+
""",
|
|
164
|
+
{
|
|
165
|
+
"id": class_id,
|
|
166
|
+
"fqcn": fqcn,
|
|
167
|
+
"name": name,
|
|
168
|
+
"package": package,
|
|
169
|
+
"file_id": file_id,
|
|
170
|
+
},
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
def upsert_method(
|
|
174
|
+
self,
|
|
175
|
+
method_id: str,
|
|
176
|
+
class_id: str,
|
|
177
|
+
name: str,
|
|
178
|
+
signature: str,
|
|
179
|
+
return_type: str,
|
|
180
|
+
modifiers: list[str],
|
|
181
|
+
is_constructor: bool,
|
|
182
|
+
is_test: bool,
|
|
183
|
+
) -> None:
|
|
184
|
+
self.execute(
|
|
185
|
+
"""
|
|
186
|
+
MERGE (m:Method {id: $id})
|
|
187
|
+
SET m.class_id = $class_id,
|
|
188
|
+
m.name = $name,
|
|
189
|
+
m.signature = $signature,
|
|
190
|
+
m.return_type = $return_type,
|
|
191
|
+
m.modifiers = $modifiers,
|
|
192
|
+
m.is_constructor = $is_constructor,
|
|
193
|
+
m.is_test = $is_test
|
|
194
|
+
""",
|
|
195
|
+
{
|
|
196
|
+
"id": method_id,
|
|
197
|
+
"class_id": class_id,
|
|
198
|
+
"name": name,
|
|
199
|
+
"signature": signature,
|
|
200
|
+
"return_type": return_type,
|
|
201
|
+
"modifiers": modifiers,
|
|
202
|
+
"is_constructor": is_constructor,
|
|
203
|
+
"is_test": is_test,
|
|
204
|
+
},
|
|
205
|
+
)
|
|
206
|
+
self.execute(
|
|
207
|
+
"MATCH (c:Class {id: $cid}), (m:Method {id: $mid}) MERGE (c)-[:HAS_METHOD]->(m)",
|
|
208
|
+
{"cid": class_id, "mid": method_id},
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
def upsert_symbol(
|
|
212
|
+
self,
|
|
213
|
+
symbol_id: str,
|
|
214
|
+
kind: str,
|
|
215
|
+
name: str,
|
|
216
|
+
fqname: str,
|
|
217
|
+
file_id: str,
|
|
218
|
+
line: int,
|
|
219
|
+
col: int,
|
|
220
|
+
embedding: list[float] | None,
|
|
221
|
+
) -> None:
|
|
222
|
+
self.execute(
|
|
223
|
+
"""
|
|
224
|
+
MERGE (s:Symbol {id: $id})
|
|
225
|
+
SET s.kind = $kind,
|
|
226
|
+
s.name = $name,
|
|
227
|
+
s.fqname = $fqname,
|
|
228
|
+
s.file_id = $file_id,
|
|
229
|
+
s.line = $line,
|
|
230
|
+
s.col = $col,
|
|
231
|
+
s.embedding = $embedding
|
|
232
|
+
""",
|
|
233
|
+
{
|
|
234
|
+
"id": symbol_id,
|
|
235
|
+
"kind": kind,
|
|
236
|
+
"name": name,
|
|
237
|
+
"fqname": fqname,
|
|
238
|
+
"file_id": file_id,
|
|
239
|
+
"line": line,
|
|
240
|
+
"col": col,
|
|
241
|
+
"embedding": embedding,
|
|
242
|
+
},
|
|
243
|
+
)
|
|
244
|
+
self.execute(
|
|
245
|
+
"MATCH (f:File {id: $fid}), (s:Symbol {id: $sid}) MERGE (f)-[:DECLARES]->(s)",
|
|
246
|
+
{"fid": file_id, "sid": symbol_id},
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
def add_call(self, source_id: str, target_id: str, confidence: float, reason: str) -> None:
|
|
250
|
+
self.execute(
|
|
251
|
+
"""
|
|
252
|
+
MATCH (source:Method {id: $source_id}), (target:Method {id: $target_id})
|
|
253
|
+
MERGE (source)-[:CALLS {confidence: $confidence, reason: $reason}]->(target)
|
|
254
|
+
""",
|
|
255
|
+
{
|
|
256
|
+
"source_id": source_id,
|
|
257
|
+
"target_id": target_id,
|
|
258
|
+
"confidence": confidence,
|
|
259
|
+
"reason": reason,
|
|
260
|
+
},
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
def add_reference(self, rel: str, src_label: str, src_id: str, dst_label: str, dst_id: str, confidence: float) -> None:
|
|
264
|
+
if rel not in {"REFERENCES_TYPE", "IMPLEMENTS", "OVERRIDES"}:
|
|
265
|
+
return
|
|
266
|
+
query = (
|
|
267
|
+
f"MATCH (s:{src_label} {{id: $src_id}}), (d:{dst_label} {{id: $dst_id}}) "
|
|
268
|
+
f"MERGE (s)-[:{rel} {{confidence: $confidence}}]->(d)"
|
|
269
|
+
)
|
|
270
|
+
self.execute(query, {"src_id": src_id, "dst_id": dst_id, "confidence": confidence})
|
|
271
|
+
|
|
272
|
+
def set_community(self, community_id: str, label: str, cohesion: float, symbol_ids: list[str]) -> None:
|
|
273
|
+
self.execute(
|
|
274
|
+
"MERGE (c:Community {id: $id}) SET c.label = $label, c.cohesion = $cohesion",
|
|
275
|
+
{"id": community_id, "label": label, "cohesion": cohesion},
|
|
276
|
+
)
|
|
277
|
+
for sid in symbol_ids:
|
|
278
|
+
self.execute(
|
|
279
|
+
"MATCH (s:Symbol {id: $sid}), (c:Community {id: $cid}) MERGE (s)-[:IN_COMMUNITY]->(c)",
|
|
280
|
+
{"sid": sid, "cid": community_id},
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
def set_flow(self, flow_id: str, entry_symbol_id: str, kind: str, symbols_at_depth: list[tuple[str, int]]) -> None:
|
|
284
|
+
self.execute(
|
|
285
|
+
"MERGE (f:Flow {id: $id}) SET f.entry_symbol_id = $entry, f.kind = $kind",
|
|
286
|
+
{"id": flow_id, "entry": entry_symbol_id, "kind": kind},
|
|
287
|
+
)
|
|
288
|
+
for sid, depth in symbols_at_depth:
|
|
289
|
+
self.execute(
|
|
290
|
+
"MATCH (s:Symbol {id: $sid}), (f:Flow {id: $fid}) MERGE (s)-[:IN_FLOW {depth: $depth}]->(f)",
|
|
291
|
+
{"sid": sid, "fid": flow_id, "depth": int(depth)},
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
def upsert_coupling(self, file_a: str, file_b: str, strength: float, cochanges: int, months: int) -> None:
|
|
295
|
+
self.execute(
|
|
296
|
+
"""
|
|
297
|
+
MATCH (a:File {id: $a}), (b:File {id: $b})
|
|
298
|
+
MERGE (a)-[:CO_CHANGED_WITH {strength: $strength, cochanges: $cochanges, months: $months}]->(b)
|
|
299
|
+
""",
|
|
300
|
+
{
|
|
301
|
+
"a": file_a,
|
|
302
|
+
"b": file_b,
|
|
303
|
+
"strength": strength,
|
|
304
|
+
"cochanges": int(cochanges),
|
|
305
|
+
"months": int(months),
|
|
306
|
+
},
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
def query_records(self, query: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
|
|
310
|
+
frame = self.execute(query, params or {}).get_as_df()
|
|
311
|
+
if frame.empty:
|
|
312
|
+
return []
|
|
313
|
+
return json.loads(frame.to_json(orient="records"))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Branch diff layer."""
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
import tempfile
|
|
10
|
+
|
|
11
|
+
import tree_sitter_java as tsjava
|
|
12
|
+
from tree_sitter import Language, Parser, Query
|
|
13
|
+
|
|
14
|
+
from codespine.indexer.java_parser import parse_java_source
|
|
15
|
+
|
|
16
|
+
JAVA_LANGUAGE = Language(tsjava.language())
|
|
17
|
+
PARSER = Parser(JAVA_LANGUAGE)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _text(node) -> str:
|
|
21
|
+
return node.text.decode("utf-8")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _hash_text(text: str) -> str:
|
|
25
|
+
return hashlib.sha1(_normalize_java_snippet(text).encode("utf-8")).hexdigest()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _normalize_java_snippet(text: str) -> str:
|
|
29
|
+
"""Normalize formatting/comments so branch diff emphasizes semantic edits."""
|
|
30
|
+
text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL)
|
|
31
|
+
text = re.sub(r"//.*?$", "", text, flags=re.MULTILINE)
|
|
32
|
+
text = re.sub(r"\s+", " ", text).strip()
|
|
33
|
+
return text
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _method_hashes(source: bytes) -> dict[str, dict]:
|
|
37
|
+
tree = PARSER.parse(source)
|
|
38
|
+
root = tree.root_node
|
|
39
|
+
method_query = Query(
|
|
40
|
+
JAVA_LANGUAGE,
|
|
41
|
+
"""
|
|
42
|
+
[
|
|
43
|
+
(method_declaration
|
|
44
|
+
name: (identifier) @name
|
|
45
|
+
parameters: (formal_parameters) @params) @decl
|
|
46
|
+
(constructor_declaration
|
|
47
|
+
name: (identifier) @name
|
|
48
|
+
parameters: (formal_parameters) @params) @decl
|
|
49
|
+
]
|
|
50
|
+
""",
|
|
51
|
+
)
|
|
52
|
+
methods: dict[str, dict] = {}
|
|
53
|
+
grouped: dict[object, dict[str, str]] = {}
|
|
54
|
+
for node, tag in method_query.captures(root):
|
|
55
|
+
key_node = node if tag == "decl" else node.parent
|
|
56
|
+
grouped.setdefault(key_node, {})[tag] = _text(node)
|
|
57
|
+
|
|
58
|
+
for node, capture in grouped.items():
|
|
59
|
+
name = capture.get("name")
|
|
60
|
+
params = capture.get("params", "()")
|
|
61
|
+
if not name:
|
|
62
|
+
continue
|
|
63
|
+
signature = f"{name}{params}"
|
|
64
|
+
methods[signature] = {
|
|
65
|
+
"hash": _hash_text(_text(node)),
|
|
66
|
+
"line_start": node.start_point[0] + 1,
|
|
67
|
+
"line_end": node.end_point[0] + 1,
|
|
68
|
+
}
|
|
69
|
+
return methods
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _class_hashes(source: bytes) -> dict[str, str]:
|
|
73
|
+
tree = PARSER.parse(source)
|
|
74
|
+
root = tree.root_node
|
|
75
|
+
class_query = Query(
|
|
76
|
+
JAVA_LANGUAGE,
|
|
77
|
+
"""
|
|
78
|
+
(class_declaration
|
|
79
|
+
name: (identifier) @name) @decl
|
|
80
|
+
""",
|
|
81
|
+
)
|
|
82
|
+
grouped: dict[object, dict[str, str]] = {}
|
|
83
|
+
for node, tag in class_query.captures(root):
|
|
84
|
+
key_node = node if tag == "decl" else node.parent
|
|
85
|
+
grouped.setdefault(key_node, {})[tag] = _text(node)
|
|
86
|
+
out: dict[str, str] = {}
|
|
87
|
+
for node, capture in grouped.items():
|
|
88
|
+
name = capture.get("name")
|
|
89
|
+
if name:
|
|
90
|
+
out[name] = _hash_text(_text(node))
|
|
91
|
+
return out
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _symbol_manifest(repo_path: str) -> dict[str, dict]:
|
|
95
|
+
manifest: dict[str, dict] = {}
|
|
96
|
+
for root, _, files in os.walk(repo_path):
|
|
97
|
+
if any(skip in root for skip in [".git", "target", "build", "out"]):
|
|
98
|
+
continue
|
|
99
|
+
for f in files:
|
|
100
|
+
if not f.endswith(".java"):
|
|
101
|
+
continue
|
|
102
|
+
path = os.path.join(root, f)
|
|
103
|
+
rel = os.path.relpath(path, repo_path)
|
|
104
|
+
with open(path, "rb") as fp:
|
|
105
|
+
source = fp.read()
|
|
106
|
+
parsed = parse_java_source(source)
|
|
107
|
+
method_hashes = _method_hashes(source)
|
|
108
|
+
class_hashes = _class_hashes(source)
|
|
109
|
+
for cls in parsed.classes:
|
|
110
|
+
cls_key = f"class:{cls.fqcn}"
|
|
111
|
+
manifest[cls_key] = {
|
|
112
|
+
"kind": "Class",
|
|
113
|
+
"file": rel,
|
|
114
|
+
"name": cls.fqcn,
|
|
115
|
+
"hash": class_hashes.get(cls.name, cls.body_hash),
|
|
116
|
+
"line_start": cls.line,
|
|
117
|
+
}
|
|
118
|
+
for m in cls.methods:
|
|
119
|
+
m_key = f"method:{cls.fqcn}#{m.signature}"
|
|
120
|
+
mh = method_hashes.get(f"{m.name}({','.join(m.parameter_types)})") or method_hashes.get(m.signature) or {}
|
|
121
|
+
manifest[m_key] = {
|
|
122
|
+
"kind": "Method",
|
|
123
|
+
"file": rel,
|
|
124
|
+
"name": m.signature,
|
|
125
|
+
"class": cls.fqcn,
|
|
126
|
+
"hash": m.body_hash or mh.get("hash"),
|
|
127
|
+
"line_start": mh.get("line_start", m.line),
|
|
128
|
+
"line_end": mh.get("line_end", m.line),
|
|
129
|
+
}
|
|
130
|
+
return manifest
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def compare_branches(repo_path: str, base_ref: str, head_ref: str) -> dict:
|
|
134
|
+
temp_dir = tempfile.mkdtemp(prefix="codespine-diff-")
|
|
135
|
+
base_dir = os.path.join(temp_dir, "base")
|
|
136
|
+
head_dir = os.path.join(temp_dir, "head")
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
subprocess.run(["git", "-C", repo_path, "worktree", "add", "--detach", base_dir, base_ref], check=True, capture_output=True)
|
|
140
|
+
subprocess.run(["git", "-C", repo_path, "worktree", "add", "--detach", head_dir, head_ref], check=True, capture_output=True)
|
|
141
|
+
|
|
142
|
+
base_manifest = _symbol_manifest(base_dir)
|
|
143
|
+
head_manifest = _symbol_manifest(head_dir)
|
|
144
|
+
|
|
145
|
+
added = sorted(set(head_manifest) - set(base_manifest))
|
|
146
|
+
removed = sorted(set(base_manifest) - set(head_manifest))
|
|
147
|
+
|
|
148
|
+
modified = []
|
|
149
|
+
for key in sorted(set(base_manifest) & set(head_manifest)):
|
|
150
|
+
if json.dumps(base_manifest[key], sort_keys=True) != json.dumps(head_manifest[key], sort_keys=True):
|
|
151
|
+
modified.append(key)
|
|
152
|
+
|
|
153
|
+
return {
|
|
154
|
+
"base": base_ref,
|
|
155
|
+
"head": head_ref,
|
|
156
|
+
"added": [head_manifest[k] for k in added],
|
|
157
|
+
"removed": [base_manifest[k] for k in removed],
|
|
158
|
+
"modified": [head_manifest[k] for k in modified],
|
|
159
|
+
}
|
|
160
|
+
finally:
|
|
161
|
+
subprocess.run(["git", "-C", repo_path, "worktree", "remove", "--force", base_dir], check=False, capture_output=True)
|
|
162
|
+
subprocess.run(["git", "-C", repo_path, "worktree", "remove", "--force", head_dir], check=False, capture_output=True)
|
|
163
|
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Indexing layer."""
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
|
|
5
|
+
from codespine.noise.blocklist import NOISE_METHOD_NAMES
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _simple_type_name(type_name: str | None) -> str:
|
|
9
|
+
if not type_name:
|
|
10
|
+
return ""
|
|
11
|
+
base = type_name.strip().replace("[]", "")
|
|
12
|
+
return base.split(".")[-1]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _resolve_type_candidates(type_name: str | None, context: dict, class_catalog: dict[str, list[str]]) -> list[str]:
|
|
16
|
+
"""Best-effort type resolution using fqcn/simple-name, imports, and package."""
|
|
17
|
+
if not type_name:
|
|
18
|
+
return []
|
|
19
|
+
resolved: list[str] = []
|
|
20
|
+
raw = type_name.strip()
|
|
21
|
+
simple = _simple_type_name(raw)
|
|
22
|
+
|
|
23
|
+
# Direct FQCN hint.
|
|
24
|
+
if "." in raw:
|
|
25
|
+
resolved.append(raw)
|
|
26
|
+
|
|
27
|
+
# Imported types.
|
|
28
|
+
imports = context.get("imports", []) or []
|
|
29
|
+
for imp in imports:
|
|
30
|
+
if imp.endswith(f".{simple}"):
|
|
31
|
+
resolved.append(imp)
|
|
32
|
+
|
|
33
|
+
# Same package fallback.
|
|
34
|
+
pkg = context.get("package", "")
|
|
35
|
+
if pkg:
|
|
36
|
+
resolved.append(f"{pkg}.{simple}")
|
|
37
|
+
|
|
38
|
+
# Indexed type matches by simple class name.
|
|
39
|
+
resolved.extend(class_catalog.get(simple, []))
|
|
40
|
+
|
|
41
|
+
# Stable unique order.
|
|
42
|
+
uniq: list[str] = []
|
|
43
|
+
seen = set()
|
|
44
|
+
for item in resolved:
|
|
45
|
+
if item and item not in seen:
|
|
46
|
+
uniq.append(item)
|
|
47
|
+
seen.add(item)
|
|
48
|
+
return uniq
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def resolve_calls(
|
|
52
|
+
method_catalog: dict[str, dict],
|
|
53
|
+
calls: dict[str, list],
|
|
54
|
+
method_context: dict[str, dict],
|
|
55
|
+
class_catalog: dict[str, list[str]],
|
|
56
|
+
) -> list[tuple[str, str, float, str]]:
|
|
57
|
+
"""Resolve call names to known method ids.
|
|
58
|
+
|
|
59
|
+
Returns tuples: (source_method_id, target_method_id, confidence, reason)
|
|
60
|
+
"""
|
|
61
|
+
name_arity_to_method_ids: dict[tuple[str, int], list[str]] = defaultdict(list)
|
|
62
|
+
class_method_index: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
|
|
63
|
+
for method_id, meta in method_catalog.items():
|
|
64
|
+
key = (meta["name"], int(meta["param_count"]))
|
|
65
|
+
name_arity_to_method_ids[key].append(method_id)
|
|
66
|
+
class_method_index[meta["class_fqcn"]][key].append(method_id)
|
|
67
|
+
|
|
68
|
+
edges: list[tuple[str, str, float, str]] = []
|
|
69
|
+
for source_id, call_sites in calls.items():
|
|
70
|
+
src_meta = method_catalog.get(source_id, {})
|
|
71
|
+
src_ctx = method_context.get(source_id, {})
|
|
72
|
+
src_class = src_meta.get("class_fqcn", "")
|
|
73
|
+
local_types = src_ctx.get("local_types", {}) or {}
|
|
74
|
+
field_types = src_ctx.get("field_types", {}) or {}
|
|
75
|
+
|
|
76
|
+
for call in call_sites:
|
|
77
|
+
call_name = call.name
|
|
78
|
+
if call_name in NOISE_METHOD_NAMES:
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
key = (call_name, int(call.arg_count))
|
|
82
|
+
targets: list[str] = []
|
|
83
|
+
confidence = 0.5
|
|
84
|
+
reason = "fuzzy_name_ambiguous"
|
|
85
|
+
|
|
86
|
+
receiver = (call.receiver or "").strip() if getattr(call, "receiver", None) else ""
|
|
87
|
+
if receiver:
|
|
88
|
+
receiver_type = None
|
|
89
|
+
receiver_is_this = False
|
|
90
|
+
if receiver == "this":
|
|
91
|
+
receiver_type = src_class
|
|
92
|
+
receiver_is_this = True
|
|
93
|
+
elif receiver in local_types:
|
|
94
|
+
receiver_type = local_types[receiver]
|
|
95
|
+
elif receiver in field_types:
|
|
96
|
+
receiver_type = field_types[receiver]
|
|
97
|
+
else:
|
|
98
|
+
receiver_type = receiver
|
|
99
|
+
|
|
100
|
+
receiver_fqcn_candidates = _resolve_type_candidates(receiver_type, src_ctx, class_catalog)
|
|
101
|
+
|
|
102
|
+
for fqcn in receiver_fqcn_candidates:
|
|
103
|
+
targets.extend(class_method_index.get(fqcn, {}).get(key, []))
|
|
104
|
+
|
|
105
|
+
if targets:
|
|
106
|
+
confidence = 1.0 if receiver_is_this else 0.8
|
|
107
|
+
reason = "receiver_this_exact" if receiver_is_this else "receiver_method_match"
|
|
108
|
+
|
|
109
|
+
if not targets:
|
|
110
|
+
in_class = class_method_index.get(src_class, {}).get(key, [])
|
|
111
|
+
if in_class:
|
|
112
|
+
targets = in_class
|
|
113
|
+
confidence = 1.0
|
|
114
|
+
reason = "intra_class_exact"
|
|
115
|
+
|
|
116
|
+
if not targets:
|
|
117
|
+
# Prefer same-package candidates before global fallback.
|
|
118
|
+
src_pkg = src_ctx.get("package", "")
|
|
119
|
+
same_pkg = []
|
|
120
|
+
for mid in name_arity_to_method_ids.get(key, []):
|
|
121
|
+
fqcn = method_catalog.get(mid, {}).get("class_fqcn", "")
|
|
122
|
+
if src_pkg and fqcn.startswith(f"{src_pkg}."):
|
|
123
|
+
same_pkg.append(mid)
|
|
124
|
+
targets = same_pkg or name_arity_to_method_ids.get(key, [])
|
|
125
|
+
if len(targets) == 1:
|
|
126
|
+
confidence = 1.0
|
|
127
|
+
reason = "exact_name_arity_unique"
|
|
128
|
+
elif len(targets) > 1:
|
|
129
|
+
confidence = 0.5
|
|
130
|
+
reason = "fuzzy_name_arity_ambiguous"
|
|
131
|
+
|
|
132
|
+
if not targets:
|
|
133
|
+
continue
|
|
134
|
+
for target_id in targets:
|
|
135
|
+
edges.append((source_id, target_id, confidence, reason))
|
|
136
|
+
|
|
137
|
+
return edges
|