codespine 0.1.5__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.1.5 → codespine-0.1.8}/PKG-INFO +1 -1
- {codespine-0.1.5 → codespine-0.1.8}/codespine/__init__.py +1 -1
- {codespine-0.1.5 → codespine-0.1.8}/codespine/cli.py +56 -21
- {codespine-0.1.5 → codespine-0.1.8}/codespine/indexer/call_resolver.py +23 -13
- {codespine-0.1.5 → codespine-0.1.8}/codespine/indexer/engine.py +80 -30
- codespine-0.1.8/codespine/indexer/symbol_builder.py +35 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.1.5 → codespine-0.1.8}/codespine.egg-info/SOURCES.txt +1 -0
- {codespine-0.1.5 → codespine-0.1.8}/pyproject.toml +1 -1
- codespine-0.1.8/tests/test_call_resolver.py +43 -0
- codespine-0.1.8/tests/test_multimodule_index.py +55 -0
- codespine-0.1.5/codespine/indexer/symbol_builder.py +0 -32
- codespine-0.1.5/tests/test_call_resolver.py +0 -30
- {codespine-0.1.5 → codespine-0.1.8}/LICENSE +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/README.md +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/community.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/context.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/coupling.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/flow.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/impact.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/config.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/db/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/db/schema.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/db/store.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/diff/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/indexer/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/mcp/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/mcp/server.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/noise/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/noise/blocklist.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/search/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/search/bm25.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/search/fuzzy.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/search/hybrid.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/search/rrf.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/search/vector.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/watch/__init__.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine/watch/watcher.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/gindex.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/setup.cfg +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/tests/test_java_parser.py +0 -0
- {codespine-0.1.5 → codespine-0.1.8}/tests/test_search_ranking.py +0 -0
|
@@ -78,7 +78,8 @@ def main() -> None:
|
|
|
78
78
|
@main.command()
|
|
79
79
|
@click.argument("path", type=click.Path(exists=True))
|
|
80
80
|
@click.option("--full/--incremental", default=True, show_default=True)
|
|
81
|
-
|
|
81
|
+
@click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
|
|
82
|
+
def analyse(path: str, full: bool, deep: bool) -> None:
|
|
82
83
|
"""Index a local Java project."""
|
|
83
84
|
if _is_running():
|
|
84
85
|
click.secho("Stop MCP first ('codespine stop') to index.", fg="yellow")
|
|
@@ -89,6 +90,7 @@ def analyse(path: str, full: bool) -> None:
|
|
|
89
90
|
store = GraphStore(read_only=False)
|
|
90
91
|
indexer = JavaIndexer(store)
|
|
91
92
|
parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0}
|
|
93
|
+
call_state = {"shown": False, "count": 0, "last_ts": 0.0}
|
|
92
94
|
|
|
93
95
|
def _progress(event: str, payload: dict) -> None:
|
|
94
96
|
now = time.perf_counter()
|
|
@@ -119,6 +121,29 @@ def analyse(path: str, full: bool) -> None:
|
|
|
119
121
|
if event == "resolve_calls_start" and parse_state["shown"]:
|
|
120
122
|
click.echo()
|
|
121
123
|
parse_state["shown"] = False
|
|
124
|
+
_phase("Tracing calls...", "running")
|
|
125
|
+
return
|
|
126
|
+
if event == "resolve_calls_start":
|
|
127
|
+
_phase("Tracing calls...", "running")
|
|
128
|
+
return
|
|
129
|
+
if event == "resolve_calls_progress":
|
|
130
|
+
call_state["count"] = int(payload.get("calls_resolved", 0))
|
|
131
|
+
if (now - call_state["last_ts"]) >= 0.25:
|
|
132
|
+
click.echo(f"\rTracing calls... {call_state['count']} resolved", nl=False)
|
|
133
|
+
call_state["shown"] = True
|
|
134
|
+
call_state["last_ts"] = now
|
|
135
|
+
return
|
|
136
|
+
if event == "resolve_calls_done":
|
|
137
|
+
if call_state["shown"]:
|
|
138
|
+
click.echo()
|
|
139
|
+
call_state["shown"] = False
|
|
140
|
+
_phase("Tracing calls...", f"{int(payload.get('calls_resolved', 0))} calls resolved")
|
|
141
|
+
return
|
|
142
|
+
if event == "resolve_types_start":
|
|
143
|
+
_phase("Analyzing types...", "running")
|
|
144
|
+
return
|
|
145
|
+
if event == "resolve_types_done":
|
|
146
|
+
_phase("Analyzing types...", f"{int(payload.get('type_relationships', 0))} type relationships")
|
|
122
147
|
return
|
|
123
148
|
|
|
124
149
|
result = indexer.index_project(abs_path, full=full, progress=_progress)
|
|
@@ -128,27 +153,37 @@ def analyse(path: str, full: bool) -> None:
|
|
|
128
153
|
_phase("Parsing code...", "0/0")
|
|
129
154
|
elif parse_state["indexed"] < parse_state["total"]:
|
|
130
155
|
_phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
|
|
131
|
-
_phase("Tracing calls...", f"{result.calls_resolved} calls resolved")
|
|
132
|
-
_phase("Analyzing types...", f"{result.type_relationships} type relationships")
|
|
133
|
-
|
|
134
|
-
communities = detect_communities(store)
|
|
135
|
-
_phase("Detecting communities...", f"{len(communities)} clusters found")
|
|
136
156
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
store
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
157
|
+
communities: list[dict] = []
|
|
158
|
+
flows: list[dict] = []
|
|
159
|
+
dead: list[dict] = []
|
|
160
|
+
coupling_pairs: list[dict] = []
|
|
161
|
+
|
|
162
|
+
should_run_deep = deep or result.files_found <= 1200
|
|
163
|
+
if should_run_deep:
|
|
164
|
+
communities = detect_communities(store)
|
|
165
|
+
_phase("Detecting communities...", f"{len(communities)} clusters found")
|
|
166
|
+
|
|
167
|
+
flows = trace_execution_flows(store)
|
|
168
|
+
_phase("Detecting execution flows...", f"{len(flows)} processes found")
|
|
169
|
+
|
|
170
|
+
dead = detect_dead_code(store, limit=500)
|
|
171
|
+
_phase("Finding dead code...", f"{len(dead)} unreachable symbols")
|
|
172
|
+
|
|
173
|
+
coupling_pairs = compute_coupling(
|
|
174
|
+
store,
|
|
175
|
+
abs_path,
|
|
176
|
+
result.project_id,
|
|
177
|
+
months=SETTINGS.default_coupling_months,
|
|
178
|
+
min_strength=SETTINGS.default_min_coupling_strength,
|
|
179
|
+
min_cochanges=SETTINGS.default_min_cochanges,
|
|
180
|
+
)
|
|
181
|
+
_phase("Analyzing git history...", f"{len(coupling_pairs)} coupled file pairs")
|
|
182
|
+
else:
|
|
183
|
+
_phase("Detecting communities...", "skipped (large repo; rerun with --deep)")
|
|
184
|
+
_phase("Detecting execution flows...", "skipped (large repo; rerun with --deep)")
|
|
185
|
+
_phase("Finding dead code...", "skipped (large repo; rerun with --deep)")
|
|
186
|
+
_phase("Analyzing git history...", "skipped (large repo; rerun with --deep)")
|
|
152
187
|
|
|
153
188
|
vector_count = store.query_records(
|
|
154
189
|
"""
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from collections import defaultdict
|
|
4
|
+
from typing import Iterator
|
|
4
5
|
|
|
5
6
|
from codespine.noise.blocklist import NOISE_METHOD_NAMES
|
|
6
7
|
|
|
8
|
+
MAX_FUZZY_TARGETS = 12
|
|
9
|
+
|
|
7
10
|
|
|
8
11
|
def _simple_type_name(type_name: str | None) -> str:
|
|
9
12
|
if not type_name:
|
|
@@ -53,23 +56,29 @@ def resolve_calls(
|
|
|
53
56
|
calls: dict[str, list],
|
|
54
57
|
method_context: dict[str, dict],
|
|
55
58
|
class_catalog: dict[str, list[str]],
|
|
56
|
-
) ->
|
|
59
|
+
) -> Iterator[tuple[str, str, float, str]]:
|
|
57
60
|
"""Resolve call names to known method ids.
|
|
58
61
|
|
|
59
|
-
|
|
62
|
+
Yields tuples: (source_method_id, target_method_id, confidence, reason)
|
|
60
63
|
"""
|
|
61
64
|
name_arity_to_method_ids: dict[tuple[str, int], list[str]] = defaultdict(list)
|
|
62
|
-
|
|
65
|
+
class_method_index_by_id: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
|
|
66
|
+
class_method_index_by_fqcn: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
|
|
63
67
|
for method_id, meta in method_catalog.items():
|
|
64
68
|
key = (meta["name"], int(meta["param_count"]))
|
|
65
69
|
name_arity_to_method_ids[key].append(method_id)
|
|
66
|
-
|
|
70
|
+
class_id = meta.get("class_id", "")
|
|
71
|
+
class_fqcn = meta.get("class_fqcn", "")
|
|
72
|
+
if class_id:
|
|
73
|
+
class_method_index_by_id[class_id][key].append(method_id)
|
|
74
|
+
if class_fqcn:
|
|
75
|
+
class_method_index_by_fqcn[class_fqcn][key].append(method_id)
|
|
67
76
|
|
|
68
|
-
edges: list[tuple[str, str, float, str]] = []
|
|
69
77
|
for source_id, call_sites in calls.items():
|
|
70
78
|
src_meta = method_catalog.get(source_id, {})
|
|
71
79
|
src_ctx = method_context.get(source_id, {})
|
|
72
|
-
|
|
80
|
+
src_class_id = src_meta.get("class_id", "") or src_ctx.get("class_id", "")
|
|
81
|
+
src_class_fqcn = src_meta.get("class_fqcn", "")
|
|
73
82
|
local_types = src_ctx.get("local_types", {}) or {}
|
|
74
83
|
field_types = src_ctx.get("field_types", {}) or {}
|
|
75
84
|
|
|
@@ -88,7 +97,7 @@ def resolve_calls(
|
|
|
88
97
|
receiver_type = None
|
|
89
98
|
receiver_is_this = False
|
|
90
99
|
if receiver == "this":
|
|
91
|
-
receiver_type =
|
|
100
|
+
receiver_type = src_class_fqcn
|
|
92
101
|
receiver_is_this = True
|
|
93
102
|
elif receiver in local_types:
|
|
94
103
|
receiver_type = local_types[receiver]
|
|
@@ -100,14 +109,14 @@ def resolve_calls(
|
|
|
100
109
|
receiver_fqcn_candidates = _resolve_type_candidates(receiver_type, src_ctx, class_catalog)
|
|
101
110
|
|
|
102
111
|
for fqcn in receiver_fqcn_candidates:
|
|
103
|
-
targets.extend(
|
|
112
|
+
targets.extend(class_method_index_by_fqcn.get(fqcn, {}).get(key, []))
|
|
104
113
|
|
|
105
114
|
if targets:
|
|
106
115
|
confidence = 1.0 if receiver_is_this else 0.8
|
|
107
116
|
reason = "receiver_this_exact" if receiver_is_this else "receiver_method_match"
|
|
108
117
|
|
|
109
118
|
if not targets:
|
|
110
|
-
in_class =
|
|
119
|
+
in_class = class_method_index_by_id.get(src_class_id, {}).get(key, [])
|
|
111
120
|
if in_class:
|
|
112
121
|
targets = in_class
|
|
113
122
|
confidence = 1.0
|
|
@@ -126,12 +135,13 @@ def resolve_calls(
|
|
|
126
135
|
confidence = 1.0
|
|
127
136
|
reason = "exact_name_arity_unique"
|
|
128
137
|
elif len(targets) > 1:
|
|
138
|
+
if len(targets) > MAX_FUZZY_TARGETS:
|
|
139
|
+
# Avoid exploding low-confidence edges in large repos.
|
|
140
|
+
continue
|
|
129
141
|
confidence = 0.5
|
|
130
142
|
reason = "fuzzy_name_arity_ambiguous"
|
|
131
143
|
|
|
132
144
|
if not targets:
|
|
133
145
|
continue
|
|
134
|
-
for target_id in targets:
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
return edges
|
|
146
|
+
for target_id in set(targets):
|
|
147
|
+
yield source_id, target_id, confidence, reason
|
|
@@ -70,6 +70,7 @@ class JavaIndexer:
|
|
|
70
70
|
method_calls: dict[str, list] = {}
|
|
71
71
|
method_context: dict[str, dict] = {}
|
|
72
72
|
class_catalog: dict[str, list[str]] = self._existing_class_catalog(project_id) if not full else {}
|
|
73
|
+
fqcn_to_class_ids: dict[str, list[str]] = self._existing_class_ids_by_fqcn(project_id) if not full else {}
|
|
73
74
|
class_meta: dict[str, dict] = {}
|
|
74
75
|
class_methods: dict[str, dict[str, str]] = self._existing_class_methods(project_id) if not full else {}
|
|
75
76
|
|
|
@@ -84,6 +85,7 @@ class JavaIndexer:
|
|
|
84
85
|
for file_path in to_reindex:
|
|
85
86
|
rel_path = os.path.relpath(file_path, root_path)
|
|
86
87
|
is_test = "src/test/java" in file_path.replace("\\", "/")
|
|
88
|
+
scope = self._scope_from_rel_path(rel_path)
|
|
87
89
|
|
|
88
90
|
with open(file_path, "rb") as f:
|
|
89
91
|
source = f.read()
|
|
@@ -96,20 +98,25 @@ class JavaIndexer:
|
|
|
96
98
|
self.store.upsert_file(f_id, file_path, project_id, is_test, digest_bytes(source))
|
|
97
99
|
|
|
98
100
|
for cls in parsed.classes:
|
|
99
|
-
c_id = class_id(cls.fqcn)
|
|
101
|
+
c_id = class_id(cls.fqcn, scope)
|
|
100
102
|
self.store.upsert_class(c_id, cls.fqcn, cls.name, cls.package, f_id)
|
|
101
103
|
class_catalog.setdefault(cls.name, [])
|
|
102
104
|
if cls.fqcn not in class_catalog[cls.name]:
|
|
103
105
|
class_catalog[cls.name].append(cls.fqcn)
|
|
104
|
-
|
|
106
|
+
fqcn_to_class_ids.setdefault(cls.fqcn, [])
|
|
107
|
+
if c_id not in fqcn_to_class_ids[cls.fqcn]:
|
|
108
|
+
fqcn_to_class_ids[cls.fqcn].append(c_id)
|
|
109
|
+
class_meta[c_id] = {
|
|
110
|
+
"fqcn": cls.fqcn,
|
|
105
111
|
"package": parsed.package,
|
|
106
112
|
"imports": parsed.imports,
|
|
107
113
|
"extends": cls.extends,
|
|
108
114
|
"interfaces": cls.interfaces,
|
|
115
|
+
"scope": scope,
|
|
109
116
|
}
|
|
110
|
-
class_methods.setdefault(
|
|
117
|
+
class_methods.setdefault(c_id, {})
|
|
111
118
|
|
|
112
|
-
cls_symbol_id = symbol_id("class", cls.fqcn)
|
|
119
|
+
cls_symbol_id = symbol_id("class", cls.fqcn, scope)
|
|
113
120
|
self.store.upsert_symbol(
|
|
114
121
|
symbol_id=cls_symbol_id,
|
|
115
122
|
kind="class",
|
|
@@ -123,7 +130,7 @@ class JavaIndexer:
|
|
|
123
130
|
classes_indexed += 1
|
|
124
131
|
|
|
125
132
|
for method in cls.methods:
|
|
126
|
-
m_id = method_id(cls.fqcn, method.signature)
|
|
133
|
+
m_id = method_id(cls.fqcn, method.signature, scope)
|
|
127
134
|
self.store.upsert_method(
|
|
128
135
|
method_id=m_id,
|
|
129
136
|
class_id=c_id,
|
|
@@ -136,7 +143,7 @@ class JavaIndexer:
|
|
|
136
143
|
)
|
|
137
144
|
|
|
138
145
|
fqname = f"{cls.fqcn}#{method.signature}"
|
|
139
|
-
m_symbol_id = symbol_id("method", fqname)
|
|
146
|
+
m_symbol_id = symbol_id("method", fqname, scope)
|
|
140
147
|
self.store.upsert_symbol(
|
|
141
148
|
symbol_id=m_symbol_id,
|
|
142
149
|
kind="method",
|
|
@@ -154,16 +161,18 @@ class JavaIndexer:
|
|
|
154
161
|
"name": method.name,
|
|
155
162
|
"param_count": len(method.parameter_types),
|
|
156
163
|
"class_fqcn": cls.fqcn,
|
|
164
|
+
"class_id": c_id,
|
|
157
165
|
}
|
|
158
166
|
method_calls[m_id] = method.calls
|
|
159
167
|
method_context[m_id] = {
|
|
168
|
+
"class_id": c_id,
|
|
160
169
|
"class_fqcn": cls.fqcn,
|
|
161
170
|
"local_types": method.local_types,
|
|
162
171
|
"field_types": cls.field_types,
|
|
163
172
|
"imports": parsed.imports,
|
|
164
173
|
"package": parsed.package,
|
|
165
174
|
}
|
|
166
|
-
class_methods[
|
|
175
|
+
class_methods[c_id][method.signature] = m_id
|
|
167
176
|
files_indexed += 1
|
|
168
177
|
self._emit(
|
|
169
178
|
progress,
|
|
@@ -177,10 +186,17 @@ class JavaIndexer:
|
|
|
177
186
|
for src, dst, confidence, reason in resolve_calls(method_catalog, method_calls, method_context, class_catalog):
|
|
178
187
|
self.store.add_call(src, dst, confidence, reason)
|
|
179
188
|
calls_resolved += 1
|
|
189
|
+
if calls_resolved % 2000 == 0:
|
|
190
|
+
self._emit(progress, "resolve_calls_progress", calls_resolved=calls_resolved)
|
|
180
191
|
self._emit(progress, "resolve_calls_done", calls_resolved=calls_resolved)
|
|
181
192
|
|
|
182
193
|
self._emit(progress, "resolve_types_start")
|
|
183
|
-
type_relationships += self._build_inheritance_edges(
|
|
194
|
+
type_relationships += self._build_inheritance_edges(
|
|
195
|
+
class_meta,
|
|
196
|
+
class_catalog,
|
|
197
|
+
class_methods,
|
|
198
|
+
fqcn_to_class_ids,
|
|
199
|
+
)
|
|
184
200
|
self._emit(progress, "resolve_types_done", type_relationships=type_relationships)
|
|
185
201
|
|
|
186
202
|
return IndexResult(
|
|
@@ -223,7 +239,7 @@ class JavaIndexer:
|
|
|
223
239
|
"""
|
|
224
240
|
MATCH (m:Method), (c:Class), (f:File)
|
|
225
241
|
WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
|
|
226
|
-
RETURN m.id as method_id, m.name as name, m.signature as signature, c.fqcn as class_fqcn
|
|
242
|
+
RETURN m.id as method_id, m.name as name, m.signature as signature, c.fqcn as class_fqcn, c.id as class_id
|
|
227
243
|
""",
|
|
228
244
|
{"pid": project_id},
|
|
229
245
|
)
|
|
@@ -237,9 +253,30 @@ class JavaIndexer:
|
|
|
237
253
|
"name": r.get("name", ""),
|
|
238
254
|
"param_count": param_count,
|
|
239
255
|
"class_fqcn": r.get("class_fqcn", ""),
|
|
256
|
+
"class_id": r.get("class_id", ""),
|
|
240
257
|
}
|
|
241
258
|
return out
|
|
242
259
|
|
|
260
|
+
def _existing_class_ids_by_fqcn(self, project_id: str) -> dict[str, list[str]]:
|
|
261
|
+
recs = self.store.query_records(
|
|
262
|
+
"""
|
|
263
|
+
MATCH (c:Class), (f:File)
|
|
264
|
+
WHERE c.file_id = f.id AND f.project_id = $pid
|
|
265
|
+
RETURN c.fqcn as fqcn, c.id as class_id
|
|
266
|
+
""",
|
|
267
|
+
{"pid": project_id},
|
|
268
|
+
)
|
|
269
|
+
out: dict[str, list[str]] = {}
|
|
270
|
+
for r in recs:
|
|
271
|
+
fqcn = r.get("fqcn", "")
|
|
272
|
+
cid = r.get("class_id", "")
|
|
273
|
+
if not fqcn or not cid:
|
|
274
|
+
continue
|
|
275
|
+
out.setdefault(fqcn, [])
|
|
276
|
+
if cid not in out[fqcn]:
|
|
277
|
+
out[fqcn].append(cid)
|
|
278
|
+
return out
|
|
279
|
+
|
|
243
280
|
def _existing_class_catalog(self, project_id: str) -> dict[str, list[str]]:
|
|
244
281
|
recs = self.store.query_records(
|
|
245
282
|
"""
|
|
@@ -261,14 +298,17 @@ class JavaIndexer:
|
|
|
261
298
|
"""
|
|
262
299
|
MATCH (m:Method), (c:Class), (f:File)
|
|
263
300
|
WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
|
|
264
|
-
RETURN c.
|
|
301
|
+
RETURN c.id as class_id, m.signature as signature, m.id as method_id
|
|
265
302
|
""",
|
|
266
303
|
{"pid": project_id},
|
|
267
304
|
)
|
|
268
305
|
out: dict[str, dict[str, str]] = {}
|
|
269
306
|
for r in recs:
|
|
270
|
-
|
|
271
|
-
|
|
307
|
+
class_key = r.get("class_id")
|
|
308
|
+
if not class_key:
|
|
309
|
+
continue
|
|
310
|
+
out.setdefault(class_key, {})
|
|
311
|
+
out[class_key][r["signature"]] = r["method_id"]
|
|
272
312
|
return out
|
|
273
313
|
|
|
274
314
|
@staticmethod
|
|
@@ -300,34 +340,34 @@ class JavaIndexer:
|
|
|
300
340
|
class_meta: dict[str, dict],
|
|
301
341
|
class_catalog: dict[str, list[str]],
|
|
302
342
|
class_methods: dict[str, dict[str, str]],
|
|
343
|
+
fqcn_to_class_ids: dict[str, list[str]],
|
|
303
344
|
) -> int:
|
|
304
345
|
rel_count = 0
|
|
305
|
-
for
|
|
306
|
-
src_id = class_id(fqcn)
|
|
346
|
+
for src_id, meta in class_meta.items():
|
|
307
347
|
ctx = {"package": meta.get("package", ""), "imports": meta.get("imports", [])}
|
|
308
348
|
|
|
309
349
|
parent_candidates = self._resolve_type_candidates(meta.get("extends"), ctx, class_catalog)
|
|
310
350
|
for parent_fqcn in parent_candidates:
|
|
311
|
-
dst_id
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
351
|
+
for dst_id in fqcn_to_class_ids.get(parent_fqcn, []):
|
|
352
|
+
self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 0.8)
|
|
353
|
+
rel_count += 1
|
|
354
|
+
for sig, method_id in class_methods.get(src_id, {}).items():
|
|
355
|
+
parent_method = class_methods.get(dst_id, {}).get(sig)
|
|
356
|
+
if parent_method:
|
|
357
|
+
self.store.add_reference("OVERRIDES", "Method", method_id, "Method", parent_method, 1.0)
|
|
358
|
+
rel_count += 1
|
|
319
359
|
|
|
320
360
|
for iface in meta.get("interfaces", []):
|
|
321
361
|
iface_candidates = self._resolve_type_candidates(iface, ctx, class_catalog)
|
|
322
362
|
for iface_fqcn in iface_candidates:
|
|
323
|
-
dst_id
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
363
|
+
for dst_id in fqcn_to_class_ids.get(iface_fqcn, []):
|
|
364
|
+
self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 1.0)
|
|
365
|
+
rel_count += 1
|
|
366
|
+
for sig, method_id in class_methods.get(src_id, {}).items():
|
|
367
|
+
iface_method = class_methods.get(dst_id, {}).get(sig)
|
|
368
|
+
if iface_method:
|
|
369
|
+
self.store.add_reference("OVERRIDES", "Method", method_id, "Method", iface_method, 1.0)
|
|
370
|
+
rel_count += 1
|
|
331
371
|
return rel_count
|
|
332
372
|
|
|
333
373
|
@staticmethod
|
|
@@ -335,3 +375,13 @@ class JavaIndexer:
|
|
|
335
375
|
if progress is None:
|
|
336
376
|
return
|
|
337
377
|
progress(event, payload)
|
|
378
|
+
|
|
379
|
+
@staticmethod
|
|
380
|
+
def _scope_from_rel_path(rel_path: str) -> str:
|
|
381
|
+
normalized = rel_path.replace("\\", "/")
|
|
382
|
+
if "/java/" in normalized:
|
|
383
|
+
return normalized.split("/java/", 1)[0]
|
|
384
|
+
if "/src/" in normalized:
|
|
385
|
+
return normalized.split("/src/", 1)[0]
|
|
386
|
+
scope = os.path.dirname(normalized).strip()
|
|
387
|
+
return scope or "."
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class SymbolRef:
|
|
9
|
+
symbol_id: str
|
|
10
|
+
method_id: str
|
|
11
|
+
class_id: str
|
|
12
|
+
file_id: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def digest_bytes(payload: bytes) -> str:
|
|
16
|
+
return hashlib.sha1(payload).hexdigest()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def file_id(project_id: str, rel_path: str) -> str:
|
|
20
|
+
return hashlib.sha1(f"{project_id}:{rel_path}".encode("utf-8")).hexdigest()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def class_id(fqcn: str, scope: str | None = None) -> str:
|
|
24
|
+
key = f"{scope}::{fqcn}" if scope else fqcn
|
|
25
|
+
return hashlib.sha1(key.encode("utf-8")).hexdigest()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def method_id(fqcn: str, signature: str, scope: str | None = None) -> str:
|
|
29
|
+
key = f"{scope}::{fqcn}#{signature}" if scope else f"{fqcn}#{signature}"
|
|
30
|
+
return hashlib.sha1(key.encode("utf-8")).hexdigest()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def symbol_id(kind: str, fqname: str, scope: str | None = None) -> str:
|
|
34
|
+
key = f"{kind}:{scope}:{fqname}" if scope else f"{kind}:{fqname}"
|
|
35
|
+
return hashlib.sha1(key.encode("utf-8")).hexdigest()
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from types import SimpleNamespace
|
|
2
|
+
|
|
3
|
+
from codespine.indexer.call_resolver import resolve_calls
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_resolver_prefers_receiver_type_and_arity():
|
|
7
|
+
method_catalog = {
|
|
8
|
+
"src": {
|
|
9
|
+
"name": "entry",
|
|
10
|
+
"param_count": 0,
|
|
11
|
+
"class_id": "c_service",
|
|
12
|
+
"class_fqcn": "com.example.Service",
|
|
13
|
+
"signature": "entry()",
|
|
14
|
+
},
|
|
15
|
+
"m1": {"name": "run", "param_count": 0, "class_id": "c_service", "class_fqcn": "com.example.Service", "signature": "run()"},
|
|
16
|
+
"m2": {
|
|
17
|
+
"name": "run",
|
|
18
|
+
"param_count": 1,
|
|
19
|
+
"class_id": "c_service",
|
|
20
|
+
"class_fqcn": "com.example.Service",
|
|
21
|
+
"signature": "run(String)",
|
|
22
|
+
},
|
|
23
|
+
"m3": {"name": "save", "param_count": 0, "class_id": "c_repo", "class_fqcn": "com.example.Repo", "signature": "save()"},
|
|
24
|
+
}
|
|
25
|
+
calls = {
|
|
26
|
+
"src": [
|
|
27
|
+
SimpleNamespace(name="run", receiver="this", arg_count=0),
|
|
28
|
+
SimpleNamespace(name="save", receiver="repo", arg_count=0),
|
|
29
|
+
]
|
|
30
|
+
}
|
|
31
|
+
method_context = {
|
|
32
|
+
"src": {
|
|
33
|
+
"class_id": "c_service",
|
|
34
|
+
"class_fqcn": "com.example.Service",
|
|
35
|
+
"local_types": {"repo": "Repo"},
|
|
36
|
+
"field_types": {},
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
class_catalog = {"Service": ["com.example.Service"], "Repo": ["com.example.Repo"]}
|
|
40
|
+
|
|
41
|
+
out = list(resolve_calls(method_catalog, calls, method_context, class_catalog))
|
|
42
|
+
assert ("src", "m1", 1.0, "receiver_this_exact") in out
|
|
43
|
+
assert ("src", "m3", 0.8, "receiver_method_match") in out
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
pytest.importorskip("kuzu")
|
|
6
|
+
pytest.importorskip("tree_sitter_java")
|
|
7
|
+
|
|
8
|
+
from codespine.db.store import GraphStore
|
|
9
|
+
from codespine.indexer.engine import JavaIndexer
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _write_java(path: Path, content: str) -> None:
|
|
13
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
14
|
+
path.write_text(content, encoding="utf-8")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_multimodule_duplicate_fqcn_is_indexed_without_collision(tmp_path: Path):
|
|
18
|
+
_write_java(
|
|
19
|
+
tmp_path / "module-a" / "src" / "main" / "java" / "com" / "example" / "App.java",
|
|
20
|
+
"""
|
|
21
|
+
package com.example;
|
|
22
|
+
public class App { public void fromA() {} }
|
|
23
|
+
""",
|
|
24
|
+
)
|
|
25
|
+
_write_java(
|
|
26
|
+
tmp_path / "module-b" / "src" / "main" / "java" / "com" / "example" / "App.java",
|
|
27
|
+
"""
|
|
28
|
+
package com.example;
|
|
29
|
+
public class App { public void fromB() {} }
|
|
30
|
+
""",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
store = GraphStore(read_only=False)
|
|
34
|
+
result = JavaIndexer(store).index_project(str(tmp_path), full=True)
|
|
35
|
+
|
|
36
|
+
classes = store.query_records(
|
|
37
|
+
"""
|
|
38
|
+
MATCH (c:Class), (f:File)
|
|
39
|
+
WHERE c.file_id = f.id AND f.project_id = $pid AND c.fqcn = $fqcn
|
|
40
|
+
RETURN c.id as id, f.path as path
|
|
41
|
+
""",
|
|
42
|
+
{"pid": result.project_id, "fqcn": "com.example.App"},
|
|
43
|
+
)
|
|
44
|
+
methods = store.query_records(
|
|
45
|
+
"""
|
|
46
|
+
MATCH (m:Method), (c:Class), (f:File)
|
|
47
|
+
WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
|
|
48
|
+
RETURN m.name as name
|
|
49
|
+
""",
|
|
50
|
+
{"pid": result.project_id},
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
assert len(classes) == 2
|
|
54
|
+
assert len({c["id"] for c in classes}) == 2
|
|
55
|
+
assert {"fromA", "fromB"}.issubset({m["name"] for m in methods})
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import hashlib
|
|
4
|
-
from dataclasses import dataclass
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@dataclass
|
|
8
|
-
class SymbolRef:
|
|
9
|
-
symbol_id: str
|
|
10
|
-
method_id: str
|
|
11
|
-
class_id: str
|
|
12
|
-
file_id: str
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def digest_bytes(payload: bytes) -> str:
|
|
16
|
-
return hashlib.sha1(payload).hexdigest()
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def file_id(project_id: str, rel_path: str) -> str:
|
|
20
|
-
return hashlib.sha1(f"{project_id}:{rel_path}".encode("utf-8")).hexdigest()
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def class_id(fqcn: str) -> str:
|
|
24
|
-
return hashlib.sha1(fqcn.encode("utf-8")).hexdigest()
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def method_id(fqcn: str, signature: str) -> str:
|
|
28
|
-
return hashlib.sha1(f"{fqcn}#{signature}".encode("utf-8")).hexdigest()
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def symbol_id(kind: str, fqname: str) -> str:
|
|
32
|
-
return hashlib.sha1(f"{kind}:{fqname}".encode("utf-8")).hexdigest()
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from types import SimpleNamespace
|
|
2
|
-
|
|
3
|
-
from codespine.indexer.call_resolver import resolve_calls
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def test_resolver_prefers_receiver_type_and_arity():
|
|
7
|
-
method_catalog = {
|
|
8
|
-
"src": {"name": "entry", "param_count": 0, "class_fqcn": "com.example.Service", "signature": "entry()"},
|
|
9
|
-
"m1": {"name": "run", "param_count": 0, "class_fqcn": "com.example.Service", "signature": "run()"},
|
|
10
|
-
"m2": {"name": "run", "param_count": 1, "class_fqcn": "com.example.Service", "signature": "run(String)"},
|
|
11
|
-
"m3": {"name": "save", "param_count": 0, "class_fqcn": "com.example.Repo", "signature": "save()"},
|
|
12
|
-
}
|
|
13
|
-
calls = {
|
|
14
|
-
"src": [
|
|
15
|
-
SimpleNamespace(name="run", receiver="this", arg_count=0),
|
|
16
|
-
SimpleNamespace(name="save", receiver="repo", arg_count=0),
|
|
17
|
-
]
|
|
18
|
-
}
|
|
19
|
-
method_context = {
|
|
20
|
-
"src": {
|
|
21
|
-
"class_fqcn": "com.example.Service",
|
|
22
|
-
"local_types": {"repo": "Repo"},
|
|
23
|
-
"field_types": {},
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
class_catalog = {"Service": ["com.example.Service"], "Repo": ["com.example.Repo"]}
|
|
27
|
-
|
|
28
|
-
out = resolve_calls(method_catalog, calls, method_context, class_catalog)
|
|
29
|
-
assert ("src", "m1", 1.0, "receiver_this_exact") in out
|
|
30
|
-
assert ("src", "m3", 0.8, "receiver_method_match") in out
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|