java-codebase-rag 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ast_java.py +2813 -0
- brownfield_events.py +58 -0
- build_ast_graph.py +3081 -0
- chunk_heuristics.py +62 -0
- graph_enrich.py +1681 -0
- index_common.py +10 -0
- java_codebase_rag/__init__.py +1 -0
- java_codebase_rag/cli.py +761 -0
- java_codebase_rag/cli_progress.py +52 -0
- java_codebase_rag/config.py +327 -0
- java_codebase_rag/pipeline.py +189 -0
- java_codebase_rag-0.1.0.dist-info/METADATA +818 -0
- java_codebase_rag-0.1.0.dist-info/RECORD +27 -0
- java_codebase_rag-0.1.0.dist-info/WHEEL +5 -0
- java_codebase_rag-0.1.0.dist-info/entry_points.txt +3 -0
- java_codebase_rag-0.1.0.dist-info/licenses/LICENSE +21 -0
- java_codebase_rag-0.1.0.dist-info/top_level.txt +17 -0
- java_index_flow_lancedb.py +398 -0
- java_index_v1_common.py +33 -0
- java_ontology.py +446 -0
- kuzu_queries.py +1989 -0
- mcp_hints.py +748 -0
- mcp_v2.py +1957 -0
- path_filtering.py +472 -0
- pr_analysis.py +534 -0
- search_lancedb.py +1075 -0
- server.py +578 -0
build_ast_graph.py
ADDED
|
@@ -0,0 +1,3081 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Four-pass AST-derived Knowledge Base builder (Kuzu).
|
|
3
|
+
|
|
4
|
+
Walks a Java source tree with `tree_sitter_java`, writes a deterministic graph of:
|
|
5
|
+
Symbol nodes: package, file, class, interface, enum, record, annotation, method, constructor
|
|
6
|
+
Route nodes: declaration-site routes (Spring MVC/WebFlux, Feign, Kafka, …)
|
|
7
|
+
Rel tables: EXTENDS, IMPLEMENTS, INJECTS, DECLARES, OVERRIDES, CALLS, EXPOSES
|
|
8
|
+
|
|
9
|
+
Pass 1 builds every node and in-memory resolution indexes.
|
|
10
|
+
Pass 2 resolves each extends/implements/injection target using Java's lookup order
|
|
11
|
+
(same file → explicit import → same package → wildcard import → java.lang → phantom).
|
|
12
|
+
Pass 3 resolves static call sites into confidence-scored CALLS edges and DECLARES.
|
|
13
|
+
Pass 4 emits Route rows plus Symbol→Route EXPOSES edges from literal annotation metadata.
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
build_ast_graph.py --source-root <repo> [--kuzu-path <path>] [--verbose]
|
|
17
|
+
|
|
18
|
+
Default Kuzu database path resolution order:
|
|
19
|
+
--kuzu-path CLI arg (path passed to kuzu.Database(...))
|
|
20
|
+
JAVA_CODEBASE_RAG_INDEX_DIR/code_graph.kuzu (if set and local)
|
|
21
|
+
./.java-codebase-rag/code_graph.kuzu under cwd
|
|
22
|
+
|
|
23
|
+
The Kuzu DB is dropped and rebuilt on every run (Phase 1 is a full rebuild).
|
|
24
|
+
"""
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import argparse
|
|
28
|
+
import hashlib
|
|
29
|
+
import json
|
|
30
|
+
import logging
|
|
31
|
+
import os
|
|
32
|
+
import re
|
|
33
|
+
import sys
|
|
34
|
+
import threading
|
|
35
|
+
import time
|
|
36
|
+
from collections import defaultdict
|
|
37
|
+
from dataclasses import asdict, dataclass, field, replace
|
|
38
|
+
from pathlib import Path
|
|
39
|
+
|
|
40
|
+
import kuzu
|
|
41
|
+
|
|
42
|
+
from ast_java import (
|
|
43
|
+
ONTOLOGY_VERSION,
|
|
44
|
+
CallSite,
|
|
45
|
+
JavaFileAst,
|
|
46
|
+
MethodDecl,
|
|
47
|
+
OutgoingCallDecl,
|
|
48
|
+
TypeDecl,
|
|
49
|
+
injection_annotation_names,
|
|
50
|
+
lombok_required_args_annotations,
|
|
51
|
+
parse_java,
|
|
52
|
+
)
|
|
53
|
+
from graph_enrich import (
|
|
54
|
+
_load_config_cross_service_resolution,
|
|
55
|
+
collect_annotation_meta_chain,
|
|
56
|
+
load_brownfield_overrides,
|
|
57
|
+
microservice_for_path,
|
|
58
|
+
module_for_path,
|
|
59
|
+
phantom_id,
|
|
60
|
+
resolve_async_producer_for_method,
|
|
61
|
+
resolve_http_client_for_method,
|
|
62
|
+
resolve_role_and_capabilities,
|
|
63
|
+
resolve_routes_for_method,
|
|
64
|
+
symbol_id,
|
|
65
|
+
)
|
|
66
|
+
from path_filtering import LayeredIgnore, iter_java_source_files
|
|
67
|
+
from java_ontology import VALID_CLIENT_KINDS, VALID_HTTP_CALL_MATCHES, VALID_PRODUCER_KINDS
|
|
68
|
+
|
|
69
|
+
log = logging.getLogger(__name__)
|
|
70
|
+
|
|
71
|
+
_VERBOSE_STDERR_LOCK = threading.Lock()
|
|
72
|
+
|
|
73
|
+
_PASS1_START = "[pass1] starting · parsing Java files under source root"
|
|
74
|
+
_PASS2_START = "[pass2] starting · emitting EXTENDS / IMPLEMENTS / DECLARES rows"
|
|
75
|
+
_PASS3_START = "[pass3] starting · call resolution (outgoing calls per site)"
|
|
76
|
+
_PASS4_START = "[pass4] starting · route and EXPOSES extraction"
|
|
77
|
+
_PASS5_START = "[pass5] starting · imperative HTTP_CALLS / ASYNC_CALLS edges"
|
|
78
|
+
_PASS6_START = "[pass6] starting · cross-service call-edge matching"
|
|
79
|
+
_WRITE_START = "[write] starting · writing Kuzu graph to disk"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _verbose_stderr_line(content: str) -> None:
|
|
83
|
+
with _VERBOSE_STDERR_LOCK:
|
|
84
|
+
print(content, file=sys.stderr, flush=True)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class _VerbosePassHeartbeats:
|
|
88
|
+
"""Emit ``[tag] running … Ns elapsed`` every 5s on stderr while in scope (verbose only)."""
|
|
89
|
+
|
|
90
|
+
def __init__(self, tag: str, *, verbose: bool) -> None:
|
|
91
|
+
self._tag = tag
|
|
92
|
+
self._verbose = verbose
|
|
93
|
+
self._thr: threading.Thread | None = None
|
|
94
|
+
self._stop: threading.Event | None = None
|
|
95
|
+
|
|
96
|
+
def __enter__(self) -> None:
|
|
97
|
+
if not self._verbose:
|
|
98
|
+
return None
|
|
99
|
+
self._stop = threading.Event()
|
|
100
|
+
stop = self._stop
|
|
101
|
+
tag = self._tag
|
|
102
|
+
|
|
103
|
+
def worker() -> None:
|
|
104
|
+
t0 = time.monotonic()
|
|
105
|
+
while not stop.wait(timeout=5.0):
|
|
106
|
+
elapsed = int(time.monotonic() - t0)
|
|
107
|
+
_verbose_stderr_line(f"{tag} running … {elapsed}s elapsed")
|
|
108
|
+
|
|
109
|
+
self._thr = threading.Thread(target=worker, name=f"hb-{tag}", daemon=True)
|
|
110
|
+
self._thr.start()
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
def __exit__(self, exc_type, exc, tb) -> bool:
|
|
114
|
+
if self._thr is not None and self._stop is not None:
|
|
115
|
+
self._stop.set()
|
|
116
|
+
self._thr.join(timeout=2.0)
|
|
117
|
+
return False
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
_JAVA_LANG_SIMPLE = frozenset({
|
|
121
|
+
"Object", "String", "Integer", "Long", "Short", "Byte", "Boolean", "Double",
|
|
122
|
+
"Float", "Character", "Number", "Void", "Class", "Enum", "Record",
|
|
123
|
+
"Throwable", "Exception", "RuntimeException", "Error", "Thread", "Runnable",
|
|
124
|
+
"Iterable", "Comparable", "CharSequence", "StringBuilder", "StringBuffer",
|
|
125
|
+
"Math", "System", "AutoCloseable", "Cloneable",
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ---------- dataclasses ----------
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@dataclass
|
|
133
|
+
class TypeIndexEntry:
|
|
134
|
+
"""Pass-1 record for a type declaration + any methods/constructors inside it."""
|
|
135
|
+
decl: TypeDecl
|
|
136
|
+
file_path: str
|
|
137
|
+
module: str
|
|
138
|
+
microservice: str
|
|
139
|
+
package: str
|
|
140
|
+
outer_fqn: str | None
|
|
141
|
+
node_id: str
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@dataclass
|
|
145
|
+
class MemberEntry:
|
|
146
|
+
kind: str # method | constructor
|
|
147
|
+
decl: MethodDecl
|
|
148
|
+
parent_id: str
|
|
149
|
+
parent_fqn: str
|
|
150
|
+
file_path: str
|
|
151
|
+
module: str
|
|
152
|
+
microservice: str
|
|
153
|
+
node_id: str
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@dataclass
|
|
157
|
+
class EdgeRow:
|
|
158
|
+
src_id: str
|
|
159
|
+
dst_id: str
|
|
160
|
+
dst_name: str
|
|
161
|
+
dst_fqn: str
|
|
162
|
+
resolved: bool
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@dataclass
|
|
166
|
+
class InjectsRow(EdgeRow):
|
|
167
|
+
mechanism: str = ""
|
|
168
|
+
annotation: str = ""
|
|
169
|
+
field_or_param: str = ""
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@dataclass
|
|
173
|
+
class CallsRow:
|
|
174
|
+
src_id: str
|
|
175
|
+
dst_id: str
|
|
176
|
+
call_site_line: int = 0
|
|
177
|
+
call_site_byte: int = 0
|
|
178
|
+
arg_count: int = 0
|
|
179
|
+
confidence: float = 0.0
|
|
180
|
+
strategy: str = "phantom"
|
|
181
|
+
source: str = "static"
|
|
182
|
+
resolved: bool = True
|
|
183
|
+
callee_declaring_role: str = "OTHER"
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@dataclass
|
|
187
|
+
class UnresolvedCallSiteRow:
|
|
188
|
+
id: str
|
|
189
|
+
caller_id: str
|
|
190
|
+
call_site_line: int
|
|
191
|
+
call_site_byte: int
|
|
192
|
+
arg_count: int
|
|
193
|
+
callee_simple: str
|
|
194
|
+
receiver_expr: str
|
|
195
|
+
reason: str
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@dataclass
|
|
199
|
+
class DeclaresRow:
|
|
200
|
+
src_id: str
|
|
201
|
+
dst_id: str
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@dataclass
|
|
205
|
+
class CallResolutionStats:
|
|
206
|
+
total: int = 0
|
|
207
|
+
by_strategy: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
|
208
|
+
phantom_chained: int = 0
|
|
209
|
+
phantom_other: int = 0
|
|
210
|
+
callee_unresolved: int = 0
|
|
211
|
+
skipped_cross_service: int = 0
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@dataclass
|
|
215
|
+
class RouteRow:
|
|
216
|
+
id: str
|
|
217
|
+
kind: str
|
|
218
|
+
framework: str
|
|
219
|
+
method: str
|
|
220
|
+
path: str
|
|
221
|
+
path_template: str
|
|
222
|
+
path_regex: str
|
|
223
|
+
topic: str
|
|
224
|
+
broker: str
|
|
225
|
+
feign_name: str
|
|
226
|
+
feign_url: str
|
|
227
|
+
microservice: str
|
|
228
|
+
module: str
|
|
229
|
+
filename: str
|
|
230
|
+
start_line: int
|
|
231
|
+
end_line: int
|
|
232
|
+
resolved: bool
|
|
233
|
+
# B2a brownfield composition (PR-A3); not persisted on Kuzu `Route` nodes.
|
|
234
|
+
source_layer: str = "builtin"
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
@dataclass
|
|
238
|
+
class ExposesRow:
|
|
239
|
+
symbol_id: str
|
|
240
|
+
route_id: str
|
|
241
|
+
confidence: float
|
|
242
|
+
strategy: str
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@dataclass
|
|
246
|
+
class RouteExtractionStats:
|
|
247
|
+
routes_skipped_unresolved: int = 0
|
|
248
|
+
by_framework: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
|
249
|
+
by_kind: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
|
250
|
+
routes_resolved_pct: float = 100.0
|
|
251
|
+
# Percentage of emitted `Route` rows whose `source_layer` is not `builtin`.
|
|
252
|
+
# Brownfield layers: `layer_b_ann`, `layer_a_meta`, `layer_c_source`, `layer_b_fqn`.
|
|
253
|
+
routes_from_brownfield_pct: float = 0.0
|
|
254
|
+
routes_by_layer: dict[str, int] = field(default_factory=dict)
|
|
255
|
+
exposes_suppressed_feign: int = 0
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
@dataclass
|
|
259
|
+
class HttpCallRow:
|
|
260
|
+
client_id: str
|
|
261
|
+
route_id: str
|
|
262
|
+
confidence: float
|
|
263
|
+
strategy: str
|
|
264
|
+
method_call: str
|
|
265
|
+
raw_uri: str
|
|
266
|
+
match: str
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
@dataclass
|
|
270
|
+
class AsyncCallRow:
|
|
271
|
+
producer_id: str
|
|
272
|
+
route_id: str
|
|
273
|
+
confidence: float
|
|
274
|
+
strategy: str
|
|
275
|
+
direction: str
|
|
276
|
+
raw_topic: str
|
|
277
|
+
match: str
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@dataclass
|
|
281
|
+
class ClientRow:
|
|
282
|
+
id: str
|
|
283
|
+
client_kind: str
|
|
284
|
+
target_service: str
|
|
285
|
+
path: str
|
|
286
|
+
path_template: str
|
|
287
|
+
path_regex: str
|
|
288
|
+
method: str
|
|
289
|
+
member_fqn: str
|
|
290
|
+
member_id: str
|
|
291
|
+
microservice: str
|
|
292
|
+
module: str
|
|
293
|
+
filename: str
|
|
294
|
+
start_line: int
|
|
295
|
+
end_line: int
|
|
296
|
+
resolved: bool
|
|
297
|
+
source_layer: str
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
@dataclass
|
|
301
|
+
class DeclaresClientRow:
|
|
302
|
+
symbol_id: str
|
|
303
|
+
client_id: str
|
|
304
|
+
confidence: float
|
|
305
|
+
strategy: str
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
@dataclass
|
|
309
|
+
class ProducerRow:
|
|
310
|
+
id: str
|
|
311
|
+
producer_kind: str
|
|
312
|
+
topic: str
|
|
313
|
+
broker: str
|
|
314
|
+
direction: str
|
|
315
|
+
member_fqn: str
|
|
316
|
+
member_id: str
|
|
317
|
+
microservice: str
|
|
318
|
+
module: str
|
|
319
|
+
filename: str
|
|
320
|
+
start_line: int
|
|
321
|
+
end_line: int
|
|
322
|
+
resolved: bool
|
|
323
|
+
source_layer: str
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
@dataclass
|
|
327
|
+
class DeclaresProducerRow:
|
|
328
|
+
symbol_id: str
|
|
329
|
+
producer_id: str
|
|
330
|
+
confidence: float
|
|
331
|
+
strategy: str
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
@dataclass
|
|
335
|
+
class ClientExtractionStats:
|
|
336
|
+
clients_total: int = 0
|
|
337
|
+
declares_client_total: int = 0
|
|
338
|
+
clients_by_kind: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
@dataclass
|
|
342
|
+
class ProducerExtractionStats:
|
|
343
|
+
producers_total: int = 0
|
|
344
|
+
declares_producer_total: int = 0
|
|
345
|
+
producers_by_kind: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
@dataclass
|
|
349
|
+
class CallEdgeStats:
|
|
350
|
+
http_calls_total: int = 0
|
|
351
|
+
async_calls_total: int = 0
|
|
352
|
+
http_calls_by_client_kind: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
|
353
|
+
async_calls_by_client_kind: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
|
354
|
+
http_calls_by_strategy: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
|
355
|
+
async_calls_by_strategy: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
|
356
|
+
http_calls_skipped_unresolved: int = 0
|
|
357
|
+
async_calls_skipped_unresolved: int = 0
|
|
358
|
+
http_clients_from_brownfield_pct: float = 0.0
|
|
359
|
+
async_producers_from_brownfield_pct: float = 0.0
|
|
360
|
+
http_calls_match_breakdown: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
|
361
|
+
async_calls_match_breakdown: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
|
362
|
+
cross_service_calls_total: int = 0
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
@dataclass
|
|
366
|
+
class GraphTables:
|
|
367
|
+
types: dict[str, TypeIndexEntry] = field(default_factory=dict) # fqn -> entry
|
|
368
|
+
by_simple_name: dict[str, list[TypeIndexEntry]] = field(default_factory=dict)
|
|
369
|
+
by_package: dict[str, list[TypeIndexEntry]] = field(default_factory=dict)
|
|
370
|
+
files: dict[str, str] = field(default_factory=dict) # path -> node id
|
|
371
|
+
packages: dict[str, str] = field(default_factory=dict) # pkg -> node id
|
|
372
|
+
members: list[MemberEntry] = field(default_factory=list)
|
|
373
|
+
phantoms: dict[str, dict] = field(default_factory=dict) # id -> row
|
|
374
|
+
extends_rows: list[EdgeRow] = field(default_factory=list)
|
|
375
|
+
implements_rows: list[EdgeRow] = field(default_factory=list)
|
|
376
|
+
injects_rows: list[InjectsRow] = field(default_factory=list)
|
|
377
|
+
calls_rows: list[CallsRow] = field(default_factory=list)
|
|
378
|
+
unresolved_call_site_rows: list[UnresolvedCallSiteRow] = field(default_factory=list)
|
|
379
|
+
declares_rows: list[DeclaresRow] = field(default_factory=list)
|
|
380
|
+
routes_rows: list[RouteRow] = field(default_factory=list)
|
|
381
|
+
exposes_rows: list[ExposesRow] = field(default_factory=list)
|
|
382
|
+
http_call_rows: list[HttpCallRow] = field(default_factory=list)
|
|
383
|
+
async_call_rows: list[AsyncCallRow] = field(default_factory=list)
|
|
384
|
+
client_rows: list[ClientRow] = field(default_factory=list)
|
|
385
|
+
declares_client_rows: list[DeclaresClientRow] = field(default_factory=list)
|
|
386
|
+
producer_rows: list[ProducerRow] = field(default_factory=list)
|
|
387
|
+
declares_producer_rows: list[DeclaresProducerRow] = field(default_factory=list)
|
|
388
|
+
overrides_rows: list[DeclaresRow] = field(default_factory=list)
|
|
389
|
+
route_stats: RouteExtractionStats = field(default_factory=RouteExtractionStats)
|
|
390
|
+
call_edge_stats: CallEdgeStats = field(default_factory=CallEdgeStats)
|
|
391
|
+
client_stats: ClientExtractionStats = field(default_factory=ClientExtractionStats)
|
|
392
|
+
producer_stats: ProducerExtractionStats = field(default_factory=ProducerExtractionStats)
|
|
393
|
+
methods_by_type: dict[str, list[MemberEntry]] = field(default_factory=dict)
|
|
394
|
+
parse_errors: int = 0
|
|
395
|
+
skipped_files: int = 0
|
|
396
|
+
pass3_skipped_cross_service: int = 0
|
|
397
|
+
pass3_unresolved_phantom_receiver: int = 0
|
|
398
|
+
pass3_unresolved_chained: int = 0
|
|
399
|
+
cross_service_resolution: str = "auto"
|
|
400
|
+
# Populated in _write_nodes (same overrides + meta_chain as Symbol.role).
|
|
401
|
+
type_role_by_node_id: dict[str, str] = field(default_factory=dict)
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
# ---------- file walk (see `path_filtering.iter_java_source_files`) ----------
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
# ---------- pass 1 ----------
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _register_type(
|
|
411
|
+
tables: GraphTables,
|
|
412
|
+
decl: TypeDecl,
|
|
413
|
+
*,
|
|
414
|
+
file_path: str,
|
|
415
|
+
module: str,
|
|
416
|
+
microservice: str,
|
|
417
|
+
outer_fqn: str | None,
|
|
418
|
+
) -> TypeIndexEntry:
|
|
419
|
+
package = decl.fqn.rsplit(".", 1)[0] if "." in decl.fqn and outer_fqn is None else (
|
|
420
|
+
outer_fqn.rsplit(".", 1)[0] if outer_fqn and "." in outer_fqn else ""
|
|
421
|
+
)
|
|
422
|
+
# top-level: package = fqn - name; nested: inherit from outer
|
|
423
|
+
if outer_fqn is None:
|
|
424
|
+
package = decl.fqn[: -(len(decl.name) + 1)] if decl.fqn.endswith("." + decl.name) else ""
|
|
425
|
+
else:
|
|
426
|
+
# walk outward to find a top-level fqn; package is everything before its simple name
|
|
427
|
+
top = outer_fqn
|
|
428
|
+
while top in tables.types and tables.types[top].outer_fqn:
|
|
429
|
+
top = tables.types[top].outer_fqn # type: ignore[assignment]
|
|
430
|
+
package = top[: top.rfind(".")] if "." in top else ""
|
|
431
|
+
|
|
432
|
+
node_id = symbol_id(decl.kind, decl.fqn, file_path, decl.start_byte)
|
|
433
|
+
entry = TypeIndexEntry(
|
|
434
|
+
decl=decl,
|
|
435
|
+
file_path=file_path,
|
|
436
|
+
module=module,
|
|
437
|
+
microservice=microservice,
|
|
438
|
+
package=package,
|
|
439
|
+
outer_fqn=outer_fqn,
|
|
440
|
+
node_id=node_id,
|
|
441
|
+
)
|
|
442
|
+
tables.types[decl.fqn] = entry
|
|
443
|
+
tables.by_simple_name.setdefault(decl.name, []).append(entry)
|
|
444
|
+
tables.by_package.setdefault(package, []).append(entry)
|
|
445
|
+
|
|
446
|
+
for m in decl.methods:
|
|
447
|
+
kind = "constructor" if m.is_constructor else "method"
|
|
448
|
+
mid = symbol_id(kind, f"{decl.fqn}#{m.signature}", file_path, m.start_byte)
|
|
449
|
+
tables.members.append(MemberEntry(
|
|
450
|
+
kind=kind, decl=m, parent_id=node_id, parent_fqn=decl.fqn,
|
|
451
|
+
file_path=file_path, module=module, microservice=microservice,
|
|
452
|
+
node_id=mid,
|
|
453
|
+
))
|
|
454
|
+
|
|
455
|
+
for nested in decl.nested:
|
|
456
|
+
_register_type(
|
|
457
|
+
tables, nested, file_path=file_path,
|
|
458
|
+
module=module, microservice=microservice, outer_fqn=decl.fqn,
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
return entry
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool) -> dict[str, JavaFileAst]:
|
|
465
|
+
"""Walk files, parse them, populate node indexes. Returns path -> AST."""
|
|
466
|
+
asts: dict[str, JavaFileAst] = {}
|
|
467
|
+
ignore = LayeredIgnore(root)
|
|
468
|
+
t0 = time.time()
|
|
469
|
+
n_files = 0
|
|
470
|
+
if verbose:
|
|
471
|
+
_verbose_stderr_line(_PASS1_START)
|
|
472
|
+
slow_sec = 0.0
|
|
473
|
+
raw_slow = os.environ.get("JAVA_CODEBASE_RAG_TEST_GRAPH_SLOW_SEC", "").strip()
|
|
474
|
+
if raw_slow:
|
|
475
|
+
try:
|
|
476
|
+
slow_sec = float(raw_slow)
|
|
477
|
+
except ValueError:
|
|
478
|
+
slow_sec = 0.0
|
|
479
|
+
with _VerbosePassHeartbeats("[pass1]", verbose=verbose):
|
|
480
|
+
if verbose and slow_sec > 0:
|
|
481
|
+
time.sleep(slow_sec)
|
|
482
|
+
for p in iter_java_source_files(root, ignore=ignore):
|
|
483
|
+
n_files += 1
|
|
484
|
+
try:
|
|
485
|
+
content = p.read_bytes()
|
|
486
|
+
except OSError:
|
|
487
|
+
tables.skipped_files += 1
|
|
488
|
+
continue
|
|
489
|
+
if not content.strip():
|
|
490
|
+
continue
|
|
491
|
+
try:
|
|
492
|
+
rel = p.resolve().relative_to(root.resolve()).as_posix()
|
|
493
|
+
except ValueError:
|
|
494
|
+
rel = p.as_posix()
|
|
495
|
+
try:
|
|
496
|
+
ast = parse_java(content, filename=rel, verbose=verbose)
|
|
497
|
+
except Exception:
|
|
498
|
+
tables.parse_errors += 1
|
|
499
|
+
continue
|
|
500
|
+
if ast.parse_error:
|
|
501
|
+
tables.parse_errors += 1
|
|
502
|
+
# Still index what tree-sitter gave us; robust to syntax errors.
|
|
503
|
+
module = module_for_path(str(p), root)
|
|
504
|
+
microservice = microservice_for_path(str(p), root)
|
|
505
|
+
asts[rel] = ast
|
|
506
|
+
|
|
507
|
+
# file node
|
|
508
|
+
file_id = symbol_id("file", rel, rel, 0)
|
|
509
|
+
tables.files[rel] = file_id
|
|
510
|
+
|
|
511
|
+
# package node (created lazily; nodes deduped by id)
|
|
512
|
+
if ast.package and ast.package not in tables.packages:
|
|
513
|
+
tables.packages[ast.package] = symbol_id("package", ast.package, "", 0)
|
|
514
|
+
|
|
515
|
+
for t in ast.top_level_types:
|
|
516
|
+
_register_type(
|
|
517
|
+
tables, t, file_path=rel,
|
|
518
|
+
module=module, microservice=microservice, outer_fqn=None,
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
if verbose:
|
|
522
|
+
elapsed = time.time() - t0
|
|
523
|
+
_verbose_stderr_line(
|
|
524
|
+
f"[pass1] parsed {n_files} files in {elapsed:.2f}s: "
|
|
525
|
+
f"{len(tables.types)} types, {len(tables.members)} members, "
|
|
526
|
+
f"{tables.parse_errors} parse errors, {tables.skipped_files} skipped",
|
|
527
|
+
)
|
|
528
|
+
return asts
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
# ---------- pass 2: resolution + edges ----------
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def _resolve_simple(
|
|
535
|
+
name: str,
|
|
536
|
+
*,
|
|
537
|
+
current: TypeIndexEntry,
|
|
538
|
+
ast: JavaFileAst,
|
|
539
|
+
tables: GraphTables,
|
|
540
|
+
) -> TypeIndexEntry | None:
|
|
541
|
+
"""Java-ish name resolution. Returns a known TypeIndexEntry or None (phantom)."""
|
|
542
|
+
# Strip trailing generics the caller may have left in, defensively.
|
|
543
|
+
bare = name.split("<", 1)[0].strip()
|
|
544
|
+
if not bare:
|
|
545
|
+
return None
|
|
546
|
+
|
|
547
|
+
# 0. Nested inside the same top-level hierarchy — try `Outer.Bare` fqn.
|
|
548
|
+
outer = current.outer_fqn
|
|
549
|
+
while outer is not None and outer in tables.types:
|
|
550
|
+
candidate = f"{outer}.{bare}"
|
|
551
|
+
if candidate in tables.types:
|
|
552
|
+
return tables.types[candidate]
|
|
553
|
+
outer = tables.types[outer].outer_fqn
|
|
554
|
+
|
|
555
|
+
# 1. Same-file siblings (same outer as `current`).
|
|
556
|
+
same_outer = current.outer_fqn or current.package
|
|
557
|
+
for e in tables.by_simple_name.get(bare, ()):
|
|
558
|
+
e_parent = e.outer_fqn or e.package
|
|
559
|
+
if e.file_path == current.file_path and e_parent == same_outer:
|
|
560
|
+
return e
|
|
561
|
+
|
|
562
|
+
# 2. Explicit import.
|
|
563
|
+
if bare in ast.explicit_imports:
|
|
564
|
+
fq = ast.explicit_imports[bare]
|
|
565
|
+
if fq in tables.types:
|
|
566
|
+
return tables.types[fq]
|
|
567
|
+
# Known FQN (outside our codebase) → unresolved; caller will phantom-ise.
|
|
568
|
+
return None
|
|
569
|
+
|
|
570
|
+
# 3. Same package.
|
|
571
|
+
if current.package:
|
|
572
|
+
candidate = f"{current.package}.{bare}"
|
|
573
|
+
if candidate in tables.types:
|
|
574
|
+
return tables.types[candidate]
|
|
575
|
+
|
|
576
|
+
# 4. Wildcard imports.
|
|
577
|
+
for wild in ast.wildcard_imports:
|
|
578
|
+
candidate = f"{wild}.{bare}"
|
|
579
|
+
if candidate in tables.types:
|
|
580
|
+
return tables.types[candidate]
|
|
581
|
+
|
|
582
|
+
# 5. java.lang best-effort (unresolved but deterministic phantom).
|
|
583
|
+
return None
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
def _phantom_target(
|
|
587
|
+
tables: GraphTables,
|
|
588
|
+
simple: str,
|
|
589
|
+
ast: JavaFileAst,
|
|
590
|
+
*,
|
|
591
|
+
current: TypeIndexEntry,
|
|
592
|
+
) -> tuple[str, str, str]:
|
|
593
|
+
"""Produce (id, simple, fqn-or-best-guess) for an unresolved type reference.
|
|
594
|
+
|
|
595
|
+
The fqn falls back through: explicit import → wildcard → java.lang → bare name.
|
|
596
|
+
"""
|
|
597
|
+
bare = simple.split("<", 1)[0].strip()
|
|
598
|
+
guess_fqn = bare
|
|
599
|
+
if bare in ast.explicit_imports:
|
|
600
|
+
guess_fqn = ast.explicit_imports[bare]
|
|
601
|
+
elif bare in _JAVA_LANG_SIMPLE:
|
|
602
|
+
guess_fqn = f"java.lang.{bare}"
|
|
603
|
+
elif ast.wildcard_imports:
|
|
604
|
+
# Pick first wildcard as a hint (imperfect but useful for display).
|
|
605
|
+
guess_fqn = f"{ast.wildcard_imports[0]}.{bare}"
|
|
606
|
+
|
|
607
|
+
pid = phantom_id(guess_fqn)
|
|
608
|
+
if pid not in tables.phantoms:
|
|
609
|
+
tables.phantoms[pid] = {
|
|
610
|
+
"id": pid,
|
|
611
|
+
"kind": "class",
|
|
612
|
+
"name": bare,
|
|
613
|
+
"fqn": guess_fqn,
|
|
614
|
+
"package": guess_fqn.rsplit(".", 1)[0] if "." in guess_fqn else "",
|
|
615
|
+
"module": "",
|
|
616
|
+
"microservice": "",
|
|
617
|
+
"filename": "",
|
|
618
|
+
"start_line": 0,
|
|
619
|
+
"end_line": 0,
|
|
620
|
+
"start_byte": 0,
|
|
621
|
+
"end_byte": 0,
|
|
622
|
+
"modifiers": [],
|
|
623
|
+
"annotations": [],
|
|
624
|
+
"capabilities": [],
|
|
625
|
+
"role": "OTHER",
|
|
626
|
+
"signature": "",
|
|
627
|
+
"parent_id": "",
|
|
628
|
+
"resolved": False,
|
|
629
|
+
}
|
|
630
|
+
return pid, bare, guess_fqn
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def _edge_for(
|
|
634
|
+
*,
|
|
635
|
+
src: TypeIndexEntry,
|
|
636
|
+
target_simple: str,
|
|
637
|
+
ast: JavaFileAst,
|
|
638
|
+
tables: GraphTables,
|
|
639
|
+
) -> tuple[str, str, str, bool]:
|
|
640
|
+
resolved = _resolve_simple(target_simple, current=src, ast=ast, tables=tables)
|
|
641
|
+
if resolved is not None:
|
|
642
|
+
return resolved.node_id, resolved.decl.name, resolved.decl.fqn, True
|
|
643
|
+
pid, simple, fqn = _phantom_target(tables, target_simple, ast, current=src)
|
|
644
|
+
return pid, simple, fqn, False
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
def _emit_extends_implements(
|
|
648
|
+
entry: TypeIndexEntry,
|
|
649
|
+
ast: JavaFileAst,
|
|
650
|
+
tables: GraphTables,
|
|
651
|
+
*,
|
|
652
|
+
seen_ext: set[tuple[str, str]],
|
|
653
|
+
seen_impl: set[tuple[str, str]],
|
|
654
|
+
) -> None:
|
|
655
|
+
for name in entry.decl.extends:
|
|
656
|
+
dst_id, dst_simple, dst_fqn, ok = _edge_for(
|
|
657
|
+
src=entry, target_simple=name, ast=ast, tables=tables,
|
|
658
|
+
)
|
|
659
|
+
key = (entry.node_id, dst_id)
|
|
660
|
+
if key in seen_ext:
|
|
661
|
+
continue
|
|
662
|
+
seen_ext.add(key)
|
|
663
|
+
tables.extends_rows.append(EdgeRow(
|
|
664
|
+
src_id=entry.node_id, dst_id=dst_id,
|
|
665
|
+
dst_name=dst_simple, dst_fqn=dst_fqn, resolved=ok,
|
|
666
|
+
))
|
|
667
|
+
|
|
668
|
+
for name in entry.decl.implements:
|
|
669
|
+
dst_id, dst_simple, dst_fqn, ok = _edge_for(
|
|
670
|
+
src=entry, target_simple=name, ast=ast, tables=tables,
|
|
671
|
+
)
|
|
672
|
+
key = (entry.node_id, dst_id)
|
|
673
|
+
if key in seen_impl:
|
|
674
|
+
continue
|
|
675
|
+
seen_impl.add(key)
|
|
676
|
+
tables.implements_rows.append(EdgeRow(
|
|
677
|
+
src_id=entry.node_id, dst_id=dst_id,
|
|
678
|
+
dst_name=dst_simple, dst_fqn=dst_fqn, resolved=ok,
|
|
679
|
+
))
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
def _emit_injects(
|
|
683
|
+
entry: TypeIndexEntry,
|
|
684
|
+
ast: JavaFileAst,
|
|
685
|
+
tables: GraphTables,
|
|
686
|
+
*,
|
|
687
|
+
seen: set[tuple[str, str, str, str]],
|
|
688
|
+
) -> None:
|
|
689
|
+
if entry.decl.kind == "interface":
|
|
690
|
+
return
|
|
691
|
+
|
|
692
|
+
ann_names = [a.name for a in entry.decl.annotations]
|
|
693
|
+
inject_set = injection_annotation_names()
|
|
694
|
+
lombok_rac = lombok_required_args_annotations()
|
|
695
|
+
has_lombok_rac = any(a in lombok_rac for a in ann_names)
|
|
696
|
+
|
|
697
|
+
def _add(
|
|
698
|
+
target: str, mechanism: str, annotation: str, slot: str,
|
|
699
|
+
) -> None:
|
|
700
|
+
dst_id, dst_simple, dst_fqn, ok = _edge_for(
|
|
701
|
+
src=entry, target_simple=target, ast=ast, tables=tables,
|
|
702
|
+
)
|
|
703
|
+
key = (entry.node_id, dst_id, mechanism, slot)
|
|
704
|
+
if key in seen:
|
|
705
|
+
return
|
|
706
|
+
seen.add(key)
|
|
707
|
+
tables.injects_rows.append(InjectsRow(
|
|
708
|
+
src_id=entry.node_id, dst_id=dst_id,
|
|
709
|
+
dst_name=dst_simple, dst_fqn=dst_fqn, resolved=ok,
|
|
710
|
+
mechanism=mechanism, annotation=annotation, field_or_param=slot,
|
|
711
|
+
))
|
|
712
|
+
|
|
713
|
+
# Field injection: @Autowired / @Inject / @Resource.
|
|
714
|
+
for f in entry.decl.fields:
|
|
715
|
+
annotated = next((a.name for a in f.annotations if a.name in inject_set), None)
|
|
716
|
+
if annotated:
|
|
717
|
+
_add(f.type_name, "field", annotated, f.name)
|
|
718
|
+
|
|
719
|
+
# Lombok: @RequiredArgsConstructor -> each `final` non-static field becomes an injection;
|
|
720
|
+
# @AllArgsConstructor -> every non-static field.
|
|
721
|
+
if has_lombok_rac:
|
|
722
|
+
all_args = "AllArgsConstructor" in ann_names
|
|
723
|
+
for f in entry.decl.fields:
|
|
724
|
+
if "static" in f.modifiers:
|
|
725
|
+
continue
|
|
726
|
+
if not all_args and "final" not in f.modifiers:
|
|
727
|
+
continue
|
|
728
|
+
_add(f.type_name, "lombok_required_args",
|
|
729
|
+
"AllArgsConstructor" if all_args else "RequiredArgsConstructor",
|
|
730
|
+
f.name)
|
|
731
|
+
|
|
732
|
+
# Constructor injection:
|
|
733
|
+
ctors = [m for m in entry.decl.methods if m.is_constructor]
|
|
734
|
+
if ctors:
|
|
735
|
+
chosen = None
|
|
736
|
+
autowired = [c for c in ctors if any(a.name == "Autowired" for a in c.annotations)]
|
|
737
|
+
if autowired:
|
|
738
|
+
chosen = autowired[0]
|
|
739
|
+
elif len(ctors) == 1 and ctors[0].parameters:
|
|
740
|
+
chosen = ctors[0]
|
|
741
|
+
if chosen is not None:
|
|
742
|
+
annotation = "Autowired" if any(a.name == "Autowired" for a in chosen.annotations) else ""
|
|
743
|
+
for p in chosen.parameters:
|
|
744
|
+
_add(p.type_name, "constructor", annotation, p.name)
|
|
745
|
+
|
|
746
|
+
# Setter injection: setXxx annotated @Autowired with 1 parameter.
|
|
747
|
+
for m in entry.decl.methods:
|
|
748
|
+
if m.is_constructor or not m.name.startswith("set") or len(m.parameters) != 1:
|
|
749
|
+
continue
|
|
750
|
+
if any(a.name == "Autowired" for a in m.annotations):
|
|
751
|
+
_add(m.parameters[0].type_name, "setter", "Autowired",
|
|
752
|
+
m.parameters[0].name)
|
|
753
|
+
|
|
754
|
+
|
|
755
|
+
def pass2_edges(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: bool) -> None:
|
|
756
|
+
t0 = time.time()
|
|
757
|
+
seen_ext: set[tuple[str, str]] = set()
|
|
758
|
+
seen_impl: set[tuple[str, str]] = set()
|
|
759
|
+
seen_inj: set[tuple[str, str, str, str]] = set()
|
|
760
|
+
if verbose:
|
|
761
|
+
_verbose_stderr_line(_PASS2_START)
|
|
762
|
+
with _VerbosePassHeartbeats("[pass2]", verbose=verbose):
|
|
763
|
+
for fqn, entry in tables.types.items():
|
|
764
|
+
ast = asts.get(entry.file_path)
|
|
765
|
+
if ast is None:
|
|
766
|
+
continue
|
|
767
|
+
_emit_extends_implements(entry, ast, tables, seen_ext=seen_ext, seen_impl=seen_impl)
|
|
768
|
+
_emit_injects(entry, ast, tables, seen=seen_inj)
|
|
769
|
+
if verbose:
|
|
770
|
+
elapsed = time.time() - t0
|
|
771
|
+
_verbose_stderr_line(
|
|
772
|
+
f"[pass2] emitted {len(tables.extends_rows)} EXTENDS, "
|
|
773
|
+
f"{len(tables.implements_rows)} IMPLEMENTS, "
|
|
774
|
+
f"{len(tables.injects_rows)} INJECTS, "
|
|
775
|
+
f"{len(tables.phantoms)} phantoms in {elapsed:.2f}s",
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
# ---------- pass 3: call graph ----------
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
def _build_member_indexes(tables: GraphTables) -> None:
|
|
783
|
+
tables.methods_by_type = {}
|
|
784
|
+
for m in tables.members:
|
|
785
|
+
tables.methods_by_type.setdefault(m.parent_fqn, []).append(m)
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
def _direct_supertype_fqns(entry: TypeIndexEntry, tables: GraphTables) -> list[str]:
|
|
789
|
+
out: list[str] = []
|
|
790
|
+
for r in tables.extends_rows:
|
|
791
|
+
if r.src_id == entry.node_id and r.dst_fqn in tables.types:
|
|
792
|
+
out.append(r.dst_fqn)
|
|
793
|
+
for r in tables.implements_rows:
|
|
794
|
+
if r.src_id == entry.node_id and r.dst_fqn in tables.types:
|
|
795
|
+
out.append(r.dst_fqn)
|
|
796
|
+
return out
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
def _first_supertype_fqn(tables: GraphTables, type_fqn: str) -> str | None:
|
|
800
|
+
entry = tables.types.get(type_fqn)
|
|
801
|
+
if entry is None:
|
|
802
|
+
return None
|
|
803
|
+
for r in tables.extends_rows:
|
|
804
|
+
if r.src_id == entry.node_id and r.dst_fqn in tables.types:
|
|
805
|
+
return r.dst_fqn
|
|
806
|
+
for r in tables.implements_rows:
|
|
807
|
+
if r.src_id == entry.node_id and r.dst_fqn in tables.types:
|
|
808
|
+
return r.dst_fqn
|
|
809
|
+
return None
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
def _is_chained_receiver_text(receiver_expr: str) -> bool:
|
|
813
|
+
"""Heuristic: call chain or complex expr (contains a completed call)."""
|
|
814
|
+
s = receiver_expr.strip()
|
|
815
|
+
return "(" in s and ")" in s
|
|
816
|
+
|
|
817
|
+
|
|
818
|
+
def _resolve_this_super_field_chain(
|
|
819
|
+
expr: str,
|
|
820
|
+
*,
|
|
821
|
+
member: MemberEntry,
|
|
822
|
+
ast: JavaFileAst,
|
|
823
|
+
tables: GraphTables,
|
|
824
|
+
) -> str | None:
|
|
825
|
+
"""Resolve `this.a.b` / `super.a` (no calls) to the final field's type FQN."""
|
|
826
|
+
s = expr.strip()
|
|
827
|
+
if "(" in s or ")" in s or "." not in s:
|
|
828
|
+
return None
|
|
829
|
+
entry = tables.types.get(member.parent_fqn)
|
|
830
|
+
if entry is None:
|
|
831
|
+
return None
|
|
832
|
+
parts = s.split(".")
|
|
833
|
+
if len(parts) < 2:
|
|
834
|
+
return None
|
|
835
|
+
if parts[0] == "this":
|
|
836
|
+
cur = entry
|
|
837
|
+
elif parts[0] == "super":
|
|
838
|
+
sup = _first_supertype_fqn(tables, member.parent_fqn)
|
|
839
|
+
if sup is None or sup not in tables.types:
|
|
840
|
+
return None
|
|
841
|
+
cur = tables.types[sup]
|
|
842
|
+
else:
|
|
843
|
+
return None
|
|
844
|
+
for fname in parts[1:]:
|
|
845
|
+
fld = next((f for f in cur.decl.fields if f.name == fname), None)
|
|
846
|
+
if fld is None:
|
|
847
|
+
return None
|
|
848
|
+
resolved = _resolve_simple(fld.type_name, current=cur, ast=ast, tables=tables)
|
|
849
|
+
if resolved is None:
|
|
850
|
+
return None
|
|
851
|
+
cur = resolved
|
|
852
|
+
return cur.decl.fqn
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
def _scope_table(member: MemberEntry, ast: JavaFileAst, tables: GraphTables) -> dict[str, str]:
|
|
856
|
+
"""Map simple variable/field/param name -> resolved declaring type FQN."""
|
|
857
|
+
scope: dict[str, str] = {}
|
|
858
|
+
entry = tables.types.get(member.parent_fqn)
|
|
859
|
+
if entry is None:
|
|
860
|
+
return scope
|
|
861
|
+
|
|
862
|
+
def add_fields(tentry: TypeIndexEntry) -> None:
|
|
863
|
+
for f in tentry.decl.fields:
|
|
864
|
+
resolved = _resolve_simple(f.type_name, current=tentry, ast=ast, tables=tables)
|
|
865
|
+
if resolved is not None:
|
|
866
|
+
scope[f.name] = resolved.decl.fqn
|
|
867
|
+
|
|
868
|
+
add_fields(entry)
|
|
869
|
+
seen: set[str] = {member.parent_fqn}
|
|
870
|
+
queue = list(_direct_supertype_fqns(entry, tables))
|
|
871
|
+
while queue:
|
|
872
|
+
sup = queue.pop()
|
|
873
|
+
if sup in seen or sup not in tables.types:
|
|
874
|
+
continue
|
|
875
|
+
seen.add(sup)
|
|
876
|
+
te = tables.types[sup]
|
|
877
|
+
for f in te.decl.fields:
|
|
878
|
+
if f.name not in scope:
|
|
879
|
+
resolved = _resolve_simple(f.type_name, current=te, ast=ast, tables=tables)
|
|
880
|
+
if resolved is not None:
|
|
881
|
+
scope[f.name] = resolved.decl.fqn
|
|
882
|
+
queue.extend(_direct_supertype_fqns(te, tables))
|
|
883
|
+
|
|
884
|
+
for p in member.decl.parameters:
|
|
885
|
+
resolved = _resolve_simple(p.type_name, current=entry, ast=ast, tables=tables)
|
|
886
|
+
if resolved is not None:
|
|
887
|
+
scope[p.name] = resolved.decl.fqn
|
|
888
|
+
|
|
889
|
+
# Locals shadow fields and parameters (same simple name → local wins).
|
|
890
|
+
for name, t_simple in member.decl.local_vars:
|
|
891
|
+
resolved = _resolve_simple(t_simple, current=entry, ast=ast, tables=tables)
|
|
892
|
+
if resolved is not None:
|
|
893
|
+
scope[name] = resolved.decl.fqn
|
|
894
|
+
|
|
895
|
+
return scope
|
|
896
|
+
|
|
897
|
+
|
|
898
|
+
def _lookup_method_candidates(
|
|
899
|
+
type_fqn: str,
|
|
900
|
+
callee_simple: str,
|
|
901
|
+
arg_count: int,
|
|
902
|
+
tables: GraphTables,
|
|
903
|
+
ast: JavaFileAst,
|
|
904
|
+
*,
|
|
905
|
+
visited: set[str] | None = None,
|
|
906
|
+
) -> tuple[list[MemberEntry], bool]:
|
|
907
|
+
"""Return (candidates, used_name_only_fallback). Walks type + supertypes.
|
|
908
|
+
|
|
909
|
+
When ``used_name_only_fallback`` is true and ``len(candidates) == 1``, the
|
|
910
|
+
caller may reuse the receiver-resolution strategy (see ``_resolve_and_emit_call``)
|
|
911
|
+
instead of tagging ``overload_ambiguous``.
|
|
912
|
+
"""
|
|
913
|
+
if visited is None:
|
|
914
|
+
visited = set()
|
|
915
|
+
exact: list[MemberEntry] = []
|
|
916
|
+
name_only: list[MemberEntry] = []
|
|
917
|
+
|
|
918
|
+
def collect_on_type(tfqn: str) -> None:
|
|
919
|
+
nonlocal exact, name_only
|
|
920
|
+
for m in tables.methods_by_type.get(tfqn, ()):
|
|
921
|
+
if callee_simple == "<init>":
|
|
922
|
+
if not m.decl.is_constructor:
|
|
923
|
+
continue
|
|
924
|
+
np = len(m.decl.parameters)
|
|
925
|
+
if arg_count < 0:
|
|
926
|
+
name_only.append(m)
|
|
927
|
+
elif np == arg_count:
|
|
928
|
+
exact.append(m)
|
|
929
|
+
else:
|
|
930
|
+
name_only.append(m)
|
|
931
|
+
continue
|
|
932
|
+
if m.decl.is_constructor:
|
|
933
|
+
continue
|
|
934
|
+
if m.decl.name != callee_simple:
|
|
935
|
+
continue
|
|
936
|
+
np = len(m.decl.parameters)
|
|
937
|
+
if arg_count < 0:
|
|
938
|
+
name_only.append(m)
|
|
939
|
+
elif np == arg_count:
|
|
940
|
+
exact.append(m)
|
|
941
|
+
else:
|
|
942
|
+
name_only.append(m)
|
|
943
|
+
|
|
944
|
+
queue = [type_fqn]
|
|
945
|
+
while queue:
|
|
946
|
+
tfqn = queue.pop(0)
|
|
947
|
+
if tfqn in visited or tfqn not in tables.types:
|
|
948
|
+
continue
|
|
949
|
+
visited.add(tfqn)
|
|
950
|
+
collect_on_type(tfqn)
|
|
951
|
+
te = tables.types[tfqn]
|
|
952
|
+
for sup in _direct_supertype_fqns(te, tables):
|
|
953
|
+
if sup not in visited:
|
|
954
|
+
queue.append(sup)
|
|
955
|
+
# Synthetic anonymous classes (`….<anon:byte>`): unqualified instance calls
|
|
956
|
+
# may target the lexically enclosing type (D3), e.g. `pingFromAnon()` from
|
|
957
|
+
# `NestedCalls` inside `new Runnable() { void run() { … } }`.
|
|
958
|
+
if ".<anon:" in tfqn and te.outer_fqn and te.outer_fqn not in visited:
|
|
959
|
+
queue.append(te.outer_fqn)
|
|
960
|
+
|
|
961
|
+
if exact:
|
|
962
|
+
return exact, False
|
|
963
|
+
if name_only:
|
|
964
|
+
return name_only, True
|
|
965
|
+
return [], False
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
def _static_wildcard_resolve(
|
|
969
|
+
callee_simple: str,
|
|
970
|
+
ast: JavaFileAst,
|
|
971
|
+
tables: GraphTables,
|
|
972
|
+
current: TypeIndexEntry,
|
|
973
|
+
) -> str | None:
|
|
974
|
+
for tw in ast.file_imports.static_wildcards:
|
|
975
|
+
if tw not in tables.types:
|
|
976
|
+
continue
|
|
977
|
+
for m in tables.methods_by_type.get(tw, ()):
|
|
978
|
+
if m.decl.name != callee_simple or m.decl.is_constructor:
|
|
979
|
+
continue
|
|
980
|
+
if "static" not in m.decl.modifiers:
|
|
981
|
+
continue
|
|
982
|
+
return tw
|
|
983
|
+
return None
|
|
984
|
+
|
|
985
|
+
|
|
986
|
+
def _unique_type_simple_resolve(simple: str, tables: GraphTables) -> str | None:
|
|
987
|
+
"""Return the type FQN iff exactly one indexed type uses `simple` as `decl.name`.
|
|
988
|
+
|
|
989
|
+
Used only for receiver / static-qualifier disambiguation. Do not use the
|
|
990
|
+
method index here: an unresolved identifier that equals some method's
|
|
991
|
+
simple name elsewhere in the project is not evidence about the receiver type.
|
|
992
|
+
"""
|
|
993
|
+
hits = tables.by_simple_name.get(simple, [])
|
|
994
|
+
if len(hits) != 1:
|
|
995
|
+
return None
|
|
996
|
+
return hits[0].decl.fqn
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
def _suffix_resolve(receiver_simple: str, tables: GraphTables) -> str | None:
|
|
1000
|
+
matches = [fq for fq in tables.types if fq.endswith("." + receiver_simple)]
|
|
1001
|
+
if len(matches) != 1:
|
|
1002
|
+
return None
|
|
1003
|
+
return matches[0]
|
|
1004
|
+
|
|
1005
|
+
|
|
1006
|
+
def _resolve_receiver_type(
|
|
1007
|
+
call: CallSite,
|
|
1008
|
+
*,
|
|
1009
|
+
scope: dict[str, str],
|
|
1010
|
+
member: MemberEntry,
|
|
1011
|
+
ast: JavaFileAst,
|
|
1012
|
+
tables: GraphTables,
|
|
1013
|
+
) -> tuple[str | None, str, float]:
|
|
1014
|
+
"""Returns (receiver_type_fqn_or_none, strategy, confidence)."""
|
|
1015
|
+
expr = call.receiver_expr.strip()
|
|
1016
|
+
callee = call.callee_simple
|
|
1017
|
+
|
|
1018
|
+
effective_static = call.is_static_call
|
|
1019
|
+
if call.is_static_call and expr and not _is_chained_receiver_text(expr):
|
|
1020
|
+
bare_for_static = expr.split("<", 1)[0].strip()
|
|
1021
|
+
if bare_for_static and "." not in bare_for_static and bare_for_static in scope:
|
|
1022
|
+
effective_static = False
|
|
1023
|
+
|
|
1024
|
+
if not expr and not call.is_static_call:
|
|
1025
|
+
if callee in ast.file_imports.static_methods:
|
|
1026
|
+
full = ast.file_imports.static_methods[callee]
|
|
1027
|
+
if "." in full:
|
|
1028
|
+
type_fqn = full.rsplit(".", 1)[0]
|
|
1029
|
+
return type_fqn, "static_import", 0.95
|
|
1030
|
+
sw = _static_wildcard_resolve(callee, ast, tables, tables.types[member.parent_fqn])
|
|
1031
|
+
if sw is not None:
|
|
1032
|
+
return sw, "static_import_wildcard", 0.85
|
|
1033
|
+
|
|
1034
|
+
if effective_static and expr:
|
|
1035
|
+
if _is_chained_receiver_text(expr):
|
|
1036
|
+
return None, "chained_receiver", 0.0
|
|
1037
|
+
entry = tables.types.get(member.parent_fqn)
|
|
1038
|
+
if entry is None:
|
|
1039
|
+
return None, "chained_receiver", 0.0
|
|
1040
|
+
bare_static = expr.split("<", 1)[0].strip()
|
|
1041
|
+
resolved = _resolve_simple(bare_static, current=entry, ast=ast, tables=tables)
|
|
1042
|
+
if resolved is not None:
|
|
1043
|
+
return resolved.decl.fqn, "import_map", 0.95
|
|
1044
|
+
# External type not in the index but FQN is deterministic via an explicit import.
|
|
1045
|
+
# e.g. `import java.util.Objects; Objects.requireNonNull(x)` — we know the FQN
|
|
1046
|
+
# is "java.util.Objects" even though the type isn't indexed; return it so the
|
|
1047
|
+
# edge carries the correct receiver-tier confidence rather than collapsing to phantom.
|
|
1048
|
+
if bare_static in ast.explicit_imports:
|
|
1049
|
+
return ast.explicit_imports[bare_static], "import_map", 0.95
|
|
1050
|
+
uq = _unique_type_simple_resolve(expr, tables)
|
|
1051
|
+
if uq is not None:
|
|
1052
|
+
return uq, "unique_type_name", 0.75
|
|
1053
|
+
sf = _suffix_resolve(expr, tables)
|
|
1054
|
+
if sf is not None:
|
|
1055
|
+
return sf, "suffix", 0.55
|
|
1056
|
+
return None, "phantom", 0.0
|
|
1057
|
+
|
|
1058
|
+
if expr in ("", "this"):
|
|
1059
|
+
return member.parent_fqn, "this_super", 0.95
|
|
1060
|
+
|
|
1061
|
+
if expr == "super":
|
|
1062
|
+
sup = _first_supertype_fqn(tables, member.parent_fqn)
|
|
1063
|
+
if sup is not None:
|
|
1064
|
+
return sup, "this_super", 0.95
|
|
1065
|
+
# No indexed supertype — implicit super to java.lang.Object.
|
|
1066
|
+
# Keep strategy='implicit_super' and confidence=0.90 so this path is
|
|
1067
|
+
# distinguishable from a genuinely unresolvable receiver.
|
|
1068
|
+
return "java.lang.Object", "implicit_super", 0.90
|
|
1069
|
+
|
|
1070
|
+
if _is_chained_receiver_text(expr):
|
|
1071
|
+
return None, "chained_receiver", 0.0
|
|
1072
|
+
|
|
1073
|
+
entry = tables.types.get(member.parent_fqn)
|
|
1074
|
+
if entry is None:
|
|
1075
|
+
return None, "phantom", 0.0
|
|
1076
|
+
|
|
1077
|
+
bare = expr.split("<", 1)[0].strip()
|
|
1078
|
+
if bare in scope:
|
|
1079
|
+
return scope[bare], "import_map", 0.95
|
|
1080
|
+
|
|
1081
|
+
chain = _resolve_this_super_field_chain(expr, member=member, ast=ast, tables=tables)
|
|
1082
|
+
if chain is not None:
|
|
1083
|
+
return chain, "import_map", 0.95
|
|
1084
|
+
|
|
1085
|
+
resolved = _resolve_simple(bare, current=entry, ast=ast, tables=tables)
|
|
1086
|
+
if resolved is not None:
|
|
1087
|
+
return resolved.decl.fqn, "import_map", 0.95
|
|
1088
|
+
|
|
1089
|
+
if entry.package:
|
|
1090
|
+
cand = f"{entry.package}.{bare}"
|
|
1091
|
+
if cand in tables.types:
|
|
1092
|
+
return cand, "same_module", 0.90
|
|
1093
|
+
|
|
1094
|
+
uq = _unique_type_simple_resolve(bare, tables)
|
|
1095
|
+
if uq is not None:
|
|
1096
|
+
return uq, "unique_type_name", 0.75
|
|
1097
|
+
|
|
1098
|
+
sf = _suffix_resolve(bare, tables)
|
|
1099
|
+
if sf is not None:
|
|
1100
|
+
return sf, "suffix", 0.55
|
|
1101
|
+
|
|
1102
|
+
return None, "phantom", 0.0
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
def _phantom_method_id(
|
|
1106
|
+
tables: GraphTables,
|
|
1107
|
+
*,
|
|
1108
|
+
receiver_fqn: str | None,
|
|
1109
|
+
receiver_expr: str,
|
|
1110
|
+
callee: str,
|
|
1111
|
+
arg_count: int,
|
|
1112
|
+
) -> str:
|
|
1113
|
+
# Phantom node identity for a resolved receiver omits call-site arity so
|
|
1114
|
+
# method references (arg_count=-1) and normal invocations share one Symbol
|
|
1115
|
+
# per (receiver_fqn, callee) when the callee is not indexed (D1).
|
|
1116
|
+
if receiver_fqn:
|
|
1117
|
+
fqn = f"{receiver_fqn}#{callee}(?)"
|
|
1118
|
+
sig = f"{callee}(?)"
|
|
1119
|
+
else:
|
|
1120
|
+
expr_short = (receiver_expr[:50] if receiver_expr else "?")
|
|
1121
|
+
arity = "(?)" if arg_count < 0 else f"({arg_count})"
|
|
1122
|
+
fqn = f"?{expr_short}#{callee}{arity}"
|
|
1123
|
+
sig = f"{callee}{arity}"
|
|
1124
|
+
pid = phantom_id(fqn)
|
|
1125
|
+
if pid not in tables.phantoms:
|
|
1126
|
+
tables.phantoms[pid] = {
|
|
1127
|
+
"id": pid,
|
|
1128
|
+
"kind": "method",
|
|
1129
|
+
"name": callee,
|
|
1130
|
+
"fqn": fqn,
|
|
1131
|
+
"package": "",
|
|
1132
|
+
"module": "",
|
|
1133
|
+
"microservice": "",
|
|
1134
|
+
"filename": "",
|
|
1135
|
+
"start_line": 0,
|
|
1136
|
+
"end_line": 0,
|
|
1137
|
+
"start_byte": 0,
|
|
1138
|
+
"end_byte": 0,
|
|
1139
|
+
"modifiers": [],
|
|
1140
|
+
"annotations": [],
|
|
1141
|
+
"capabilities": [],
|
|
1142
|
+
"role": "OTHER",
|
|
1143
|
+
"signature": sig,
|
|
1144
|
+
"parent_id": "",
|
|
1145
|
+
"resolved": False,
|
|
1146
|
+
}
|
|
1147
|
+
return pid
|
|
1148
|
+
|
|
1149
|
+
|
|
1150
|
+
def _method_signature_matches_call(member: MemberEntry, call: CallSite) -> bool:
|
|
1151
|
+
if call.arg_count < 0:
|
|
1152
|
+
return True
|
|
1153
|
+
return len(member.decl.parameters) == call.arg_count
|
|
1154
|
+
|
|
1155
|
+
|
|
1156
|
+
def _is_strict_supertype_of(tables: GraphTables, super_fqn: str, subtype_fqn: str) -> bool:
|
|
1157
|
+
if super_fqn == subtype_fqn:
|
|
1158
|
+
return False
|
|
1159
|
+
entry = tables.types.get(subtype_fqn)
|
|
1160
|
+
if entry is None:
|
|
1161
|
+
return False
|
|
1162
|
+
visited: set[str] = set()
|
|
1163
|
+
queue = list(_direct_supertype_fqns(entry, tables))
|
|
1164
|
+
while queue:
|
|
1165
|
+
tfqn = queue.pop(0)
|
|
1166
|
+
if tfqn == super_fqn:
|
|
1167
|
+
return True
|
|
1168
|
+
if tfqn in visited or tfqn not in tables.types:
|
|
1169
|
+
continue
|
|
1170
|
+
visited.add(tfqn)
|
|
1171
|
+
queue.extend(_direct_supertype_fqns(tables.types[tfqn], tables))
|
|
1172
|
+
return False
|
|
1173
|
+
|
|
1174
|
+
|
|
1175
|
+
def _callee_declaring_role_at_write(
|
|
1176
|
+
tables: GraphTables,
|
|
1177
|
+
dst_id: str,
|
|
1178
|
+
*,
|
|
1179
|
+
member_by_id: dict[str, MemberEntry],
|
|
1180
|
+
) -> str:
|
|
1181
|
+
"""Match parent declaring-type Symbol.role (brownfield + meta_chain included)."""
|
|
1182
|
+
if dst_id in tables.phantoms:
|
|
1183
|
+
return "OTHER"
|
|
1184
|
+
member = member_by_id.get(dst_id)
|
|
1185
|
+
if member is None:
|
|
1186
|
+
return "OTHER"
|
|
1187
|
+
return tables.type_role_by_node_id.get(member.parent_id, "OTHER")
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
def _collapse_supertype_duplicates(
|
|
1191
|
+
candidates: list[MemberEntry],
|
|
1192
|
+
recv_type_fqn: str,
|
|
1193
|
+
call: CallSite,
|
|
1194
|
+
tables: GraphTables,
|
|
1195
|
+
) -> list[MemberEntry]:
|
|
1196
|
+
"""§3.3.1 supertype-walk dedup — collapse interface + concrete duplicate sites."""
|
|
1197
|
+
if len(candidates) <= 1:
|
|
1198
|
+
return candidates
|
|
1199
|
+
concrete_on_receiver = [
|
|
1200
|
+
c for c in candidates
|
|
1201
|
+
if c.parent_fqn == recv_type_fqn and _method_signature_matches_call(c, call)
|
|
1202
|
+
]
|
|
1203
|
+
if len(concrete_on_receiver) != 1:
|
|
1204
|
+
return candidates
|
|
1205
|
+
concrete = concrete_on_receiver[0]
|
|
1206
|
+
supertypes = [
|
|
1207
|
+
c for c in candidates
|
|
1208
|
+
if c is not concrete
|
|
1209
|
+
and _is_strict_supertype_of(tables, c.parent_fqn, recv_type_fqn)
|
|
1210
|
+
and c.decl.signature == concrete.decl.signature
|
|
1211
|
+
]
|
|
1212
|
+
if not supertypes:
|
|
1213
|
+
return candidates
|
|
1214
|
+
allowed_ids = {concrete.node_id, *(c.node_id for c in supertypes)}
|
|
1215
|
+
if any(c.node_id not in allowed_ids for c in candidates):
|
|
1216
|
+
return candidates
|
|
1217
|
+
log.debug(
|
|
1218
|
+
"pass3 supertype dedup %s -> %s",
|
|
1219
|
+
[c.node_id for c in candidates],
|
|
1220
|
+
concrete.node_id,
|
|
1221
|
+
)
|
|
1222
|
+
return [concrete]
|
|
1223
|
+
|
|
1224
|
+
|
|
1225
|
+
def _unresolved_call_site_id(caller_id: str, call: CallSite) -> str:
|
|
1226
|
+
return f"ucs:{caller_id}:{call.line}:{call.byte}"
|
|
1227
|
+
|
|
1228
|
+
|
|
1229
|
+
def _emit_unresolved_call_site(
|
|
1230
|
+
tables: GraphTables,
|
|
1231
|
+
stats: CallResolutionStats,
|
|
1232
|
+
*,
|
|
1233
|
+
caller_id: str,
|
|
1234
|
+
call: CallSite,
|
|
1235
|
+
reason: str,
|
|
1236
|
+
) -> None:
|
|
1237
|
+
tables.unresolved_call_site_rows.append(UnresolvedCallSiteRow(
|
|
1238
|
+
id=_unresolved_call_site_id(caller_id, call),
|
|
1239
|
+
caller_id=caller_id,
|
|
1240
|
+
call_site_line=call.line,
|
|
1241
|
+
call_site_byte=call.byte,
|
|
1242
|
+
arg_count=call.arg_count,
|
|
1243
|
+
callee_simple=call.callee_simple,
|
|
1244
|
+
receiver_expr=call.receiver_expr or "",
|
|
1245
|
+
reason=reason,
|
|
1246
|
+
))
|
|
1247
|
+
if reason == "chained_receiver":
|
|
1248
|
+
stats.phantom_chained += 1
|
|
1249
|
+
else:
|
|
1250
|
+
stats.phantom_other += 1
|
|
1251
|
+
|
|
1252
|
+
|
|
1253
|
+
def _emit_call_edge(
|
|
1254
|
+
tables: GraphTables,
|
|
1255
|
+
stats: CallResolutionStats,
|
|
1256
|
+
*,
|
|
1257
|
+
src_id: str,
|
|
1258
|
+
dst_id: str,
|
|
1259
|
+
call: CallSite,
|
|
1260
|
+
confidence: float,
|
|
1261
|
+
strategy: str,
|
|
1262
|
+
resolved: bool,
|
|
1263
|
+
edge_arg_count: int | None = None,
|
|
1264
|
+
) -> None:
|
|
1265
|
+
arity = call.arg_count if edge_arg_count is None else edge_arg_count
|
|
1266
|
+
tables.calls_rows.append(CallsRow(
|
|
1267
|
+
src_id=src_id,
|
|
1268
|
+
dst_id=dst_id,
|
|
1269
|
+
call_site_line=call.line,
|
|
1270
|
+
call_site_byte=call.byte,
|
|
1271
|
+
arg_count=arity,
|
|
1272
|
+
confidence=confidence,
|
|
1273
|
+
strategy=strategy,
|
|
1274
|
+
source="static",
|
|
1275
|
+
resolved=resolved,
|
|
1276
|
+
))
|
|
1277
|
+
stats.total += 1
|
|
1278
|
+
stats.by_strategy[strategy] += 1
|
|
1279
|
+
if not resolved:
|
|
1280
|
+
stats.callee_unresolved += 1
|
|
1281
|
+
|
|
1282
|
+
|
|
1283
|
+
def _resolve_and_emit_call(
|
|
1284
|
+
call: CallSite,
|
|
1285
|
+
member: MemberEntry,
|
|
1286
|
+
ast: JavaFileAst,
|
|
1287
|
+
tables: GraphTables,
|
|
1288
|
+
stats: CallResolutionStats,
|
|
1289
|
+
*,
|
|
1290
|
+
scope: dict[str, str],
|
|
1291
|
+
) -> None:
|
|
1292
|
+
"""Emit CALLS rows for one call site.
|
|
1293
|
+
|
|
1294
|
+
Candidate selection uses ``_lookup_method_candidates`` (exact arity first, then
|
|
1295
|
+
name-only fallback on the type + supertype walk).
|
|
1296
|
+
|
|
1297
|
+
When ``used_name_only_fallback`` is true and exactly one name-only candidate
|
|
1298
|
+
exists, the edge ``strategy`` reuses the receiver-resolution tier (``strat``)
|
|
1299
|
+
rather than ``overload_ambiguous``: arity at the call site did not match any
|
|
1300
|
+
overload, but only one method of that name exists — the callee is unambiguous.
|
|
1301
|
+
"""
|
|
1302
|
+
recv_type, strat, conf = _resolve_receiver_type(call, scope=scope, member=member, ast=ast, tables=tables)
|
|
1303
|
+
|
|
1304
|
+
if strat == "chained_receiver":
|
|
1305
|
+
_emit_unresolved_call_site(
|
|
1306
|
+
tables, stats, caller_id=member.node_id, call=call, reason="chained_receiver",
|
|
1307
|
+
)
|
|
1308
|
+
return
|
|
1309
|
+
|
|
1310
|
+
if recv_type is None:
|
|
1311
|
+
_emit_unresolved_call_site(
|
|
1312
|
+
tables, stats,
|
|
1313
|
+
caller_id=member.node_id,
|
|
1314
|
+
call=call,
|
|
1315
|
+
reason="phantom_unresolved_receiver",
|
|
1316
|
+
)
|
|
1317
|
+
return
|
|
1318
|
+
|
|
1319
|
+
candidates, name_only_fb = _lookup_method_candidates(
|
|
1320
|
+
recv_type, call.callee_simple, call.arg_count, tables, ast,
|
|
1321
|
+
)
|
|
1322
|
+
|
|
1323
|
+
# Guard relies on `_lookup_method_candidates` returning a same-ms candidate when one exists; revisit if pass3 scopes lookups per-microservice.
|
|
1324
|
+
if member.microservice:
|
|
1325
|
+
same_ms = [c for c in candidates if c.microservice == member.microservice]
|
|
1326
|
+
if same_ms and len(same_ms) != len(candidates):
|
|
1327
|
+
for c in candidates:
|
|
1328
|
+
if c.microservice and c.microservice != member.microservice:
|
|
1329
|
+
log.warning(
|
|
1330
|
+
"skipping cross-microservice CALLS edge %s -> %s "
|
|
1331
|
+
"(caller=%s, callee=%s)",
|
|
1332
|
+
f"{member.parent_fqn}#{member.decl.signature}",
|
|
1333
|
+
f"{c.parent_fqn}#{c.decl.signature}",
|
|
1334
|
+
member.microservice, c.microservice,
|
|
1335
|
+
)
|
|
1336
|
+
stats.skipped_cross_service += 1
|
|
1337
|
+
candidates = same_ms
|
|
1338
|
+
|
|
1339
|
+
# Compute the call-shape strategy / confidence override BEFORE the
|
|
1340
|
+
# empty-candidates check so they are preserved even when the callee cannot
|
|
1341
|
+
# be located on the resolved receiver type (B3 fix).
|
|
1342
|
+
edge_conf = conf
|
|
1343
|
+
if call.arg_count < 0:
|
|
1344
|
+
edge_strat = "method_reference"
|
|
1345
|
+
elif call.callee_simple == "<init>" and call.receiver_expr == "super" and (
|
|
1346
|
+
call.byte == member.decl.start_byte and call.line == member.decl.start_line
|
|
1347
|
+
):
|
|
1348
|
+
# Synthesized implicit-super site from _parse_method.
|
|
1349
|
+
edge_strat = "implicit_super"
|
|
1350
|
+
edge_conf = 0.90
|
|
1351
|
+
elif call.callee_simple == "<init>":
|
|
1352
|
+
# new Foo(…), this(…), super(…) — confidence inherited from receiver tier.
|
|
1353
|
+
edge_strat = "constructor"
|
|
1354
|
+
elif name_only_fb and len(candidates) > 1:
|
|
1355
|
+
edge_strat = "overload_ambiguous"
|
|
1356
|
+
elif name_only_fb and len(candidates) == 1:
|
|
1357
|
+
# Name-only fallback with a single candidate — not ambiguous.
|
|
1358
|
+
edge_strat = strat
|
|
1359
|
+
else:
|
|
1360
|
+
edge_strat = strat
|
|
1361
|
+
|
|
1362
|
+
if not candidates:
|
|
1363
|
+
# Receiver was resolved but the callee method isn't indexed on that type
|
|
1364
|
+
# (e.g. JDK / Spring / external library). Preserve the receiver-tier
|
|
1365
|
+
# strategy and confidence — only resolved=False signals the phantom callee
|
|
1366
|
+
# (B3 fix: do NOT downgrade to confidence=0.0 / strategy='phantom' here).
|
|
1367
|
+
pid = _phantom_method_id(
|
|
1368
|
+
tables, receiver_fqn=recv_type, receiver_expr=call.receiver_expr,
|
|
1369
|
+
callee=call.callee_simple, arg_count=call.arg_count,
|
|
1370
|
+
)
|
|
1371
|
+
_emit_call_edge(
|
|
1372
|
+
tables, stats, src_id=member.node_id, dst_id=pid, call=call,
|
|
1373
|
+
confidence=edge_conf, strategy=edge_strat, resolved=False,
|
|
1374
|
+
)
|
|
1375
|
+
return
|
|
1376
|
+
|
|
1377
|
+
if len(candidates) > 1 and edge_strat != "overload_ambiguous":
|
|
1378
|
+
candidates = _collapse_supertype_duplicates(candidates, recv_type, call, tables)
|
|
1379
|
+
|
|
1380
|
+
if len(candidates) == 1:
|
|
1381
|
+
candidate = candidates[0]
|
|
1382
|
+
ref_arity: int | None = None
|
|
1383
|
+
if call.arg_count < 0:
|
|
1384
|
+
ref_arity = len(candidate.decl.parameters)
|
|
1385
|
+
_emit_call_edge(
|
|
1386
|
+
tables, stats, src_id=member.node_id, dst_id=candidate.node_id, call=call,
|
|
1387
|
+
confidence=edge_conf, strategy=edge_strat, resolved=True,
|
|
1388
|
+
edge_arg_count=ref_arity,
|
|
1389
|
+
)
|
|
1390
|
+
return
|
|
1391
|
+
|
|
1392
|
+
for c in candidates:
|
|
1393
|
+
ref_arity_multi: int | None = len(c.decl.parameters) if call.arg_count < 0 else None
|
|
1394
|
+
_emit_call_edge(
|
|
1395
|
+
tables, stats, src_id=member.node_id, dst_id=c.node_id, call=call,
|
|
1396
|
+
confidence=edge_conf, strategy="overload_ambiguous", resolved=True,
|
|
1397
|
+
edge_arg_count=ref_arity_multi,
|
|
1398
|
+
)
|
|
1399
|
+
|
|
1400
|
+
|
|
1401
|
+
def _resolve_method_calls(
|
|
1402
|
+
member: MemberEntry,
|
|
1403
|
+
ast: JavaFileAst,
|
|
1404
|
+
tables: GraphTables,
|
|
1405
|
+
stats: CallResolutionStats,
|
|
1406
|
+
) -> None:
|
|
1407
|
+
scope = _scope_table(member, ast, tables)
|
|
1408
|
+
for call in member.decl.call_sites:
|
|
1409
|
+
try:
|
|
1410
|
+
_resolve_and_emit_call(call, member, ast, tables, stats, scope=scope)
|
|
1411
|
+
except Exception as e:
|
|
1412
|
+
log.warning("call resolution failed for %s: %s", member.decl.signature, e)
|
|
1413
|
+
|
|
1414
|
+
|
|
1415
|
+
def _process_file_calls(
|
|
1416
|
+
file_ast: JavaFileAst,
|
|
1417
|
+
file_path: str,
|
|
1418
|
+
tables: GraphTables,
|
|
1419
|
+
stats: CallResolutionStats,
|
|
1420
|
+
) -> None:
|
|
1421
|
+
for member in tables.members:
|
|
1422
|
+
if member.file_path != file_path:
|
|
1423
|
+
continue
|
|
1424
|
+
try:
|
|
1425
|
+
_resolve_method_calls(member, file_ast, tables, stats)
|
|
1426
|
+
except Exception as e:
|
|
1427
|
+
log.warning("Failed to extract calls from %s#%s: %s", member.parent_fqn, member.decl.signature, e)
|
|
1428
|
+
|
|
1429
|
+
|
|
1430
|
+
def pass3_calls(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: bool) -> None:
|
|
1431
|
+
if verbose:
|
|
1432
|
+
_verbose_stderr_line(_PASS3_START)
|
|
1433
|
+
_build_member_indexes(tables)
|
|
1434
|
+
stats = CallResolutionStats()
|
|
1435
|
+
with _VerbosePassHeartbeats("[pass3]", verbose=verbose):
|
|
1436
|
+
for rel_path, file_ast in asts.items():
|
|
1437
|
+
try:
|
|
1438
|
+
_process_file_calls(file_ast, rel_path, tables, stats)
|
|
1439
|
+
except Exception as e:
|
|
1440
|
+
log.error("Call extraction failed for %s: %s", rel_path, e)
|
|
1441
|
+
denom_calls = max(1, stats.total)
|
|
1442
|
+
denom_sites = max(1, stats.total + stats.phantom_chained + stats.phantom_other)
|
|
1443
|
+
pct_chained = 100.0 * stats.phantom_chained / denom_sites
|
|
1444
|
+
pct_callee_unres = 100.0 * stats.callee_unresolved / denom_calls
|
|
1445
|
+
pct_phantom_recv = 100.0 * stats.phantom_other / denom_sites
|
|
1446
|
+
tables.pass3_skipped_cross_service = int(stats.skipped_cross_service)
|
|
1447
|
+
tables.pass3_unresolved_phantom_receiver = int(stats.phantom_other)
|
|
1448
|
+
tables.pass3_unresolved_chained = int(stats.phantom_chained)
|
|
1449
|
+
msg = (
|
|
1450
|
+
f"Call resolution: {stats.total} CALLS rows, {stats.phantom_chained} chained unresolved "
|
|
1451
|
+
f"({pct_chained:.1f}%), {stats.callee_unresolved} unresolved callee on CALLS "
|
|
1452
|
+
f"({pct_callee_unres:.1f}%), {stats.phantom_other} phantom-receiver unresolved "
|
|
1453
|
+
f"({pct_phantom_recv:.1f}%), {stats.skipped_cross_service} skipped cross-service, "
|
|
1454
|
+
f"strategies: {dict(stats.by_strategy)}"
|
|
1455
|
+
)
|
|
1456
|
+
log.info(msg)
|
|
1457
|
+
if verbose:
|
|
1458
|
+
_verbose_stderr_line(f"[pass3] {msg}")
|
|
1459
|
+
|
|
1460
|
+
|
|
1461
|
+
_PATH_VAR_SEG = re.compile(r"^\{([^:{}]+)(?::([^}]*))?\}$") # whole path segment
|
|
1462
|
+
|
|
1463
|
+
|
|
1464
|
+
def _normalize_path(raw_path: str) -> tuple[str, str]:
|
|
1465
|
+
"""Return `(path_template, path_regex)` for a servlet-style path pattern.
|
|
1466
|
+
|
|
1467
|
+
`/api/users/{id}` → ``("/api/users/{}", "^/api/users/[^/]+/?$")``.
|
|
1468
|
+
`{id:\\d+}` constraints strip to ``{}`` in the template while preserving the
|
|
1469
|
+
regex constraint for that segment. Deterministic for shared use by B2b/B6.
|
|
1470
|
+
"""
|
|
1471
|
+
raw_path = (raw_path or "").strip()
|
|
1472
|
+
if not raw_path:
|
|
1473
|
+
return "", ""
|
|
1474
|
+
p = raw_path if raw_path.startswith("/") else "/" + raw_path
|
|
1475
|
+
trimmed = p.rstrip("/")
|
|
1476
|
+
if trimmed == "":
|
|
1477
|
+
return "/", "^/?$"
|
|
1478
|
+
segments = [s for s in trimmed.split("/") if s != ""]
|
|
1479
|
+
tmpl_parts: list[str] = []
|
|
1480
|
+
re_parts: list[str] = []
|
|
1481
|
+
for seg in segments:
|
|
1482
|
+
m = _PATH_VAR_SEG.fullmatch(seg)
|
|
1483
|
+
if m:
|
|
1484
|
+
tmpl_parts.append("{}")
|
|
1485
|
+
constraint = m.group(2)
|
|
1486
|
+
re_parts.append(constraint if constraint else "[^/]+")
|
|
1487
|
+
else:
|
|
1488
|
+
tmpl_parts.append(seg)
|
|
1489
|
+
re_parts.append(re.escape(seg))
|
|
1490
|
+
tmpl = "/" + "/".join(tmpl_parts)
|
|
1491
|
+
body = "/".join(re_parts)
|
|
1492
|
+
if not body.startswith("/"):
|
|
1493
|
+
body = "/" + body
|
|
1494
|
+
return tmpl, f"^{body}/?$"
|
|
1495
|
+
|
|
1496
|
+
|
|
1497
|
+
def _route_id(
|
|
1498
|
+
framework: str,
|
|
1499
|
+
kind: str,
|
|
1500
|
+
http_method: str,
|
|
1501
|
+
path_template: str,
|
|
1502
|
+
path_raw: str,
|
|
1503
|
+
topic: str,
|
|
1504
|
+
broker: str,
|
|
1505
|
+
microservice: str,
|
|
1506
|
+
) -> str:
|
|
1507
|
+
"""Stable id; `path_raw` disambiguates HTTP routes when `path_template` is empty (SpEL / const)."""
|
|
1508
|
+
path_key = path_template if path_template else path_raw
|
|
1509
|
+
key = (
|
|
1510
|
+
f"{framework}|{kind}|{http_method}|{path_key}|"
|
|
1511
|
+
f"{topic}|{broker}|{microservice}"
|
|
1512
|
+
)
|
|
1513
|
+
return f"r:{hashlib.sha1(key.encode()).hexdigest()[:16]}"
|
|
1514
|
+
|
|
1515
|
+
|
|
1516
|
+
def _client_id(
|
|
1517
|
+
*,
|
|
1518
|
+
microservice: str,
|
|
1519
|
+
member_fqn: str,
|
|
1520
|
+
client_kind: str,
|
|
1521
|
+
path: str,
|
|
1522
|
+
method: str,
|
|
1523
|
+
) -> str:
|
|
1524
|
+
key = f"{microservice}|{member_fqn}|{client_kind}|{path}|{method}"
|
|
1525
|
+
return f"c:{hashlib.sha1(key.encode()).hexdigest()[:16]}"
|
|
1526
|
+
|
|
1527
|
+
|
|
1528
|
+
def _producer_id(
|
|
1529
|
+
*,
|
|
1530
|
+
microservice: str,
|
|
1531
|
+
member_fqn: str,
|
|
1532
|
+
producer_kind: str,
|
|
1533
|
+
topic: str,
|
|
1534
|
+
) -> str:
|
|
1535
|
+
# Topic-level identity per method+kind; broker is intentionally omitted so the same
|
|
1536
|
+
# resolved topic on one method shares one Producer node across call sites.
|
|
1537
|
+
key = f"{microservice}|{member_fqn}|{producer_kind}|{topic}"
|
|
1538
|
+
return f"p:{hashlib.sha1(key.encode()).hexdigest()[:16]}"
|
|
1539
|
+
|
|
1540
|
+
|
|
1541
|
+
def _client_source_layer(strategy: str) -> str:
|
|
1542
|
+
if strategy in {"layer_a_meta", "layer_b_ann", "layer_b_fqn", "layer_c_source"}:
|
|
1543
|
+
return strategy
|
|
1544
|
+
# Some caller extraction paths emit client kind as strategy; treat those
|
|
1545
|
+
# as builtin-source declarations instead of warning on every row.
|
|
1546
|
+
if strategy in VALID_CLIENT_KINDS:
|
|
1547
|
+
return "builtin"
|
|
1548
|
+
if strategy != "builtin":
|
|
1549
|
+
log.warning("unknown client source strategy %r, falling back to builtin", strategy)
|
|
1550
|
+
return "builtin"
|
|
1551
|
+
|
|
1552
|
+
|
|
1553
|
+
def _producer_source_layer(strategy: str) -> str:
|
|
1554
|
+
if strategy in {"layer_a_meta", "layer_b_ann", "layer_b_fqn", "layer_c_source"}:
|
|
1555
|
+
return strategy
|
|
1556
|
+
if strategy in VALID_PRODUCER_KINDS:
|
|
1557
|
+
return "builtin"
|
|
1558
|
+
if strategy != "builtin":
|
|
1559
|
+
log.warning("unknown producer source strategy %r, falling back to builtin", strategy)
|
|
1560
|
+
return "builtin"
|
|
1561
|
+
|
|
1562
|
+
|
|
1563
|
+
_ROUTE_LAYER_RANK: dict[str, int] = {
|
|
1564
|
+
"builtin": 0,
|
|
1565
|
+
"layer_b_ann": 1,
|
|
1566
|
+
"layer_a_meta": 2,
|
|
1567
|
+
"layer_c_source": 3,
|
|
1568
|
+
"layer_b_fqn": 4,
|
|
1569
|
+
}
|
|
1570
|
+
|
|
1571
|
+
|
|
1572
|
+
def pass4_routes(
|
|
1573
|
+
tables: GraphTables,
|
|
1574
|
+
asts: dict[str, JavaFileAst],
|
|
1575
|
+
*,
|
|
1576
|
+
source_root: Path,
|
|
1577
|
+
verbose: bool,
|
|
1578
|
+
) -> None:
|
|
1579
|
+
stats = tables.route_stats
|
|
1580
|
+
overrides = load_brownfield_overrides(source_root)
|
|
1581
|
+
try:
|
|
1582
|
+
prs = str(source_root.resolve())
|
|
1583
|
+
except OSError:
|
|
1584
|
+
prs = str(source_root)
|
|
1585
|
+
tables.cross_service_resolution = _load_config_cross_service_resolution(prs)
|
|
1586
|
+
meta_chain = collect_annotation_meta_chain(prs)
|
|
1587
|
+
if verbose:
|
|
1588
|
+
_verbose_stderr_line(_PASS4_START)
|
|
1589
|
+
with _VerbosePassHeartbeats("[pass4]", verbose=verbose):
|
|
1590
|
+
|
|
1591
|
+
for ast in asts.values():
|
|
1592
|
+
stats.routes_skipped_unresolved += ast.routes_skipped_unresolved
|
|
1593
|
+
|
|
1594
|
+
routes_by_id: dict[str, RouteRow] = {}
|
|
1595
|
+
exposes_seen: set[tuple[str, str]] = set()
|
|
1596
|
+
|
|
1597
|
+
http_kinds = frozenset({"http_endpoint", "http_consumer"})
|
|
1598
|
+
|
|
1599
|
+
for member in sorted(tables.members, key=lambda m: m.node_id):
|
|
1600
|
+
if member.decl.is_constructor:
|
|
1601
|
+
continue
|
|
1602
|
+
ast = asts.get(member.file_path)
|
|
1603
|
+
if ast is None:
|
|
1604
|
+
continue
|
|
1605
|
+
type_decl = tables.types[member.parent_fqn].decl
|
|
1606
|
+
final_routes = resolve_routes_for_method(
|
|
1607
|
+
method_decl=member.decl,
|
|
1608
|
+
enclosing_type=type_decl,
|
|
1609
|
+
overrides=overrides,
|
|
1610
|
+
meta_chain=meta_chain,
|
|
1611
|
+
builtin_routes=member.decl.routes,
|
|
1612
|
+
)
|
|
1613
|
+
if not final_routes:
|
|
1614
|
+
continue
|
|
1615
|
+
for decl in final_routes:
|
|
1616
|
+
path_template, path_regex = ("", "")
|
|
1617
|
+
if decl.kind in http_kinds:
|
|
1618
|
+
if decl.resolved and decl.resolution_strategy in (
|
|
1619
|
+
"annotation",
|
|
1620
|
+
"codebase_route",
|
|
1621
|
+
):
|
|
1622
|
+
path_template, path_regex = _normalize_path(decl.path)
|
|
1623
|
+
else:
|
|
1624
|
+
path_template, path_regex = "", ""
|
|
1625
|
+
rid = _route_id(
|
|
1626
|
+
decl.framework,
|
|
1627
|
+
decl.kind,
|
|
1628
|
+
decl.http_method,
|
|
1629
|
+
path_template,
|
|
1630
|
+
decl.path,
|
|
1631
|
+
decl.topic,
|
|
1632
|
+
decl.broker,
|
|
1633
|
+
member.microservice,
|
|
1634
|
+
)
|
|
1635
|
+
layer = decl.route_source_layer
|
|
1636
|
+
if rid not in routes_by_id:
|
|
1637
|
+
routes_by_id[rid] = RouteRow(
|
|
1638
|
+
id=rid,
|
|
1639
|
+
kind=decl.kind,
|
|
1640
|
+
framework=decl.framework,
|
|
1641
|
+
method=decl.http_method,
|
|
1642
|
+
path=decl.path,
|
|
1643
|
+
path_template=path_template,
|
|
1644
|
+
path_regex=path_regex,
|
|
1645
|
+
topic=decl.topic,
|
|
1646
|
+
broker=decl.broker,
|
|
1647
|
+
feign_name=decl.feign_name,
|
|
1648
|
+
feign_url=decl.feign_url,
|
|
1649
|
+
microservice=member.microservice,
|
|
1650
|
+
module=member.module,
|
|
1651
|
+
filename=decl.filename,
|
|
1652
|
+
start_line=decl.start_line,
|
|
1653
|
+
end_line=decl.end_line,
|
|
1654
|
+
resolved=decl.resolved,
|
|
1655
|
+
source_layer=layer,
|
|
1656
|
+
)
|
|
1657
|
+
else:
|
|
1658
|
+
prev = routes_by_id[rid]
|
|
1659
|
+
if _ROUTE_LAYER_RANK.get(layer, 0) > _ROUTE_LAYER_RANK.get(
|
|
1660
|
+
prev.source_layer,
|
|
1661
|
+
0,
|
|
1662
|
+
):
|
|
1663
|
+
routes_by_id[rid] = replace(prev, source_layer=layer)
|
|
1664
|
+
ek = (member.node_id, rid)
|
|
1665
|
+
if ek not in exposes_seen:
|
|
1666
|
+
route_kind = routes_by_id[rid].kind
|
|
1667
|
+
if route_kind == "http_consumer":
|
|
1668
|
+
stats.exposes_suppressed_feign += 1
|
|
1669
|
+
continue
|
|
1670
|
+
exposes_seen.add(ek)
|
|
1671
|
+
tables.exposes_rows.append(
|
|
1672
|
+
ExposesRow(
|
|
1673
|
+
symbol_id=member.node_id,
|
|
1674
|
+
route_id=rid,
|
|
1675
|
+
confidence=decl.confidence,
|
|
1676
|
+
strategy=decl.resolution_strategy,
|
|
1677
|
+
),
|
|
1678
|
+
)
|
|
1679
|
+
|
|
1680
|
+
tables.routes_rows = sorted(routes_by_id.values(), key=lambda r: r.id)
|
|
1681
|
+
|
|
1682
|
+
for row in tables.routes_rows:
|
|
1683
|
+
stats.by_framework[row.framework] += 1
|
|
1684
|
+
stats.by_kind[row.kind] += 1
|
|
1685
|
+
|
|
1686
|
+
n_routes = len(tables.routes_rows)
|
|
1687
|
+
if n_routes:
|
|
1688
|
+
stats.routes_resolved_pct = 100.0 * sum(
|
|
1689
|
+
1 for r in tables.routes_rows if r.resolved
|
|
1690
|
+
) / n_routes
|
|
1691
|
+
stats.routes_from_brownfield_pct = 100.0 * sum(
|
|
1692
|
+
1 for r in tables.routes_rows if r.source_layer != "builtin"
|
|
1693
|
+
) / n_routes
|
|
1694
|
+
else:
|
|
1695
|
+
stats.routes_resolved_pct = 100.0
|
|
1696
|
+
stats.routes_from_brownfield_pct = 0.0
|
|
1697
|
+
|
|
1698
|
+
by_layer: dict[str, int] = defaultdict(int)
|
|
1699
|
+
for row in tables.routes_rows:
|
|
1700
|
+
by_layer[row.source_layer] += 1
|
|
1701
|
+
stats.routes_by_layer = dict(sorted(by_layer.items()))
|
|
1702
|
+
|
|
1703
|
+
msg = (
|
|
1704
|
+
f"Route extraction: emitted={n_routes}, exposes={len(tables.exposes_rows)}, "
|
|
1705
|
+
f"exposes_suppressed_feign={stats.exposes_suppressed_feign}, "
|
|
1706
|
+
f"skipped_unresolved={stats.routes_skipped_unresolved}, "
|
|
1707
|
+
f"routes_resolved_pct={stats.routes_resolved_pct:.1f}, "
|
|
1708
|
+
f"routes_from_brownfield_pct={stats.routes_from_brownfield_pct:.1f}, "
|
|
1709
|
+
f"by_framework={dict(stats.by_framework)}"
|
|
1710
|
+
)
|
|
1711
|
+
log.info(msg)
|
|
1712
|
+
if verbose:
|
|
1713
|
+
_verbose_stderr_line(f"[pass4] {msg}")
|
|
1714
|
+
|
|
1715
|
+
|
|
1716
|
+
def pass5_imperative_edges(
|
|
1717
|
+
tables: GraphTables,
|
|
1718
|
+
asts: dict[str, JavaFileAst],
|
|
1719
|
+
*,
|
|
1720
|
+
source_root: Path,
|
|
1721
|
+
verbose: bool,
|
|
1722
|
+
) -> None:
|
|
1723
|
+
del asts
|
|
1724
|
+
overrides = load_brownfield_overrides(source_root)
|
|
1725
|
+
try:
|
|
1726
|
+
prs = str(source_root.resolve())
|
|
1727
|
+
except OSError:
|
|
1728
|
+
prs = str(source_root)
|
|
1729
|
+
tables.cross_service_resolution = _load_config_cross_service_resolution(prs)
|
|
1730
|
+
meta_chain = collect_annotation_meta_chain(prs)
|
|
1731
|
+
routes_by_id = {r.id: r for r in tables.routes_rows}
|
|
1732
|
+
existing_route_ids = set(routes_by_id)
|
|
1733
|
+
http_seen: set[tuple[str, str]] = set()
|
|
1734
|
+
async_seen: set[tuple[str, str]] = set()
|
|
1735
|
+
client_seen: set[str] = set()
|
|
1736
|
+
producer_seen: set[str] = set()
|
|
1737
|
+
declares_client_seen: set[tuple[str, str]] = set()
|
|
1738
|
+
declares_producer_seen: set[tuple[str, str]] = set()
|
|
1739
|
+
route_rows = list(tables.routes_rows)
|
|
1740
|
+
|
|
1741
|
+
def _micro_factor(member: MemberEntry) -> float:
|
|
1742
|
+
ms = microservice_for_path(member.file_path, source_root)
|
|
1743
|
+
return 1.0 if ms else 0.85
|
|
1744
|
+
|
|
1745
|
+
def _append_route(row: RouteRow) -> None:
|
|
1746
|
+
if row.id in existing_route_ids:
|
|
1747
|
+
return
|
|
1748
|
+
existing_route_ids.add(row.id)
|
|
1749
|
+
routes_by_id[row.id] = row
|
|
1750
|
+
route_rows.append(row)
|
|
1751
|
+
|
|
1752
|
+
def _phantom_http_route_id(call: OutgoingCallDecl) -> str:
|
|
1753
|
+
if call.path_template_call and call.method_call:
|
|
1754
|
+
return _route_id("", "http_endpoint", call.method_call, call.path_template_call, call.path_template_call, "", "", "")
|
|
1755
|
+
uniq = hashlib.sha1(f"{call.filename}:{call.start_line}:{call.raw_uri}".encode()).hexdigest()[:12]
|
|
1756
|
+
return f"r:phantom:{uniq}"
|
|
1757
|
+
|
|
1758
|
+
def _phantom_async_route_id(call: OutgoingCallDecl) -> str:
|
|
1759
|
+
if call.topic_call:
|
|
1760
|
+
return _route_id("", "kafka_topic", "", "", "", call.topic_call, call.broker_call, "")
|
|
1761
|
+
uniq = hashlib.sha1(f"{call.filename}:{call.start_line}:{call.raw_topic}".encode()).hexdigest()[:12]
|
|
1762
|
+
return f"r:phantom:{uniq}"
|
|
1763
|
+
|
|
1764
|
+
if verbose:
|
|
1765
|
+
_verbose_stderr_line(_PASS5_START)
|
|
1766
|
+
with _VerbosePassHeartbeats("[pass5]", verbose=verbose):
|
|
1767
|
+
for member in sorted(tables.members, key=lambda x: x.node_id):
|
|
1768
|
+
if member.decl.is_constructor:
|
|
1769
|
+
continue
|
|
1770
|
+
type_decl = tables.types[member.parent_fqn].decl
|
|
1771
|
+
final_http_calls = resolve_http_client_for_method(
|
|
1772
|
+
method_decl=member.decl,
|
|
1773
|
+
enclosing_type=type_decl,
|
|
1774
|
+
overrides=overrides,
|
|
1775
|
+
meta_chain=meta_chain,
|
|
1776
|
+
builtin_calls=member.decl.outgoing_calls,
|
|
1777
|
+
)
|
|
1778
|
+
final_async_calls = resolve_async_producer_for_method(
|
|
1779
|
+
method_decl=member.decl,
|
|
1780
|
+
enclosing_type=type_decl,
|
|
1781
|
+
overrides=overrides,
|
|
1782
|
+
meta_chain=meta_chain,
|
|
1783
|
+
builtin_calls=member.decl.outgoing_calls,
|
|
1784
|
+
)
|
|
1785
|
+
micro_factor = _micro_factor(member)
|
|
1786
|
+
for call in final_http_calls + final_async_calls:
|
|
1787
|
+
if call.channel == "http":
|
|
1788
|
+
client_path = (call.path_template_call or "").strip()
|
|
1789
|
+
client_method = (call.method_call or "").strip().upper()
|
|
1790
|
+
# Keep normalized path fields on Client now so LC3 filter semantics
|
|
1791
|
+
# (`path_prefix`) can use persisted columns without extra transforms.
|
|
1792
|
+
client_path_template = ""
|
|
1793
|
+
client_path_regex = ""
|
|
1794
|
+
if client_path:
|
|
1795
|
+
client_path_template, client_path_regex = _normalize_path(client_path)
|
|
1796
|
+
cid = _client_id(
|
|
1797
|
+
microservice=member.microservice,
|
|
1798
|
+
member_fqn=call.method_fqn,
|
|
1799
|
+
client_kind=call.client_kind,
|
|
1800
|
+
path=client_path,
|
|
1801
|
+
method=client_method,
|
|
1802
|
+
)
|
|
1803
|
+
if cid not in client_seen:
|
|
1804
|
+
client_seen.add(cid)
|
|
1805
|
+
tables.client_rows.append(
|
|
1806
|
+
ClientRow(
|
|
1807
|
+
id=cid,
|
|
1808
|
+
client_kind=call.client_kind,
|
|
1809
|
+
target_service=call.feign_target_name,
|
|
1810
|
+
path=client_path,
|
|
1811
|
+
path_template=client_path_template,
|
|
1812
|
+
path_regex=client_path_regex,
|
|
1813
|
+
method=client_method,
|
|
1814
|
+
member_fqn=call.method_fqn,
|
|
1815
|
+
member_id=member.node_id,
|
|
1816
|
+
microservice=member.microservice,
|
|
1817
|
+
module=member.module,
|
|
1818
|
+
filename=call.filename,
|
|
1819
|
+
start_line=call.start_line,
|
|
1820
|
+
end_line=call.end_line,
|
|
1821
|
+
resolved=call.resolved,
|
|
1822
|
+
source_layer=_client_source_layer(call.resolution_strategy),
|
|
1823
|
+
),
|
|
1824
|
+
)
|
|
1825
|
+
dkey = (member.node_id, cid)
|
|
1826
|
+
if dkey not in declares_client_seen:
|
|
1827
|
+
declares_client_seen.add(dkey)
|
|
1828
|
+
tables.declares_client_rows.append(
|
|
1829
|
+
DeclaresClientRow(
|
|
1830
|
+
symbol_id=member.node_id,
|
|
1831
|
+
client_id=cid,
|
|
1832
|
+
confidence=call.confidence_base,
|
|
1833
|
+
strategy=call.resolution_strategy,
|
|
1834
|
+
),
|
|
1835
|
+
)
|
|
1836
|
+
rid = ""
|
|
1837
|
+
strategy = call.resolution_strategy
|
|
1838
|
+
if call.client_kind == "feign_method":
|
|
1839
|
+
exposing = next((e for e in tables.exposes_rows if e.symbol_id == member.node_id), None)
|
|
1840
|
+
if exposing is not None:
|
|
1841
|
+
rid = exposing.route_id
|
|
1842
|
+
if not rid:
|
|
1843
|
+
rid = _phantom_http_route_id(call)
|
|
1844
|
+
_append_route(
|
|
1845
|
+
RouteRow(
|
|
1846
|
+
id=rid,
|
|
1847
|
+
kind="http_endpoint",
|
|
1848
|
+
framework="",
|
|
1849
|
+
method=call.method_call,
|
|
1850
|
+
path=call.path_template_call,
|
|
1851
|
+
path_template=call.path_template_call,
|
|
1852
|
+
path_regex="",
|
|
1853
|
+
topic="",
|
|
1854
|
+
broker="",
|
|
1855
|
+
feign_name=call.feign_target_name,
|
|
1856
|
+
feign_url=call.feign_target_url,
|
|
1857
|
+
microservice="",
|
|
1858
|
+
module="",
|
|
1859
|
+
filename=call.filename,
|
|
1860
|
+
start_line=call.start_line,
|
|
1861
|
+
end_line=call.end_line,
|
|
1862
|
+
resolved=False,
|
|
1863
|
+
source_layer="builtin",
|
|
1864
|
+
)
|
|
1865
|
+
)
|
|
1866
|
+
key = (cid, rid)
|
|
1867
|
+
if key in http_seen:
|
|
1868
|
+
continue
|
|
1869
|
+
http_seen.add(key)
|
|
1870
|
+
conf = call.confidence_base * 0.3 * micro_factor
|
|
1871
|
+
tables.http_call_rows.append(
|
|
1872
|
+
HttpCallRow(
|
|
1873
|
+
client_id=cid,
|
|
1874
|
+
route_id=rid,
|
|
1875
|
+
confidence=conf,
|
|
1876
|
+
strategy=strategy,
|
|
1877
|
+
method_call=call.method_call,
|
|
1878
|
+
raw_uri=call.raw_uri,
|
|
1879
|
+
match="unresolved",
|
|
1880
|
+
)
|
|
1881
|
+
)
|
|
1882
|
+
tables.call_edge_stats.http_calls_total += 1
|
|
1883
|
+
tables.call_edge_stats.http_calls_by_client_kind[call.client_kind] += 1
|
|
1884
|
+
tables.call_edge_stats.http_calls_by_strategy[strategy] += 1
|
|
1885
|
+
elif call.channel == "async":
|
|
1886
|
+
topic_atom = (call.topic_call or "").strip()
|
|
1887
|
+
pid = _producer_id(
|
|
1888
|
+
microservice=member.microservice,
|
|
1889
|
+
member_fqn=call.method_fqn,
|
|
1890
|
+
producer_kind=call.client_kind,
|
|
1891
|
+
topic=topic_atom,
|
|
1892
|
+
)
|
|
1893
|
+
if pid not in producer_seen:
|
|
1894
|
+
producer_seen.add(pid)
|
|
1895
|
+
tables.producer_rows.append(
|
|
1896
|
+
ProducerRow(
|
|
1897
|
+
id=pid,
|
|
1898
|
+
producer_kind=call.client_kind,
|
|
1899
|
+
topic=topic_atom,
|
|
1900
|
+
broker=call.broker_call,
|
|
1901
|
+
direction="producer",
|
|
1902
|
+
member_fqn=call.method_fqn,
|
|
1903
|
+
member_id=member.node_id,
|
|
1904
|
+
microservice=member.microservice,
|
|
1905
|
+
module=member.module,
|
|
1906
|
+
filename=call.filename,
|
|
1907
|
+
start_line=call.start_line,
|
|
1908
|
+
end_line=call.end_line,
|
|
1909
|
+
resolved=call.resolved,
|
|
1910
|
+
source_layer=_producer_source_layer(call.resolution_strategy),
|
|
1911
|
+
),
|
|
1912
|
+
)
|
|
1913
|
+
dpkey = (member.node_id, pid)
|
|
1914
|
+
if dpkey not in declares_producer_seen:
|
|
1915
|
+
declares_producer_seen.add(dpkey)
|
|
1916
|
+
tables.declares_producer_rows.append(
|
|
1917
|
+
DeclaresProducerRow(
|
|
1918
|
+
symbol_id=member.node_id,
|
|
1919
|
+
producer_id=pid,
|
|
1920
|
+
confidence=call.confidence_base,
|
|
1921
|
+
strategy=call.resolution_strategy,
|
|
1922
|
+
),
|
|
1923
|
+
)
|
|
1924
|
+
rid = _phantom_async_route_id(call)
|
|
1925
|
+
_append_route(
|
|
1926
|
+
RouteRow(
|
|
1927
|
+
id=rid,
|
|
1928
|
+
kind="kafka_topic",
|
|
1929
|
+
framework="",
|
|
1930
|
+
method="",
|
|
1931
|
+
path="",
|
|
1932
|
+
path_template="",
|
|
1933
|
+
path_regex="",
|
|
1934
|
+
topic=call.topic_call,
|
|
1935
|
+
broker=call.broker_call,
|
|
1936
|
+
feign_name="",
|
|
1937
|
+
feign_url="",
|
|
1938
|
+
microservice="",
|
|
1939
|
+
module="",
|
|
1940
|
+
filename=call.filename,
|
|
1941
|
+
start_line=call.start_line,
|
|
1942
|
+
end_line=call.end_line,
|
|
1943
|
+
resolved=False,
|
|
1944
|
+
source_layer="builtin",
|
|
1945
|
+
)
|
|
1946
|
+
)
|
|
1947
|
+
key = (pid, rid)
|
|
1948
|
+
if key in async_seen:
|
|
1949
|
+
continue
|
|
1950
|
+
async_seen.add(key)
|
|
1951
|
+
conf = call.confidence_base * 0.3 * micro_factor
|
|
1952
|
+
strategy = call.resolution_strategy
|
|
1953
|
+
tables.async_call_rows.append(
|
|
1954
|
+
AsyncCallRow(
|
|
1955
|
+
producer_id=pid,
|
|
1956
|
+
route_id=rid,
|
|
1957
|
+
confidence=conf,
|
|
1958
|
+
strategy=strategy,
|
|
1959
|
+
direction="producer",
|
|
1960
|
+
raw_topic=call.raw_topic,
|
|
1961
|
+
match="unresolved",
|
|
1962
|
+
)
|
|
1963
|
+
)
|
|
1964
|
+
tables.call_edge_stats.async_calls_total += 1
|
|
1965
|
+
tables.call_edge_stats.async_calls_by_client_kind[call.client_kind] += 1
|
|
1966
|
+
tables.call_edge_stats.async_calls_by_strategy[strategy] += 1
|
|
1967
|
+
|
|
1968
|
+
tables.routes_rows = sorted(route_rows, key=lambda r: r.id)
|
|
1969
|
+
tables.client_rows = sorted(tables.client_rows, key=lambda c: c.id)
|
|
1970
|
+
tables.declares_client_rows = sorted(
|
|
1971
|
+
tables.declares_client_rows,
|
|
1972
|
+
key=lambda e: (e.symbol_id, e.client_id),
|
|
1973
|
+
)
|
|
1974
|
+
tables.client_stats.clients_total = len(tables.client_rows)
|
|
1975
|
+
tables.client_stats.declares_client_total = len(tables.declares_client_rows)
|
|
1976
|
+
tables.client_stats.clients_by_kind = defaultdict(int)
|
|
1977
|
+
for row in tables.client_rows:
|
|
1978
|
+
tables.client_stats.clients_by_kind[row.client_kind] += 1
|
|
1979
|
+
tables.producer_rows = sorted(tables.producer_rows, key=lambda p: p.id)
|
|
1980
|
+
tables.declares_producer_rows = sorted(
|
|
1981
|
+
tables.declares_producer_rows,
|
|
1982
|
+
key=lambda e: (e.symbol_id, e.producer_id),
|
|
1983
|
+
)
|
|
1984
|
+
tables.producer_stats.producers_total = len(tables.producer_rows)
|
|
1985
|
+
tables.producer_stats.declares_producer_total = len(tables.declares_producer_rows)
|
|
1986
|
+
tables.producer_stats.producers_by_kind = defaultdict(int)
|
|
1987
|
+
for row in tables.producer_rows:
|
|
1988
|
+
tables.producer_stats.producers_by_kind[row.producer_kind] += 1
|
|
1989
|
+
brownfield_strategies = frozenset(
|
|
1990
|
+
(
|
|
1991
|
+
"layer_b_ann",
|
|
1992
|
+
"layer_a_meta",
|
|
1993
|
+
"layer_c_source",
|
|
1994
|
+
"layer_b_fqn",
|
|
1995
|
+
"codebase_client",
|
|
1996
|
+
"codebase_producer",
|
|
1997
|
+
),
|
|
1998
|
+
)
|
|
1999
|
+
if tables.call_edge_stats.http_calls_total:
|
|
2000
|
+
n_http = sum(
|
|
2001
|
+
v for k, v in tables.call_edge_stats.http_calls_by_strategy.items()
|
|
2002
|
+
if k in brownfield_strategies
|
|
2003
|
+
)
|
|
2004
|
+
tables.call_edge_stats.http_clients_from_brownfield_pct = (
|
|
2005
|
+
100.0 * float(n_http) / float(tables.call_edge_stats.http_calls_total)
|
|
2006
|
+
)
|
|
2007
|
+
if tables.call_edge_stats.async_calls_total:
|
|
2008
|
+
n_async = sum(
|
|
2009
|
+
v for k, v in tables.call_edge_stats.async_calls_by_strategy.items()
|
|
2010
|
+
if k in brownfield_strategies
|
|
2011
|
+
)
|
|
2012
|
+
tables.call_edge_stats.async_producers_from_brownfield_pct = (
|
|
2013
|
+
100.0 * float(n_async) / float(tables.call_edge_stats.async_calls_total)
|
|
2014
|
+
)
|
|
2015
|
+
if verbose:
|
|
2016
|
+
http_client = dict(sorted(tables.call_edge_stats.http_calls_by_client_kind.items()))
|
|
2017
|
+
async_client = dict(sorted(tables.call_edge_stats.async_calls_by_client_kind.items()))
|
|
2018
|
+
http_strategy = dict(sorted(tables.call_edge_stats.http_calls_by_strategy.items()))
|
|
2019
|
+
async_strategy = dict(sorted(tables.call_edge_stats.async_calls_by_strategy.items()))
|
|
2020
|
+
_verbose_stderr_line(
|
|
2021
|
+
f"[pass5] HTTP_CALLS: {len(tables.http_call_rows)} edges, "
|
|
2022
|
+
f"ASYNC_CALLS: {len(tables.async_call_rows)} edges; "
|
|
2023
|
+
f"http_by_client_kind={http_client}, async_by_client_kind={async_client}, "
|
|
2024
|
+
f"http_by_strategy={http_strategy}, async_by_strategy={async_strategy}",
|
|
2025
|
+
)
|
|
2026
|
+
|
|
2027
|
+
|
|
2028
|
+
def _match_call_edge(
|
|
2029
|
+
call: OutgoingCallDecl,
|
|
2030
|
+
routes: list[RouteRow],
|
|
2031
|
+
caller_microservice: str,
|
|
2032
|
+
) -> tuple[str, list[RouteRow]]:
|
|
2033
|
+
"""Return (match_outcome, candidate_routes) for an outgoing call."""
|
|
2034
|
+
if (
|
|
2035
|
+
(not call.resolved)
|
|
2036
|
+
and call.path_template_call == ""
|
|
2037
|
+
and call.topic_call == ""
|
|
2038
|
+
):
|
|
2039
|
+
return "unresolved", []
|
|
2040
|
+
|
|
2041
|
+
candidates: list[RouteRow] = []
|
|
2042
|
+
if call.client_kind == "feign_method":
|
|
2043
|
+
# Prefer endpoint matching by target service + path/method for Feign declarations.
|
|
2044
|
+
path_value = call.path_template_call
|
|
2045
|
+
method_value = call.method_call
|
|
2046
|
+
if path_value:
|
|
2047
|
+
for r in routes:
|
|
2048
|
+
if r.kind != "http_endpoint":
|
|
2049
|
+
continue
|
|
2050
|
+
if call.feign_target_name and r.microservice != call.feign_target_name:
|
|
2051
|
+
continue
|
|
2052
|
+
if not (r.method == "" or method_value == "" or r.method == method_value):
|
|
2053
|
+
continue
|
|
2054
|
+
if not r.path_regex:
|
|
2055
|
+
continue
|
|
2056
|
+
try:
|
|
2057
|
+
if re.fullmatch(r.path_regex, path_value or "") is None:
|
|
2058
|
+
continue
|
|
2059
|
+
except re.error:
|
|
2060
|
+
continue
|
|
2061
|
+
candidates.append(r)
|
|
2062
|
+
if not candidates:
|
|
2063
|
+
# Fallback for legacy/manual routes that only expose Feign target names.
|
|
2064
|
+
candidates = [
|
|
2065
|
+
r for r in routes
|
|
2066
|
+
if r.feign_name and call.feign_target_name and r.feign_name == call.feign_target_name
|
|
2067
|
+
]
|
|
2068
|
+
elif call.channel == "http":
|
|
2069
|
+
path_value = call.path_template_call
|
|
2070
|
+
method_value = call.method_call
|
|
2071
|
+
for r in routes:
|
|
2072
|
+
if r.kind != "http_endpoint":
|
|
2073
|
+
continue
|
|
2074
|
+
if not (r.method == "" or method_value == "" or r.method == method_value):
|
|
2075
|
+
continue
|
|
2076
|
+
if not r.path_regex:
|
|
2077
|
+
continue
|
|
2078
|
+
try:
|
|
2079
|
+
if re.fullmatch(r.path_regex, path_value or "") is None:
|
|
2080
|
+
continue
|
|
2081
|
+
except re.error:
|
|
2082
|
+
continue
|
|
2083
|
+
candidates.append(r)
|
|
2084
|
+
elif call.channel == "async":
|
|
2085
|
+
candidates = [
|
|
2086
|
+
r for r in routes
|
|
2087
|
+
if r.topic == call.topic_call and r.broker == call.broker_call
|
|
2088
|
+
]
|
|
2089
|
+
|
|
2090
|
+
if not candidates:
|
|
2091
|
+
return "phantom", []
|
|
2092
|
+
if len(candidates) > 1:
|
|
2093
|
+
return "ambiguous", candidates
|
|
2094
|
+
if candidates[0].microservice and candidates[0].microservice == caller_microservice:
|
|
2095
|
+
return "intra_service", candidates
|
|
2096
|
+
return "cross_service", candidates
|
|
2097
|
+
|
|
2098
|
+
|
|
2099
|
+
_BROWNFIELD_LAYERS = frozenset({
|
|
2100
|
+
"layer_c_source",
|
|
2101
|
+
"layer_b_ann",
|
|
2102
|
+
"layer_b_fqn",
|
|
2103
|
+
"layer_a_meta",
|
|
2104
|
+
})
|
|
2105
|
+
|
|
2106
|
+
|
|
2107
|
+
def _is_brownfield_sourced(
|
|
2108
|
+
call_strategy: str,
|
|
2109
|
+
candidates: list[RouteRow],
|
|
2110
|
+
) -> bool:
|
|
2111
|
+
"""Both sides must come from brownfield layers for an edge to count as
|
|
2112
|
+
authoritative under brownfield_only mode."""
|
|
2113
|
+
if not candidates:
|
|
2114
|
+
return False
|
|
2115
|
+
if call_strategy not in _BROWNFIELD_LAYERS:
|
|
2116
|
+
return False
|
|
2117
|
+
return all(
|
|
2118
|
+
getattr(c, "source_layer", "builtin") in _BROWNFIELD_LAYERS
|
|
2119
|
+
for c in candidates
|
|
2120
|
+
)
|
|
2121
|
+
|
|
2122
|
+
|
|
2123
|
+
def pass6_match_edges(
|
|
2124
|
+
tables: GraphTables,
|
|
2125
|
+
*,
|
|
2126
|
+
verbose: bool,
|
|
2127
|
+
) -> None:
|
|
2128
|
+
match_factor: dict[str, float] = {
|
|
2129
|
+
"cross_service": 1.0,
|
|
2130
|
+
"intra_service": 0.6,
|
|
2131
|
+
"ambiguous": 0.5,
|
|
2132
|
+
"phantom": 0.4,
|
|
2133
|
+
"unresolved": 0.3,
|
|
2134
|
+
}
|
|
2135
|
+
route_by_id = {r.id: r for r in tables.routes_rows}
|
|
2136
|
+
all_routes = [r for r in tables.routes_rows if r.microservice]
|
|
2137
|
+
member_by_id = {m.node_id: m for m in tables.members}
|
|
2138
|
+
clients_by_id = {c.id: c for c in tables.client_rows}
|
|
2139
|
+
producers_by_id = {p.id: p for p in tables.producer_rows}
|
|
2140
|
+
client_hints_by_member: dict[str, list[ClientRow]] = defaultdict(list)
|
|
2141
|
+
for edge in tables.declares_client_rows:
|
|
2142
|
+
client = clients_by_id.get(edge.client_id)
|
|
2143
|
+
if client is None:
|
|
2144
|
+
continue
|
|
2145
|
+
# `DECLARES_CLIENT.symbol_id` targets `Symbol.id` for member symbols,
|
|
2146
|
+
# and member symbols are emitted with `id == MemberEntry.node_id`.
|
|
2147
|
+
client_hints_by_member[edge.symbol_id].append(client)
|
|
2148
|
+
for member_symbol_id in list(client_hints_by_member.keys()):
|
|
2149
|
+
# Deterministic fallback when a method carries multiple feign declarations.
|
|
2150
|
+
client_hints_by_member[member_symbol_id].sort(key=lambda c: c.id)
|
|
2151
|
+
|
|
2152
|
+
# Pass 6 is idempotent for full rebuilds: each run fully re-derives match outcomes.
|
|
2153
|
+
# If incremental rebuild lands later (Tier-2 follow-up), this reset must remain pass-scoped.
|
|
2154
|
+
tables.call_edge_stats.http_calls_match_breakdown.clear()
|
|
2155
|
+
tables.call_edge_stats.async_calls_match_breakdown.clear()
|
|
2156
|
+
tables.call_edge_stats.cross_service_calls_total = 0
|
|
2157
|
+
|
|
2158
|
+
brownfield_only = tables.cross_service_resolution == "brownfield_only"
|
|
2159
|
+
suppressed_auto_cross_http: list[str] = []
|
|
2160
|
+
suppressed_auto_cross_async: list[str] = []
|
|
2161
|
+
suppressed_auto_cross_count = 0
|
|
2162
|
+
|
|
2163
|
+
def _micro_factor(member: MemberEntry | None) -> float:
|
|
2164
|
+
return 1.0 if (member and member.microservice) else 0.85
|
|
2165
|
+
|
|
2166
|
+
if verbose:
|
|
2167
|
+
_verbose_stderr_line(_PASS6_START)
|
|
2168
|
+
with _VerbosePassHeartbeats("[pass6]", verbose=verbose):
|
|
2169
|
+
for row in tables.http_call_rows:
|
|
2170
|
+
if row.match != "unresolved":
|
|
2171
|
+
continue
|
|
2172
|
+
client = clients_by_id.get(row.client_id)
|
|
2173
|
+
member = member_by_id.get(client.member_id) if client else None
|
|
2174
|
+
base = row.confidence / max(1e-9, (0.3 * _micro_factor(member)))
|
|
2175
|
+
src_route = route_by_id.get(row.route_id)
|
|
2176
|
+
if src_route is None and member is not None:
|
|
2177
|
+
# Recover feign caller hints from persisted caller-side Client declarations.
|
|
2178
|
+
for client in client_hints_by_member.get(member.node_id, ()):
|
|
2179
|
+
if client.client_kind != "feign_method":
|
|
2180
|
+
continue
|
|
2181
|
+
path_template, path_regex = _normalize_path(client.path)
|
|
2182
|
+
src_route = RouteRow(
|
|
2183
|
+
id="",
|
|
2184
|
+
kind="http_consumer",
|
|
2185
|
+
framework="feign",
|
|
2186
|
+
method=client.method,
|
|
2187
|
+
path=client.path,
|
|
2188
|
+
path_template=path_template,
|
|
2189
|
+
path_regex=path_regex,
|
|
2190
|
+
topic="",
|
|
2191
|
+
broker="",
|
|
2192
|
+
feign_name=client.target_service,
|
|
2193
|
+
# `Client` stores service-name hints, not feign URL; matcher keys off feign_name.
|
|
2194
|
+
feign_url="",
|
|
2195
|
+
microservice=member.microservice,
|
|
2196
|
+
module=member.module,
|
|
2197
|
+
filename=client.filename,
|
|
2198
|
+
start_line=client.start_line,
|
|
2199
|
+
end_line=client.end_line,
|
|
2200
|
+
resolved=client.resolved,
|
|
2201
|
+
source_layer=client.source_layer,
|
|
2202
|
+
)
|
|
2203
|
+
break
|
|
2204
|
+
# Feign caller hints are synthesized as transient `http_consumer` routes in pass6;
|
|
2205
|
+
# synthetic phantoms from imperative clients are `http_endpoint` even when `feign_name` is populated from
|
|
2206
|
+
# `@CodebaseHttpClient.targetService` / YAML hints — those must path-match like RestTemplate.
|
|
2207
|
+
_feign_like = (
|
|
2208
|
+
src_route is not None
|
|
2209
|
+
and src_route.kind == "http_consumer"
|
|
2210
|
+
and bool(src_route.feign_name)
|
|
2211
|
+
)
|
|
2212
|
+
call = OutgoingCallDecl(
|
|
2213
|
+
method_fqn=f"{member.parent_fqn}#{member.decl.signature}" if member else "",
|
|
2214
|
+
method_sig=member.decl.signature if member else "",
|
|
2215
|
+
client_kind="feign_method" if _feign_like else "rest_template",
|
|
2216
|
+
channel="http",
|
|
2217
|
+
feign_target_name=src_route.feign_name if src_route else "",
|
|
2218
|
+
feign_target_url=src_route.feign_url if src_route else "",
|
|
2219
|
+
path_template_call=src_route.path_template if src_route else "",
|
|
2220
|
+
method_call=row.method_call,
|
|
2221
|
+
topic_call="",
|
|
2222
|
+
broker_call="",
|
|
2223
|
+
raw_uri=row.raw_uri,
|
|
2224
|
+
raw_topic="",
|
|
2225
|
+
resolution_strategy=row.strategy,
|
|
2226
|
+
confidence_base=base,
|
|
2227
|
+
resolved=(row.strategy != "unresolved"),
|
|
2228
|
+
filename=member.file_path if member else "",
|
|
2229
|
+
start_line=member.decl.start_line if member else 0,
|
|
2230
|
+
end_line=member.decl.end_line if member else 0,
|
|
2231
|
+
)
|
|
2232
|
+
outcome, candidates = _match_call_edge(call, all_routes, member.microservice if member else "")
|
|
2233
|
+
if (
|
|
2234
|
+
brownfield_only
|
|
2235
|
+
and outcome == "cross_service"
|
|
2236
|
+
and not _is_brownfield_sourced(row.strategy, candidates)
|
|
2237
|
+
):
|
|
2238
|
+
outcome = "unresolved"
|
|
2239
|
+
candidates = []
|
|
2240
|
+
suppressed_auto_cross_count += 1
|
|
2241
|
+
if len(suppressed_auto_cross_http) < 5:
|
|
2242
|
+
suppressed_auto_cross_http.append(call.method_fqn)
|
|
2243
|
+
if outcome in VALID_HTTP_CALL_MATCHES:
|
|
2244
|
+
row.match = outcome
|
|
2245
|
+
if outcome in ("cross_service", "intra_service") and len(candidates) == 1:
|
|
2246
|
+
row.route_id = candidates[0].id
|
|
2247
|
+
row.confidence = call.confidence_base * match_factor[row.match] * _micro_factor(member)
|
|
2248
|
+
tables.call_edge_stats.http_calls_match_breakdown[row.match] += 1
|
|
2249
|
+
if row.match == "cross_service":
|
|
2250
|
+
tables.call_edge_stats.cross_service_calls_total += 1
|
|
2251
|
+
|
|
2252
|
+
for row in tables.async_call_rows:
|
|
2253
|
+
if row.match != "unresolved":
|
|
2254
|
+
continue
|
|
2255
|
+
producer = producers_by_id.get(row.producer_id)
|
|
2256
|
+
member = member_by_id.get(producer.member_id) if producer else None
|
|
2257
|
+
base = row.confidence / max(1e-9, (0.3 * _micro_factor(member)))
|
|
2258
|
+
src_route = route_by_id.get(row.route_id)
|
|
2259
|
+
async_kind = producer.producer_kind if producer else "kafka_send"
|
|
2260
|
+
call = OutgoingCallDecl(
|
|
2261
|
+
method_fqn=f"{member.parent_fqn}#{member.decl.signature}" if member else "",
|
|
2262
|
+
method_sig=member.decl.signature if member else "",
|
|
2263
|
+
client_kind=async_kind,
|
|
2264
|
+
channel="async",
|
|
2265
|
+
feign_target_name="",
|
|
2266
|
+
feign_target_url="",
|
|
2267
|
+
path_template_call="",
|
|
2268
|
+
method_call="",
|
|
2269
|
+
topic_call=src_route.topic if src_route else "",
|
|
2270
|
+
broker_call=src_route.broker if src_route else "",
|
|
2271
|
+
raw_uri="",
|
|
2272
|
+
raw_topic=row.raw_topic,
|
|
2273
|
+
resolution_strategy=row.strategy,
|
|
2274
|
+
confidence_base=base,
|
|
2275
|
+
resolved=(row.strategy != "unresolved"),
|
|
2276
|
+
filename=member.file_path if member else "",
|
|
2277
|
+
start_line=member.decl.start_line if member else 0,
|
|
2278
|
+
end_line=member.decl.end_line if member else 0,
|
|
2279
|
+
)
|
|
2280
|
+
outcome, candidates = _match_call_edge(call, all_routes, member.microservice if member else "")
|
|
2281
|
+
if (
|
|
2282
|
+
brownfield_only
|
|
2283
|
+
and outcome == "cross_service"
|
|
2284
|
+
and not _is_brownfield_sourced(row.strategy, candidates)
|
|
2285
|
+
):
|
|
2286
|
+
outcome = "unresolved"
|
|
2287
|
+
candidates = []
|
|
2288
|
+
suppressed_auto_cross_count += 1
|
|
2289
|
+
if len(suppressed_auto_cross_async) < 5:
|
|
2290
|
+
suppressed_auto_cross_async.append(call.method_fqn)
|
|
2291
|
+
if outcome in VALID_HTTP_CALL_MATCHES:
|
|
2292
|
+
row.match = outcome
|
|
2293
|
+
if outcome in ("cross_service", "intra_service") and len(candidates) == 1:
|
|
2294
|
+
row.route_id = candidates[0].id
|
|
2295
|
+
row.confidence = call.confidence_base * match_factor[row.match] * _micro_factor(member)
|
|
2296
|
+
tables.call_edge_stats.async_calls_match_breakdown[row.match] += 1
|
|
2297
|
+
if row.match == "cross_service":
|
|
2298
|
+
tables.call_edge_stats.cross_service_calls_total += 1
|
|
2299
|
+
|
|
2300
|
+
inbound_route_ids = {r.route_id for r in tables.http_call_rows} | {r.route_id for r in tables.async_call_rows}
|
|
2301
|
+
tables.routes_rows = sorted(
|
|
2302
|
+
[
|
|
2303
|
+
r for r in tables.routes_rows
|
|
2304
|
+
if not (
|
|
2305
|
+
(r.microservice == "")
|
|
2306
|
+
and (r.framework == "")
|
|
2307
|
+
and (not r.resolved)
|
|
2308
|
+
and (r.id not in inbound_route_ids)
|
|
2309
|
+
)
|
|
2310
|
+
],
|
|
2311
|
+
key=lambda r: r.id,
|
|
2312
|
+
)
|
|
2313
|
+
|
|
2314
|
+
if verbose:
|
|
2315
|
+
if brownfield_only:
|
|
2316
|
+
n_bf = tables.call_edge_stats.cross_service_calls_total
|
|
2317
|
+
first_http = ", ".join(suppressed_auto_cross_http)
|
|
2318
|
+
first_async = ", ".join(suppressed_auto_cross_async)
|
|
2319
|
+
_verbose_stderr_line(
|
|
2320
|
+
f"[pass6] cross_service_resolution=brownfield_only:\n"
|
|
2321
|
+
f" {n_bf} cross_service edges from brownfield layers,\n"
|
|
2322
|
+
f" {suppressed_auto_cross_count} auto-cross-service candidates suppressed -> unresolved\n"
|
|
2323
|
+
f" (first 5 http: {first_http})\n"
|
|
2324
|
+
f" (first 5 async: {first_async})",
|
|
2325
|
+
)
|
|
2326
|
+
_verbose_stderr_line(
|
|
2327
|
+
f"[pass6] http_match={dict(sorted(tables.call_edge_stats.http_calls_match_breakdown.items()))}, "
|
|
2328
|
+
f"async_match={dict(sorted(tables.call_edge_stats.async_calls_match_breakdown.items()))}, "
|
|
2329
|
+
f"cross_service_calls_total={tables.call_edge_stats.cross_service_calls_total}",
|
|
2330
|
+
)
|
|
2331
|
+
|
|
2332
|
+
|
|
2333
|
+
# ---------- Kuzu write ----------
|
|
2334
|
+
|
|
2335
|
+
|
|
2336
|
+
_SCHEMA_NODE = (
|
|
2337
|
+
"CREATE NODE TABLE Symbol("
|
|
2338
|
+
"id STRING PRIMARY KEY, "
|
|
2339
|
+
"kind STRING, name STRING, fqn STRING, package STRING, "
|
|
2340
|
+
"module STRING, microservice STRING, "
|
|
2341
|
+
"filename STRING, start_line INT64, end_line INT64, "
|
|
2342
|
+
"start_byte INT64, end_byte INT64, "
|
|
2343
|
+
"modifiers STRING[], annotations STRING[], capabilities STRING[], "
|
|
2344
|
+
"role STRING, signature STRING, parent_id STRING, resolved BOOLEAN"
|
|
2345
|
+
")"
|
|
2346
|
+
)
|
|
2347
|
+
|
|
2348
|
+
_SCHEMA_META = (
|
|
2349
|
+
"CREATE NODE TABLE GraphMeta("
|
|
2350
|
+
"key STRING PRIMARY KEY, "
|
|
2351
|
+
"ontology_version INT64, built_at INT64, source_root STRING, "
|
|
2352
|
+
"counts_json STRING, parse_errors INT64, "
|
|
2353
|
+
"routes_total INT64, exposes_total INT64, "
|
|
2354
|
+
# JSON map {framework: count}; STRING avoids Kuzu Python MAP↔STRUCT binder mismatch.
|
|
2355
|
+
"routes_by_framework STRING, "
|
|
2356
|
+
"routes_resolved_pct DOUBLE, "
|
|
2357
|
+
"routes_from_brownfield_pct DOUBLE, "
|
|
2358
|
+
"routes_by_layer STRING, "
|
|
2359
|
+
"clients_total INT64, "
|
|
2360
|
+
"declares_client_total INT64, "
|
|
2361
|
+
"clients_by_kind STRING, "
|
|
2362
|
+
"producers_total INT64, "
|
|
2363
|
+
"declares_producer_total INT64, "
|
|
2364
|
+
"producers_by_kind STRING, "
|
|
2365
|
+
"http_calls_total INT64, "
|
|
2366
|
+
"async_calls_total INT64, "
|
|
2367
|
+
"http_calls_by_strategy STRING, "
|
|
2368
|
+
"async_calls_by_strategy STRING, "
|
|
2369
|
+
"http_calls_resolved_pct DOUBLE, "
|
|
2370
|
+
"async_calls_resolved_pct DOUBLE, "
|
|
2371
|
+
"http_clients_from_brownfield_pct DOUBLE, "
|
|
2372
|
+
"async_producers_from_brownfield_pct DOUBLE, "
|
|
2373
|
+
"http_calls_match_breakdown STRING, "
|
|
2374
|
+
"async_calls_match_breakdown STRING, "
|
|
2375
|
+
"cross_service_calls_total INT64, "
|
|
2376
|
+
"pass3_skipped_cross_service INT64, "
|
|
2377
|
+
"pass3_unresolved_phantom_receiver INT64, "
|
|
2378
|
+
"pass3_unresolved_chained INT64, "
|
|
2379
|
+
"pass4_exposes_suppressed_feign INT64, "
|
|
2380
|
+
"cross_service_resolution STRING"
|
|
2381
|
+
")"
|
|
2382
|
+
)
|
|
2383
|
+
|
|
2384
|
+
_SCHEMA_ROUTE = (
|
|
2385
|
+
"CREATE NODE TABLE Route("
|
|
2386
|
+
"id STRING, kind STRING, framework STRING, "
|
|
2387
|
+
"method STRING, path STRING, path_template STRING, path_regex STRING, "
|
|
2388
|
+
"topic STRING, broker STRING, "
|
|
2389
|
+
"feign_name STRING, feign_url STRING, "
|
|
2390
|
+
"microservice STRING, module STRING, "
|
|
2391
|
+
"filename STRING, start_line INT64, end_line INT64, "
|
|
2392
|
+
"resolved BOOLEAN, "
|
|
2393
|
+
"PRIMARY KEY(id))"
|
|
2394
|
+
)
|
|
2395
|
+
|
|
2396
|
+
_SCHEMA_CLIENT = (
|
|
2397
|
+
"CREATE NODE TABLE Client("
|
|
2398
|
+
"id STRING, client_kind STRING, target_service STRING, "
|
|
2399
|
+
"path STRING, path_template STRING, path_regex STRING, method STRING, "
|
|
2400
|
+
"member_fqn STRING, member_id STRING, "
|
|
2401
|
+
"microservice STRING, module STRING, filename STRING, "
|
|
2402
|
+
"start_line INT64, end_line INT64, resolved BOOLEAN, source_layer STRING, "
|
|
2403
|
+
"PRIMARY KEY(id))"
|
|
2404
|
+
)
|
|
2405
|
+
|
|
2406
|
+
_SCHEMA_PRODUCER = (
|
|
2407
|
+
"CREATE NODE TABLE Producer("
|
|
2408
|
+
"id STRING, producer_kind STRING, topic STRING, broker STRING, direction STRING, "
|
|
2409
|
+
"member_fqn STRING, member_id STRING, "
|
|
2410
|
+
"microservice STRING, module STRING, filename STRING, "
|
|
2411
|
+
"start_line INT64, end_line INT64, resolved BOOLEAN, source_layer STRING, "
|
|
2412
|
+
"PRIMARY KEY(id))"
|
|
2413
|
+
)
|
|
2414
|
+
|
|
2415
|
+
_SCHEMA_EXTENDS = (
|
|
2416
|
+
"CREATE REL TABLE EXTENDS(FROM Symbol TO Symbol, "
|
|
2417
|
+
"dst_name STRING, dst_fqn STRING, resolved BOOLEAN)"
|
|
2418
|
+
)
|
|
2419
|
+
_SCHEMA_IMPLEMENTS = (
|
|
2420
|
+
"CREATE REL TABLE IMPLEMENTS(FROM Symbol TO Symbol, "
|
|
2421
|
+
"dst_name STRING, dst_fqn STRING, resolved BOOLEAN)"
|
|
2422
|
+
)
|
|
2423
|
+
_SCHEMA_INJECTS = (
|
|
2424
|
+
"CREATE REL TABLE INJECTS(FROM Symbol TO Symbol, "
|
|
2425
|
+
"dst_name STRING, dst_fqn STRING, resolved BOOLEAN, "
|
|
2426
|
+
"mechanism STRING, annotation STRING, field_or_param STRING)"
|
|
2427
|
+
)
|
|
2428
|
+
_SCHEMA_DECLARES = "CREATE REL TABLE DECLARES(FROM Symbol TO Symbol)"
|
|
2429
|
+
_SCHEMA_OVERRIDES = "CREATE REL TABLE OVERRIDES(FROM Symbol TO Symbol)"
|
|
2430
|
+
_SCHEMA_CALLS = (
|
|
2431
|
+
"CREATE REL TABLE CALLS(FROM Symbol TO Symbol, "
|
|
2432
|
+
"call_site_line INT64, call_site_byte INT64, arg_count INT64, "
|
|
2433
|
+
"confidence DOUBLE, strategy STRING, source STRING, resolved BOOLEAN, "
|
|
2434
|
+
"callee_declaring_role STRING)"
|
|
2435
|
+
)
|
|
2436
|
+
_SCHEMA_UNRESOLVED_CALL_SITE = (
|
|
2437
|
+
"CREATE NODE TABLE UnresolvedCallSite("
|
|
2438
|
+
"id STRING, caller_id STRING, call_site_line INT64, call_site_byte INT64, "
|
|
2439
|
+
"arg_count INT64, callee_simple STRING, receiver_expr STRING, reason STRING, "
|
|
2440
|
+
"PRIMARY KEY(id))"
|
|
2441
|
+
)
|
|
2442
|
+
_SCHEMA_UNRESOLVED_AT = "CREATE REL TABLE UNRESOLVED_AT(FROM Symbol TO UnresolvedCallSite)"
|
|
2443
|
+
_SCHEMA_EXPOSES = (
|
|
2444
|
+
"CREATE REL TABLE EXPOSES(FROM Symbol TO Route, "
|
|
2445
|
+
"confidence DOUBLE, strategy STRING)"
|
|
2446
|
+
)
|
|
2447
|
+
_SCHEMA_DECLARES_CLIENT = (
|
|
2448
|
+
"CREATE REL TABLE DECLARES_CLIENT(FROM Symbol TO Client, "
|
|
2449
|
+
"confidence DOUBLE, strategy STRING)"
|
|
2450
|
+
)
|
|
2451
|
+
_SCHEMA_DECLARES_PRODUCER = (
|
|
2452
|
+
"CREATE REL TABLE DECLARES_PRODUCER(FROM Symbol TO Producer, "
|
|
2453
|
+
"confidence DOUBLE, strategy STRING)"
|
|
2454
|
+
)
|
|
2455
|
+
_SCHEMA_HTTP_CALLS = (
|
|
2456
|
+
"CREATE REL TABLE HTTP_CALLS(FROM Client TO Route, "
|
|
2457
|
+
"confidence DOUBLE, strategy STRING, "
|
|
2458
|
+
"method_call STRING, raw_uri STRING, match STRING)"
|
|
2459
|
+
)
|
|
2460
|
+
_SCHEMA_ASYNC_CALLS = (
|
|
2461
|
+
"CREATE REL TABLE ASYNC_CALLS(FROM Producer TO Route, "
|
|
2462
|
+
"confidence DOUBLE, strategy STRING, "
|
|
2463
|
+
"direction STRING, raw_topic STRING, match STRING)"
|
|
2464
|
+
)
|
|
2465
|
+
|
|
2466
|
+
|
|
2467
|
+
def _drop_all(conn: kuzu.Connection) -> None:
|
|
2468
|
+
for stmt in (
|
|
2469
|
+
"DROP TABLE IF EXISTS DECLARES_CLIENT",
|
|
2470
|
+
"DROP TABLE IF EXISTS DECLARES_PRODUCER",
|
|
2471
|
+
"DROP TABLE IF EXISTS HTTP_CALLS",
|
|
2472
|
+
"DROP TABLE IF EXISTS ASYNC_CALLS",
|
|
2473
|
+
"DROP TABLE IF EXISTS EXPOSES",
|
|
2474
|
+
"DROP TABLE IF EXISTS UNRESOLVED_AT",
|
|
2475
|
+
"DROP TABLE IF EXISTS EXTENDS",
|
|
2476
|
+
"DROP TABLE IF EXISTS IMPLEMENTS",
|
|
2477
|
+
"DROP TABLE IF EXISTS INJECTS",
|
|
2478
|
+
"DROP TABLE IF EXISTS CALLS",
|
|
2479
|
+
"DROP TABLE IF EXISTS OVERRIDES",
|
|
2480
|
+
"DROP TABLE IF EXISTS DECLARES",
|
|
2481
|
+
"DROP TABLE IF EXISTS UnresolvedCallSite",
|
|
2482
|
+
"DROP TABLE IF EXISTS Symbol",
|
|
2483
|
+
"DROP TABLE IF EXISTS Route",
|
|
2484
|
+
"DROP TABLE IF EXISTS Client",
|
|
2485
|
+
"DROP TABLE IF EXISTS Producer",
|
|
2486
|
+
"DROP TABLE IF EXISTS GraphMeta",
|
|
2487
|
+
):
|
|
2488
|
+
try:
|
|
2489
|
+
conn.execute(stmt)
|
|
2490
|
+
except Exception:
|
|
2491
|
+
pass
|
|
2492
|
+
|
|
2493
|
+
|
|
2494
|
+
def _create_schema(conn: kuzu.Connection) -> None:
|
|
2495
|
+
for stmt in (
|
|
2496
|
+
_SCHEMA_NODE,
|
|
2497
|
+
_SCHEMA_UNRESOLVED_CALL_SITE,
|
|
2498
|
+
_SCHEMA_ROUTE,
|
|
2499
|
+
_SCHEMA_CLIENT,
|
|
2500
|
+
_SCHEMA_PRODUCER,
|
|
2501
|
+
_SCHEMA_META,
|
|
2502
|
+
_SCHEMA_EXTENDS,
|
|
2503
|
+
_SCHEMA_IMPLEMENTS,
|
|
2504
|
+
_SCHEMA_INJECTS,
|
|
2505
|
+
_SCHEMA_DECLARES,
|
|
2506
|
+
_SCHEMA_OVERRIDES,
|
|
2507
|
+
_SCHEMA_CALLS,
|
|
2508
|
+
_SCHEMA_UNRESOLVED_AT,
|
|
2509
|
+
_SCHEMA_EXPOSES,
|
|
2510
|
+
_SCHEMA_DECLARES_CLIENT,
|
|
2511
|
+
_SCHEMA_DECLARES_PRODUCER,
|
|
2512
|
+
_SCHEMA_HTTP_CALLS,
|
|
2513
|
+
_SCHEMA_ASYNC_CALLS,
|
|
2514
|
+
):
|
|
2515
|
+
conn.execute(stmt)
|
|
2516
|
+
|
|
2517
|
+
|
|
2518
|
+
def _node_row(**kwargs) -> dict:
|
|
2519
|
+
base = {
|
|
2520
|
+
"kind": "", "name": "", "fqn": "", "package": "",
|
|
2521
|
+
"module": "", "microservice": "",
|
|
2522
|
+
"filename": "", "start_line": 0, "end_line": 0,
|
|
2523
|
+
"start_byte": 0, "end_byte": 0,
|
|
2524
|
+
"modifiers": [], "annotations": [], "capabilities": [],
|
|
2525
|
+
"role": "OTHER", "signature": "", "parent_id": "", "resolved": True,
|
|
2526
|
+
}
|
|
2527
|
+
base.update(kwargs)
|
|
2528
|
+
return base
|
|
2529
|
+
|
|
2530
|
+
|
|
2531
|
+
_CREATE_SYMBOL = (
|
|
2532
|
+
"CREATE (:Symbol {id: $id, kind: $kind, name: $name, fqn: $fqn, "
|
|
2533
|
+
"package: $package, module: $module, microservice: $microservice, "
|
|
2534
|
+
"filename: $filename, "
|
|
2535
|
+
"start_line: $start_line, end_line: $end_line, "
|
|
2536
|
+
"start_byte: $start_byte, end_byte: $end_byte, "
|
|
2537
|
+
"modifiers: $modifiers, annotations: $annotations, capabilities: $capabilities, "
|
|
2538
|
+
"role: $role, signature: $signature, parent_id: $parent_id, resolved: $resolved})"
|
|
2539
|
+
)
|
|
2540
|
+
|
|
2541
|
+
|
|
2542
|
+
def _write_nodes(
|
|
2543
|
+
conn: kuzu.Connection,
|
|
2544
|
+
tables: GraphTables,
|
|
2545
|
+
*,
|
|
2546
|
+
project_root: Path,
|
|
2547
|
+
meta_chain: dict[str, frozenset[str]] | None,
|
|
2548
|
+
) -> None:
|
|
2549
|
+
overrides = load_brownfield_overrides(project_root)
|
|
2550
|
+
try:
|
|
2551
|
+
prs = str(project_root.resolve())
|
|
2552
|
+
except OSError:
|
|
2553
|
+
prs = str(project_root)
|
|
2554
|
+
tables.cross_service_resolution = _load_config_cross_service_resolution(prs)
|
|
2555
|
+
mch = meta_chain
|
|
2556
|
+
# packages
|
|
2557
|
+
for pkg, pid in tables.packages.items():
|
|
2558
|
+
conn.execute(_CREATE_SYMBOL, _node_row(
|
|
2559
|
+
id=pid, kind="package", name=pkg.rsplit(".", 1)[-1], fqn=pkg, package=pkg,
|
|
2560
|
+
))
|
|
2561
|
+
# files
|
|
2562
|
+
for path, fid in tables.files.items():
|
|
2563
|
+
conn.execute(_CREATE_SYMBOL, _node_row(
|
|
2564
|
+
id=fid, kind="file", name=Path(path).name, fqn=path, filename=path,
|
|
2565
|
+
))
|
|
2566
|
+
# types
|
|
2567
|
+
for entry in tables.types.values():
|
|
2568
|
+
d = entry.decl
|
|
2569
|
+
role, capabilities = resolve_role_and_capabilities(
|
|
2570
|
+
d,
|
|
2571
|
+
overrides=overrides,
|
|
2572
|
+
meta_chain=mch,
|
|
2573
|
+
)
|
|
2574
|
+
tables.type_role_by_node_id[entry.node_id] = role
|
|
2575
|
+
conn.execute(_CREATE_SYMBOL, _node_row(
|
|
2576
|
+
id=entry.node_id, kind=d.kind, name=d.name, fqn=d.fqn,
|
|
2577
|
+
package=entry.package,
|
|
2578
|
+
module=entry.module, microservice=entry.microservice,
|
|
2579
|
+
filename=entry.file_path,
|
|
2580
|
+
start_line=d.start_line, end_line=d.end_line,
|
|
2581
|
+
start_byte=d.start_byte, end_byte=d.end_byte,
|
|
2582
|
+
modifiers=list(d.modifiers),
|
|
2583
|
+
annotations=[a.name for a in d.annotations],
|
|
2584
|
+
capabilities=capabilities,
|
|
2585
|
+
role=role,
|
|
2586
|
+
signature="",
|
|
2587
|
+
parent_id=tables.types[entry.outer_fqn].node_id if entry.outer_fqn and entry.outer_fqn in tables.types else "",
|
|
2588
|
+
))
|
|
2589
|
+
# members (methods / constructors)
|
|
2590
|
+
for m in tables.members:
|
|
2591
|
+
conn.execute(_CREATE_SYMBOL, _node_row(
|
|
2592
|
+
id=m.node_id, kind=m.kind, name=m.decl.name,
|
|
2593
|
+
fqn=f"{m.parent_fqn}#{m.decl.signature}",
|
|
2594
|
+
package=tables.types[m.parent_fqn].package if m.parent_fqn in tables.types else "",
|
|
2595
|
+
module=m.module, microservice=m.microservice,
|
|
2596
|
+
filename=m.file_path,
|
|
2597
|
+
start_line=m.decl.start_line, end_line=m.decl.end_line,
|
|
2598
|
+
start_byte=m.decl.start_byte, end_byte=m.decl.end_byte,
|
|
2599
|
+
modifiers=list(m.decl.modifiers),
|
|
2600
|
+
annotations=[a.name for a in m.decl.annotations],
|
|
2601
|
+
signature=m.decl.signature, parent_id=m.parent_id,
|
|
2602
|
+
))
|
|
2603
|
+
# phantoms
|
|
2604
|
+
for pid, row in tables.phantoms.items():
|
|
2605
|
+
conn.execute(_CREATE_SYMBOL, row)
|
|
2606
|
+
|
|
2607
|
+
|
|
2608
|
+
_CREATE_EXT = (
|
|
2609
|
+
"MATCH (a:Symbol {id: $src}), (b:Symbol {id: $dst}) "
|
|
2610
|
+
"CREATE (a)-[:EXTENDS {dst_name: $dst_name, dst_fqn: $dst_fqn, resolved: $resolved}]->(b)"
|
|
2611
|
+
)
|
|
2612
|
+
_CREATE_IMPL = (
|
|
2613
|
+
"MATCH (a:Symbol {id: $src}), (b:Symbol {id: $dst}) "
|
|
2614
|
+
"CREATE (a)-[:IMPLEMENTS {dst_name: $dst_name, dst_fqn: $dst_fqn, resolved: $resolved}]->(b)"
|
|
2615
|
+
)
|
|
2616
|
+
_CREATE_INJ = (
|
|
2617
|
+
"MATCH (a:Symbol {id: $src}), (b:Symbol {id: $dst}) "
|
|
2618
|
+
"CREATE (a)-[:INJECTS {dst_name: $dst_name, dst_fqn: $dst_fqn, resolved: $resolved, "
|
|
2619
|
+
"mechanism: $mechanism, annotation: $annotation, field_or_param: $field_or_param}]->(b)"
|
|
2620
|
+
)
|
|
2621
|
+
_CREATE_DECL = (
|
|
2622
|
+
"MATCH (a:Symbol {id: $src}), (b:Symbol {id: $dst}) "
|
|
2623
|
+
"CREATE (a)-[:DECLARES]->(b)"
|
|
2624
|
+
)
|
|
2625
|
+
_CREATE_OVERRIDES = (
|
|
2626
|
+
"MATCH (a:Symbol {id: $src}), (b:Symbol {id: $dst}) "
|
|
2627
|
+
"CREATE (a)-[:OVERRIDES]->(b)"
|
|
2628
|
+
)
|
|
2629
|
+
_CREATE_CALL = (
|
|
2630
|
+
"MATCH (a:Symbol {id: $src}), (b:Symbol {id: $dst}) "
|
|
2631
|
+
"CREATE (a)-[:CALLS {"
|
|
2632
|
+
"call_site_line: $line, call_site_byte: $byte, arg_count: $argc, "
|
|
2633
|
+
"confidence: $conf, strategy: $strat, source: $src_kind, resolved: $resolved, "
|
|
2634
|
+
"callee_declaring_role: $callee_declaring_role"
|
|
2635
|
+
"}]->(b)"
|
|
2636
|
+
)
|
|
2637
|
+
|
|
2638
|
+
_CREATE_ROUTE = (
|
|
2639
|
+
"CREATE (:Route {"
|
|
2640
|
+
"id: $id, kind: $kind, framework: $framework, method: $method, "
|
|
2641
|
+
"path: $path, path_template: $path_template, path_regex: $path_regex, "
|
|
2642
|
+
"topic: $topic, broker: $broker, feign_name: $feign_name, feign_url: $feign_url, "
|
|
2643
|
+
"microservice: $microservice, module: $module, filename: $filename, "
|
|
2644
|
+
"start_line: $start_line, end_line: $end_line, resolved: $resolved"
|
|
2645
|
+
"})"
|
|
2646
|
+
)
|
|
2647
|
+
_CREATE_CLIENT = (
|
|
2648
|
+
"CREATE (:Client {"
|
|
2649
|
+
"id: $id, client_kind: $client_kind, target_service: $target_service, "
|
|
2650
|
+
"path: $path, path_template: $path_template, path_regex: $path_regex, method: $method, "
|
|
2651
|
+
"member_fqn: $member_fqn, member_id: $member_id, "
|
|
2652
|
+
"microservice: $microservice, module: $module, filename: $filename, "
|
|
2653
|
+
"start_line: $start_line, end_line: $end_line, resolved: $resolved, source_layer: $source_layer"
|
|
2654
|
+
"})"
|
|
2655
|
+
)
|
|
2656
|
+
|
|
2657
|
+
_CREATE_EXPOSES = (
|
|
2658
|
+
"MATCH (s:Symbol {id: $sid}), (r:Route {id: $rid}) "
|
|
2659
|
+
"CREATE (s)-[:EXPOSES {confidence: $confidence, strategy: $strategy}]->(r)"
|
|
2660
|
+
)
|
|
2661
|
+
_CREATE_DECLARES_CLIENT = (
|
|
2662
|
+
"MATCH (s:Symbol {id: $sid}), (c:Client {id: $cid}) "
|
|
2663
|
+
"CREATE (s)-[:DECLARES_CLIENT {confidence: $confidence, strategy: $strategy}]->(c)"
|
|
2664
|
+
)
|
|
2665
|
+
_CREATE_PRODUCER = (
|
|
2666
|
+
"CREATE (:Producer {"
|
|
2667
|
+
"id: $id, producer_kind: $producer_kind, topic: $topic, broker: $broker, "
|
|
2668
|
+
"direction: $direction, member_fqn: $member_fqn, member_id: $member_id, "
|
|
2669
|
+
"microservice: $microservice, module: $module, filename: $filename, "
|
|
2670
|
+
"start_line: $start_line, end_line: $end_line, resolved: $resolved, "
|
|
2671
|
+
"source_layer: $source_layer"
|
|
2672
|
+
"})"
|
|
2673
|
+
)
|
|
2674
|
+
_CREATE_DECLARES_PRODUCER = (
|
|
2675
|
+
"MATCH (s:Symbol {id: $sid}), (p:Producer {id: $pid}) "
|
|
2676
|
+
"CREATE (s)-[:DECLARES_PRODUCER {confidence: $confidence, strategy: $strategy}]->(p)"
|
|
2677
|
+
)
|
|
2678
|
+
_CREATE_HTTP_CALL = (
|
|
2679
|
+
"MATCH (c:Client {id: $cid}), (r:Route {id: $rid}) "
|
|
2680
|
+
"CREATE (c)-[:HTTP_CALLS {confidence: $confidence, strategy: $strategy, "
|
|
2681
|
+
"method_call: $method_call, raw_uri: $raw_uri, match: $match}]->(r)"
|
|
2682
|
+
)
|
|
2683
|
+
_CREATE_ASYNC_CALL = (
|
|
2684
|
+
"MATCH (p:Producer {id: $pid}), (r:Route {id: $rid}) "
|
|
2685
|
+
"CREATE (p)-[:ASYNC_CALLS {confidence: $confidence, strategy: $strategy, "
|
|
2686
|
+
"direction: $direction, raw_topic: $raw_topic, match: $match}]->(r)"
|
|
2687
|
+
)
|
|
2688
|
+
|
|
2689
|
+
|
|
2690
|
+
def _populate_declares_rows(tables: GraphTables) -> None:
|
|
2691
|
+
tables.declares_rows = [
|
|
2692
|
+
DeclaresRow(src_id=m.parent_id, dst_id=m.node_id) for m in tables.members
|
|
2693
|
+
]
|
|
2694
|
+
|
|
2695
|
+
|
|
2696
|
+
def _direct_supertype_ids(tables: GraphTables, type_id: str) -> list[str]:
|
|
2697
|
+
out: list[str] = []
|
|
2698
|
+
for r in tables.extends_rows:
|
|
2699
|
+
if r.src_id == type_id:
|
|
2700
|
+
out.append(r.dst_id)
|
|
2701
|
+
for r in tables.implements_rows:
|
|
2702
|
+
if r.src_id == type_id:
|
|
2703
|
+
out.append(r.dst_id)
|
|
2704
|
+
return out
|
|
2705
|
+
|
|
2706
|
+
|
|
2707
|
+
def _populate_overrides_rows(tables: GraphTables) -> None:
|
|
2708
|
+
"""Materialize (subtype_method)-[:OVERRIDES]->(supertype_method) for one supertype hop.
|
|
2709
|
+
|
|
2710
|
+
Matches ``KuzuGraph.override_axis_rollup_for`` (direct ``IMPLEMENTS`` / ``EXTENDS``
|
|
2711
|
+
only, same ``signature``, distinct method ids, non-static instance methods).
|
|
2712
|
+
"""
|
|
2713
|
+
by_declaring_type: dict[str, list[MemberEntry]] = defaultdict(list)
|
|
2714
|
+
for m in tables.members:
|
|
2715
|
+
by_declaring_type[m.parent_id].append(m)
|
|
2716
|
+
pairs: set[tuple[str, str]] = set()
|
|
2717
|
+
for m in tables.members:
|
|
2718
|
+
if m.kind != "method" or "static" in m.decl.modifiers:
|
|
2719
|
+
continue
|
|
2720
|
+
impl_tid = m.parent_id
|
|
2721
|
+
for sup_id in _direct_supertype_ids(tables, impl_tid):
|
|
2722
|
+
for other in by_declaring_type.get(sup_id, ()):
|
|
2723
|
+
if other.kind != "method":
|
|
2724
|
+
continue
|
|
2725
|
+
if other.decl.signature != m.decl.signature:
|
|
2726
|
+
continue
|
|
2727
|
+
if other.node_id == m.node_id:
|
|
2728
|
+
continue
|
|
2729
|
+
pairs.add((m.node_id, other.node_id))
|
|
2730
|
+
tables.overrides_rows = [
|
|
2731
|
+
DeclaresRow(src_id=a, dst_id=b) for a, b in sorted(pairs)
|
|
2732
|
+
]
|
|
2733
|
+
|
|
2734
|
+
|
|
2735
|
+
def _write_edges(conn: kuzu.Connection, tables: GraphTables) -> None:
|
|
2736
|
+
for r in tables.extends_rows:
|
|
2737
|
+
conn.execute(_CREATE_EXT, {
|
|
2738
|
+
"src": r.src_id, "dst": r.dst_id,
|
|
2739
|
+
"dst_name": r.dst_name, "dst_fqn": r.dst_fqn, "resolved": r.resolved,
|
|
2740
|
+
})
|
|
2741
|
+
for r in tables.implements_rows:
|
|
2742
|
+
conn.execute(_CREATE_IMPL, {
|
|
2743
|
+
"src": r.src_id, "dst": r.dst_id,
|
|
2744
|
+
"dst_name": r.dst_name, "dst_fqn": r.dst_fqn, "resolved": r.resolved,
|
|
2745
|
+
})
|
|
2746
|
+
for r in tables.injects_rows:
|
|
2747
|
+
conn.execute(_CREATE_INJ, {
|
|
2748
|
+
"src": r.src_id, "dst": r.dst_id,
|
|
2749
|
+
"dst_name": r.dst_name, "dst_fqn": r.dst_fqn, "resolved": r.resolved,
|
|
2750
|
+
"mechanism": r.mechanism, "annotation": r.annotation,
|
|
2751
|
+
"field_or_param": r.field_or_param,
|
|
2752
|
+
})
|
|
2753
|
+
|
|
2754
|
+
for row in tables.declares_rows:
|
|
2755
|
+
conn.execute(_CREATE_DECL, {"src": row.src_id, "dst": row.dst_id})
|
|
2756
|
+
|
|
2757
|
+
for row in tables.overrides_rows:
|
|
2758
|
+
conn.execute(_CREATE_OVERRIDES, {"src": row.src_id, "dst": row.dst_id})
|
|
2759
|
+
|
|
2760
|
+
seen_calls: set[tuple[str, str, int, int]] = set()
|
|
2761
|
+
unique_calls: list[CallsRow] = []
|
|
2762
|
+
for row in tables.calls_rows:
|
|
2763
|
+
key = (row.src_id, row.dst_id, row.arg_count, row.call_site_line)
|
|
2764
|
+
if key not in seen_calls:
|
|
2765
|
+
seen_calls.add(key)
|
|
2766
|
+
unique_calls.append(row)
|
|
2767
|
+
|
|
2768
|
+
member_by_id = {m.node_id: m for m in tables.members}
|
|
2769
|
+
for row in unique_calls:
|
|
2770
|
+
conn.execute(_CREATE_CALL, {
|
|
2771
|
+
"src": row.src_id, "dst": row.dst_id,
|
|
2772
|
+
"line": row.call_site_line,
|
|
2773
|
+
"byte": row.call_site_byte,
|
|
2774
|
+
"argc": row.arg_count,
|
|
2775
|
+
"conf": row.confidence,
|
|
2776
|
+
"strat": row.strategy,
|
|
2777
|
+
"src_kind": row.source,
|
|
2778
|
+
"resolved": row.resolved,
|
|
2779
|
+
"callee_declaring_role": _callee_declaring_role_at_write(
|
|
2780
|
+
tables, row.dst_id, member_by_id=member_by_id,
|
|
2781
|
+
),
|
|
2782
|
+
})
|
|
2783
|
+
|
|
2784
|
+
_CREATE_UNRESOLVED = (
|
|
2785
|
+
"CREATE (:UnresolvedCallSite {"
|
|
2786
|
+
"id: $id, caller_id: $caller_id, call_site_line: $line, call_site_byte: $byte, "
|
|
2787
|
+
"arg_count: $argc, callee_simple: $callee, receiver_expr: $recv, reason: $reason"
|
|
2788
|
+
"})"
|
|
2789
|
+
)
|
|
2790
|
+
_CREATE_UNRESOLVED_AT = (
|
|
2791
|
+
"MATCH (a:Symbol {id: $caller}), (u:UnresolvedCallSite {id: $ucs}) "
|
|
2792
|
+
"CREATE (a)-[:UNRESOLVED_AT]->(u)"
|
|
2793
|
+
)
|
|
2794
|
+
seen_ucs: set[str] = set()
|
|
2795
|
+
for row in tables.unresolved_call_site_rows:
|
|
2796
|
+
if row.id in seen_ucs:
|
|
2797
|
+
continue
|
|
2798
|
+
seen_ucs.add(row.id)
|
|
2799
|
+
conn.execute(_CREATE_UNRESOLVED, {
|
|
2800
|
+
"id": row.id,
|
|
2801
|
+
"caller_id": row.caller_id,
|
|
2802
|
+
"line": row.call_site_line,
|
|
2803
|
+
"byte": row.call_site_byte,
|
|
2804
|
+
"argc": row.arg_count,
|
|
2805
|
+
"callee": row.callee_simple,
|
|
2806
|
+
"recv": row.receiver_expr,
|
|
2807
|
+
"reason": row.reason,
|
|
2808
|
+
})
|
|
2809
|
+
conn.execute(_CREATE_UNRESOLVED_AT, {"caller": row.caller_id, "ucs": row.id})
|
|
2810
|
+
|
|
2811
|
+
|
|
2812
|
+
def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables) -> None:
|
|
2813
|
+
for row in tables.routes_rows:
|
|
2814
|
+
conn.execute(_CREATE_ROUTE, {
|
|
2815
|
+
"id": row.id,
|
|
2816
|
+
"kind": row.kind,
|
|
2817
|
+
"framework": row.framework,
|
|
2818
|
+
"method": row.method,
|
|
2819
|
+
"path": row.path,
|
|
2820
|
+
"path_template": row.path_template,
|
|
2821
|
+
"path_regex": row.path_regex,
|
|
2822
|
+
"topic": row.topic,
|
|
2823
|
+
"broker": row.broker,
|
|
2824
|
+
"feign_name": row.feign_name,
|
|
2825
|
+
"feign_url": row.feign_url,
|
|
2826
|
+
"microservice": row.microservice,
|
|
2827
|
+
"module": row.module,
|
|
2828
|
+
"filename": row.filename,
|
|
2829
|
+
"start_line": row.start_line,
|
|
2830
|
+
"end_line": row.end_line,
|
|
2831
|
+
"resolved": row.resolved,
|
|
2832
|
+
})
|
|
2833
|
+
for row in tables.exposes_rows:
|
|
2834
|
+
conn.execute(_CREATE_EXPOSES, {
|
|
2835
|
+
"sid": row.symbol_id,
|
|
2836
|
+
"rid": row.route_id,
|
|
2837
|
+
"confidence": row.confidence,
|
|
2838
|
+
"strategy": row.strategy,
|
|
2839
|
+
})
|
|
2840
|
+
for row in tables.client_rows:
|
|
2841
|
+
conn.execute(_CREATE_CLIENT, asdict(row))
|
|
2842
|
+
for row in tables.declares_client_rows:
|
|
2843
|
+
conn.execute(_CREATE_DECLARES_CLIENT, {
|
|
2844
|
+
"sid": row.symbol_id,
|
|
2845
|
+
"cid": row.client_id,
|
|
2846
|
+
"confidence": row.confidence,
|
|
2847
|
+
"strategy": row.strategy,
|
|
2848
|
+
})
|
|
2849
|
+
for row in tables.producer_rows:
|
|
2850
|
+
conn.execute(_CREATE_PRODUCER, asdict(row))
|
|
2851
|
+
for row in tables.declares_producer_rows:
|
|
2852
|
+
conn.execute(_CREATE_DECLARES_PRODUCER, {
|
|
2853
|
+
"sid": row.symbol_id,
|
|
2854
|
+
"pid": row.producer_id,
|
|
2855
|
+
"confidence": row.confidence,
|
|
2856
|
+
"strategy": row.strategy,
|
|
2857
|
+
})
|
|
2858
|
+
for row in tables.http_call_rows:
|
|
2859
|
+
conn.execute(_CREATE_HTTP_CALL, {
|
|
2860
|
+
"cid": row.client_id,
|
|
2861
|
+
"rid": row.route_id,
|
|
2862
|
+
"confidence": row.confidence,
|
|
2863
|
+
"strategy": row.strategy,
|
|
2864
|
+
"method_call": row.method_call,
|
|
2865
|
+
"raw_uri": row.raw_uri,
|
|
2866
|
+
"match": row.match,
|
|
2867
|
+
})
|
|
2868
|
+
for row in tables.async_call_rows:
|
|
2869
|
+
conn.execute(_CREATE_ASYNC_CALL, {
|
|
2870
|
+
"pid": row.producer_id,
|
|
2871
|
+
"rid": row.route_id,
|
|
2872
|
+
"confidence": row.confidence,
|
|
2873
|
+
"strategy": row.strategy,
|
|
2874
|
+
"direction": row.direction,
|
|
2875
|
+
"raw_topic": row.raw_topic,
|
|
2876
|
+
"match": row.match,
|
|
2877
|
+
})
|
|
2878
|
+
|
|
2879
|
+
|
|
2880
|
+
def _write_meta(conn: kuzu.Connection, tables: GraphTables, source_root: Path) -> None:
|
|
2881
|
+
seen_calls: set[tuple[str, str, int, int]] = set()
|
|
2882
|
+
calls_unique = 0
|
|
2883
|
+
for row in tables.calls_rows:
|
|
2884
|
+
key = (row.src_id, row.dst_id, row.arg_count, row.call_site_line)
|
|
2885
|
+
if key not in seen_calls:
|
|
2886
|
+
seen_calls.add(key)
|
|
2887
|
+
calls_unique += 1
|
|
2888
|
+
st = tables.route_stats
|
|
2889
|
+
routes_fw = dict(sorted(st.by_framework.items()))
|
|
2890
|
+
call_stats = tables.call_edge_stats
|
|
2891
|
+
client_stats = tables.client_stats
|
|
2892
|
+
producer_stats = tables.producer_stats
|
|
2893
|
+
http_by_strategy = dict(sorted(call_stats.http_calls_by_strategy.items()))
|
|
2894
|
+
async_by_strategy = dict(sorted(call_stats.async_calls_by_strategy.items()))
|
|
2895
|
+
http_match = dict(sorted(call_stats.http_calls_match_breakdown.items()))
|
|
2896
|
+
async_match = dict(sorted(call_stats.async_calls_match_breakdown.items()))
|
|
2897
|
+
http_resolved_pct = 0.0
|
|
2898
|
+
async_resolved_pct = 0.0
|
|
2899
|
+
if call_stats.http_calls_total:
|
|
2900
|
+
# PR-D1 definition: "resolved_pct" is strategy-based (strategy != 'unresolved'),
|
|
2901
|
+
# not match-based (all PR-D1 edges keep match='unresolved').
|
|
2902
|
+
resolved_http = sum(v for k, v in call_stats.http_calls_by_strategy.items() if k != "unresolved")
|
|
2903
|
+
http_resolved_pct = float(resolved_http) / float(call_stats.http_calls_total)
|
|
2904
|
+
if call_stats.async_calls_total:
|
|
2905
|
+
resolved_async = sum(v for k, v in call_stats.async_calls_by_strategy.items() if k != "unresolved")
|
|
2906
|
+
async_resolved_pct = float(resolved_async) / float(call_stats.async_calls_total)
|
|
2907
|
+
counts = {
|
|
2908
|
+
"packages": len(tables.packages),
|
|
2909
|
+
"files": len(tables.files),
|
|
2910
|
+
"types": len(tables.types),
|
|
2911
|
+
"members": len(tables.members),
|
|
2912
|
+
"phantoms": len(tables.phantoms),
|
|
2913
|
+
"extends": len(tables.extends_rows),
|
|
2914
|
+
"implements": len(tables.implements_rows),
|
|
2915
|
+
"injects": len(tables.injects_rows),
|
|
2916
|
+
"declares": len(tables.declares_rows),
|
|
2917
|
+
"overrides": len(tables.overrides_rows),
|
|
2918
|
+
"calls": calls_unique,
|
|
2919
|
+
"routes": len(tables.routes_rows),
|
|
2920
|
+
"exposes": len(tables.exposes_rows),
|
|
2921
|
+
"clients": len(tables.client_rows),
|
|
2922
|
+
"declares_client": len(tables.declares_client_rows),
|
|
2923
|
+
"producers": len(tables.producer_rows),
|
|
2924
|
+
"declares_producer": len(tables.declares_producer_rows),
|
|
2925
|
+
"http_calls": len(tables.http_call_rows),
|
|
2926
|
+
"async_calls": len(tables.async_call_rows),
|
|
2927
|
+
}
|
|
2928
|
+
routes_layer = dict(sorted(st.routes_by_layer.items()))
|
|
2929
|
+
clients_by_kind = dict(sorted(client_stats.clients_by_kind.items()))
|
|
2930
|
+
producers_by_kind = dict(sorted(producer_stats.producers_by_kind.items()))
|
|
2931
|
+
conn.execute(
|
|
2932
|
+
"CREATE (:GraphMeta {key: $k, ontology_version: $ov, built_at: $t, "
|
|
2933
|
+
"source_root: $sr, counts_json: $cj, parse_errors: $pe, "
|
|
2934
|
+
"routes_total: $routes_total, exposes_total: $exposes_total, "
|
|
2935
|
+
"routes_by_framework: $routes_by_framework, routes_resolved_pct: $routes_resolved_pct, "
|
|
2936
|
+
"routes_from_brownfield_pct: $routes_from_brownfield_pct, routes_by_layer: $routes_by_layer, "
|
|
2937
|
+
"clients_total: $clients_total, declares_client_total: $declares_client_total, "
|
|
2938
|
+
"clients_by_kind: $clients_by_kind, "
|
|
2939
|
+
"producers_total: $producers_total, declares_producer_total: $declares_producer_total, "
|
|
2940
|
+
"producers_by_kind: $producers_by_kind, "
|
|
2941
|
+
"http_calls_total: $http_calls_total, async_calls_total: $async_calls_total, "
|
|
2942
|
+
"http_calls_by_strategy: $http_calls_by_strategy, async_calls_by_strategy: $async_calls_by_strategy, "
|
|
2943
|
+
"http_calls_resolved_pct: $http_calls_resolved_pct, async_calls_resolved_pct: $async_calls_resolved_pct, "
|
|
2944
|
+
"http_clients_from_brownfield_pct: $http_clients_from_brownfield_pct, "
|
|
2945
|
+
"async_producers_from_brownfield_pct: $async_producers_from_brownfield_pct, "
|
|
2946
|
+
"http_calls_match_breakdown: $http_calls_match_breakdown, "
|
|
2947
|
+
"async_calls_match_breakdown: $async_calls_match_breakdown, "
|
|
2948
|
+
"cross_service_calls_total: $cross_service_calls_total, "
|
|
2949
|
+
"pass3_skipped_cross_service: $pass3_skipped_cross_service, "
|
|
2950
|
+
"pass3_unresolved_phantom_receiver: $pass3_unresolved_phantom_receiver, "
|
|
2951
|
+
"pass3_unresolved_chained: $pass3_unresolved_chained, "
|
|
2952
|
+
"pass4_exposes_suppressed_feign: $pass4_exposes_suppressed_feign, "
|
|
2953
|
+
"cross_service_resolution: $cross_service_resolution})",
|
|
2954
|
+
{
|
|
2955
|
+
"k": "graph",
|
|
2956
|
+
"ov": ONTOLOGY_VERSION,
|
|
2957
|
+
"t": int(time.time()),
|
|
2958
|
+
"sr": str(source_root.resolve()),
|
|
2959
|
+
"cj": json.dumps(counts),
|
|
2960
|
+
"pe": tables.parse_errors,
|
|
2961
|
+
"routes_total": len(tables.routes_rows),
|
|
2962
|
+
"exposes_total": len(tables.exposes_rows),
|
|
2963
|
+
"routes_by_framework": json.dumps(routes_fw),
|
|
2964
|
+
"routes_resolved_pct": float(st.routes_resolved_pct),
|
|
2965
|
+
"routes_from_brownfield_pct": float(st.routes_from_brownfield_pct),
|
|
2966
|
+
"routes_by_layer": json.dumps(routes_layer),
|
|
2967
|
+
"clients_total": int(client_stats.clients_total),
|
|
2968
|
+
"declares_client_total": int(client_stats.declares_client_total),
|
|
2969
|
+
"clients_by_kind": json.dumps(clients_by_kind),
|
|
2970
|
+
"producers_total": int(producer_stats.producers_total),
|
|
2971
|
+
"declares_producer_total": int(producer_stats.declares_producer_total),
|
|
2972
|
+
"producers_by_kind": json.dumps(producers_by_kind),
|
|
2973
|
+
"http_calls_total": call_stats.http_calls_total,
|
|
2974
|
+
"async_calls_total": call_stats.async_calls_total,
|
|
2975
|
+
"http_calls_by_strategy": json.dumps(http_by_strategy),
|
|
2976
|
+
"async_calls_by_strategy": json.dumps(async_by_strategy),
|
|
2977
|
+
"http_calls_resolved_pct": http_resolved_pct,
|
|
2978
|
+
"async_calls_resolved_pct": async_resolved_pct,
|
|
2979
|
+
"http_clients_from_brownfield_pct": call_stats.http_clients_from_brownfield_pct,
|
|
2980
|
+
"async_producers_from_brownfield_pct": call_stats.async_producers_from_brownfield_pct,
|
|
2981
|
+
"http_calls_match_breakdown": json.dumps(http_match),
|
|
2982
|
+
"async_calls_match_breakdown": json.dumps(async_match),
|
|
2983
|
+
"cross_service_calls_total": int(call_stats.cross_service_calls_total),
|
|
2984
|
+
"pass3_skipped_cross_service": int(tables.pass3_skipped_cross_service),
|
|
2985
|
+
"pass3_unresolved_phantom_receiver": int(tables.pass3_unresolved_phantom_receiver),
|
|
2986
|
+
"pass3_unresolved_chained": int(tables.pass3_unresolved_chained),
|
|
2987
|
+
"pass4_exposes_suppressed_feign": int(st.exposes_suppressed_feign),
|
|
2988
|
+
"cross_service_resolution": str(tables.cross_service_resolution),
|
|
2989
|
+
},
|
|
2990
|
+
)
|
|
2991
|
+
|
|
2992
|
+
|
|
2993
|
+
def write_kuzu(
|
|
2994
|
+
db_path: Path,
|
|
2995
|
+
tables: GraphTables,
|
|
2996
|
+
*,
|
|
2997
|
+
source_root: Path,
|
|
2998
|
+
verbose: bool,
|
|
2999
|
+
meta_chain: dict[str, frozenset[str]] | None = None,
|
|
3000
|
+
) -> None:
|
|
3001
|
+
if meta_chain is None:
|
|
3002
|
+
meta_chain = collect_annotation_meta_chain(
|
|
3003
|
+
str(source_root.resolve()),
|
|
3004
|
+
)
|
|
3005
|
+
if verbose:
|
|
3006
|
+
_verbose_stderr_line(_WRITE_START)
|
|
3007
|
+
with _VerbosePassHeartbeats("[write]", verbose=verbose):
|
|
3008
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
3009
|
+
db = kuzu.Database(str(db_path))
|
|
3010
|
+
conn = kuzu.Connection(db)
|
|
3011
|
+
_drop_all(conn)
|
|
3012
|
+
_create_schema(conn)
|
|
3013
|
+
t0 = time.time()
|
|
3014
|
+
_write_nodes(
|
|
3015
|
+
conn,
|
|
3016
|
+
tables,
|
|
3017
|
+
project_root=source_root,
|
|
3018
|
+
meta_chain=meta_chain,
|
|
3019
|
+
)
|
|
3020
|
+
if verbose:
|
|
3021
|
+
_verbose_stderr_line(f"[write] nodes written in {time.time() - t0:.2f}s")
|
|
3022
|
+
_populate_declares_rows(tables)
|
|
3023
|
+
_populate_overrides_rows(tables)
|
|
3024
|
+
t1 = time.time()
|
|
3025
|
+
_write_edges(conn, tables)
|
|
3026
|
+
if verbose:
|
|
3027
|
+
_verbose_stderr_line(f"[write] edges written in {time.time() - t1:.2f}s")
|
|
3028
|
+
t2 = time.time()
|
|
3029
|
+
_write_routes_and_exposes(conn, tables)
|
|
3030
|
+
if verbose:
|
|
3031
|
+
_verbose_stderr_line(f"[write] routes/exposes written in {time.time() - t2:.2f}s")
|
|
3032
|
+
_write_meta(conn, tables, source_root)
|
|
3033
|
+
conn.close()
|
|
3034
|
+
|
|
3035
|
+
|
|
3036
|
+
# ---------- CLI ----------
|
|
3037
|
+
|
|
3038
|
+
|
|
3039
|
+
def _default_kuzu_path() -> Path:
|
|
3040
|
+
idx = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
|
|
3041
|
+
if idx and not idx.startswith(("s3://", "gs://", "az://")):
|
|
3042
|
+
return Path(os.path.expanduser(idx.rstrip("/"))) / "code_graph.kuzu"
|
|
3043
|
+
return Path.cwd() / ".java-codebase-rag" / "code_graph.kuzu"
|
|
3044
|
+
|
|
3045
|
+
|
|
3046
|
+
def main() -> int:
|
|
3047
|
+
parser = argparse.ArgumentParser(description="Build an AST-derived Kuzu graph for Java sources.")
|
|
3048
|
+
parser.add_argument("--source-root", default=None, help="Repository / monorepo root to scan for .java (defaults to current working directory)")
|
|
3049
|
+
parser.add_argument(
|
|
3050
|
+
"--kuzu-path",
|
|
3051
|
+
default=None,
|
|
3052
|
+
help=(
|
|
3053
|
+
"Kuzu database path (file/dir as used by kuzu.Database; "
|
|
3054
|
+
"default: $JAVA_CODEBASE_RAG_INDEX_DIR/code_graph.kuzu or ./.java-codebase-rag/code_graph.kuzu)"
|
|
3055
|
+
),
|
|
3056
|
+
)
|
|
3057
|
+
parser.add_argument("--verbose", action="store_true")
|
|
3058
|
+
args = parser.parse_args()
|
|
3059
|
+
|
|
3060
|
+
root = Path(args.source_root).expanduser().resolve() if args.source_root else Path.cwd().resolve()
|
|
3061
|
+
if not root.is_dir():
|
|
3062
|
+
print(f"source-root not a directory: {root}", file=sys.stderr)
|
|
3063
|
+
return 2
|
|
3064
|
+
|
|
3065
|
+
kuzu_path = Path(args.kuzu_path).expanduser() if args.kuzu_path else _default_kuzu_path()
|
|
3066
|
+
|
|
3067
|
+
tables = GraphTables()
|
|
3068
|
+
asts = pass1_parse(root, tables, verbose=args.verbose)
|
|
3069
|
+
pass2_edges(tables, asts, verbose=args.verbose)
|
|
3070
|
+
pass3_calls(tables, asts, verbose=args.verbose)
|
|
3071
|
+
pass4_routes(tables, asts, source_root=root, verbose=args.verbose)
|
|
3072
|
+
pass5_imperative_edges(tables, asts, source_root=root, verbose=args.verbose)
|
|
3073
|
+
pass6_match_edges(tables, verbose=args.verbose)
|
|
3074
|
+
write_kuzu(kuzu_path, tables, source_root=root, verbose=args.verbose)
|
|
3075
|
+
if args.verbose:
|
|
3076
|
+
_verbose_stderr_line(f"[done] kuzu at {kuzu_path}")
|
|
3077
|
+
return 0
|
|
3078
|
+
|
|
3079
|
+
|
|
3080
|
+
if __name__ == "__main__":
|
|
3081
|
+
sys.exit(main())
|