codespine 0.7.3__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.7.3 → codespine-0.9.0}/PKG-INFO +2 -2
- {codespine-0.7.3 → codespine-0.9.0}/README.md +1 -1
- {codespine-0.7.3 → codespine-0.9.0}/codespine/__init__.py +1 -1
- {codespine-0.7.3 → codespine-0.9.0}/codespine/analysis/crossmodule.py +26 -28
- {codespine-0.7.3 → codespine-0.9.0}/codespine/analysis/impact.py +55 -2
- {codespine-0.7.3 → codespine-0.9.0}/codespine/db/schema.py +11 -1
- {codespine-0.7.3 → codespine-0.9.0}/codespine/db/store.py +157 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/indexer/call_resolver.py +7 -3
- codespine-0.9.0/codespine/indexer/di_resolver.py +210 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/indexer/engine.py +100 -1
- {codespine-0.7.3 → codespine-0.9.0}/codespine/indexer/java_parser.py +75 -6
- {codespine-0.7.3 → codespine-0.9.0}/codespine/mcp/server.py +1100 -30
- codespine-0.9.0/codespine/noise/blocklist.py +33 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/search/hybrid.py +23 -8
- codespine-0.9.0/codespine/watch/git_hook.py +205 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/watch/watcher.py +144 -75
- {codespine-0.7.3 → codespine-0.9.0}/codespine.egg-info/PKG-INFO +2 -2
- {codespine-0.7.3 → codespine-0.9.0}/codespine.egg-info/SOURCES.txt +2 -0
- {codespine-0.7.3 → codespine-0.9.0}/pyproject.toml +1 -1
- {codespine-0.7.3 → codespine-0.9.0}/tests/test_overlay.py +3 -2
- codespine-0.7.3/codespine/noise/blocklist.py +0 -37
- {codespine-0.7.3 → codespine-0.9.0}/LICENSE +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/analysis/__init__.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/analysis/community.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/analysis/context.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/analysis/coupling.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/analysis/flow.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/cli.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/config.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/db/__init__.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/diff/__init__.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/guide.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/indexer/__init__.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/mcp/__init__.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/noise/__init__.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/overlay/__init__.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/overlay/git_state.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/overlay/merge.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/overlay/store.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/search/__init__.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/search/bm25.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/search/fuzzy.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/search/rrf.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/search/vector.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine/watch/__init__.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/gindex.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/setup.cfg +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/tests/test_call_resolver.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/tests/test_community_detection.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/tests/test_deadcode.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/tests/test_java_parser.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/tests/test_multimodule_index.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/tests/test_search_ranking.py +0 -0
- {codespine-0.7.3 → codespine-0.9.0}/tests/test_store_recovery.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codespine
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Local Java code intelligence indexer backed by a graph database
|
|
5
5
|
Author: CodeSpine contributors
|
|
6
6
|
License: MIT License
|
|
@@ -267,7 +267,7 @@ codespine guide --json # structured JSON for tooling
|
|
|
267
267
|
| `detect_dead_code(limit, project, strict)` | Methods with no callers (Java-aware exemptions). |
|
|
268
268
|
| `trace_execution_flows(entry_symbol, max_depth, project)` | Execution paths from entry points. |
|
|
269
269
|
| `get_symbol_community(symbol)` | Architectural community cluster for a symbol. |
|
|
270
|
-
| `get_change_coupling(
|
|
270
|
+
| `get_change_coupling(days, min_strength, min_cochanges)` | Files that changed together in the last N days (default 5). |
|
|
271
271
|
|
|
272
272
|
**Git**
|
|
273
273
|
|
|
@@ -203,7 +203,7 @@ codespine guide --json # structured JSON for tooling
|
|
|
203
203
|
| `detect_dead_code(limit, project, strict)` | Methods with no callers (Java-aware exemptions). |
|
|
204
204
|
| `trace_execution_flows(entry_symbol, max_depth, project)` | Execution paths from entry points. |
|
|
205
205
|
| `get_symbol_community(symbol)` | Architectural community cluster for a symbol. |
|
|
206
|
-
| `get_change_coupling(
|
|
206
|
+
| `get_change_coupling(days, min_strength, min_cochanges)` | Files that changed together in the last N days (default 5). |
|
|
207
207
|
|
|
208
208
|
**Git**
|
|
209
209
|
|
|
@@ -17,11 +17,10 @@ Two linking strategies are applied:
|
|
|
17
17
|
parameter count as a method M_dst in the referenced class. This catches
|
|
18
18
|
delegation, interface-implementation forwarding, and adapter patterns.
|
|
19
19
|
|
|
20
|
-
Strategy B —
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
cross-module from appearing as dead code.
|
|
20
|
+
Strategy B — Direct parameter/return type reference (confidence 0.6)
|
|
21
|
+
When the referenced class name appears directly as a parameter type or
|
|
22
|
+
return type of the source method, create an edge to the class's
|
|
23
|
+
constructor (if any). This catches model/DTO/context instantiation.
|
|
25
24
|
"""
|
|
26
25
|
from __future__ import annotations
|
|
27
26
|
|
|
@@ -165,29 +164,28 @@ def link_cross_module_calls(store, project_ids: list[str] | None = None, progres
|
|
|
165
164
|
LOGGER.debug("Name-match edge failed: %s", exc)
|
|
166
165
|
matched_dst_mids.add(dm["mid"])
|
|
167
166
|
|
|
168
|
-
# Strategy B:
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
LOGGER.debug("Fallback edge failed: %s", exc)
|
|
167
|
+
# Strategy B: if the referenced class name appears directly
|
|
168
|
+
# in the source method's parameter types or return type,
|
|
169
|
+
# link to the class's constructor (model/DTO instantiation).
|
|
170
|
+
if not matched_dst_mids:
|
|
171
|
+
rtype_tokens = set(_TOKEN_RE.findall(rtype))
|
|
172
|
+
sig_tokens = set(_TOKEN_RE.findall(sig))
|
|
173
|
+
if class_name in rtype_tokens or class_name in sig_tokens:
|
|
174
|
+
for dm in dst_methods:
|
|
175
|
+
if not dm.get("is_ctor"):
|
|
176
|
+
continue
|
|
177
|
+
pair = (sm["mid"], dm["mid"])
|
|
178
|
+
if pair in seen:
|
|
179
|
+
continue
|
|
180
|
+
seen.add(pair)
|
|
181
|
+
try:
|
|
182
|
+
store.add_call(
|
|
183
|
+
sm["mid"], dm["mid"],
|
|
184
|
+
0.6, "cross_module_ctor_ref",
|
|
185
|
+
)
|
|
186
|
+
new_edges += 1
|
|
187
|
+
except Exception as exc:
|
|
188
|
+
LOGGER.debug("Ctor-ref edge failed: %s", exc)
|
|
191
189
|
|
|
192
190
|
_ping(f"{new_edges} edges created")
|
|
193
191
|
LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
|
|
@@ -115,6 +115,41 @@ def analyze_impact(store, symbol_query: str, max_depth: int = 4, project: str |
|
|
|
115
115
|
"""
|
|
116
116
|
)
|
|
117
117
|
|
|
118
|
+
# Augment with DI injection edges: for each target method's class, find all
|
|
119
|
+
# classes that @Inject it (or bind it via @Component/@Service) and add their
|
|
120
|
+
# methods as implicit callers at depth+1 with edge_type "DI_INJECT".
|
|
121
|
+
try:
|
|
122
|
+
di_edges = store.query_records(
|
|
123
|
+
"""
|
|
124
|
+
MATCH (a:Method), (ca:Class), (b:Method), (cb:Class),
|
|
125
|
+
(ca)-[r:INJECTS]->(cb)
|
|
126
|
+
WHERE a.class_id = ca.id AND b.class_id = cb.id
|
|
127
|
+
RETURN a.id as src, b.id as dst, 'DI_INJECT' as edge_type,
|
|
128
|
+
coalesce(r.confidence, 0.8) as confidence,
|
|
129
|
+
coalesce(r.binding_type, 'field_inject') as reason
|
|
130
|
+
"""
|
|
131
|
+
)
|
|
132
|
+
edges = list(edges) + di_edges
|
|
133
|
+
except Exception:
|
|
134
|
+
pass # INJECTS table may not exist on old DBs
|
|
135
|
+
|
|
136
|
+
# Also follow BINDS_INTERFACE — any class implementing the target's interface
|
|
137
|
+
# counts as an indirect caller.
|
|
138
|
+
try:
|
|
139
|
+
bi_edges = store.query_records(
|
|
140
|
+
"""
|
|
141
|
+
MATCH (a:Method), (ca:Class), (b:Method), (cb:Class),
|
|
142
|
+
(ca)-[r:BINDS_INTERFACE]->(cb)
|
|
143
|
+
WHERE a.class_id = ca.id AND b.class_id = cb.id
|
|
144
|
+
RETURN a.id as src, b.id as dst, 'INTERFACE_BINDING' as edge_type,
|
|
145
|
+
coalesce(r.confidence, 0.9) as confidence,
|
|
146
|
+
coalesce(r.reason, 'implements') as reason
|
|
147
|
+
"""
|
|
148
|
+
)
|
|
149
|
+
edges = list(edges) + bi_edges
|
|
150
|
+
except Exception:
|
|
151
|
+
pass
|
|
152
|
+
|
|
118
153
|
reverse_adj: dict[str, list[dict]] = defaultdict(list)
|
|
119
154
|
for edge in edges:
|
|
120
155
|
reverse_adj[edge["dst"]].append(edge)
|
|
@@ -184,13 +219,31 @@ def analyze_impact(store, symbol_query: str, max_depth: int = 4, project: str |
|
|
|
184
219
|
for mid in target_method_ids
|
|
185
220
|
]
|
|
186
221
|
|
|
222
|
+
# FR-06: Separate "self_callers" (same class as target, depth=1) from
|
|
223
|
+
# impacted_callers so the output is unambiguous.
|
|
224
|
+
target_class_fqcns = {
|
|
225
|
+
target_meta.get(mid, {}).get("class_fqcn")
|
|
226
|
+
for mid in target_method_ids
|
|
227
|
+
if target_meta.get(mid, {}).get("class_fqcn")
|
|
228
|
+
}
|
|
229
|
+
self_callers: list[dict] = []
|
|
230
|
+
impacted_depth1: list[dict] = []
|
|
231
|
+
for item in depth_groups["1"]:
|
|
232
|
+
if item.get("class_fqcn") and item["class_fqcn"] in target_class_fqcns:
|
|
233
|
+
self_callers.append(item)
|
|
234
|
+
else:
|
|
235
|
+
impacted_depth1.append(item)
|
|
236
|
+
depth_groups["1"] = impacted_depth1
|
|
237
|
+
|
|
187
238
|
return {
|
|
188
239
|
"target": symbol_query,
|
|
189
|
-
"
|
|
190
|
-
"
|
|
240
|
+
"resolved_to": resolved_targets,
|
|
241
|
+
"self_callers": self_callers,
|
|
242
|
+
"impacted_callers": depth_groups,
|
|
191
243
|
"summary": {
|
|
192
244
|
"direct": len(depth_groups["1"]),
|
|
193
245
|
"indirect": len(depth_groups["2"]),
|
|
194
246
|
"transitive": len(depth_groups["3+"]),
|
|
247
|
+
"self_callers": len(self_callers),
|
|
195
248
|
},
|
|
196
249
|
}
|
|
@@ -51,6 +51,16 @@ REL_TABLES: Iterable[tuple[str, str]] = [
|
|
|
51
51
|
"CO_CHANGED_WITH",
|
|
52
52
|
"CREATE REL TABLE CO_CHANGED_WITH(FROM File TO File, strength DOUBLE, cochanges INT64, days INT64)",
|
|
53
53
|
),
|
|
54
|
+
# v5: Dependency-injection edges — tracks @Inject/@Autowired/@Provides/@Bean bindings
|
|
55
|
+
(
|
|
56
|
+
"INJECTS",
|
|
57
|
+
"CREATE REL TABLE INJECTS(FROM Class TO Class, framework STRING, binding_type STRING, confidence DOUBLE)",
|
|
58
|
+
),
|
|
59
|
+
# v5: Interface-to-implementation bindings discovered via @Component/@Service annotations
|
|
60
|
+
(
|
|
61
|
+
"BINDS_INTERFACE",
|
|
62
|
+
"CREATE REL TABLE BINDS_INTERFACE(FROM Class TO Class, confidence DOUBLE, reason STRING)",
|
|
63
|
+
),
|
|
54
64
|
]
|
|
55
65
|
|
|
56
66
|
|
|
@@ -81,7 +91,7 @@ def ensure_schema(conn) -> None:
|
|
|
81
91
|
|
|
82
92
|
_safe_execute(
|
|
83
93
|
conn,
|
|
84
|
-
"MERGE (s:SchemaMeta {key: 'schema_version'}) SET s.value = '
|
|
94
|
+
"MERGE (s:SchemaMeta {key: 'schema_version'}) SET s.value = '5'",
|
|
85
95
|
)
|
|
86
96
|
|
|
87
97
|
_safe_execute(conn, "ALTER TABLE Project ADD indexed_commit STRING DEFAULT ''")
|
|
@@ -476,6 +476,163 @@ class GraphStore:
|
|
|
476
476
|
confidence=float(record["confidence"]),
|
|
477
477
|
)
|
|
478
478
|
|
|
479
|
+
def add_injection(
|
|
480
|
+
self,
|
|
481
|
+
src_class_id: str,
|
|
482
|
+
dst_class_id: str,
|
|
483
|
+
framework: str,
|
|
484
|
+
binding_type: str,
|
|
485
|
+
confidence: float,
|
|
486
|
+
) -> None:
|
|
487
|
+
"""Write an INJECTS edge between two Class nodes."""
|
|
488
|
+
try:
|
|
489
|
+
self.execute(
|
|
490
|
+
"""
|
|
491
|
+
MATCH (a:Class {id: $src}), (b:Class {id: $dst})
|
|
492
|
+
MERGE (a)-[:INJECTS {framework: $fw, binding_type: $bt, confidence: $conf}]->(b)
|
|
493
|
+
""",
|
|
494
|
+
{
|
|
495
|
+
"src": src_class_id,
|
|
496
|
+
"dst": dst_class_id,
|
|
497
|
+
"fw": framework,
|
|
498
|
+
"bt": binding_type,
|
|
499
|
+
"conf": float(confidence),
|
|
500
|
+
},
|
|
501
|
+
)
|
|
502
|
+
except Exception as exc:
|
|
503
|
+
LOGGER.debug("add_injection: skipping edge %s→%s: %s", src_class_id, dst_class_id, exc)
|
|
504
|
+
|
|
505
|
+
def add_injections_batch(self, records: list[dict[str, Any]]) -> None:
|
|
506
|
+
for rec in records:
|
|
507
|
+
self.add_injection(
|
|
508
|
+
src_class_id=rec["src"],
|
|
509
|
+
dst_class_id=rec["dst"],
|
|
510
|
+
framework=rec.get("framework", "unknown"),
|
|
511
|
+
binding_type=rec.get("binding_type", "unknown"),
|
|
512
|
+
confidence=float(rec.get("confidence", 0.8)),
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
def add_interface_binding(
|
|
516
|
+
self,
|
|
517
|
+
src_class_id: str,
|
|
518
|
+
dst_class_id: str,
|
|
519
|
+
confidence: float,
|
|
520
|
+
reason: str,
|
|
521
|
+
) -> None:
|
|
522
|
+
"""Write a BINDS_INTERFACE edge between two Class nodes."""
|
|
523
|
+
try:
|
|
524
|
+
self.execute(
|
|
525
|
+
"""
|
|
526
|
+
MATCH (a:Class {id: $src}), (b:Class {id: $dst})
|
|
527
|
+
MERGE (a)-[:BINDS_INTERFACE {confidence: $conf, reason: $reason}]->(b)
|
|
528
|
+
""",
|
|
529
|
+
{
|
|
530
|
+
"src": src_class_id,
|
|
531
|
+
"dst": dst_class_id,
|
|
532
|
+
"conf": float(confidence),
|
|
533
|
+
"reason": reason,
|
|
534
|
+
},
|
|
535
|
+
)
|
|
536
|
+
except Exception as exc:
|
|
537
|
+
LOGGER.debug("add_interface_binding: skipping edge %s→%s: %s", src_class_id, dst_class_id, exc)
|
|
538
|
+
|
|
539
|
+
def add_interface_bindings_batch(self, records: list[dict[str, Any]]) -> None:
|
|
540
|
+
for rec in records:
|
|
541
|
+
self.add_interface_binding(
|
|
542
|
+
src_class_id=rec["src"],
|
|
543
|
+
dst_class_id=rec["dst"],
|
|
544
|
+
confidence=float(rec.get("confidence", 0.9)),
|
|
545
|
+
reason=rec.get("reason", "implements"),
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
# Sub-batch sizes for direct-to-graph file writes (same policy as engine.py)
|
|
549
|
+
_FILE_METHOD_SUB_BATCH = 200
|
|
550
|
+
_FILE_SYMBOL_SUB_BATCH = 200
|
|
551
|
+
_FILE_CALL_SUB_BATCH = 500
|
|
552
|
+
_FILE_REL_SUB_BATCH = 500
|
|
553
|
+
|
|
554
|
+
def upsert_file_from_entry(self, entry: dict, project_path: str) -> None:
|
|
555
|
+
"""Atomically replace one file's graph data from a build_overlay_file_entry() dict.
|
|
556
|
+
|
|
557
|
+
Clears all existing nodes/edges for the file first, then writes the
|
|
558
|
+
full parsed content (file, classes, methods, symbols, calls, type rels)
|
|
559
|
+
in sub-batched transactions to prevent Kuzu buffer pool OOM.
|
|
560
|
+
|
|
561
|
+
This is the primary path for watch-mode incremental writes — it
|
|
562
|
+
bypasses the overlay JSON store and writes directly to the write DB
|
|
563
|
+
so changes are immediately visible after snapshot_to_read_replica().
|
|
564
|
+
"""
|
|
565
|
+
f_id = entry["file_id"]
|
|
566
|
+
path = entry["file_path"]
|
|
567
|
+
project_id = entry["project_id"]
|
|
568
|
+
is_test = bool(entry.get("is_test", False))
|
|
569
|
+
digest = entry.get("file_hash", "")
|
|
570
|
+
classes = entry.get("classes") or []
|
|
571
|
+
methods = entry.get("methods") or []
|
|
572
|
+
symbols = entry.get("symbols") or []
|
|
573
|
+
calls = entry.get("calls") or []
|
|
574
|
+
type_rels = entry.get("types") or []
|
|
575
|
+
|
|
576
|
+
# 1. Clear stale data for this file
|
|
577
|
+
with self.transaction():
|
|
578
|
+
self.clear_file(f_id)
|
|
579
|
+
self._recycle_conn()
|
|
580
|
+
|
|
581
|
+
# 2. Upsert file record
|
|
582
|
+
with self.transaction():
|
|
583
|
+
self.upsert_file(f_id, path, project_id, is_test, digest)
|
|
584
|
+
self._recycle_conn()
|
|
585
|
+
|
|
586
|
+
# 3. Upsert classes (typically very few per file)
|
|
587
|
+
if classes:
|
|
588
|
+
with self.transaction():
|
|
589
|
+
self.upsert_classes_batch(classes)
|
|
590
|
+
self._recycle_conn()
|
|
591
|
+
|
|
592
|
+
# 4. Upsert methods in sub-batches of 200
|
|
593
|
+
for i in range(0, len(methods), self._FILE_METHOD_SUB_BATCH):
|
|
594
|
+
batch = methods[i: i + self._FILE_METHOD_SUB_BATCH]
|
|
595
|
+
with self.transaction():
|
|
596
|
+
self.upsert_methods_batch(batch)
|
|
597
|
+
self._recycle_conn()
|
|
598
|
+
|
|
599
|
+
# 5. Upsert symbols in sub-batches of 200
|
|
600
|
+
for i in range(0, len(symbols), self._FILE_SYMBOL_SUB_BATCH):
|
|
601
|
+
batch = symbols[i: i + self._FILE_SYMBOL_SUB_BATCH]
|
|
602
|
+
with self.transaction():
|
|
603
|
+
self.upsert_symbols_batch(batch)
|
|
604
|
+
self._recycle_conn()
|
|
605
|
+
|
|
606
|
+
# 6. Write call edges in sub-batches of 500
|
|
607
|
+
for i in range(0, len(calls), self._FILE_CALL_SUB_BATCH):
|
|
608
|
+
batch = calls[i: i + self._FILE_CALL_SUB_BATCH]
|
|
609
|
+
with self.transaction():
|
|
610
|
+
for rec in batch:
|
|
611
|
+
self.add_call(
|
|
612
|
+
source_id=rec["src"],
|
|
613
|
+
target_id=rec["dst"],
|
|
614
|
+
confidence=float(rec.get("confidence", 0.5)),
|
|
615
|
+
reason=rec.get("reason", "unknown"),
|
|
616
|
+
)
|
|
617
|
+
self._recycle_conn()
|
|
618
|
+
|
|
619
|
+
# 7. Write type relations (IMPLEMENTS, OVERRIDES, REFERENCES_TYPE)
|
|
620
|
+
for i in range(0, len(type_rels), self._FILE_REL_SUB_BATCH):
|
|
621
|
+
batch = type_rels[i: i + self._FILE_REL_SUB_BATCH]
|
|
622
|
+
with self.transaction():
|
|
623
|
+
self.add_references_batch(batch)
|
|
624
|
+
self._recycle_conn()
|
|
625
|
+
|
|
626
|
+
def clear_file_by_path(self, project_id: str, project_path: str, file_path: str) -> None:
|
|
627
|
+
"""Delete all graph data for a file identified by its filesystem path."""
|
|
628
|
+
from codespine.indexer.symbol_builder import file_id as _fid
|
|
629
|
+
import os as _os
|
|
630
|
+
rel_path = _os.path.relpath(os.path.abspath(file_path), os.path.abspath(project_path))
|
|
631
|
+
f_id = _fid(project_id, rel_path)
|
|
632
|
+
with self.transaction():
|
|
633
|
+
self.clear_file(f_id)
|
|
634
|
+
self._recycle_conn()
|
|
635
|
+
|
|
479
636
|
def _recycle_conn(self) -> None:
|
|
480
637
|
"""Drop and recreate the per-thread connection to release buffer pages."""
|
|
481
638
|
try:
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
from typing import Iterator
|
|
5
5
|
|
|
6
|
-
from codespine.noise.blocklist import NOISE_METHOD_NAMES
|
|
6
|
+
from codespine.noise.blocklist import MIN_FUZZY_NAME_LEN, NOISE_METHOD_NAMES
|
|
7
7
|
|
|
8
8
|
MAX_FUZZY_TARGETS = 12
|
|
9
9
|
|
|
@@ -84,8 +84,6 @@ def resolve_calls(
|
|
|
84
84
|
|
|
85
85
|
for call in call_sites:
|
|
86
86
|
call_name = call.name
|
|
87
|
-
if call_name in NOISE_METHOD_NAMES:
|
|
88
|
-
continue
|
|
89
87
|
|
|
90
88
|
key = (call_name, int(call.arg_count))
|
|
91
89
|
targets: list[str] = []
|
|
@@ -123,6 +121,12 @@ def resolve_calls(
|
|
|
123
121
|
reason = "intra_class_exact"
|
|
124
122
|
|
|
125
123
|
if not targets:
|
|
124
|
+
# Skip noise method names and short names in the fuzzy global
|
|
125
|
+
# fallback — they are too ambiguous without receiver context.
|
|
126
|
+
if call_name in NOISE_METHOD_NAMES:
|
|
127
|
+
continue
|
|
128
|
+
if len(call_name) < MIN_FUZZY_NAME_LEN:
|
|
129
|
+
continue
|
|
126
130
|
# Prefer same-package candidates before global fallback.
|
|
127
131
|
src_pkg = src_ctx.get("package", "")
|
|
128
132
|
same_pkg = []
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Dependency-injection binding resolver for CodeSpine.
|
|
2
|
+
|
|
3
|
+
Produces INJECTS and BINDS_INTERFACE edges by inspecting:
|
|
4
|
+
|
|
5
|
+
1. @Inject / @Autowired fields → INJECTS(consumer_class → provider_class)
|
|
6
|
+
2. @Provides / @Bean methods → INJECTS(config_class → consumer_classes)
|
|
7
|
+
3. @Component/@Service classes → BINDS_INTERFACE(impl → interface) when
|
|
8
|
+
the class implements an indexed interface.
|
|
9
|
+
|
|
10
|
+
All edges are written in sub-batched transactions by the engine after
|
|
11
|
+
call resolution completes.
|
|
12
|
+
|
|
13
|
+
Edge schemas
|
|
14
|
+
------------
|
|
15
|
+
INJECTS(FROM Class TO Class,
|
|
16
|
+
framework STRING, # "spring" | "guice" | "jakarta" | "javax" | "unknown"
|
|
17
|
+
binding_type STRING, # "field_inject" | "constructor_inject" |
|
|
18
|
+
# "provides_binding" | "bean_method" | "component_scan"
|
|
19
|
+
confidence DOUBLE)
|
|
20
|
+
|
|
21
|
+
BINDS_INTERFACE(FROM Class TO Class,
|
|
22
|
+
confidence DOUBLE,
|
|
23
|
+
reason STRING) # "implements" | "extends"
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import logging
|
|
29
|
+
from typing import Any, Iterator
|
|
30
|
+
|
|
31
|
+
LOGGER = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
# Annotations that mark a field/constructor parameter as injection point.
|
|
34
|
+
_INJECT_ANNOTATIONS = frozenset({
|
|
35
|
+
"Inject", "Autowired", "Resource",
|
|
36
|
+
"javax.inject.Inject", "jakarta.inject.Inject",
|
|
37
|
+
"org.springframework.beans.factory.annotation.Autowired",
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
# Annotations that mark a method as a DI provider.
|
|
41
|
+
_PROVIDES_ANNOTATIONS = frozenset({
|
|
42
|
+
"Provides", "Bean",
|
|
43
|
+
"com.google.inject.Provides",
|
|
44
|
+
"org.springframework.context.annotation.Bean",
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
# Component-scan annotations: classes with these are treated as injectable.
|
|
48
|
+
_COMPONENT_ANNOTATIONS = frozenset({
|
|
49
|
+
"Component", "Service", "Repository", "Controller", "RestController",
|
|
50
|
+
"Configuration", "ManagedBean", "Named",
|
|
51
|
+
"org.springframework.stereotype.Component",
|
|
52
|
+
"org.springframework.stereotype.Service",
|
|
53
|
+
"javax.inject.Named", "jakarta.inject.Named",
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
# DI frameworks by annotation prefix
|
|
57
|
+
_FRAMEWORK_MAP: dict[str, str] = {
|
|
58
|
+
"Autowired": "spring",
|
|
59
|
+
"Component": "spring", "Service": "spring", "Repository": "spring",
|
|
60
|
+
"Controller": "spring", "RestController": "spring", "Bean": "spring",
|
|
61
|
+
"Configuration": "spring",
|
|
62
|
+
"Inject": "guice",
|
|
63
|
+
"Provides": "guice",
|
|
64
|
+
"Named": "guice",
|
|
65
|
+
"Resource": "jakarta",
|
|
66
|
+
"ManagedBean": "jakarta",
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _framework(annotation: str) -> str:
|
|
71
|
+
simple = annotation.split(".")[-1]
|
|
72
|
+
if "springframework" in annotation:
|
|
73
|
+
return "spring"
|
|
74
|
+
if "google.inject" in annotation:
|
|
75
|
+
return "guice"
|
|
76
|
+
if "jakarta.inject" in annotation:
|
|
77
|
+
return "jakarta"
|
|
78
|
+
if "javax.inject" in annotation:
|
|
79
|
+
return "javax"
|
|
80
|
+
return _FRAMEWORK_MAP.get(simple, "unknown")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _simple(annotation: str) -> str:
|
|
84
|
+
return annotation.split(".")[-1]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def resolve_di_bindings(
|
|
88
|
+
class_catalog: dict[str, list[str]],
|
|
89
|
+
class_meta: dict[str, dict[str, Any]],
|
|
90
|
+
parsed_classes: list[dict[str, Any]],
|
|
91
|
+
fqcn_to_class_ids: dict[str, list[str]],
|
|
92
|
+
) -> Iterator[tuple[str, str, str, str, float, str]]:
|
|
93
|
+
"""Yield (src_class_id, dst_class_id, framework, binding_type, confidence, edge_type) tuples.
|
|
94
|
+
|
|
95
|
+
edge_type is "INJECTS" or "BINDS_INTERFACE".
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
class_catalog : name → [fqcn, ...] (full project catalog)
|
|
100
|
+
class_meta : class_id → {fqcn, annotations, interfaces, extends, imports, package}
|
|
101
|
+
parsed_classes : list of dicts from build_overlay_file_entry / engine — each has:
|
|
102
|
+
id, fqcn, name, package, file_id, and extra DI metadata fields
|
|
103
|
+
injected_fields, methods_with_provides
|
|
104
|
+
fqcn_to_class_ids : fqcn → [class_id, ...]
|
|
105
|
+
"""
|
|
106
|
+
# Build a reverse FQCN lookup: fqcn → class_id (first match)
|
|
107
|
+
fqcn_to_id: dict[str, str] = {}
|
|
108
|
+
for fqcn, ids in fqcn_to_class_ids.items():
|
|
109
|
+
if ids:
|
|
110
|
+
fqcn_to_id[fqcn] = ids[0]
|
|
111
|
+
|
|
112
|
+
# Build simple-name → [fqcn] lookup for type resolution.
|
|
113
|
+
name_to_fqcns: dict[str, list[str]] = {}
|
|
114
|
+
for fqcn in fqcn_to_id:
|
|
115
|
+
simple = fqcn.split(".")[-1]
|
|
116
|
+
name_to_fqcns.setdefault(simple, []).append(fqcn)
|
|
117
|
+
# Also add class_catalog entries.
|
|
118
|
+
for name, fqcns in class_catalog.items():
|
|
119
|
+
for fqcn in fqcns:
|
|
120
|
+
if fqcn not in name_to_fqcns.get(name, []):
|
|
121
|
+
name_to_fqcns.setdefault(name, []).append(fqcn)
|
|
122
|
+
|
|
123
|
+
# Build a set of class_ids for component-scan eligible classes.
|
|
124
|
+
component_class_ids: set[str] = set()
|
|
125
|
+
for meta in class_meta.values():
|
|
126
|
+
anns = [_simple(a) for a in (meta.get("annotations") or [])]
|
|
127
|
+
if any(a in _COMPONENT_ANNOTATIONS for a in anns):
|
|
128
|
+
cid = meta.get("id") or ""
|
|
129
|
+
if cid:
|
|
130
|
+
component_class_ids.add(cid)
|
|
131
|
+
|
|
132
|
+
def _resolve_type(type_name: str, package: str, imports: list[str]) -> list[str]:
|
|
133
|
+
"""Best-effort resolution of a simple type name to known class IDs."""
|
|
134
|
+
if not type_name:
|
|
135
|
+
return []
|
|
136
|
+
# Strip generics: List<Foo> → Foo
|
|
137
|
+
if "<" in type_name:
|
|
138
|
+
type_name = type_name[type_name.index("<") + 1:].rstrip(">").strip()
|
|
139
|
+
simple = type_name.split(".")[-1]
|
|
140
|
+
candidates: list[str] = []
|
|
141
|
+
# 1. Exact FQ match.
|
|
142
|
+
if "." in type_name and type_name in fqcn_to_id:
|
|
143
|
+
candidates.append(fqcn_to_id[type_name])
|
|
144
|
+
# 2. Same package.
|
|
145
|
+
same_pkg = f"{package}.{simple}" if package else simple
|
|
146
|
+
if same_pkg in fqcn_to_id:
|
|
147
|
+
candidates.append(fqcn_to_id[same_pkg])
|
|
148
|
+
# 3. From imports.
|
|
149
|
+
for imp in imports:
|
|
150
|
+
if imp.endswith(f".{simple}") and imp in fqcn_to_id:
|
|
151
|
+
candidates.append(fqcn_to_id[imp])
|
|
152
|
+
# 4. Name catalog.
|
|
153
|
+
for fqcn in name_to_fqcns.get(simple, []):
|
|
154
|
+
cid = fqcn_to_id.get(fqcn)
|
|
155
|
+
if cid:
|
|
156
|
+
candidates.append(cid)
|
|
157
|
+
return list(dict.fromkeys(candidates)) # deduplicate preserving order
|
|
158
|
+
|
|
159
|
+
# ---------------------------------------------------------------
|
|
160
|
+
# Process each parsed class.
|
|
161
|
+
# ---------------------------------------------------------------
|
|
162
|
+
for cls in parsed_classes:
|
|
163
|
+
src_id = cls.get("id") or cls.get("class_id") or ""
|
|
164
|
+
if not src_id:
|
|
165
|
+
continue
|
|
166
|
+
meta = class_meta.get(src_id, {})
|
|
167
|
+
package = str(meta.get("package") or cls.get("package") or "")
|
|
168
|
+
imports: list[str] = meta.get("imports") or []
|
|
169
|
+
cls_annotations: list[str] = [_simple(a) for a in (meta.get("annotations") or cls.get("annotations") or [])]
|
|
170
|
+
|
|
171
|
+
# --- 1. @Inject / @Autowired fields → INJECTS edges ---------
|
|
172
|
+
for fld in cls.get("injected_fields") or []:
|
|
173
|
+
inj_ann = fld.get("injection_annotation") or ""
|
|
174
|
+
if not inj_ann or _simple(inj_ann) not in frozenset({
|
|
175
|
+
"Inject", "Autowired", "Resource"
|
|
176
|
+
}):
|
|
177
|
+
continue
|
|
178
|
+
type_name = fld.get("type_name") or ""
|
|
179
|
+
dst_ids = _resolve_type(type_name, package, imports)
|
|
180
|
+
fw = _framework(inj_ann)
|
|
181
|
+
for dst_id in dst_ids:
|
|
182
|
+
if dst_id != src_id:
|
|
183
|
+
yield (src_id, dst_id, fw, "field_inject", 0.85, "INJECTS")
|
|
184
|
+
|
|
185
|
+
# --- 2. @Provides / @Bean methods → INJECTS (config → type) -
|
|
186
|
+
for method in cls.get("methods_with_provides") or []:
|
|
187
|
+
provides_type = method.get("provides_type") or ""
|
|
188
|
+
if not provides_type:
|
|
189
|
+
continue
|
|
190
|
+
ann = method.get("provides_annotation") or "Provides"
|
|
191
|
+
fw = _framework(ann)
|
|
192
|
+
dst_ids = _resolve_type(provides_type, package, imports)
|
|
193
|
+
for dst_id in dst_ids:
|
|
194
|
+
if dst_id != src_id:
|
|
195
|
+
binding_type = "bean_method" if _simple(ann) == "Bean" else "provides_binding"
|
|
196
|
+
yield (src_id, dst_id, fw, binding_type, 0.9, "INJECTS")
|
|
197
|
+
|
|
198
|
+
# --- 3. Component-scan: class implementing interfaces → BINDS_INTERFACE
|
|
199
|
+
if any(a in _COMPONENT_ANNOTATIONS for a in cls_annotations):
|
|
200
|
+
for iface_name in meta.get("interfaces") or []:
|
|
201
|
+
dst_ids = _resolve_type(iface_name, package, imports)
|
|
202
|
+
for dst_id in dst_ids:
|
|
203
|
+
if dst_id != src_id:
|
|
204
|
+
yield (src_id, dst_id, 1.0, "implements", 0.95, "BINDS_INTERFACE")
|
|
205
|
+
extends_name = meta.get("extends") or ""
|
|
206
|
+
if extends_name:
|
|
207
|
+
dst_ids = _resolve_type(extends_name, package, imports)
|
|
208
|
+
for dst_id in dst_ids:
|
|
209
|
+
if dst_id != src_id:
|
|
210
|
+
yield (src_id, dst_id, 0.7, "extends", 0.7, "BINDS_INTERFACE")
|