codespine 0.8.0__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.8.0 → codespine-0.9.0}/PKG-INFO +1 -1
- {codespine-0.8.0 → codespine-0.9.0}/codespine/__init__.py +1 -1
- {codespine-0.8.0 → codespine-0.9.0}/codespine/analysis/impact.py +55 -2
- {codespine-0.8.0 → codespine-0.9.0}/codespine/db/schema.py +11 -1
- {codespine-0.8.0 → codespine-0.9.0}/codespine/db/store.py +157 -0
- codespine-0.9.0/codespine/indexer/di_resolver.py +210 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/indexer/engine.py +71 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/indexer/java_parser.py +52 -2
- {codespine-0.8.0 → codespine-0.9.0}/codespine/mcp/server.py +1017 -24
- {codespine-0.8.0 → codespine-0.9.0}/codespine/search/hybrid.py +23 -8
- codespine-0.9.0/codespine/watch/git_hook.py +205 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/watch/watcher.py +138 -72
- {codespine-0.8.0 → codespine-0.9.0}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.8.0 → codespine-0.9.0}/codespine.egg-info/SOURCES.txt +2 -0
- {codespine-0.8.0 → codespine-0.9.0}/pyproject.toml +1 -1
- {codespine-0.8.0 → codespine-0.9.0}/tests/test_overlay.py +3 -2
- {codespine-0.8.0 → codespine-0.9.0}/LICENSE +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/README.md +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/analysis/__init__.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/analysis/community.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/analysis/context.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/analysis/coupling.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/analysis/crossmodule.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/analysis/flow.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/cli.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/config.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/db/__init__.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/diff/__init__.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/guide.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/indexer/__init__.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/mcp/__init__.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/noise/__init__.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/noise/blocklist.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/overlay/__init__.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/overlay/git_state.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/overlay/merge.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/overlay/store.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/search/__init__.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/search/bm25.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/search/fuzzy.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/search/rrf.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/search/vector.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine/watch/__init__.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/gindex.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/setup.cfg +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/tests/test_call_resolver.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/tests/test_community_detection.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/tests/test_deadcode.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/tests/test_java_parser.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/tests/test_multimodule_index.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/tests/test_search_ranking.py +0 -0
- {codespine-0.8.0 → codespine-0.9.0}/tests/test_store_recovery.py +0 -0
|
@@ -115,6 +115,41 @@ def analyze_impact(store, symbol_query: str, max_depth: int = 4, project: str |
|
|
|
115
115
|
"""
|
|
116
116
|
)
|
|
117
117
|
|
|
118
|
+
# Augment with DI injection edges: for each target method's class, find all
|
|
119
|
+
# classes that @Inject it (or bind it via @Component/@Service) and add their
|
|
120
|
+
# methods as implicit callers at depth+1 with edge_type "DI_INJECT".
|
|
121
|
+
try:
|
|
122
|
+
di_edges = store.query_records(
|
|
123
|
+
"""
|
|
124
|
+
MATCH (a:Method), (ca:Class), (b:Method), (cb:Class),
|
|
125
|
+
(ca)-[r:INJECTS]->(cb)
|
|
126
|
+
WHERE a.class_id = ca.id AND b.class_id = cb.id
|
|
127
|
+
RETURN a.id as src, b.id as dst, 'DI_INJECT' as edge_type,
|
|
128
|
+
coalesce(r.confidence, 0.8) as confidence,
|
|
129
|
+
coalesce(r.binding_type, 'field_inject') as reason
|
|
130
|
+
"""
|
|
131
|
+
)
|
|
132
|
+
edges = list(edges) + di_edges
|
|
133
|
+
except Exception:
|
|
134
|
+
pass # INJECTS table may not exist on old DBs
|
|
135
|
+
|
|
136
|
+
# Also follow BINDS_INTERFACE — any class implementing the target's interface
|
|
137
|
+
# counts as an indirect caller.
|
|
138
|
+
try:
|
|
139
|
+
bi_edges = store.query_records(
|
|
140
|
+
"""
|
|
141
|
+
MATCH (a:Method), (ca:Class), (b:Method), (cb:Class),
|
|
142
|
+
(ca)-[r:BINDS_INTERFACE]->(cb)
|
|
143
|
+
WHERE a.class_id = ca.id AND b.class_id = cb.id
|
|
144
|
+
RETURN a.id as src, b.id as dst, 'INTERFACE_BINDING' as edge_type,
|
|
145
|
+
coalesce(r.confidence, 0.9) as confidence,
|
|
146
|
+
coalesce(r.reason, 'implements') as reason
|
|
147
|
+
"""
|
|
148
|
+
)
|
|
149
|
+
edges = list(edges) + bi_edges
|
|
150
|
+
except Exception:
|
|
151
|
+
pass
|
|
152
|
+
|
|
118
153
|
reverse_adj: dict[str, list[dict]] = defaultdict(list)
|
|
119
154
|
for edge in edges:
|
|
120
155
|
reverse_adj[edge["dst"]].append(edge)
|
|
@@ -184,13 +219,31 @@ def analyze_impact(store, symbol_query: str, max_depth: int = 4, project: str |
|
|
|
184
219
|
for mid in target_method_ids
|
|
185
220
|
]
|
|
186
221
|
|
|
222
|
+
# FR-06: Separate "self_callers" (same class as target, depth=1) from
|
|
223
|
+
# impacted_callers so the output is unambiguous.
|
|
224
|
+
target_class_fqcns = {
|
|
225
|
+
target_meta.get(mid, {}).get("class_fqcn")
|
|
226
|
+
for mid in target_method_ids
|
|
227
|
+
if target_meta.get(mid, {}).get("class_fqcn")
|
|
228
|
+
}
|
|
229
|
+
self_callers: list[dict] = []
|
|
230
|
+
impacted_depth1: list[dict] = []
|
|
231
|
+
for item in depth_groups["1"]:
|
|
232
|
+
if item.get("class_fqcn") and item["class_fqcn"] in target_class_fqcns:
|
|
233
|
+
self_callers.append(item)
|
|
234
|
+
else:
|
|
235
|
+
impacted_depth1.append(item)
|
|
236
|
+
depth_groups["1"] = impacted_depth1
|
|
237
|
+
|
|
187
238
|
return {
|
|
188
239
|
"target": symbol_query,
|
|
189
|
-
"
|
|
190
|
-
"
|
|
240
|
+
"resolved_to": resolved_targets,
|
|
241
|
+
"self_callers": self_callers,
|
|
242
|
+
"impacted_callers": depth_groups,
|
|
191
243
|
"summary": {
|
|
192
244
|
"direct": len(depth_groups["1"]),
|
|
193
245
|
"indirect": len(depth_groups["2"]),
|
|
194
246
|
"transitive": len(depth_groups["3+"]),
|
|
247
|
+
"self_callers": len(self_callers),
|
|
195
248
|
},
|
|
196
249
|
}
|
|
@@ -51,6 +51,16 @@ REL_TABLES: Iterable[tuple[str, str]] = [
|
|
|
51
51
|
"CO_CHANGED_WITH",
|
|
52
52
|
"CREATE REL TABLE CO_CHANGED_WITH(FROM File TO File, strength DOUBLE, cochanges INT64, days INT64)",
|
|
53
53
|
),
|
|
54
|
+
# v5: Dependency-injection edges — tracks @Inject/@Autowired/@Provides/@Bean bindings
|
|
55
|
+
(
|
|
56
|
+
"INJECTS",
|
|
57
|
+
"CREATE REL TABLE INJECTS(FROM Class TO Class, framework STRING, binding_type STRING, confidence DOUBLE)",
|
|
58
|
+
),
|
|
59
|
+
# v5: Interface-to-implementation bindings discovered via @Component/@Service annotations
|
|
60
|
+
(
|
|
61
|
+
"BINDS_INTERFACE",
|
|
62
|
+
"CREATE REL TABLE BINDS_INTERFACE(FROM Class TO Class, confidence DOUBLE, reason STRING)",
|
|
63
|
+
),
|
|
54
64
|
]
|
|
55
65
|
|
|
56
66
|
|
|
@@ -81,7 +91,7 @@ def ensure_schema(conn) -> None:
|
|
|
81
91
|
|
|
82
92
|
_safe_execute(
|
|
83
93
|
conn,
|
|
84
|
-
"MERGE (s:SchemaMeta {key: 'schema_version'}) SET s.value = '
|
|
94
|
+
"MERGE (s:SchemaMeta {key: 'schema_version'}) SET s.value = '5'",
|
|
85
95
|
)
|
|
86
96
|
|
|
87
97
|
_safe_execute(conn, "ALTER TABLE Project ADD indexed_commit STRING DEFAULT ''")
|
|
@@ -476,6 +476,163 @@ class GraphStore:
|
|
|
476
476
|
confidence=float(record["confidence"]),
|
|
477
477
|
)
|
|
478
478
|
|
|
479
|
+
def add_injection(
|
|
480
|
+
self,
|
|
481
|
+
src_class_id: str,
|
|
482
|
+
dst_class_id: str,
|
|
483
|
+
framework: str,
|
|
484
|
+
binding_type: str,
|
|
485
|
+
confidence: float,
|
|
486
|
+
) -> None:
|
|
487
|
+
"""Write an INJECTS edge between two Class nodes."""
|
|
488
|
+
try:
|
|
489
|
+
self.execute(
|
|
490
|
+
"""
|
|
491
|
+
MATCH (a:Class {id: $src}), (b:Class {id: $dst})
|
|
492
|
+
MERGE (a)-[:INJECTS {framework: $fw, binding_type: $bt, confidence: $conf}]->(b)
|
|
493
|
+
""",
|
|
494
|
+
{
|
|
495
|
+
"src": src_class_id,
|
|
496
|
+
"dst": dst_class_id,
|
|
497
|
+
"fw": framework,
|
|
498
|
+
"bt": binding_type,
|
|
499
|
+
"conf": float(confidence),
|
|
500
|
+
},
|
|
501
|
+
)
|
|
502
|
+
except Exception as exc:
|
|
503
|
+
LOGGER.debug("add_injection: skipping edge %s→%s: %s", src_class_id, dst_class_id, exc)
|
|
504
|
+
|
|
505
|
+
def add_injections_batch(self, records: list[dict[str, Any]]) -> None:
|
|
506
|
+
for rec in records:
|
|
507
|
+
self.add_injection(
|
|
508
|
+
src_class_id=rec["src"],
|
|
509
|
+
dst_class_id=rec["dst"],
|
|
510
|
+
framework=rec.get("framework", "unknown"),
|
|
511
|
+
binding_type=rec.get("binding_type", "unknown"),
|
|
512
|
+
confidence=float(rec.get("confidence", 0.8)),
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
def add_interface_binding(
|
|
516
|
+
self,
|
|
517
|
+
src_class_id: str,
|
|
518
|
+
dst_class_id: str,
|
|
519
|
+
confidence: float,
|
|
520
|
+
reason: str,
|
|
521
|
+
) -> None:
|
|
522
|
+
"""Write a BINDS_INTERFACE edge between two Class nodes."""
|
|
523
|
+
try:
|
|
524
|
+
self.execute(
|
|
525
|
+
"""
|
|
526
|
+
MATCH (a:Class {id: $src}), (b:Class {id: $dst})
|
|
527
|
+
MERGE (a)-[:BINDS_INTERFACE {confidence: $conf, reason: $reason}]->(b)
|
|
528
|
+
""",
|
|
529
|
+
{
|
|
530
|
+
"src": src_class_id,
|
|
531
|
+
"dst": dst_class_id,
|
|
532
|
+
"conf": float(confidence),
|
|
533
|
+
"reason": reason,
|
|
534
|
+
},
|
|
535
|
+
)
|
|
536
|
+
except Exception as exc:
|
|
537
|
+
LOGGER.debug("add_interface_binding: skipping edge %s→%s: %s", src_class_id, dst_class_id, exc)
|
|
538
|
+
|
|
539
|
+
def add_interface_bindings_batch(self, records: list[dict[str, Any]]) -> None:
|
|
540
|
+
for rec in records:
|
|
541
|
+
self.add_interface_binding(
|
|
542
|
+
src_class_id=rec["src"],
|
|
543
|
+
dst_class_id=rec["dst"],
|
|
544
|
+
confidence=float(rec.get("confidence", 0.9)),
|
|
545
|
+
reason=rec.get("reason", "implements"),
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
# Sub-batch sizes for direct-to-graph file writes (same policy as engine.py)
|
|
549
|
+
_FILE_METHOD_SUB_BATCH = 200
|
|
550
|
+
_FILE_SYMBOL_SUB_BATCH = 200
|
|
551
|
+
_FILE_CALL_SUB_BATCH = 500
|
|
552
|
+
_FILE_REL_SUB_BATCH = 500
|
|
553
|
+
|
|
554
|
+
def upsert_file_from_entry(self, entry: dict, project_path: str) -> None:
|
|
555
|
+
"""Atomically replace one file's graph data from a build_overlay_file_entry() dict.
|
|
556
|
+
|
|
557
|
+
Clears all existing nodes/edges for the file first, then writes the
|
|
558
|
+
full parsed content (file, classes, methods, symbols, calls, type rels)
|
|
559
|
+
in sub-batched transactions to prevent Kuzu buffer pool OOM.
|
|
560
|
+
|
|
561
|
+
This is the primary path for watch-mode incremental writes — it
|
|
562
|
+
bypasses the overlay JSON store and writes directly to the write DB
|
|
563
|
+
so changes are immediately visible after snapshot_to_read_replica().
|
|
564
|
+
"""
|
|
565
|
+
f_id = entry["file_id"]
|
|
566
|
+
path = entry["file_path"]
|
|
567
|
+
project_id = entry["project_id"]
|
|
568
|
+
is_test = bool(entry.get("is_test", False))
|
|
569
|
+
digest = entry.get("file_hash", "")
|
|
570
|
+
classes = entry.get("classes") or []
|
|
571
|
+
methods = entry.get("methods") or []
|
|
572
|
+
symbols = entry.get("symbols") or []
|
|
573
|
+
calls = entry.get("calls") or []
|
|
574
|
+
type_rels = entry.get("types") or []
|
|
575
|
+
|
|
576
|
+
# 1. Clear stale data for this file
|
|
577
|
+
with self.transaction():
|
|
578
|
+
self.clear_file(f_id)
|
|
579
|
+
self._recycle_conn()
|
|
580
|
+
|
|
581
|
+
# 2. Upsert file record
|
|
582
|
+
with self.transaction():
|
|
583
|
+
self.upsert_file(f_id, path, project_id, is_test, digest)
|
|
584
|
+
self._recycle_conn()
|
|
585
|
+
|
|
586
|
+
# 3. Upsert classes (typically very few per file)
|
|
587
|
+
if classes:
|
|
588
|
+
with self.transaction():
|
|
589
|
+
self.upsert_classes_batch(classes)
|
|
590
|
+
self._recycle_conn()
|
|
591
|
+
|
|
592
|
+
# 4. Upsert methods in sub-batches of 200
|
|
593
|
+
for i in range(0, len(methods), self._FILE_METHOD_SUB_BATCH):
|
|
594
|
+
batch = methods[i: i + self._FILE_METHOD_SUB_BATCH]
|
|
595
|
+
with self.transaction():
|
|
596
|
+
self.upsert_methods_batch(batch)
|
|
597
|
+
self._recycle_conn()
|
|
598
|
+
|
|
599
|
+
# 5. Upsert symbols in sub-batches of 200
|
|
600
|
+
for i in range(0, len(symbols), self._FILE_SYMBOL_SUB_BATCH):
|
|
601
|
+
batch = symbols[i: i + self._FILE_SYMBOL_SUB_BATCH]
|
|
602
|
+
with self.transaction():
|
|
603
|
+
self.upsert_symbols_batch(batch)
|
|
604
|
+
self._recycle_conn()
|
|
605
|
+
|
|
606
|
+
# 6. Write call edges in sub-batches of 500
|
|
607
|
+
for i in range(0, len(calls), self._FILE_CALL_SUB_BATCH):
|
|
608
|
+
batch = calls[i: i + self._FILE_CALL_SUB_BATCH]
|
|
609
|
+
with self.transaction():
|
|
610
|
+
for rec in batch:
|
|
611
|
+
self.add_call(
|
|
612
|
+
source_id=rec["src"],
|
|
613
|
+
target_id=rec["dst"],
|
|
614
|
+
confidence=float(rec.get("confidence", 0.5)),
|
|
615
|
+
reason=rec.get("reason", "unknown"),
|
|
616
|
+
)
|
|
617
|
+
self._recycle_conn()
|
|
618
|
+
|
|
619
|
+
# 7. Write type relations (IMPLEMENTS, OVERRIDES, REFERENCES_TYPE)
|
|
620
|
+
for i in range(0, len(type_rels), self._FILE_REL_SUB_BATCH):
|
|
621
|
+
batch = type_rels[i: i + self._FILE_REL_SUB_BATCH]
|
|
622
|
+
with self.transaction():
|
|
623
|
+
self.add_references_batch(batch)
|
|
624
|
+
self._recycle_conn()
|
|
625
|
+
|
|
626
|
+
def clear_file_by_path(self, project_id: str, project_path: str, file_path: str) -> None:
|
|
627
|
+
"""Delete all graph data for a file identified by its filesystem path."""
|
|
628
|
+
from codespine.indexer.symbol_builder import file_id as _fid
|
|
629
|
+
import os as _os
|
|
630
|
+
rel_path = _os.path.relpath(os.path.abspath(file_path), os.path.abspath(project_path))
|
|
631
|
+
f_id = _fid(project_id, rel_path)
|
|
632
|
+
with self.transaction():
|
|
633
|
+
self.clear_file(f_id)
|
|
634
|
+
self._recycle_conn()
|
|
635
|
+
|
|
479
636
|
def _recycle_conn(self) -> None:
|
|
480
637
|
"""Drop and recreate the per-thread connection to release buffer pages."""
|
|
481
638
|
try:
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Dependency-injection binding resolver for CodeSpine.
|
|
2
|
+
|
|
3
|
+
Produces INJECTS and BINDS_INTERFACE edges by inspecting:
|
|
4
|
+
|
|
5
|
+
1. @Inject / @Autowired fields → INJECTS(consumer_class → provider_class)
|
|
6
|
+
2. @Provides / @Bean methods → INJECTS(config_class → consumer_classes)
|
|
7
|
+
3. @Component/@Service classes → BINDS_INTERFACE(impl → interface) when
|
|
8
|
+
the class implements an indexed interface.
|
|
9
|
+
|
|
10
|
+
All edges are written in sub-batched transactions by the engine after
|
|
11
|
+
call resolution completes.
|
|
12
|
+
|
|
13
|
+
Edge schemas
|
|
14
|
+
------------
|
|
15
|
+
INJECTS(FROM Class TO Class,
|
|
16
|
+
framework STRING, # "spring" | "guice" | "jakarta" | "javax" | "unknown"
|
|
17
|
+
binding_type STRING, # "field_inject" | "constructor_inject" |
|
|
18
|
+
# "provides_binding" | "bean_method" | "component_scan"
|
|
19
|
+
confidence DOUBLE)
|
|
20
|
+
|
|
21
|
+
BINDS_INTERFACE(FROM Class TO Class,
|
|
22
|
+
confidence DOUBLE,
|
|
23
|
+
reason STRING) # "implements" | "extends"
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import logging
|
|
29
|
+
from typing import Any, Iterator
|
|
30
|
+
|
|
31
|
+
LOGGER = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
# Annotations that mark a field/constructor parameter as injection point.
|
|
34
|
+
_INJECT_ANNOTATIONS = frozenset({
|
|
35
|
+
"Inject", "Autowired", "Resource",
|
|
36
|
+
"javax.inject.Inject", "jakarta.inject.Inject",
|
|
37
|
+
"org.springframework.beans.factory.annotation.Autowired",
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
# Annotations that mark a method as a DI provider.
|
|
41
|
+
_PROVIDES_ANNOTATIONS = frozenset({
|
|
42
|
+
"Provides", "Bean",
|
|
43
|
+
"com.google.inject.Provides",
|
|
44
|
+
"org.springframework.context.annotation.Bean",
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
# Component-scan annotations: classes with these are treated as injectable.
|
|
48
|
+
_COMPONENT_ANNOTATIONS = frozenset({
|
|
49
|
+
"Component", "Service", "Repository", "Controller", "RestController",
|
|
50
|
+
"Configuration", "ManagedBean", "Named",
|
|
51
|
+
"org.springframework.stereotype.Component",
|
|
52
|
+
"org.springframework.stereotype.Service",
|
|
53
|
+
"javax.inject.Named", "jakarta.inject.Named",
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
# DI frameworks by annotation prefix
|
|
57
|
+
_FRAMEWORK_MAP: dict[str, str] = {
|
|
58
|
+
"Autowired": "spring",
|
|
59
|
+
"Component": "spring", "Service": "spring", "Repository": "spring",
|
|
60
|
+
"Controller": "spring", "RestController": "spring", "Bean": "spring",
|
|
61
|
+
"Configuration": "spring",
|
|
62
|
+
"Inject": "guice",
|
|
63
|
+
"Provides": "guice",
|
|
64
|
+
"Named": "guice",
|
|
65
|
+
"Resource": "jakarta",
|
|
66
|
+
"ManagedBean": "jakarta",
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _framework(annotation: str) -> str:
|
|
71
|
+
simple = annotation.split(".")[-1]
|
|
72
|
+
if "springframework" in annotation:
|
|
73
|
+
return "spring"
|
|
74
|
+
if "google.inject" in annotation:
|
|
75
|
+
return "guice"
|
|
76
|
+
if "jakarta.inject" in annotation:
|
|
77
|
+
return "jakarta"
|
|
78
|
+
if "javax.inject" in annotation:
|
|
79
|
+
return "javax"
|
|
80
|
+
return _FRAMEWORK_MAP.get(simple, "unknown")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _simple(annotation: str) -> str:
|
|
84
|
+
return annotation.split(".")[-1]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def resolve_di_bindings(
|
|
88
|
+
class_catalog: dict[str, list[str]],
|
|
89
|
+
class_meta: dict[str, dict[str, Any]],
|
|
90
|
+
parsed_classes: list[dict[str, Any]],
|
|
91
|
+
fqcn_to_class_ids: dict[str, list[str]],
|
|
92
|
+
) -> Iterator[tuple[str, str, str, str, float, str]]:
|
|
93
|
+
"""Yield (src_class_id, dst_class_id, framework, binding_type, confidence, edge_type) tuples.
|
|
94
|
+
|
|
95
|
+
edge_type is "INJECTS" or "BINDS_INTERFACE".
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
class_catalog : name → [fqcn, ...] (full project catalog)
|
|
100
|
+
class_meta : class_id → {fqcn, annotations, interfaces, extends, imports, package}
|
|
101
|
+
parsed_classes : list of dicts from build_overlay_file_entry / engine — each has:
|
|
102
|
+
id, fqcn, name, package, file_id, and extra DI metadata fields
|
|
103
|
+
injected_fields, methods_with_provides
|
|
104
|
+
fqcn_to_class_ids : fqcn → [class_id, ...]
|
|
105
|
+
"""
|
|
106
|
+
# Build a reverse FQCN lookup: fqcn → class_id (first match)
|
|
107
|
+
fqcn_to_id: dict[str, str] = {}
|
|
108
|
+
for fqcn, ids in fqcn_to_class_ids.items():
|
|
109
|
+
if ids:
|
|
110
|
+
fqcn_to_id[fqcn] = ids[0]
|
|
111
|
+
|
|
112
|
+
# Build simple-name → [fqcn] lookup for type resolution.
|
|
113
|
+
name_to_fqcns: dict[str, list[str]] = {}
|
|
114
|
+
for fqcn in fqcn_to_id:
|
|
115
|
+
simple = fqcn.split(".")[-1]
|
|
116
|
+
name_to_fqcns.setdefault(simple, []).append(fqcn)
|
|
117
|
+
# Also add class_catalog entries.
|
|
118
|
+
for name, fqcns in class_catalog.items():
|
|
119
|
+
for fqcn in fqcns:
|
|
120
|
+
if fqcn not in name_to_fqcns.get(name, []):
|
|
121
|
+
name_to_fqcns.setdefault(name, []).append(fqcn)
|
|
122
|
+
|
|
123
|
+
# Build a set of class_ids for component-scan eligible classes.
|
|
124
|
+
component_class_ids: set[str] = set()
|
|
125
|
+
for meta in class_meta.values():
|
|
126
|
+
anns = [_simple(a) for a in (meta.get("annotations") or [])]
|
|
127
|
+
if any(a in _COMPONENT_ANNOTATIONS for a in anns):
|
|
128
|
+
cid = meta.get("id") or ""
|
|
129
|
+
if cid:
|
|
130
|
+
component_class_ids.add(cid)
|
|
131
|
+
|
|
132
|
+
def _resolve_type(type_name: str, package: str, imports: list[str]) -> list[str]:
|
|
133
|
+
"""Best-effort resolution of a simple type name to known class IDs."""
|
|
134
|
+
if not type_name:
|
|
135
|
+
return []
|
|
136
|
+
# Strip generics: List<Foo> → Foo
|
|
137
|
+
if "<" in type_name:
|
|
138
|
+
type_name = type_name[type_name.index("<") + 1:].rstrip(">").strip()
|
|
139
|
+
simple = type_name.split(".")[-1]
|
|
140
|
+
candidates: list[str] = []
|
|
141
|
+
# 1. Exact FQ match.
|
|
142
|
+
if "." in type_name and type_name in fqcn_to_id:
|
|
143
|
+
candidates.append(fqcn_to_id[type_name])
|
|
144
|
+
# 2. Same package.
|
|
145
|
+
same_pkg = f"{package}.{simple}" if package else simple
|
|
146
|
+
if same_pkg in fqcn_to_id:
|
|
147
|
+
candidates.append(fqcn_to_id[same_pkg])
|
|
148
|
+
# 3. From imports.
|
|
149
|
+
for imp in imports:
|
|
150
|
+
if imp.endswith(f".{simple}") and imp in fqcn_to_id:
|
|
151
|
+
candidates.append(fqcn_to_id[imp])
|
|
152
|
+
# 4. Name catalog.
|
|
153
|
+
for fqcn in name_to_fqcns.get(simple, []):
|
|
154
|
+
cid = fqcn_to_id.get(fqcn)
|
|
155
|
+
if cid:
|
|
156
|
+
candidates.append(cid)
|
|
157
|
+
return list(dict.fromkeys(candidates)) # deduplicate preserving order
|
|
158
|
+
|
|
159
|
+
# ---------------------------------------------------------------
|
|
160
|
+
# Process each parsed class.
|
|
161
|
+
# ---------------------------------------------------------------
|
|
162
|
+
for cls in parsed_classes:
|
|
163
|
+
src_id = cls.get("id") or cls.get("class_id") or ""
|
|
164
|
+
if not src_id:
|
|
165
|
+
continue
|
|
166
|
+
meta = class_meta.get(src_id, {})
|
|
167
|
+
package = str(meta.get("package") or cls.get("package") or "")
|
|
168
|
+
imports: list[str] = meta.get("imports") or []
|
|
169
|
+
cls_annotations: list[str] = [_simple(a) for a in (meta.get("annotations") or cls.get("annotations") or [])]
|
|
170
|
+
|
|
171
|
+
# --- 1. @Inject / @Autowired fields → INJECTS edges ---------
|
|
172
|
+
for fld in cls.get("injected_fields") or []:
|
|
173
|
+
inj_ann = fld.get("injection_annotation") or ""
|
|
174
|
+
if not inj_ann or _simple(inj_ann) not in frozenset({
|
|
175
|
+
"Inject", "Autowired", "Resource"
|
|
176
|
+
}):
|
|
177
|
+
continue
|
|
178
|
+
type_name = fld.get("type_name") or ""
|
|
179
|
+
dst_ids = _resolve_type(type_name, package, imports)
|
|
180
|
+
fw = _framework(inj_ann)
|
|
181
|
+
for dst_id in dst_ids:
|
|
182
|
+
if dst_id != src_id:
|
|
183
|
+
yield (src_id, dst_id, fw, "field_inject", 0.85, "INJECTS")
|
|
184
|
+
|
|
185
|
+
# --- 2. @Provides / @Bean methods → INJECTS (config → type) -
|
|
186
|
+
for method in cls.get("methods_with_provides") or []:
|
|
187
|
+
provides_type = method.get("provides_type") or ""
|
|
188
|
+
if not provides_type:
|
|
189
|
+
continue
|
|
190
|
+
ann = method.get("provides_annotation") or "Provides"
|
|
191
|
+
fw = _framework(ann)
|
|
192
|
+
dst_ids = _resolve_type(provides_type, package, imports)
|
|
193
|
+
for dst_id in dst_ids:
|
|
194
|
+
if dst_id != src_id:
|
|
195
|
+
binding_type = "bean_method" if _simple(ann) == "Bean" else "provides_binding"
|
|
196
|
+
yield (src_id, dst_id, fw, binding_type, 0.9, "INJECTS")
|
|
197
|
+
|
|
198
|
+
# --- 3. Component-scan: class implementing interfaces → BINDS_INTERFACE
|
|
199
|
+
if any(a in _COMPONENT_ANNOTATIONS for a in cls_annotations):
|
|
200
|
+
for iface_name in meta.get("interfaces") or []:
|
|
201
|
+
dst_ids = _resolve_type(iface_name, package, imports)
|
|
202
|
+
for dst_id in dst_ids:
|
|
203
|
+
if dst_id != src_id:
|
|
204
|
+
yield (src_id, dst_id, 1.0, "implements", 0.95, "BINDS_INTERFACE")
|
|
205
|
+
extends_name = meta.get("extends") or ""
|
|
206
|
+
if extends_name:
|
|
207
|
+
dst_ids = _resolve_type(extends_name, package, imports)
|
|
208
|
+
for dst_id in dst_ids:
|
|
209
|
+
if dst_id != src_id:
|
|
210
|
+
yield (src_id, dst_id, 0.7, "extends", 0.7, "BINDS_INTERFACE")
|
|
@@ -230,6 +230,7 @@ class JavaIndexer:
|
|
|
230
230
|
fqcn_to_class_ids: dict[str, list[str]] = self._existing_class_ids_by_fqcn(project_id) if not full else {}
|
|
231
231
|
class_meta: dict[str, dict] = {}
|
|
232
232
|
class_methods: dict[str, dict[str, str]] = self._existing_class_methods(project_id) if not full else {}
|
|
233
|
+
di_classes: list[dict] = [] # accumulates DI metadata for resolver pass
|
|
233
234
|
|
|
234
235
|
# ── Parallel parse (CPU/IO) ──────────────────────────────────────────
|
|
235
236
|
# tree-sitter releases the GIL so ThreadPoolExecutor gives real speedup.
|
|
@@ -326,11 +327,13 @@ class JavaIndexer:
|
|
|
326
327
|
if c_id not in fqcn_to_class_ids[cls.fqcn]:
|
|
327
328
|
fqcn_to_class_ids[cls.fqcn].append(c_id)
|
|
328
329
|
class_meta[c_id] = {
|
|
330
|
+
"id": c_id,
|
|
329
331
|
"fqcn": cls.fqcn,
|
|
330
332
|
"package": parsed.package,
|
|
331
333
|
"imports": parsed.imports,
|
|
332
334
|
"extends": cls.extends,
|
|
333
335
|
"interfaces": cls.interfaces,
|
|
336
|
+
"annotations": cls.annotations,
|
|
334
337
|
"scope": scope,
|
|
335
338
|
}
|
|
336
339
|
class_methods.setdefault(c_id, {})
|
|
@@ -350,6 +353,38 @@ class JavaIndexer:
|
|
|
350
353
|
)
|
|
351
354
|
classes_indexed += 1
|
|
352
355
|
|
|
356
|
+
# Collect DI metadata for the resolver pass.
|
|
357
|
+
di_cls_entry: dict = {
|
|
358
|
+
"id": c_id,
|
|
359
|
+
"fqcn": cls.fqcn,
|
|
360
|
+
"name": cls.name,
|
|
361
|
+
"package": cls.package,
|
|
362
|
+
"annotations": cls.annotations,
|
|
363
|
+
"injected_fields": [
|
|
364
|
+
{
|
|
365
|
+
"name": f.name,
|
|
366
|
+
"type_name": f.type_name,
|
|
367
|
+
"injection_annotation": f.injection_annotation,
|
|
368
|
+
"qualifier": f.qualifier,
|
|
369
|
+
}
|
|
370
|
+
for f in cls.fields
|
|
371
|
+
if f.injection_annotation
|
|
372
|
+
],
|
|
373
|
+
"methods_with_provides": [
|
|
374
|
+
{
|
|
375
|
+
"name": m.name,
|
|
376
|
+
"provides_type": m.provides_type,
|
|
377
|
+
"provides_annotation": next(
|
|
378
|
+
(a for a in m.annotations if a.split(".")[-1] in {"Provides", "Bean"}),
|
|
379
|
+
"Provides",
|
|
380
|
+
),
|
|
381
|
+
}
|
|
382
|
+
for m in cls.methods
|
|
383
|
+
if m.provides_type
|
|
384
|
+
],
|
|
385
|
+
}
|
|
386
|
+
di_classes.append(di_cls_entry)
|
|
387
|
+
|
|
353
388
|
for fld in cls.fields:
|
|
354
389
|
fqfield = f"{cls.fqcn}#{fld.name}"
|
|
355
390
|
symbol_rows.append(
|
|
@@ -473,6 +508,42 @@ class JavaIndexer:
|
|
|
473
508
|
self.store._recycle_conn()
|
|
474
509
|
self._emit(progress, "resolve_types_done", type_relationships=type_relationships)
|
|
475
510
|
|
|
511
|
+
# ── DI binding resolution ─────────────────────────────────────────────
|
|
512
|
+
# Resolve @Inject/@Autowired/@Provides/@Bean bindings into INJECTS and
|
|
513
|
+
# BINDS_INTERFACE graph edges. This runs after all classes/methods are
|
|
514
|
+
# written so the full cross-project catalog is available for type lookup.
|
|
515
|
+
di_inject_rows: list[dict] = []
|
|
516
|
+
di_bind_rows: list[dict] = []
|
|
517
|
+
try:
|
|
518
|
+
from codespine.indexer.di_resolver import resolve_di_bindings
|
|
519
|
+
for tup in resolve_di_bindings(class_catalog, class_meta, di_classes, fqcn_to_class_ids):
|
|
520
|
+
src, dst, fw_or_conf, bt_or_reason, conf, edge_type = tup
|
|
521
|
+
if edge_type == "INJECTS":
|
|
522
|
+
di_inject_rows.append({
|
|
523
|
+
"src": src, "dst": dst,
|
|
524
|
+
"framework": fw_or_conf, "binding_type": bt_or_reason,
|
|
525
|
+
"confidence": conf,
|
|
526
|
+
})
|
|
527
|
+
else:
|
|
528
|
+
di_bind_rows.append({
|
|
529
|
+
"src": src, "dst": dst,
|
|
530
|
+
"confidence": float(fw_or_conf), "reason": bt_or_reason,
|
|
531
|
+
})
|
|
532
|
+
except Exception as exc:
|
|
533
|
+
LOGGER.warning("DI resolver failed (non-fatal): %s", exc)
|
|
534
|
+
|
|
535
|
+
_DI_BATCH = 500
|
|
536
|
+
for chunk in self._chunked(di_inject_rows, _DI_BATCH):
|
|
537
|
+
with self.store.transaction():
|
|
538
|
+
self.store.add_injections_batch(chunk)
|
|
539
|
+
self.store._recycle_conn()
|
|
540
|
+
for chunk in self._chunked(di_bind_rows, _DI_BATCH):
|
|
541
|
+
with self.store.transaction():
|
|
542
|
+
self.store.add_interface_bindings_batch(chunk)
|
|
543
|
+
self.store._recycle_conn()
|
|
544
|
+
self._emit(progress, "di_done",
|
|
545
|
+
injections=len(di_inject_rows), interface_bindings=len(di_bind_rows))
|
|
546
|
+
|
|
476
547
|
self._prune_meta_cache(meta_cache, current_file_ids)
|
|
477
548
|
self._save_file_meta_cache(project_id, meta_cache)
|
|
478
549
|
|