codespine 0.9.3__tar.gz → 0.9.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.9.3 → codespine-0.9.5}/PKG-INFO +1 -1
- {codespine-0.9.3 → codespine-0.9.5}/codespine/__init__.py +1 -1
- {codespine-0.9.3 → codespine-0.9.5}/codespine/db/store.py +30 -16
- {codespine-0.9.3 → codespine-0.9.5}/codespine/indexer/call_resolver.py +1 -1
- {codespine-0.9.3 → codespine-0.9.5}/codespine/indexer/engine.py +33 -21
- {codespine-0.9.3 → codespine-0.9.5}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.9.3 → codespine-0.9.5}/pyproject.toml +1 -1
- {codespine-0.9.3 → codespine-0.9.5}/LICENSE +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/README.md +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/__init__.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/community.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/context.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/coupling.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/crossmodule.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/flow.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/impact.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/cli.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/config.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/db/__init__.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/db/schema.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/diff/__init__.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/guide.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/indexer/__init__.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/indexer/di_resolver.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/mcp/__init__.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/mcp/server.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/noise/__init__.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/noise/blocklist.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/overlay/__init__.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/overlay/git_state.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/overlay/merge.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/overlay/store.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/search/__init__.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/search/bm25.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/search/fuzzy.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/search/hybrid.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/search/rrf.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/search/vector.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/watch/__init__.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/watch/git_hook.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine/watch/watcher.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/gindex.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/setup.cfg +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/tests/test_call_resolver.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/tests/test_community_detection.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/tests/test_deadcode.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/tests/test_java_parser.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/tests/test_multimodule_index.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/tests/test_overlay.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/tests/test_search_ranking.py +0 -0
- {codespine-0.9.3 → codespine-0.9.5}/tests/test_store_recovery.py +0 -0
|
@@ -527,15 +527,27 @@ class GraphStore:
|
|
|
527
527
|
rows = [{"source_id": r["source_id"], "target_id": r["target_id"],
|
|
528
528
|
"confidence": float(r["confidence"]), "reason": r["reason"]}
|
|
529
529
|
for r in records]
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
530
|
+
if create_mode:
|
|
531
|
+
self.execute(
|
|
532
|
+
"""
|
|
533
|
+
UNWIND $rows AS row
|
|
534
|
+
MATCH (src:Method {id: row.source_id}), (dst:Method {id: row.target_id})
|
|
535
|
+
CREATE (src)-[:CALLS {confidence: row.confidence, reason: row.reason}]->(dst)
|
|
536
|
+
""",
|
|
537
|
+
{"rows": rows},
|
|
538
|
+
)
|
|
539
|
+
else:
|
|
540
|
+
# Properties are SET, not part of the MERGE pattern — ensures at most
|
|
541
|
+
# one CALLS edge per (src, dst) pair regardless of confidence value.
|
|
542
|
+
self.execute(
|
|
543
|
+
"""
|
|
544
|
+
UNWIND $rows AS row
|
|
545
|
+
MATCH (src:Method {id: row.source_id}), (dst:Method {id: row.target_id})
|
|
546
|
+
MERGE (src)-[r:CALLS]->(dst)
|
|
547
|
+
SET r.confidence = row.confidence, r.reason = row.reason
|
|
548
|
+
""",
|
|
549
|
+
{"rows": rows},
|
|
550
|
+
)
|
|
539
551
|
|
|
540
552
|
def add_reference(self, rel: str, src_label: str, src_id: str, dst_label: str, dst_id: str, confidence: float) -> None:
|
|
541
553
|
if rel not in {"REFERENCES_TYPE", "IMPLEMENTS", "OVERRIDES"}:
|
|
@@ -671,28 +683,30 @@ class GraphStore:
|
|
|
671
683
|
self.clear_file(f_id)
|
|
672
684
|
self._recycle_conn()
|
|
673
685
|
|
|
674
|
-
# 2
|
|
686
|
+
# 2. Upsert file record
|
|
675
687
|
with self.transaction():
|
|
676
688
|
self.upsert_files_batch(
|
|
677
689
|
[{"id": f_id, "path": path, "project_id": project_id,
|
|
678
690
|
"is_test": is_test, "hash": digest}],
|
|
679
|
-
create_mode=True,
|
|
680
691
|
)
|
|
681
692
|
self._recycle_conn()
|
|
682
693
|
|
|
694
|
+
# 3. Upsert classes (typically very few per file)
|
|
683
695
|
if classes:
|
|
684
696
|
with self.transaction():
|
|
685
|
-
self.upsert_classes_batch(classes
|
|
697
|
+
self.upsert_classes_batch(classes)
|
|
686
698
|
self._recycle_conn()
|
|
687
699
|
|
|
700
|
+
# 4. Upsert methods in sub-batches of 200
|
|
688
701
|
for i in range(0, len(methods), self._FILE_METHOD_SUB_BATCH):
|
|
689
702
|
with self.transaction():
|
|
690
|
-
self.upsert_methods_batch(methods[i: i + self._FILE_METHOD_SUB_BATCH]
|
|
703
|
+
self.upsert_methods_batch(methods[i: i + self._FILE_METHOD_SUB_BATCH])
|
|
691
704
|
self._recycle_conn()
|
|
692
705
|
|
|
706
|
+
# 5. Upsert symbols in sub-batches of 200
|
|
693
707
|
for i in range(0, len(symbols), self._FILE_SYMBOL_SUB_BATCH):
|
|
694
708
|
with self.transaction():
|
|
695
|
-
self.upsert_symbols_batch(symbols[i: i + self._FILE_SYMBOL_SUB_BATCH]
|
|
709
|
+
self.upsert_symbols_batch(symbols[i: i + self._FILE_SYMBOL_SUB_BATCH])
|
|
696
710
|
self._recycle_conn()
|
|
697
711
|
|
|
698
712
|
# 6. Write call edges in sub-batches of 500 (normalise key names to match add_calls_batch)
|
|
@@ -705,14 +719,14 @@ class GraphStore:
|
|
|
705
719
|
for rec in batch
|
|
706
720
|
]
|
|
707
721
|
with self.transaction():
|
|
708
|
-
self.add_calls_batch(normalised
|
|
722
|
+
self.add_calls_batch(normalised)
|
|
709
723
|
self._recycle_conn()
|
|
710
724
|
|
|
711
725
|
# 7. Write type relations (IMPLEMENTS, OVERRIDES, REFERENCES_TYPE)
|
|
712
726
|
for i in range(0, len(type_rels), self._FILE_REL_SUB_BATCH):
|
|
713
727
|
batch = type_rels[i: i + self._FILE_REL_SUB_BATCH]
|
|
714
728
|
with self.transaction():
|
|
715
|
-
self.add_references_batch(batch
|
|
729
|
+
self.add_references_batch(batch)
|
|
716
730
|
self._recycle_conn()
|
|
717
731
|
|
|
718
732
|
def clear_file_by_path(self, project_id: str, project_path: str, file_path: str) -> None:
|
|
@@ -5,7 +5,7 @@ from typing import Iterator
|
|
|
5
5
|
|
|
6
6
|
from codespine.noise.blocklist import MIN_FUZZY_NAME_LEN, NOISE_METHOD_NAMES
|
|
7
7
|
|
|
8
|
-
MAX_FUZZY_TARGETS = 12
|
|
8
|
+
MAX_FUZZY_TARGETS = 6 # reduced from 12 — keeps precision, halves low-confidence edge fan-out
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def _simple_type_name(type_name: str | None) -> str:
|
|
@@ -221,7 +221,7 @@ class JavaIndexer:
|
|
|
221
221
|
calls_resolved = 0
|
|
222
222
|
type_relationships = 0
|
|
223
223
|
file_batch_size = max(1, int(getattr(SETTINGS, "index_file_batch_size", 64)))
|
|
224
|
-
edge_batch_size = max(1, int(getattr(SETTINGS, "edge_write_batch_size",
|
|
224
|
+
edge_batch_size = max(1, int(getattr(SETTINGS, "edge_write_batch_size", 5000)))
|
|
225
225
|
|
|
226
226
|
if not full:
|
|
227
227
|
method_catalog, class_catalog, fqcn_to_class_ids, class_methods = (
|
|
@@ -462,42 +462,54 @@ class JavaIndexer:
|
|
|
462
462
|
with self.store.transaction():
|
|
463
463
|
self.store.clear_files_batch(clear_sub)
|
|
464
464
|
self.store._recycle_conn()
|
|
465
|
-
# Always CREATE — full clears via clear_project, incremental clears
|
|
466
|
-
# per-file above, so nodes are guaranteed absent in both paths.
|
|
467
465
|
with self.store.transaction():
|
|
468
|
-
self.store.upsert_files_batch(file_rows
|
|
466
|
+
self.store.upsert_files_batch(file_rows)
|
|
469
467
|
self.store._recycle_conn()
|
|
470
468
|
with self.store.transaction():
|
|
471
|
-
self.store.upsert_classes_batch(class_rows
|
|
469
|
+
self.store.upsert_classes_batch(class_rows)
|
|
472
470
|
self.store._recycle_conn()
|
|
473
471
|
_METHOD_SUB_BATCH = 200
|
|
474
472
|
for method_sub in self._chunked(method_rows, _METHOD_SUB_BATCH):
|
|
475
473
|
with self.store.transaction():
|
|
476
|
-
self.store.upsert_methods_batch(method_sub
|
|
474
|
+
self.store.upsert_methods_batch(method_sub)
|
|
477
475
|
self.store._recycle_conn()
|
|
478
476
|
_SYMBOL_SUB_BATCH = 200
|
|
479
477
|
for symbol_sub in self._chunked(symbol_rows, _SYMBOL_SUB_BATCH):
|
|
480
478
|
with self.store.transaction():
|
|
481
|
-
self.store.upsert_symbols_batch(symbol_sub
|
|
479
|
+
self.store.upsert_symbols_batch(symbol_sub)
|
|
482
480
|
self.store._recycle_conn()
|
|
483
481
|
|
|
484
482
|
self._emit(progress, "resolve_calls_start")
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
483
|
+
# Deduplicate (src, dst) pairs — the same pair can appear many times when
|
|
484
|
+
# a method calls another method multiple times at different call sites.
|
|
485
|
+
# Keep the highest-confidence resolution to avoid N writes per pair.
|
|
486
|
+
best_calls: dict[tuple[str, str], tuple[float, str]] = {}
|
|
487
|
+
for src, dst, confidence, reason in resolve_calls(
|
|
488
|
+
method_catalog, method_calls, method_context, class_catalog
|
|
489
|
+
):
|
|
490
|
+
key = (src, dst)
|
|
491
|
+
if key not in best_calls or confidence > best_calls[key][0]:
|
|
492
|
+
best_calls[key] = (confidence, reason)
|
|
493
|
+
|
|
494
|
+
# Stream writes in batches — never hold the full set in RAM.
|
|
495
|
+
call_buf: list[dict] = []
|
|
496
|
+
for (src, dst), (confidence, reason) in best_calls.items():
|
|
497
|
+
call_buf.append(
|
|
498
|
+
{"source_id": src, "target_id": dst,
|
|
499
|
+
"confidence": confidence, "reason": reason}
|
|
494
500
|
)
|
|
495
|
-
|
|
501
|
+
if len(call_buf) >= edge_batch_size:
|
|
502
|
+
with self.store.transaction():
|
|
503
|
+
self.store.add_calls_batch(call_buf)
|
|
504
|
+
calls_resolved += len(call_buf)
|
|
505
|
+
self.store._recycle_conn()
|
|
506
|
+
self._emit(progress, "resolve_calls_progress", calls_resolved=calls_resolved)
|
|
507
|
+
call_buf = []
|
|
508
|
+
if call_buf:
|
|
496
509
|
with self.store.transaction():
|
|
497
|
-
self.store.add_calls_batch(
|
|
498
|
-
calls_resolved += len(
|
|
510
|
+
self.store.add_calls_batch(call_buf)
|
|
511
|
+
calls_resolved += len(call_buf)
|
|
499
512
|
self.store._recycle_conn()
|
|
500
|
-
self._emit(progress, "resolve_calls_progress", calls_resolved=calls_resolved)
|
|
501
513
|
self._emit(progress, "resolve_calls_done", calls_resolved=calls_resolved)
|
|
502
514
|
|
|
503
515
|
self._emit(progress, "resolve_types_start")
|
|
@@ -509,7 +521,7 @@ class JavaIndexer:
|
|
|
509
521
|
)
|
|
510
522
|
for rel_chunk in self._chunked(type_rows, edge_batch_size):
|
|
511
523
|
with self.store.transaction():
|
|
512
|
-
self.store.add_references_batch(rel_chunk
|
|
524
|
+
self.store.add_references_batch(rel_chunk)
|
|
513
525
|
type_relationships += len(rel_chunk)
|
|
514
526
|
self.store._recycle_conn()
|
|
515
527
|
self._emit(progress, "resolve_types_done", type_relationships=type_relationships)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|