codespine 0.9.3__tar.gz → 0.9.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {codespine-0.9.3 → codespine-0.9.5}/PKG-INFO +1 -1
  2. {codespine-0.9.3 → codespine-0.9.5}/codespine/__init__.py +1 -1
  3. {codespine-0.9.3 → codespine-0.9.5}/codespine/db/store.py +30 -16
  4. {codespine-0.9.3 → codespine-0.9.5}/codespine/indexer/call_resolver.py +1 -1
  5. {codespine-0.9.3 → codespine-0.9.5}/codespine/indexer/engine.py +33 -21
  6. {codespine-0.9.3 → codespine-0.9.5}/codespine.egg-info/PKG-INFO +1 -1
  7. {codespine-0.9.3 → codespine-0.9.5}/pyproject.toml +1 -1
  8. {codespine-0.9.3 → codespine-0.9.5}/LICENSE +0 -0
  9. {codespine-0.9.3 → codespine-0.9.5}/README.md +0 -0
  10. {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/__init__.py +0 -0
  11. {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/community.py +0 -0
  12. {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/context.py +0 -0
  13. {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/coupling.py +0 -0
  14. {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/crossmodule.py +0 -0
  15. {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/deadcode.py +0 -0
  16. {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/flow.py +0 -0
  17. {codespine-0.9.3 → codespine-0.9.5}/codespine/analysis/impact.py +0 -0
  18. {codespine-0.9.3 → codespine-0.9.5}/codespine/cli.py +0 -0
  19. {codespine-0.9.3 → codespine-0.9.5}/codespine/config.py +0 -0
  20. {codespine-0.9.3 → codespine-0.9.5}/codespine/db/__init__.py +0 -0
  21. {codespine-0.9.3 → codespine-0.9.5}/codespine/db/schema.py +0 -0
  22. {codespine-0.9.3 → codespine-0.9.5}/codespine/diff/__init__.py +0 -0
  23. {codespine-0.9.3 → codespine-0.9.5}/codespine/diff/branch_diff.py +0 -0
  24. {codespine-0.9.3 → codespine-0.9.5}/codespine/guide.py +0 -0
  25. {codespine-0.9.3 → codespine-0.9.5}/codespine/indexer/__init__.py +0 -0
  26. {codespine-0.9.3 → codespine-0.9.5}/codespine/indexer/di_resolver.py +0 -0
  27. {codespine-0.9.3 → codespine-0.9.5}/codespine/indexer/java_parser.py +0 -0
  28. {codespine-0.9.3 → codespine-0.9.5}/codespine/indexer/symbol_builder.py +0 -0
  29. {codespine-0.9.3 → codespine-0.9.5}/codespine/mcp/__init__.py +0 -0
  30. {codespine-0.9.3 → codespine-0.9.5}/codespine/mcp/server.py +0 -0
  31. {codespine-0.9.3 → codespine-0.9.5}/codespine/noise/__init__.py +0 -0
  32. {codespine-0.9.3 → codespine-0.9.5}/codespine/noise/blocklist.py +0 -0
  33. {codespine-0.9.3 → codespine-0.9.5}/codespine/overlay/__init__.py +0 -0
  34. {codespine-0.9.3 → codespine-0.9.5}/codespine/overlay/git_state.py +0 -0
  35. {codespine-0.9.3 → codespine-0.9.5}/codespine/overlay/merge.py +0 -0
  36. {codespine-0.9.3 → codespine-0.9.5}/codespine/overlay/store.py +0 -0
  37. {codespine-0.9.3 → codespine-0.9.5}/codespine/search/__init__.py +0 -0
  38. {codespine-0.9.3 → codespine-0.9.5}/codespine/search/bm25.py +0 -0
  39. {codespine-0.9.3 → codespine-0.9.5}/codespine/search/fuzzy.py +0 -0
  40. {codespine-0.9.3 → codespine-0.9.5}/codespine/search/hybrid.py +0 -0
  41. {codespine-0.9.3 → codespine-0.9.5}/codespine/search/rrf.py +0 -0
  42. {codespine-0.9.3 → codespine-0.9.5}/codespine/search/vector.py +0 -0
  43. {codespine-0.9.3 → codespine-0.9.5}/codespine/watch/__init__.py +0 -0
  44. {codespine-0.9.3 → codespine-0.9.5}/codespine/watch/git_hook.py +0 -0
  45. {codespine-0.9.3 → codespine-0.9.5}/codespine/watch/watcher.py +0 -0
  46. {codespine-0.9.3 → codespine-0.9.5}/codespine.egg-info/SOURCES.txt +0 -0
  47. {codespine-0.9.3 → codespine-0.9.5}/codespine.egg-info/dependency_links.txt +0 -0
  48. {codespine-0.9.3 → codespine-0.9.5}/codespine.egg-info/entry_points.txt +0 -0
  49. {codespine-0.9.3 → codespine-0.9.5}/codespine.egg-info/requires.txt +0 -0
  50. {codespine-0.9.3 → codespine-0.9.5}/codespine.egg-info/top_level.txt +0 -0
  51. {codespine-0.9.3 → codespine-0.9.5}/gindex.py +0 -0
  52. {codespine-0.9.3 → codespine-0.9.5}/setup.cfg +0 -0
  53. {codespine-0.9.3 → codespine-0.9.5}/tests/test_branch_diff_normalize.py +0 -0
  54. {codespine-0.9.3 → codespine-0.9.5}/tests/test_call_resolver.py +0 -0
  55. {codespine-0.9.3 → codespine-0.9.5}/tests/test_community_detection.py +0 -0
  56. {codespine-0.9.3 → codespine-0.9.5}/tests/test_deadcode.py +0 -0
  57. {codespine-0.9.3 → codespine-0.9.5}/tests/test_index_and_hybrid.py +0 -0
  58. {codespine-0.9.3 → codespine-0.9.5}/tests/test_java_parser.py +0 -0
  59. {codespine-0.9.3 → codespine-0.9.5}/tests/test_multimodule_index.py +0 -0
  60. {codespine-0.9.3 → codespine-0.9.5}/tests/test_overlay.py +0 -0
  61. {codespine-0.9.3 → codespine-0.9.5}/tests/test_search_ranking.py +0 -0
  62. {codespine-0.9.3 → codespine-0.9.5}/tests/test_store_recovery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.9.3
3
+ Version: 0.9.5
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.9.3"
4
+ __version__ = "0.9.5"
@@ -527,15 +527,27 @@ class GraphStore:
527
527
  rows = [{"source_id": r["source_id"], "target_id": r["target_id"],
528
528
  "confidence": float(r["confidence"]), "reason": r["reason"]}
529
529
  for r in records]
530
- op = "CREATE" if create_mode else "MERGE"
531
- self.execute(
532
- f"""
533
- UNWIND $rows AS row
534
- MATCH (src:Method {{id: row.source_id}}), (dst:Method {{id: row.target_id}})
535
- {op} (src)-[:CALLS {{confidence: row.confidence, reason: row.reason}}]->(dst)
536
- """,
537
- {"rows": rows},
538
- )
530
+ if create_mode:
531
+ self.execute(
532
+ """
533
+ UNWIND $rows AS row
534
+ MATCH (src:Method {id: row.source_id}), (dst:Method {id: row.target_id})
535
+ CREATE (src)-[:CALLS {confidence: row.confidence, reason: row.reason}]->(dst)
536
+ """,
537
+ {"rows": rows},
538
+ )
539
+ else:
540
+ # Properties are SET, not part of the MERGE pattern — ensures at most
541
+ # one CALLS edge per (src, dst) pair regardless of confidence value.
542
+ self.execute(
543
+ """
544
+ UNWIND $rows AS row
545
+ MATCH (src:Method {id: row.source_id}), (dst:Method {id: row.target_id})
546
+ MERGE (src)-[r:CALLS]->(dst)
547
+ SET r.confidence = row.confidence, r.reason = row.reason
548
+ """,
549
+ {"rows": rows},
550
+ )
539
551
 
540
552
  def add_reference(self, rel: str, src_label: str, src_id: str, dst_label: str, dst_id: str, confidence: float) -> None:
541
553
  if rel not in {"REFERENCES_TYPE", "IMPLEMENTS", "OVERRIDES"}:
@@ -671,28 +683,30 @@ class GraphStore:
671
683
  self.clear_file(f_id)
672
684
  self._recycle_conn()
673
685
 
674
- # 2–5. Write all nodes with CREATE (clear_file above guarantees absence).
686
+ # 2. Upsert file record
675
687
  with self.transaction():
676
688
  self.upsert_files_batch(
677
689
  [{"id": f_id, "path": path, "project_id": project_id,
678
690
  "is_test": is_test, "hash": digest}],
679
- create_mode=True,
680
691
  )
681
692
  self._recycle_conn()
682
693
 
694
+ # 3. Upsert classes (typically very few per file)
683
695
  if classes:
684
696
  with self.transaction():
685
- self.upsert_classes_batch(classes, create_mode=True)
697
+ self.upsert_classes_batch(classes)
686
698
  self._recycle_conn()
687
699
 
700
+ # 4. Upsert methods in sub-batches of 200
688
701
  for i in range(0, len(methods), self._FILE_METHOD_SUB_BATCH):
689
702
  with self.transaction():
690
- self.upsert_methods_batch(methods[i: i + self._FILE_METHOD_SUB_BATCH], create_mode=True)
703
+ self.upsert_methods_batch(methods[i: i + self._FILE_METHOD_SUB_BATCH])
691
704
  self._recycle_conn()
692
705
 
706
+ # 5. Upsert symbols in sub-batches of 200
693
707
  for i in range(0, len(symbols), self._FILE_SYMBOL_SUB_BATCH):
694
708
  with self.transaction():
695
- self.upsert_symbols_batch(symbols[i: i + self._FILE_SYMBOL_SUB_BATCH], create_mode=True)
709
+ self.upsert_symbols_batch(symbols[i: i + self._FILE_SYMBOL_SUB_BATCH])
696
710
  self._recycle_conn()
697
711
 
698
712
  # 6. Write call edges in sub-batches of 500 (normalise key names to match add_calls_batch)
@@ -705,14 +719,14 @@ class GraphStore:
705
719
  for rec in batch
706
720
  ]
707
721
  with self.transaction():
708
- self.add_calls_batch(normalised, create_mode=True)
722
+ self.add_calls_batch(normalised)
709
723
  self._recycle_conn()
710
724
 
711
725
  # 7. Write type relations (IMPLEMENTS, OVERRIDES, REFERENCES_TYPE)
712
726
  for i in range(0, len(type_rels), self._FILE_REL_SUB_BATCH):
713
727
  batch = type_rels[i: i + self._FILE_REL_SUB_BATCH]
714
728
  with self.transaction():
715
- self.add_references_batch(batch, create_mode=True)
729
+ self.add_references_batch(batch)
716
730
  self._recycle_conn()
717
731
 
718
732
  def clear_file_by_path(self, project_id: str, project_path: str, file_path: str) -> None:
@@ -5,7 +5,7 @@ from typing import Iterator
5
5
 
6
6
  from codespine.noise.blocklist import MIN_FUZZY_NAME_LEN, NOISE_METHOD_NAMES
7
7
 
8
- MAX_FUZZY_TARGETS = 12
8
+ MAX_FUZZY_TARGETS = 6 # reduced from 12 — keeps precision, halves low-confidence edge fan-out
9
9
 
10
10
 
11
11
  def _simple_type_name(type_name: str | None) -> str:
@@ -221,7 +221,7 @@ class JavaIndexer:
221
221
  calls_resolved = 0
222
222
  type_relationships = 0
223
223
  file_batch_size = max(1, int(getattr(SETTINGS, "index_file_batch_size", 64)))
224
- edge_batch_size = max(1, int(getattr(SETTINGS, "edge_write_batch_size", 2000)))
224
+ edge_batch_size = max(1, int(getattr(SETTINGS, "edge_write_batch_size", 5000)))
225
225
 
226
226
  if not full:
227
227
  method_catalog, class_catalog, fqcn_to_class_ids, class_methods = (
@@ -462,42 +462,54 @@ class JavaIndexer:
462
462
  with self.store.transaction():
463
463
  self.store.clear_files_batch(clear_sub)
464
464
  self.store._recycle_conn()
465
- # Always CREATE — full clears via clear_project, incremental clears
466
- # per-file above, so nodes are guaranteed absent in both paths.
467
465
  with self.store.transaction():
468
- self.store.upsert_files_batch(file_rows, create_mode=True)
466
+ self.store.upsert_files_batch(file_rows)
469
467
  self.store._recycle_conn()
470
468
  with self.store.transaction():
471
- self.store.upsert_classes_batch(class_rows, create_mode=True)
469
+ self.store.upsert_classes_batch(class_rows)
472
470
  self.store._recycle_conn()
473
471
  _METHOD_SUB_BATCH = 200
474
472
  for method_sub in self._chunked(method_rows, _METHOD_SUB_BATCH):
475
473
  with self.store.transaction():
476
- self.store.upsert_methods_batch(method_sub, create_mode=True)
474
+ self.store.upsert_methods_batch(method_sub)
477
475
  self.store._recycle_conn()
478
476
  _SYMBOL_SUB_BATCH = 200
479
477
  for symbol_sub in self._chunked(symbol_rows, _SYMBOL_SUB_BATCH):
480
478
  with self.store.transaction():
481
- self.store.upsert_symbols_batch(symbol_sub, create_mode=True)
479
+ self.store.upsert_symbols_batch(symbol_sub)
482
480
  self.store._recycle_conn()
483
481
 
484
482
  self._emit(progress, "resolve_calls_start")
485
- call_rows: list[dict] = []
486
- for src, dst, confidence, reason in resolve_calls(method_catalog, method_calls, method_context, class_catalog):
487
- call_rows.append(
488
- {
489
- "source_id": src,
490
- "target_id": dst,
491
- "confidence": confidence,
492
- "reason": reason,
493
- }
483
+ # Deduplicate (src, dst) pairs — the same pair can appear many times when
484
+ # a method calls another method multiple times at different call sites.
485
+ # Keep the highest-confidence resolution to avoid N writes per pair.
486
+ best_calls: dict[tuple[str, str], tuple[float, str]] = {}
487
+ for src, dst, confidence, reason in resolve_calls(
488
+ method_catalog, method_calls, method_context, class_catalog
489
+ ):
490
+ key = (src, dst)
491
+ if key not in best_calls or confidence > best_calls[key][0]:
492
+ best_calls[key] = (confidence, reason)
493
+
494
+ # Stream writes in batches — never hold the full set in RAM.
495
+ call_buf: list[dict] = []
496
+ for (src, dst), (confidence, reason) in best_calls.items():
497
+ call_buf.append(
498
+ {"source_id": src, "target_id": dst,
499
+ "confidence": confidence, "reason": reason}
494
500
  )
495
- for call_chunk in self._chunked(call_rows, edge_batch_size):
501
+ if len(call_buf) >= edge_batch_size:
502
+ with self.store.transaction():
503
+ self.store.add_calls_batch(call_buf)
504
+ calls_resolved += len(call_buf)
505
+ self.store._recycle_conn()
506
+ self._emit(progress, "resolve_calls_progress", calls_resolved=calls_resolved)
507
+ call_buf = []
508
+ if call_buf:
496
509
  with self.store.transaction():
497
- self.store.add_calls_batch(call_chunk, create_mode=True)
498
- calls_resolved += len(call_chunk)
510
+ self.store.add_calls_batch(call_buf)
511
+ calls_resolved += len(call_buf)
499
512
  self.store._recycle_conn()
500
- self._emit(progress, "resolve_calls_progress", calls_resolved=calls_resolved)
501
513
  self._emit(progress, "resolve_calls_done", calls_resolved=calls_resolved)
502
514
 
503
515
  self._emit(progress, "resolve_types_start")
@@ -509,7 +521,7 @@ class JavaIndexer:
509
521
  )
510
522
  for rel_chunk in self._chunked(type_rows, edge_batch_size):
511
523
  with self.store.transaction():
512
- self.store.add_references_batch(rel_chunk, create_mode=True)
524
+ self.store.add_references_batch(rel_chunk)
513
525
  type_relationships += len(rel_chunk)
514
526
  self.store._recycle_conn()
515
527
  self._emit(progress, "resolve_types_done", type_relationships=type_relationships)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.9.3
3
+ Version: 0.9.5
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.9.3"
7
+ version = "0.9.5"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes