codespine 0.9.1__tar.gz → 0.9.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {codespine-0.9.1 → codespine-0.9.3}/PKG-INFO +1 -1
  2. {codespine-0.9.1 → codespine-0.9.3}/codespine/__init__.py +1 -1
  3. {codespine-0.9.1 → codespine-0.9.3}/codespine/db/store.py +189 -97
  4. {codespine-0.9.1 → codespine-0.9.3}/codespine/indexer/engine.py +13 -12
  5. {codespine-0.9.1 → codespine-0.9.3}/codespine.egg-info/PKG-INFO +1 -1
  6. {codespine-0.9.1 → codespine-0.9.3}/pyproject.toml +1 -1
  7. {codespine-0.9.1 → codespine-0.9.3}/LICENSE +0 -0
  8. {codespine-0.9.1 → codespine-0.9.3}/README.md +0 -0
  9. {codespine-0.9.1 → codespine-0.9.3}/codespine/analysis/__init__.py +0 -0
  10. {codespine-0.9.1 → codespine-0.9.3}/codespine/analysis/community.py +0 -0
  11. {codespine-0.9.1 → codespine-0.9.3}/codespine/analysis/context.py +0 -0
  12. {codespine-0.9.1 → codespine-0.9.3}/codespine/analysis/coupling.py +0 -0
  13. {codespine-0.9.1 → codespine-0.9.3}/codespine/analysis/crossmodule.py +0 -0
  14. {codespine-0.9.1 → codespine-0.9.3}/codespine/analysis/deadcode.py +0 -0
  15. {codespine-0.9.1 → codespine-0.9.3}/codespine/analysis/flow.py +0 -0
  16. {codespine-0.9.1 → codespine-0.9.3}/codespine/analysis/impact.py +0 -0
  17. {codespine-0.9.1 → codespine-0.9.3}/codespine/cli.py +0 -0
  18. {codespine-0.9.1 → codespine-0.9.3}/codespine/config.py +0 -0
  19. {codespine-0.9.1 → codespine-0.9.3}/codespine/db/__init__.py +0 -0
  20. {codespine-0.9.1 → codespine-0.9.3}/codespine/db/schema.py +0 -0
  21. {codespine-0.9.1 → codespine-0.9.3}/codespine/diff/__init__.py +0 -0
  22. {codespine-0.9.1 → codespine-0.9.3}/codespine/diff/branch_diff.py +0 -0
  23. {codespine-0.9.1 → codespine-0.9.3}/codespine/guide.py +0 -0
  24. {codespine-0.9.1 → codespine-0.9.3}/codespine/indexer/__init__.py +0 -0
  25. {codespine-0.9.1 → codespine-0.9.3}/codespine/indexer/call_resolver.py +0 -0
  26. {codespine-0.9.1 → codespine-0.9.3}/codespine/indexer/di_resolver.py +0 -0
  27. {codespine-0.9.1 → codespine-0.9.3}/codespine/indexer/java_parser.py +0 -0
  28. {codespine-0.9.1 → codespine-0.9.3}/codespine/indexer/symbol_builder.py +0 -0
  29. {codespine-0.9.1 → codespine-0.9.3}/codespine/mcp/__init__.py +0 -0
  30. {codespine-0.9.1 → codespine-0.9.3}/codespine/mcp/server.py +0 -0
  31. {codespine-0.9.1 → codespine-0.9.3}/codespine/noise/__init__.py +0 -0
  32. {codespine-0.9.1 → codespine-0.9.3}/codespine/noise/blocklist.py +0 -0
  33. {codespine-0.9.1 → codespine-0.9.3}/codespine/overlay/__init__.py +0 -0
  34. {codespine-0.9.1 → codespine-0.9.3}/codespine/overlay/git_state.py +0 -0
  35. {codespine-0.9.1 → codespine-0.9.3}/codespine/overlay/merge.py +0 -0
  36. {codespine-0.9.1 → codespine-0.9.3}/codespine/overlay/store.py +0 -0
  37. {codespine-0.9.1 → codespine-0.9.3}/codespine/search/__init__.py +0 -0
  38. {codespine-0.9.1 → codespine-0.9.3}/codespine/search/bm25.py +0 -0
  39. {codespine-0.9.1 → codespine-0.9.3}/codespine/search/fuzzy.py +0 -0
  40. {codespine-0.9.1 → codespine-0.9.3}/codespine/search/hybrid.py +0 -0
  41. {codespine-0.9.1 → codespine-0.9.3}/codespine/search/rrf.py +0 -0
  42. {codespine-0.9.1 → codespine-0.9.3}/codespine/search/vector.py +0 -0
  43. {codespine-0.9.1 → codespine-0.9.3}/codespine/watch/__init__.py +0 -0
  44. {codespine-0.9.1 → codespine-0.9.3}/codespine/watch/git_hook.py +0 -0
  45. {codespine-0.9.1 → codespine-0.9.3}/codespine/watch/watcher.py +0 -0
  46. {codespine-0.9.1 → codespine-0.9.3}/codespine.egg-info/SOURCES.txt +0 -0
  47. {codespine-0.9.1 → codespine-0.9.3}/codespine.egg-info/dependency_links.txt +0 -0
  48. {codespine-0.9.1 → codespine-0.9.3}/codespine.egg-info/entry_points.txt +0 -0
  49. {codespine-0.9.1 → codespine-0.9.3}/codespine.egg-info/requires.txt +0 -0
  50. {codespine-0.9.1 → codespine-0.9.3}/codespine.egg-info/top_level.txt +0 -0
  51. {codespine-0.9.1 → codespine-0.9.3}/gindex.py +0 -0
  52. {codespine-0.9.1 → codespine-0.9.3}/setup.cfg +0 -0
  53. {codespine-0.9.1 → codespine-0.9.3}/tests/test_branch_diff_normalize.py +0 -0
  54. {codespine-0.9.1 → codespine-0.9.3}/tests/test_call_resolver.py +0 -0
  55. {codespine-0.9.1 → codespine-0.9.3}/tests/test_community_detection.py +0 -0
  56. {codespine-0.9.1 → codespine-0.9.3}/tests/test_deadcode.py +0 -0
  57. {codespine-0.9.1 → codespine-0.9.3}/tests/test_index_and_hybrid.py +0 -0
  58. {codespine-0.9.1 → codespine-0.9.3}/tests/test_java_parser.py +0 -0
  59. {codespine-0.9.1 → codespine-0.9.3}/tests/test_multimodule_index.py +0 -0
  60. {codespine-0.9.1 → codespine-0.9.3}/tests/test_overlay.py +0 -0
  61. {codespine-0.9.1 → codespine-0.9.3}/tests/test_search_ranking.py +0 -0
  62. {codespine-0.9.1 → codespine-0.9.3}/tests/test_store_recovery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.9.1
3
+ Version: 0.9.3
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.9.1"
4
+ __version__ = "0.9.3"
@@ -147,13 +147,15 @@ class GraphStore:
147
147
 
148
148
  def clear_project(self, project_id: str) -> None:
149
149
  file_recs = self.query_records("MATCH (f:File) WHERE f.project_id = $pid RETURN f.id as id", {"pid": project_id})
150
- # Small batches (10 files per tx) prevent buffer pool OOM on large projects.
151
- for idx, rec in enumerate(file_recs, start=1):
152
- with self.transaction():
153
- self.clear_file(rec["id"])
154
- if idx % 10 == 0:
150
+ file_ids = [r["id"] for r in file_recs]
151
+ # Bulk-delete in chunks of 200 to avoid WAL exhaustion on very large projects.
152
+ _CLEAR_CHUNK = 200
153
+ for i in range(0, max(1, len(file_ids)), _CLEAR_CHUNK):
154
+ chunk = file_ids[i: i + _CLEAR_CHUNK]
155
+ if chunk:
156
+ with self.transaction():
157
+ self.clear_files_batch(chunk)
155
158
  self._recycle_conn()
156
- self._recycle_conn()
157
159
  self.execute("MATCH (p:Project) WHERE p.id = $pid DETACH DELETE p", {"pid": project_id})
158
160
  self._recycle_conn()
159
161
 
@@ -242,34 +244,32 @@ class GraphStore:
242
244
  return {r["id"]: {"path": r.get("path", ""), "hash": r.get("hash", "")} for r in recs}
243
245
 
244
246
  def clear_file(self, file_id: str) -> None:
247
+ self.clear_files_batch([file_id])
248
+
249
+ def clear_files_batch(self, file_ids: list[str]) -> None:
250
+ """Delete all graph data for a set of files in 4 bulk queries instead of 4×N."""
251
+ if not file_ids:
252
+ return
253
+ fids = list(file_ids)
245
254
  self.execute(
246
- """
247
- MATCH (s:Symbol) WHERE s.file_id = $fid
248
- DETACH DELETE s
249
- """,
250
- {"fid": file_id},
255
+ "MATCH (s:Symbol) WHERE s.file_id IN $fids DETACH DELETE s",
256
+ {"fids": fids},
251
257
  )
252
258
  self.execute(
253
259
  """
254
260
  MATCH (m:Method), (c:Class)
255
- WHERE m.class_id = c.id AND c.file_id = $fid
261
+ WHERE m.class_id = c.id AND c.file_id IN $fids
256
262
  DETACH DELETE m
257
263
  """,
258
- {"fid": file_id},
264
+ {"fids": fids},
259
265
  )
260
266
  self.execute(
261
- """
262
- MATCH (c:Class) WHERE c.file_id = $fid
263
- DETACH DELETE c
264
- """,
265
- {"fid": file_id},
267
+ "MATCH (c:Class) WHERE c.file_id IN $fids DETACH DELETE c",
268
+ {"fids": fids},
266
269
  )
267
270
  self.execute(
268
- """
269
- MATCH (f:File {id: $fid})
270
- DETACH DELETE f
271
- """,
272
- {"fid": file_id},
271
+ "MATCH (f:File) WHERE f.id IN $fids DETACH DELETE f",
272
+ {"fids": fids},
273
273
  )
274
274
 
275
275
  def list_methods(self) -> list[dict[str, Any]]:
@@ -296,14 +296,29 @@ class GraphStore:
296
296
  },
297
297
  )
298
298
 
299
- def upsert_files_batch(self, records: list[dict[str, Any]]) -> None:
300
- for record in records:
301
- self.upsert_file(
302
- file_id=record["id"],
303
- path=record["path"],
304
- project_id=record["project_id"],
305
- is_test=bool(record["is_test"]),
306
- digest=record["hash"],
299
+ def upsert_files_batch(self, records: list[dict[str, Any]], create_mode: bool = False) -> None:
300
+ if not records:
301
+ return
302
+ rows = [{"id": r["id"], "path": r["path"], "project_id": r["project_id"],
303
+ "is_test": bool(r["is_test"]), "hash": r["hash"]} for r in records]
304
+ if create_mode:
305
+ self.execute(
306
+ """
307
+ UNWIND $rows AS row
308
+ CREATE (f:File {id: row.id, path: row.path, project_id: row.project_id,
309
+ is_test: row.is_test, hash: row.hash})
310
+ """,
311
+ {"rows": rows},
312
+ )
313
+ else:
314
+ self.execute(
315
+ """
316
+ UNWIND $rows AS row
317
+ MERGE (f:File {id: row.id})
318
+ SET f.path = row.path, f.project_id = row.project_id,
319
+ f.is_test = row.is_test, f.hash = row.hash
320
+ """,
321
+ {"rows": rows},
307
322
  )
308
323
 
309
324
  def upsert_class(self, class_id: str, fqcn: str, name: str, package: str, file_id: str) -> None:
@@ -321,14 +336,29 @@ class GraphStore:
321
336
  },
322
337
  )
323
338
 
324
- def upsert_classes_batch(self, records: list[dict[str, Any]]) -> None:
325
- for record in records:
326
- self.upsert_class(
327
- class_id=record["id"],
328
- fqcn=record["fqcn"],
329
- name=record["name"],
330
- package=record["package"],
331
- file_id=record["file_id"],
339
+ def upsert_classes_batch(self, records: list[dict[str, Any]], create_mode: bool = False) -> None:
340
+ if not records:
341
+ return
342
+ rows = [{"id": r["id"], "fqcn": r["fqcn"], "name": r["name"],
343
+ "package": r["package"], "file_id": r["file_id"]} for r in records]
344
+ if create_mode:
345
+ self.execute(
346
+ """
347
+ UNWIND $rows AS row
348
+ CREATE (c:Class {id: row.id, fqcn: row.fqcn, name: row.name,
349
+ package: row.package, file_id: row.file_id})
350
+ """,
351
+ {"rows": rows},
352
+ )
353
+ else:
354
+ self.execute(
355
+ """
356
+ UNWIND $rows AS row
357
+ MERGE (c:Class {id: row.id})
358
+ SET c.fqcn = row.fqcn, c.name = row.name,
359
+ c.package = row.package, c.file_id = row.file_id
360
+ """,
361
+ {"rows": rows},
332
362
  )
333
363
 
334
364
  def upsert_method(
@@ -369,17 +399,41 @@ class GraphStore:
369
399
  {"cid": class_id, "mid": method_id},
370
400
  )
371
401
 
372
- def upsert_methods_batch(self, records: list[dict[str, Any]]) -> None:
373
- for record in records:
374
- self.upsert_method(
375
- method_id=record["id"],
376
- class_id=record["class_id"],
377
- name=record["name"],
378
- signature=record["signature"],
379
- return_type=record["return_type"],
380
- modifiers=record["modifiers"],
381
- is_constructor=bool(record["is_constructor"]),
382
- is_test=bool(record["is_test"]),
402
+ def upsert_methods_batch(self, records: list[dict[str, Any]], create_mode: bool = False) -> None:
403
+ if not records:
404
+ return
405
+ rows = [{"id": r["id"], "class_id": r["class_id"], "name": r["name"],
406
+ "signature": r["signature"], "return_type": r["return_type"],
407
+ "modifiers": r["modifiers"], "is_constructor": bool(r["is_constructor"]),
408
+ "is_test": bool(r["is_test"])} for r in records]
409
+ if create_mode:
410
+ # After clear_file, nodes are guaranteed absent — CREATE skips the
411
+ # primary-key existence check that MERGE pays on every row.
412
+ self.execute(
413
+ """
414
+ UNWIND $rows AS row
415
+ MATCH (c:Class {id: row.class_id})
416
+ CREATE (m:Method {id: row.id, class_id: row.class_id, name: row.name,
417
+ signature: row.signature, return_type: row.return_type,
418
+ modifiers: row.modifiers, is_constructor: row.is_constructor,
419
+ is_test: row.is_test})
420
+ CREATE (c)-[:HAS_METHOD]->(m)
421
+ """,
422
+ {"rows": rows},
423
+ )
424
+ else:
425
+ self.execute(
426
+ """
427
+ UNWIND $rows AS row
428
+ MATCH (c:Class {id: row.class_id})
429
+ MERGE (m:Method {id: row.id})
430
+ SET m.class_id = row.class_id, m.name = row.name,
431
+ m.signature = row.signature, m.return_type = row.return_type,
432
+ m.modifiers = row.modifiers, m.is_constructor = row.is_constructor,
433
+ m.is_test = row.is_test
434
+ MERGE (c)-[:HAS_METHOD]->(m)
435
+ """,
436
+ {"rows": rows},
383
437
  )
384
438
 
385
439
  def upsert_symbol(
@@ -420,17 +474,37 @@ class GraphStore:
420
474
  {"fid": file_id, "sid": symbol_id},
421
475
  )
422
476
 
423
- def upsert_symbols_batch(self, records: list[dict[str, Any]]) -> None:
424
- for record in records:
425
- self.upsert_symbol(
426
- symbol_id=record["id"],
427
- kind=record["kind"],
428
- name=record["name"],
429
- fqname=record["fqname"],
430
- file_id=record["file_id"],
431
- line=int(record["line"]),
432
- col=int(record["col"]),
433
- embedding=record.get("embedding"),
477
+ def upsert_symbols_batch(self, records: list[dict[str, Any]], create_mode: bool = False) -> None:
478
+ if not records:
479
+ return
480
+ rows = [{"id": r["id"], "kind": r["kind"], "name": r["name"],
481
+ "fqname": r["fqname"], "file_id": r["file_id"],
482
+ "line": int(r["line"]), "col": int(r["col"]),
483
+ "embedding": r.get("embedding")} for r in records]
484
+ if create_mode:
485
+ self.execute(
486
+ """
487
+ UNWIND $rows AS row
488
+ MATCH (f:File {id: row.file_id})
489
+ CREATE (s:Symbol {id: row.id, kind: row.kind, name: row.name,
490
+ fqname: row.fqname, file_id: row.file_id,
491
+ line: row.line, col: row.col, embedding: row.embedding})
492
+ CREATE (f)-[:DECLARES]->(s)
493
+ """,
494
+ {"rows": rows},
495
+ )
496
+ else:
497
+ self.execute(
498
+ """
499
+ UNWIND $rows AS row
500
+ MATCH (f:File {id: row.file_id})
501
+ MERGE (s:Symbol {id: row.id})
502
+ SET s.kind = row.kind, s.name = row.name, s.fqname = row.fqname,
503
+ s.file_id = row.file_id, s.line = row.line, s.col = row.col,
504
+ s.embedding = row.embedding
505
+ MERGE (f)-[:DECLARES]->(s)
506
+ """,
507
+ {"rows": rows},
434
508
  )
435
509
 
436
510
  def add_call(self, source_id: str, target_id: str, confidence: float, reason: str) -> None:
@@ -447,14 +521,21 @@ class GraphStore:
447
521
  },
448
522
  )
449
523
 
450
- def add_calls_batch(self, records: list[dict[str, Any]]) -> None:
451
- for record in records:
452
- self.add_call(
453
- source_id=record["source_id"],
454
- target_id=record["target_id"],
455
- confidence=float(record["confidence"]),
456
- reason=record["reason"],
457
- )
524
+ def add_calls_batch(self, records: list[dict[str, Any]], create_mode: bool = False) -> None:
525
+ if not records:
526
+ return
527
+ rows = [{"source_id": r["source_id"], "target_id": r["target_id"],
528
+ "confidence": float(r["confidence"]), "reason": r["reason"]}
529
+ for r in records]
530
+ op = "CREATE" if create_mode else "MERGE"
531
+ self.execute(
532
+ f"""
533
+ UNWIND $rows AS row
534
+ MATCH (src:Method {{id: row.source_id}}), (dst:Method {{id: row.target_id}})
535
+ {op} (src)-[:CALLS {{confidence: row.confidence, reason: row.reason}}]->(dst)
536
+ """,
537
+ {"rows": rows},
538
+ )
458
539
 
459
540
  def add_reference(self, rel: str, src_label: str, src_id: str, dst_label: str, dst_id: str, confidence: float) -> None:
460
541
  if rel not in {"REFERENCES_TYPE", "IMPLEMENTS", "OVERRIDES"}:
@@ -465,15 +546,27 @@ class GraphStore:
465
546
  )
466
547
  self.execute(query, {"src_id": src_id, "dst_id": dst_id, "confidence": confidence})
467
548
 
468
- def add_references_batch(self, records: list[dict[str, Any]]) -> None:
469
- for record in records:
470
- self.add_reference(
471
- rel=record["rel"],
472
- src_label=record["src_label"],
473
- src_id=record["src_id"],
474
- dst_label=record["dst_label"],
475
- dst_id=record["dst_id"],
476
- confidence=float(record["confidence"]),
549
+ def add_references_batch(self, records: list[dict[str, Any]], create_mode: bool = False) -> None:
550
+ if not records:
551
+ return
552
+ # Group by (rel, src_label, dst_label) so each group can use a single UNWIND.
553
+ from collections import defaultdict
554
+ groups: dict[tuple, list[dict]] = defaultdict(list)
555
+ for rec in records:
556
+ rel = rec.get("rel")
557
+ if rel not in {"REFERENCES_TYPE", "IMPLEMENTS", "OVERRIDES"}:
558
+ continue
559
+ groups[(rel, rec["src_label"], rec["dst_label"])].append(
560
+ {"src_id": rec["src_id"], "dst_id": rec["dst_id"],
561
+ "confidence": float(rec["confidence"])}
562
+ )
563
+ op = "CREATE" if create_mode else "MERGE"
564
+ for (rel, src_label, dst_label), batch in groups.items():
565
+ self.execute(
566
+ f"UNWIND $rows AS row "
567
+ f"MATCH (s:{src_label} {{id: row.src_id}}), (d:{dst_label} {{id: row.dst_id}}) "
568
+ f"{op} (s)-[:{rel} {{confidence: row.confidence}}]->(d)",
569
+ {"rows": batch},
477
570
  )
478
571
 
479
572
  def add_injection(
@@ -578,49 +671,48 @@ class GraphStore:
578
671
  self.clear_file(f_id)
579
672
  self._recycle_conn()
580
673
 
581
- # 2. Upsert file record
674
+ # 2–5. Write all nodes with CREATE (clear_file above guarantees absence).
582
675
  with self.transaction():
583
- self.upsert_file(f_id, path, project_id, is_test, digest)
676
+ self.upsert_files_batch(
677
+ [{"id": f_id, "path": path, "project_id": project_id,
678
+ "is_test": is_test, "hash": digest}],
679
+ create_mode=True,
680
+ )
584
681
  self._recycle_conn()
585
682
 
586
- # 3. Upsert classes (typically very few per file)
587
683
  if classes:
588
684
  with self.transaction():
589
- self.upsert_classes_batch(classes)
685
+ self.upsert_classes_batch(classes, create_mode=True)
590
686
  self._recycle_conn()
591
687
 
592
- # 4. Upsert methods in sub-batches of 200
593
688
  for i in range(0, len(methods), self._FILE_METHOD_SUB_BATCH):
594
- batch = methods[i: i + self._FILE_METHOD_SUB_BATCH]
595
689
  with self.transaction():
596
- self.upsert_methods_batch(batch)
690
+ self.upsert_methods_batch(methods[i: i + self._FILE_METHOD_SUB_BATCH], create_mode=True)
597
691
  self._recycle_conn()
598
692
 
599
- # 5. Upsert symbols in sub-batches of 200
600
693
  for i in range(0, len(symbols), self._FILE_SYMBOL_SUB_BATCH):
601
- batch = symbols[i: i + self._FILE_SYMBOL_SUB_BATCH]
602
694
  with self.transaction():
603
- self.upsert_symbols_batch(batch)
695
+ self.upsert_symbols_batch(symbols[i: i + self._FILE_SYMBOL_SUB_BATCH], create_mode=True)
604
696
  self._recycle_conn()
605
697
 
606
- # 6. Write call edges in sub-batches of 500
698
+ # 6. Write call edges in sub-batches of 500 (normalise key names to match add_calls_batch)
607
699
  for i in range(0, len(calls), self._FILE_CALL_SUB_BATCH):
608
700
  batch = calls[i: i + self._FILE_CALL_SUB_BATCH]
701
+ normalised = [
702
+ {"source_id": rec["src"], "target_id": rec["dst"],
703
+ "confidence": float(rec.get("confidence", 0.5)),
704
+ "reason": rec.get("reason", "unknown")}
705
+ for rec in batch
706
+ ]
609
707
  with self.transaction():
610
- for rec in batch:
611
- self.add_call(
612
- source_id=rec["src"],
613
- target_id=rec["dst"],
614
- confidence=float(rec.get("confidence", 0.5)),
615
- reason=rec.get("reason", "unknown"),
616
- )
708
+ self.add_calls_batch(normalised, create_mode=True)
617
709
  self._recycle_conn()
618
710
 
619
711
  # 7. Write type relations (IMPLEMENTS, OVERRIDES, REFERENCES_TYPE)
620
712
  for i in range(0, len(type_rels), self._FILE_REL_SUB_BATCH):
621
713
  batch = type_rels[i: i + self._FILE_REL_SUB_BATCH]
622
714
  with self.transaction():
623
- self.add_references_batch(batch)
715
+ self.add_references_batch(batch, create_mode=True)
624
716
  self._recycle_conn()
625
717
 
626
718
  def clear_file_by_path(self, project_id: str, project_path: str, file_path: str) -> None:
@@ -454,30 +454,31 @@ class JavaIndexer:
454
454
  class_methods[c_id][method.signature] = m_id
455
455
  files_indexed += 1
456
456
 
457
- # Split writes into smaller transactions and recycle between each
458
- # to prevent Kuzu WAL from exhausting the buffer pool on large
459
- # incremental re-indexes (GH feedback: 1,604-file OOM).
457
+ # For incremental re-indexes clear files in bulk first, then use
458
+ # CREATE (not MERGE) for all writes after clear the nodes are
459
+ # guaranteed absent so we skip the costly existence-check MERGE pays.
460
460
  if not full:
461
- for clear_sub in self._chunked(file_rows, 10):
461
+ for clear_sub in self._chunked([r["id"] for r in file_rows], 100):
462
462
  with self.store.transaction():
463
- for row in clear_sub:
464
- self.store.clear_file(row["id"])
463
+ self.store.clear_files_batch(clear_sub)
465
464
  self.store._recycle_conn()
465
+ # Always CREATE — full clears via clear_project, incremental clears
466
+ # per-file above, so nodes are guaranteed absent in both paths.
466
467
  with self.store.transaction():
467
- self.store.upsert_files_batch(file_rows)
468
+ self.store.upsert_files_batch(file_rows, create_mode=True)
468
469
  self.store._recycle_conn()
469
470
  with self.store.transaction():
470
- self.store.upsert_classes_batch(class_rows)
471
+ self.store.upsert_classes_batch(class_rows, create_mode=True)
471
472
  self.store._recycle_conn()
472
473
  _METHOD_SUB_BATCH = 200
473
474
  for method_sub in self._chunked(method_rows, _METHOD_SUB_BATCH):
474
475
  with self.store.transaction():
475
- self.store.upsert_methods_batch(method_sub)
476
+ self.store.upsert_methods_batch(method_sub, create_mode=True)
476
477
  self.store._recycle_conn()
477
478
  _SYMBOL_SUB_BATCH = 200
478
479
  for symbol_sub in self._chunked(symbol_rows, _SYMBOL_SUB_BATCH):
479
480
  with self.store.transaction():
480
- self.store.upsert_symbols_batch(symbol_sub)
481
+ self.store.upsert_symbols_batch(symbol_sub, create_mode=True)
481
482
  self.store._recycle_conn()
482
483
 
483
484
  self._emit(progress, "resolve_calls_start")
@@ -493,7 +494,7 @@ class JavaIndexer:
493
494
  )
494
495
  for call_chunk in self._chunked(call_rows, edge_batch_size):
495
496
  with self.store.transaction():
496
- self.store.add_calls_batch(call_chunk)
497
+ self.store.add_calls_batch(call_chunk, create_mode=True)
497
498
  calls_resolved += len(call_chunk)
498
499
  self.store._recycle_conn()
499
500
  self._emit(progress, "resolve_calls_progress", calls_resolved=calls_resolved)
@@ -508,7 +509,7 @@ class JavaIndexer:
508
509
  )
509
510
  for rel_chunk in self._chunked(type_rows, edge_batch_size):
510
511
  with self.store.transaction():
511
- self.store.add_references_batch(rel_chunk)
512
+ self.store.add_references_batch(rel_chunk, create_mode=True)
512
513
  type_relationships += len(rel_chunk)
513
514
  self.store._recycle_conn()
514
515
  self._emit(progress, "resolve_types_done", type_relationships=type_relationships)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.9.1
3
+ Version: 0.9.3
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.9.1"
7
+ version = "0.9.3"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes