java-codebase-rag 0.5.3__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ast_java.py CHANGED
@@ -13,6 +13,7 @@ Python with no tree-sitter dependency.
13
13
  from __future__ import annotations
14
14
 
15
15
  import posixpath
16
+ import sys
16
17
  from dataclasses import dataclass, field
17
18
  from functools import lru_cache
18
19
  from typing import Iterable
@@ -325,7 +326,7 @@ class RouteDecl:
325
326
  filename: str
326
327
  start_line: int
327
328
  end_line: int
328
- # brownfield / B2a composition (graph_enrich.resolve_routes_for_method); not a Kuzu column.
329
+ # brownfield / B2a composition (graph_enrich.resolve_routes_for_method); not a graph column.
329
330
  route_source_layer: str = "builtin"
330
331
 
331
332
 
@@ -1642,9 +1643,17 @@ def _parse_codebase_http_client_annotation(
1642
1643
  pairs, _ = _annotation_kv_nodes(ann, src)
1643
1644
  client_kind = ""
1644
1645
  if "clientKind" in pairs:
1645
- val, _kind = _annotation_value(pairs["clientKind"], src)
1646
- if val and _kind == "enum":
1647
- client_kind = str(val)
1646
+ val, vkind = _annotation_value(pairs["clientKind"], src)
1647
+ if val and vkind == "enum":
1648
+ kind_val = str(val)
1649
+ from java_ontology import VALID_CLIENT_KINDS # deferred: java_ontology imports ast_java
1650
+ if kind_val in VALID_CLIENT_KINDS:
1651
+ client_kind = kind_val
1652
+ else:
1653
+ print(
1654
+ f"[lancedb-mcp] CodebaseHttpClient: invalid clientKind {kind_val!r} — ignored",
1655
+ file=sys.stderr,
1656
+ )
1648
1657
  target_service = ""
1649
1658
  if "targetService" in pairs:
1650
1659
  atoms = _string_value_atoms(pairs["targetService"], src, ctx)
@@ -1714,9 +1723,17 @@ def _parse_codebase_producer_annotation(
1714
1723
  client_kind = "kafka_send"
1715
1724
  kind_node = pairs.get("producerKind") or pairs.get("clientKind")
1716
1725
  if kind_node is not None:
1717
- val, _kind = _annotation_value(kind_node, src)
1718
- if val and _kind == "enum":
1719
- client_kind = str(val)
1726
+ val, vkind = _annotation_value(kind_node, src)
1727
+ if val and vkind == "enum":
1728
+ kind_val = str(val)
1729
+ from java_ontology import VALID_PRODUCER_KINDS # deferred: java_ontology imports ast_java
1730
+ if kind_val in VALID_PRODUCER_KINDS:
1731
+ client_kind = kind_val
1732
+ else:
1733
+ print(
1734
+ f"[lancedb-mcp] CodebaseProducer: invalid producerKind {kind_val!r} — ignored",
1735
+ file=sys.stderr,
1736
+ )
1720
1737
  topic = ""
1721
1738
  if "topic" in pairs:
1722
1739
  atoms = _string_value_atoms(pairs["topic"], src, ctx)
build_ast_graph.py CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env python3
2
- """Four-pass AST-derived Knowledge Base builder (Kuzu).
2
+ """Four-pass AST-derived Knowledge Base builder (LadybugDB).
3
3
 
4
4
  Walks a Java source tree with `tree_sitter_java`, writes a deterministic graph of:
5
5
  Symbol nodes: package, file, class, interface, enum, record, annotation, method, constructor
@@ -13,14 +13,14 @@ Pass 3 resolves static call sites into confidence-scored CALLS edges and DECLARE
13
13
  Pass 4 emits Route rows plus Symbol→Route EXPOSES edges from literal annotation metadata.
14
14
 
15
15
  Usage:
16
- build_ast_graph.py --source-root <repo> [--kuzu-path <path>] [--verbose]
16
+ build_ast_graph.py --source-root <repo> [--ladybug-path <path>] [--verbose]
17
17
 
18
- Default Kuzu database path resolution order:
19
- --kuzu-path CLI arg (path passed to kuzu.Database(...))
20
- JAVA_CODEBASE_RAG_INDEX_DIR/code_graph.kuzu (if set and local)
21
- ./.java-codebase-rag/code_graph.kuzu under cwd
18
+ Default LadybugDB database path resolution order:
19
+ --ladybug-path CLI arg (path passed to ladybug.Database(...))
20
+ JAVA_CODEBASE_RAG_INDEX_DIR/code_graph.lbug (if set and local)
21
+ ./.java-codebase-rag/code_graph.lbug under cwd
22
22
 
23
- The Kuzu DB is dropped and rebuilt on every run (Phase 1 is a full rebuild).
23
+ The LadybugDB DB is dropped and rebuilt on every run (Phase 1 is a full rebuild).
24
24
  """
25
25
  from __future__ import annotations
26
26
 
@@ -37,7 +37,7 @@ from collections import defaultdict
37
37
  from dataclasses import asdict, dataclass, field, replace
38
38
  from pathlib import Path
39
39
 
40
- import kuzu
40
+ import ladybug
41
41
 
42
42
  from ast_java import (
43
43
  ONTOLOGY_VERSION,
@@ -76,7 +76,7 @@ _PASS3_START = "[graph] pass 3 · call resolution (outgoing calls per site)"
76
76
  _PASS4_START = "[graph] pass 4 · route and EXPOSES extraction"
77
77
  _PASS5_START = "[graph] pass 5 · imperative HTTP_CALLS / ASYNC_CALLS edges"
78
78
  _PASS6_START = "[graph] pass 6 · cross-service call-edge matching"
79
- _WRITE_START = "[graph] writing · Kuzu graph to disk"
79
+ _WRITE_START = "[graph] writing · LadybugDB graph to disk"
80
80
 
81
81
 
82
82
  def _verbose_stderr_line(content: str) -> None:
@@ -230,7 +230,7 @@ class RouteRow:
230
230
  start_line: int
231
231
  end_line: int
232
232
  resolved: bool
233
- # B2a brownfield composition (PR-A3); not persisted on Kuzu `Route` nodes.
233
+ # B2a brownfield composition (PR-A3); not persisted on LadybugDB `Route` nodes.
234
234
  source_layer: str = "builtin"
235
235
 
236
236
 
@@ -499,8 +499,8 @@ def _hash_file(abs_path: Path) -> str:
499
499
  # ---------- incremental rebuild helpers ----------
500
500
 
501
501
 
502
- def _load_existing_types(conn: kuzu.Connection, tables: GraphTables, exclude_files: set[str] | None = None) -> None:
503
- """Load type entries from existing Kuzu graph into tables for cross-file resolution.
502
+ def _load_existing_types(conn: ladybug.Connection, tables: GraphTables, exclude_files: set[str] | None = None) -> None:
503
+ """Load type entries from existing LadybugDB graph into tables for cross-file resolution.
504
504
 
505
505
  When exclude_files is provided, only load types from files NOT in the set.
506
506
  """
@@ -543,8 +543,8 @@ def _load_existing_types(conn: kuzu.Connection, tables: GraphTables, exclude_fil
543
543
  tables.by_package.setdefault(package, []).append(entry)
544
544
 
545
545
 
546
- def _load_existing_members(conn: kuzu.Connection, tables: GraphTables, exclude_files: set[str] | None = None) -> None:
547
- """Load member entries from existing Kuzu graph into tables.members.
546
+ def _load_existing_members(conn: ladybug.Connection, tables: GraphTables, exclude_files: set[str] | None = None) -> None:
547
+ """Load member entries from existing LadybugDB graph into tables.members.
548
548
 
549
549
  When exclude_files is provided, only load members from files NOT in the set.
550
550
  """
@@ -588,15 +588,25 @@ def _load_existing_members(conn: kuzu.Connection, tables: GraphTables, exclude_f
588
588
  ))
589
589
 
590
590
 
591
- def _find_dependents(conn: kuzu.Connection, changed_node_ids: set[str]) -> set[str]:
591
+ # Every Symbol->Symbol REL TABLE type in the graph schema. A Symbol node can
592
+ # only have an INCOMING edge of one of these types, so `_find_dependents` MUST
593
+ # walk all of them: that completeness is what makes the changed-node DETACH
594
+ # DELETE in `_delete_file_scope` Phase 3 safe (every real caller of a changed
595
+ # node is pulled into scope, so Phase 1 removes the edge before the node delete).
596
+ # If you add a new Symbol->Symbol edge type to the schema, add it here too —
597
+ # otherwise changed-node deletion would silently drop its surviving edges.
598
+ _SYMBOL_TO_SYMBOL_EDGE_TYPES = (
599
+ "EXTENDS", "IMPLEMENTS", "INJECTS", "CALLS", "DECLARES", "OVERRIDES",
600
+ )
601
+
602
+
603
+ def _find_dependents(conn: ladybug.Connection, changed_node_ids: set[str]) -> set[str]:
592
604
  """Find files whose nodes have edges pointing into changed nodes. Returns set of filenames."""
593
605
  dependent_files: set[str] = set()
594
606
 
595
- # Query each Symbol-to-Symbol edge table for incoming edges
596
- edge_types = ["EXTENDS", "IMPLEMENTS", "INJECTS", "CALLS", "DECLARES", "OVERRIDES"]
597
607
  params = {"changed_ids": list(changed_node_ids)}
598
608
 
599
- for edge_type in edge_types:
609
+ for edge_type in _SYMBOL_TO_SYMBOL_EDGE_TYPES:
600
610
  query = f"""
601
611
  MATCH (src:Symbol)-[e:{edge_type}]->(dst:Symbol)
602
612
  WHERE dst.id IN $changed_ids
@@ -612,23 +622,52 @@ def _find_dependents(conn: kuzu.Connection, changed_node_ids: set[str]) -> set[s
612
622
  return dependent_files
613
623
 
614
624
 
615
- def _delete_file_scope(conn: kuzu.Connection, filenames: set[str]) -> None:
616
- """Delete all nodes and edges originating from the given files.
617
-
618
- Skip phantom nodes (filename=""). Deletes ALL edge types in Phase 1,
619
- then nodes in subsequent phases. Route/Client/Producer nodes use
620
- DETACH DELETE as a safety net for any edges missed in Phase 1.
621
-
622
- Edges are deleted in batch across all filenames first to avoid Kuzu
623
- "has connected edges" errors when edges from one file point to nodes
624
- in another file within the same scope.
625
+ def _delete_file_scope(
626
+ conn: ladybug.Connection,
627
+ changed_files: set[str],
628
+ dependent_files: set[str],
629
+ ) -> None:
630
+ """Delete nodes and edges for a scope split into changed vs dependent files.
631
+
632
+ ``changed_files`` are files whose content actually changed (added/modified/
633
+ removed): their Symbol nodes are deleted (and re-created by ``_scoped_write``).
634
+ ``dependent_files`` are files pulled in only to re-resolve their OUTGOING
635
+ edges against the changed nodes; their node definitions did not change, so
636
+ their nodes are deliberately PRESERVED (they re-MERGE in place on the same
637
+ deterministic ``symbol_id``). Skipping phantom nodes (filename="").
638
+
639
+ Why dependents are preserved (issue #305): the orchestrator computes
640
+ dependents from the *changed* nodes only, so a dependent file's node can
641
+ have an incoming CALLS edge from an out-of-scope caller. The ``source_file``
642
+ on every Symbol->Symbol edge is the CALLER's file (pinned by
643
+ ``test_source_file_value_matches_symbol_filename``), so Phase 1 below only
644
+ deletes edges ORIGINATING in scope; incoming edges from out-of-scope callers
645
+ survive. If we then tried to DELETE the dependent node, LadybugDB rejects it
646
+ ("Node ... has connected edges in table CALLS in the bwd direction, ...
647
+ Please delete the edges first or try DETACH DELETE") and the rebuild falls
648
+ back to a full rebuild. A naive fix (DETACH DELETE on dependents, or an
649
+ extra incoming-edge pass) would silence the crash but permanently drop those
650
+ out-of-scope edges, corrupting the graph. Preserving dependent nodes keeps
651
+ both the nodes and their incoming edges intact.
652
+
653
+ Phase 1 deletes ALL edge types across the whole scope (changed + dependent)
654
+ first to avoid LadybugDB "has connected edges" errors when edges from one
655
+ file point to nodes in another file within the same scope. Route/Client/
656
+ Producer nodes use DETACH DELETE as a safety net for any edges missed in
657
+ Phase 1.
625
658
  """
626
- filename_list = list(filenames)
627
-
628
- # Phase 1: Delete ALL edges from ALL scope files at once.
629
- # This avoids ordering issues where file A has an edge from file B
630
- # pointing into it; if we delete A's nodes before B's edges, Kuzu
631
- # raises "has connected edges" errors.
659
+ scope_files = changed_files | dependent_files
660
+ scope_list = list(scope_files)
661
+ changed_list = list(changed_files)
662
+
663
+ # Phase 1: Delete ALL edges ORIGINATING from any scope file (changed +
664
+ # dependent). Because `source_file` is the caller's file, this deletes edges
665
+ # whose source is in scope (including dependents' outgoing edges to changed
666
+ # nodes) while intentionally leaving incoming edges from out-of-scope callers
667
+ # intact — those must survive so the dependent nodes below can be preserved.
668
+ # This list is a superset of `_SYMBOL_TO_SYMBOL_EDGE_TYPES` (it also covers
669
+ # Symbol->Route/Client/Producer/UCS and Client/Producer->Route edges); keep
670
+ # both lists in sync with the schema.
632
671
  edge_tables = [
633
672
  "EXTENDS", "IMPLEMENTS", "INJECTS", "CALLS", "DECLARES", "OVERRIDES",
634
673
  "UNRESOLVED_AT", "EXPOSES", "DECLARES_CLIENT", "DECLARES_PRODUCER",
@@ -640,7 +679,7 @@ def _delete_file_scope(conn: kuzu.Connection, filenames: set[str]) -> None:
640
679
  WHERE e.source_file IN $filenames
641
680
  DELETE e
642
681
  """
643
- conn.execute(query, {"filenames": filename_list})
682
+ conn.execute(query, {"filenames": scope_list})
644
683
 
645
684
  # Phase 2: Collect all Symbol node IDs for UnresolvedCallSite cleanup.
646
685
  symbol_ids: list[str] = []
@@ -649,12 +688,15 @@ def _delete_file_scope(conn: kuzu.Connection, filenames: set[str]) -> None:
649
688
  WHERE s.filename IN $filenames
650
689
  RETURN s.id
651
690
  """
652
- result = conn.execute(symbol_ids_query, {"filenames": filename_list})
691
+ result = conn.execute(symbol_ids_query, {"filenames": scope_list})
653
692
  while result.has_next():
654
693
  row = result.get_next()
655
694
  symbol_ids.append(row[0])
656
695
 
657
- # Delete UnresolvedCallSite nodes whose caller_id is in the collected set
696
+ # Delete UnresolvedCallSite nodes whose caller_id is in the collected set.
697
+ # These are children of scope symbols (including preserved dependents);
698
+ # deleting them is safe because every scope file — dependents included — is
699
+ # reprocessed and re-emits its UnresolvedCallSite nodes in `_scoped_write`.
658
700
  if symbol_ids:
659
701
  unresolved_query = """
660
702
  MATCH (u:UnresolvedCallSite)
@@ -663,27 +705,37 @@ def _delete_file_scope(conn: kuzu.Connection, filenames: set[str]) -> None:
663
705
  """
664
706
  conn.execute(unresolved_query, {"symbol_ids": symbol_ids})
665
707
 
666
- # Phase 3: Delete Symbol nodes.
708
+ # Phase 3: Delete Symbol nodes ONLY for changed files (not dependents).
709
+ # Dependent-file nodes are deliberately PRESERVED so their incoming edges
710
+ # from out-of-scope callers survive; the dependents are re-MERGEd in place
711
+ # by `_scoped_write` on the same deterministic node id. A changed node's
712
+ # real incoming edges all come from dependent files (callers pulled into
713
+ # scope by `_find_dependents`, which walks every type in
714
+ # `_SYMBOL_TO_SYMBOL_EDGE_TYPES`), so Phase 1 already removed them and the
715
+ # dependents re-emit them when reprocessed. DETACH DELETE is only a safety
716
+ # net for the rare surviving edge whose source was NOT pulled into scope
717
+ # (e.g. a phantom caller with filename="", which `_find_dependents` skips);
718
+ # such an edge is stale once the node is recreated, so dropping it is fine.
667
719
  delete_symbols_query = """
668
720
  MATCH (s:Symbol)
669
721
  WHERE s.filename IN $filenames
670
- DELETE s
722
+ DETACH DELETE s
671
723
  """
672
- conn.execute(delete_symbols_query, {"filenames": filename_list})
724
+ conn.execute(delete_symbols_query, {"filenames": changed_list})
673
725
 
674
726
  # Phase 4: Delete Route, Client, Producer nodes.
675
727
  # Use DETACH DELETE as a safety net in case any edges were missed in Phase 1.
676
728
  for label in ["Route", "Client", "Producer"]:
677
729
  conn.execute(
678
730
  f"MATCH (n:{label}) WHERE n.filename IN $filenames DETACH DELETE n",
679
- {"filenames": filename_list},
731
+ {"filenames": scope_list},
680
732
  )
681
733
 
682
734
 
683
- def _scoped_write(conn: kuzu.Connection, tables: GraphTables, *, project_root: Path, meta_chain: dict[str, frozenset[str]] | None) -> None:
684
- """Write nodes and edges to existing Kuzu database without drop/create schema.
735
+ def _scoped_write(conn: ladybug.Connection, tables: GraphTables, *, project_root: Path, meta_chain: dict[str, frozenset[str]] | None) -> None:
736
+ """Write nodes and edges to existing LadybugDB database without drop/create schema.
685
737
 
686
- Like write_kuzu() but without _drop_all()/_create_schema(). The caller is
738
+ Like write_ladybug() but without _drop_all()/_create_schema(). The caller is
687
739
  responsible for calling _populate_declares_rows() and _populate_overrides_rows()
688
740
  before invoking this function.
689
741
 
@@ -715,13 +767,13 @@ def _scoped_write(conn: kuzu.Connection, tables: GraphTables, *, project_root: P
715
767
 
716
768
 
717
769
  def _write_nodes_merge(
718
- conn: kuzu.Connection,
770
+ conn: ladybug.Connection,
719
771
  tables: GraphTables,
720
772
  *,
721
773
  project_root: Path,
722
774
  meta_chain: dict[str, frozenset[str]] | None,
723
775
  ) -> None:
724
- """Write nodes to existing Kuzu database using MERGE to handle existing nodes."""
776
+ """Write nodes to existing LadybugDB database using MERGE to handle existing nodes."""
725
777
  _write_nodes_impl(conn, tables, project_root=project_root, meta_chain=meta_chain, symbol_query=_MERGE_SYMBOL)
726
778
 
727
779
 
@@ -2664,7 +2716,7 @@ def pass6_match_edges(
2664
2716
  )
2665
2717
 
2666
2718
 
2667
- # ---------- Kuzu write ----------
2719
+ # ---------- LadybugDB write ----------
2668
2720
 
2669
2721
 
2670
2722
  _SCHEMA_NODE = (
@@ -2685,7 +2737,7 @@ _SCHEMA_META = (
2685
2737
  "ontology_version INT64, built_at INT64, source_root STRING, "
2686
2738
  "counts_json STRING, parse_errors INT64, "
2687
2739
  "routes_total INT64, exposes_total INT64, "
2688
- # JSON map {framework: count}; STRING avoids Kuzu Python MAP↔STRUCT binder mismatch.
2740
+ # JSON map {framework: count}; STRING avoids LadybugDB Python MAP↔STRUCT binder mismatch.
2689
2741
  "routes_by_framework STRING, "
2690
2742
  "routes_resolved_pct DOUBLE, "
2691
2743
  "routes_from_brownfield_pct DOUBLE, "
@@ -2798,7 +2850,7 @@ _SCHEMA_ASYNC_CALLS = (
2798
2850
  )
2799
2851
 
2800
2852
 
2801
- def _drop_all(conn: kuzu.Connection) -> None:
2853
+ def _drop_all(conn: ladybug.Connection) -> None:
2802
2854
  for stmt in (
2803
2855
  "DROP TABLE IF EXISTS DECLARES_CLIENT",
2804
2856
  "DROP TABLE IF EXISTS DECLARES_PRODUCER",
@@ -2825,7 +2877,7 @@ def _drop_all(conn: kuzu.Connection) -> None:
2825
2877
  pass
2826
2878
 
2827
2879
 
2828
- def _create_schema(conn: kuzu.Connection) -> None:
2880
+ def _create_schema(conn: ladybug.Connection) -> None:
2829
2881
  for stmt in (
2830
2882
  _SCHEMA_NODE,
2831
2883
  _SCHEMA_UNRESOLVED_CALL_SITE,
@@ -2885,7 +2937,7 @@ _MERGE_SYMBOL = (
2885
2937
 
2886
2938
 
2887
2939
  def _write_nodes_impl(
2888
- conn: kuzu.Connection,
2940
+ conn: ladybug.Connection,
2889
2941
  tables: GraphTables,
2890
2942
  *,
2891
2943
  project_root: Path,
@@ -2952,7 +3004,7 @@ def _write_nodes_impl(
2952
3004
 
2953
3005
 
2954
3006
  def _write_nodes(
2955
- conn: kuzu.Connection,
3007
+ conn: ladybug.Connection,
2956
3008
  tables: GraphTables,
2957
3009
  *,
2958
3010
  project_root: Path,
@@ -3064,7 +3116,7 @@ def _direct_supertype_ids(tables: GraphTables, type_id: str) -> list[str]:
3064
3116
  def _populate_overrides_rows(tables: GraphTables) -> None:
3065
3117
  """Materialize (subtype_method)-[:OVERRIDES]->(supertype_method) for one supertype hop.
3066
3118
 
3067
- Matches ``KuzuGraph.override_axis_rollup_for`` (direct ``IMPLEMENTS`` / ``EXTENDS``
3119
+ Matches ``LadybugDBGraph.override_axis_rollup_for`` (direct ``IMPLEMENTS`` / ``EXTENDS``
3068
3120
  only, same ``signature``, distinct method ids, non-static instance methods).
3069
3121
  """
3070
3122
  by_declaring_type: dict[str, list[MemberEntry]] = defaultdict(list)
@@ -3099,7 +3151,7 @@ def _build_file_by_node_id(tables: GraphTables) -> dict[str, str]:
3099
3151
  return lookup
3100
3152
 
3101
3153
 
3102
- def _write_edges(conn: kuzu.Connection, tables: GraphTables, _file_by_node_id: dict[str, str] | None = None) -> None:
3154
+ def _write_edges(conn: ladybug.Connection, tables: GraphTables, _file_by_node_id: dict[str, str] | None = None) -> None:
3103
3155
  # Build node_id -> file_path lookup for source_file resolution.
3104
3156
  if _file_by_node_id is None:
3105
3157
  _file_by_node_id = _build_file_by_node_id(tables)
@@ -3193,7 +3245,7 @@ def _write_edges(conn: kuzu.Connection, tables: GraphTables, _file_by_node_id: d
3193
3245
  })
3194
3246
 
3195
3247
 
3196
- def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables, _file_by_node_id: dict[str, str] | None = None) -> None:
3248
+ def _write_routes_and_exposes(conn: ladybug.Connection, tables: GraphTables, _file_by_node_id: dict[str, str] | None = None) -> None:
3197
3249
  # Build node_id -> file_path lookup for source_file resolution (for Symbol sources).
3198
3250
  if _file_by_node_id is None:
3199
3251
  _file_by_node_id = _build_file_by_node_id(tables)
@@ -3276,7 +3328,7 @@ def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables, _file_
3276
3328
  })
3277
3329
 
3278
3330
 
3279
- def _write_meta(conn: kuzu.Connection, tables: GraphTables, source_root: Path) -> None:
3331
+ def _write_meta(conn: ladybug.Connection, tables: GraphTables, source_root: Path) -> None:
3280
3332
  seen_calls: set[tuple[str, str, int, int]] = set()
3281
3333
  calls_unique = 0
3282
3334
  for row in tables.calls_rows:
@@ -3392,12 +3444,12 @@ def _write_meta(conn: kuzu.Connection, tables: GraphTables, source_root: Path) -
3392
3444
 
3393
3445
  def incremental_rebuild(
3394
3446
  source_root: Path,
3395
- kuzu_path: Path,
3447
+ ladybug_path: Path,
3396
3448
  *,
3397
3449
  verbose: bool,
3398
3450
  expansion_cap: int = 50,
3399
3451
  ) -> IncrementalResult:
3400
- """Incrementally rebuild the Kuzu graph, processing only changed files and their dependents.
3452
+ """Incrementally rebuild the LadybugDB graph, processing only changed files and their dependents.
3401
3453
 
3402
3454
  Returns IncrementalResult with statistics about the rebuild.
3403
3455
  Falls back to full rebuild if:
@@ -3409,7 +3461,7 @@ def incremental_rebuild(
3409
3461
  t_start = time.time()
3410
3462
 
3411
3463
  # Step 1: Load existing graph and detect changes
3412
- if not kuzu_path.exists():
3464
+ if not ladybug_path.exists():
3413
3465
  if verbose:
3414
3466
  _verbose_stderr_line("[increment] no existing graph; falling back to full rebuild")
3415
3467
  # Fall back to full rebuild
@@ -3420,7 +3472,7 @@ def incremental_rebuild(
3420
3472
  pass4_routes(tables, asts, source_root=source_root, verbose=verbose)
3421
3473
  pass5_imperative_edges(tables, asts, source_root=source_root, verbose=verbose)
3422
3474
  pass6_match_edges(tables, verbose=verbose)
3423
- write_kuzu(kuzu_path, tables, source_root=source_root, verbose=verbose)
3475
+ write_ladybug(ladybug_path, tables, source_root=source_root, verbose=verbose)
3424
3476
 
3425
3477
  return IncrementalResult(
3426
3478
  mode="full_fallback",
@@ -3431,8 +3483,8 @@ def incremental_rebuild(
3431
3483
  elapsed_sec=time.time() - t_start,
3432
3484
  )
3433
3485
 
3434
- db = kuzu.Database(str(kuzu_path))
3435
- conn = kuzu.Connection(db)
3486
+ db = ladybug.Database(str(ladybug_path))
3487
+ conn = ladybug.Connection(db)
3436
3488
 
3437
3489
  # Check ontology version
3438
3490
  try:
@@ -3445,7 +3497,7 @@ def incremental_rebuild(
3445
3497
  _verbose_stderr_line(f"[increment] ontology version {version} < 17; falling back to full rebuild")
3446
3498
  conn.close()
3447
3499
  del conn, db
3448
- return _fallback_to_full(source_root, kuzu_path, verbose, t_start)
3500
+ return _fallback_to_full(source_root, ladybug_path, verbose, t_start)
3449
3501
  except Exception as e:
3450
3502
  if verbose:
3451
3503
  _verbose_stderr_line(f"[increment] failed to read ontology version: {e}; falling back to full rebuild")
@@ -3454,9 +3506,9 @@ def incremental_rebuild(
3454
3506
  except Exception:
3455
3507
  pass
3456
3508
  del conn, db
3457
- return _fallback_to_full(source_root, kuzu_path, verbose, t_start)
3509
+ return _fallback_to_full(source_root, ladybug_path, verbose, t_start)
3458
3510
 
3459
- index_dir = kuzu_path.parent
3511
+ index_dir = ladybug_path.parent
3460
3512
  tracker = FileHashTracker(index_dir)
3461
3513
  tracker.load()
3462
3514
 
@@ -3488,7 +3540,7 @@ def incremental_rebuild(
3488
3540
  _verbose_stderr_line("[increment] crash marker exists; falling back to full rebuild")
3489
3541
  conn.close()
3490
3542
  crash_marker_path.unlink(missing_ok=True)
3491
- return _fallback_to_full(source_root, kuzu_path, verbose, t_start)
3543
+ return _fallback_to_full(source_root, ladybug_path, verbose, t_start)
3492
3544
 
3493
3545
  # Write crash marker
3494
3546
  crash_marker_path.write_text("", encoding="utf-8")
@@ -3516,7 +3568,7 @@ def incremental_rebuild(
3516
3568
  _verbose_stderr_line(f"[increment] dependent expansion cap ({expansion_cap}) exceeded ({len(scope_files)} files); falling back to full rebuild")
3517
3569
  conn.close()
3518
3570
  crash_marker_path.unlink(missing_ok=True)
3519
- return _fallback_to_full(source_root, kuzu_path, verbose, t_start)
3571
+ return _fallback_to_full(source_root, ladybug_path, verbose, t_start)
3520
3572
 
3521
3573
  if verbose:
3522
3574
  _verbose_stderr_line(f"[increment] processing {len(scope_files)} files ({len(changed_files)} changed + {len(dependent_files)} dependents)")
@@ -3524,7 +3576,7 @@ def incremental_rebuild(
3524
3576
  # Step 4: Scoped deletion
3525
3577
  if verbose:
3526
3578
  _verbose_stderr_line("[increment] deleting outdated nodes and edges")
3527
- _delete_file_scope(conn, scope_files)
3579
+ _delete_file_scope(conn, changed_files, dependent_files)
3528
3580
 
3529
3581
  # Force deletion to be applied by running a dummy query
3530
3582
  conn.execute("MATCH (s:Symbol) RETURN count(*)")
@@ -3612,14 +3664,21 @@ def incremental_rebuild(
3612
3664
  _verbose_stderr_line(f"[increment] error during incremental rebuild: {e}; falling back to full rebuild")
3613
3665
  conn.close()
3614
3666
  crash_marker_path.unlink(missing_ok=True)
3615
- return _fallback_to_full(source_root, kuzu_path, verbose, t_start)
3667
+ return _fallback_to_full(source_root, ladybug_path, verbose, t_start)
3668
+
3616
3669
 
3670
+ def _init_hash_tracker(source_root: Path, ladybug_path: Path) -> int:
3671
+ """Initialize hash tracker for all Java files. Returns number of files hashed.
3617
3672
 
3618
- def _init_hash_tracker(source_root: Path, kuzu_path: Path) -> int:
3619
- """Initialize hash tracker for all Java files. Returns number of files hashed."""
3620
- index_dir = kuzu_path.parent
3673
+ Called right after a full graph rebuild (``write_ladybug``), so the store must
3674
+ mirror exactly the files that were just indexed. We deliberately do NOT
3675
+ ``load()`` the existing store: ``update`` re-hashes every current file anyway,
3676
+ and preserving old entries would leave stale hashes for files that no longer
3677
+ exist (deleted or now-ignored). Those ghosts would be re-detected as "removed"
3678
+ on every subsequent ``increment``, sustaining an endless full-rebuild loop.
3679
+ """
3680
+ index_dir = ladybug_path.parent
3621
3681
  tracker = FileHashTracker(index_dir)
3622
- tracker.load()
3623
3682
  ignore = LayeredIgnore(source_root)
3624
3683
  all_files: set[str] = set()
3625
3684
  source_root_resolved = source_root.resolve()
@@ -3635,7 +3694,7 @@ def _init_hash_tracker(source_root: Path, kuzu_path: Path) -> int:
3635
3694
  return len(all_files)
3636
3695
 
3637
3696
 
3638
- def _fallback_to_full(source_root: Path, kuzu_path: Path, verbose: bool, t_start: float) -> IncrementalResult:
3697
+ def _fallback_to_full(source_root: Path, ladybug_path: Path, verbose: bool, t_start: float) -> IncrementalResult:
3639
3698
  """Fallback to full rebuild."""
3640
3699
  tables = GraphTables()
3641
3700
  asts = pass1_parse(source_root, tables, verbose=verbose)
@@ -3644,7 +3703,7 @@ def _fallback_to_full(source_root: Path, kuzu_path: Path, verbose: bool, t_start
3644
3703
  pass4_routes(tables, asts, source_root=source_root, verbose=verbose)
3645
3704
  pass5_imperative_edges(tables, asts, source_root=source_root, verbose=verbose)
3646
3705
  pass6_match_edges(tables, verbose=verbose)
3647
- write_kuzu(kuzu_path, tables, source_root=source_root, verbose=verbose)
3706
+ write_ladybug(ladybug_path, tables, source_root=source_root, verbose=verbose)
3648
3707
 
3649
3708
  return IncrementalResult(
3650
3709
  mode="full_fallback",
@@ -3656,12 +3715,12 @@ def _fallback_to_full(source_root: Path, kuzu_path: Path, verbose: bool, t_start
3656
3715
  )
3657
3716
 
3658
3717
 
3659
- def _write_clients_producers_and_calls(conn: kuzu.Connection, tables: GraphTables) -> None:
3660
- """Write Route, Client, Producer, and cross-service edges to Kuzu.
3718
+ def _write_clients_producers_and_calls(conn: ladybug.Connection, tables: GraphTables) -> None:
3719
+ """Write Route, Client, Producer, and cross-service edges to LadybugDB.
3661
3720
 
3662
3721
  Used by the incremental rebuild's global pass 5-6 step. Writes phantom
3663
3722
  Route nodes (created by pass5 for cross-service calls) that wouldn't
3664
- otherwise exist in Kuzu.
3723
+ otherwise exist in LadybugDB.
3665
3724
  """
3666
3725
  # Write phantom routes that don't already exist (pass5 creates these for cross-service calls)
3667
3726
  for row in tables.routes_rows:
@@ -3690,7 +3749,7 @@ def _write_clients_producers_and_calls(conn: kuzu.Connection, tables: GraphTable
3690
3749
 
3691
3750
  # Write declares_client edges
3692
3751
  for row in tables.declares_client_rows:
3693
- source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="")).file_path
3752
+ source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="", node_id="")).file_path
3694
3753
  conn.execute(_CREATE_DECLARES_CLIENT, {
3695
3754
  "sid": row.symbol_id,
3696
3755
  "cid": row.client_id,
@@ -3701,7 +3760,7 @@ def _write_clients_producers_and_calls(conn: kuzu.Connection, tables: GraphTable
3701
3760
 
3702
3761
  # Write declares_producer edges
3703
3762
  for row in tables.declares_producer_rows:
3704
- source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="")).file_path
3763
+ source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="", node_id="")).file_path
3705
3764
  conn.execute(_CREATE_DECLARES_PRODUCER, {
3706
3765
  "sid": row.symbol_id,
3707
3766
  "pid": row.producer_id,
@@ -3739,7 +3798,7 @@ def _write_clients_producers_and_calls(conn: kuzu.Connection, tables: GraphTable
3739
3798
  })
3740
3799
 
3741
3800
 
3742
- def write_kuzu(
3801
+ def write_ladybug(
3743
3802
  db_path: Path,
3744
3803
  tables: GraphTables,
3745
3804
  *,
@@ -3755,8 +3814,8 @@ def write_kuzu(
3755
3814
  _verbose_stderr_line(_WRITE_START)
3756
3815
  with _VerbosePassHeartbeats("[graph] writing", verbose=verbose):
3757
3816
  db_path.parent.mkdir(parents=True, exist_ok=True)
3758
- db = kuzu.Database(str(db_path))
3759
- conn = kuzu.Connection(db)
3817
+ db = ladybug.Database(str(db_path))
3818
+ conn = ladybug.Connection(db)
3760
3819
  _drop_all(conn)
3761
3820
  _create_schema(conn)
3762
3821
  t0 = time.time()
@@ -3787,22 +3846,22 @@ def write_kuzu(
3787
3846
  # ---------- CLI ----------
3788
3847
 
3789
3848
 
3790
- def _default_kuzu_path() -> Path:
3849
+ def _default_ladybug_path() -> Path:
3791
3850
  idx = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
3792
3851
  if idx and not idx.startswith(("s3://", "gs://", "az://")):
3793
- return Path(os.path.expanduser(idx.rstrip("/"))) / "code_graph.kuzu"
3794
- return Path.cwd() / ".java-codebase-rag" / "code_graph.kuzu"
3852
+ return Path(os.path.expanduser(idx.rstrip("/"))) / "code_graph.lbug"
3853
+ return Path.cwd() / ".java-codebase-rag" / "code_graph.lbug"
3795
3854
 
3796
3855
 
3797
3856
  def main() -> int:
3798
- parser = argparse.ArgumentParser(description="Build an AST-derived Kuzu graph for Java sources.")
3857
+ parser = argparse.ArgumentParser(description="Build an AST-derived LadybugDB graph for Java sources.")
3799
3858
  parser.add_argument("--source-root", default=None, help="Repository / monorepo root to scan for .java (defaults to current working directory)")
3800
3859
  parser.add_argument(
3801
- "--kuzu-path",
3860
+ "--ladybug-path",
3802
3861
  default=None,
3803
3862
  help=(
3804
- "Kuzu database path (file/dir as used by kuzu.Database; "
3805
- "default: $JAVA_CODEBASE_RAG_INDEX_DIR/code_graph.kuzu or ./.java-codebase-rag/code_graph.kuzu)"
3863
+ "LadybugDB database path (file/dir as used by ladybug.Database; "
3864
+ "default: $JAVA_CODEBASE_RAG_INDEX_DIR/code_graph.lbug or ./.java-codebase-rag/code_graph.lbug)"
3806
3865
  ),
3807
3866
  )
3808
3867
  parser.add_argument("--verbose", action="store_true")
@@ -3814,10 +3873,10 @@ def main() -> int:
3814
3873
  print(f"source-root not a directory: {root}", file=sys.stderr)
3815
3874
  return 2
3816
3875
 
3817
- kuzu_path = Path(args.kuzu_path).expanduser() if args.kuzu_path else _default_kuzu_path()
3876
+ ladybug_path = Path(args.ladybug_path).expanduser() if args.ladybug_path else _default_ladybug_path()
3818
3877
 
3819
3878
  if args.incremental:
3820
- result = incremental_rebuild(root, kuzu_path, verbose=args.verbose)
3879
+ result = incremental_rebuild(root, ladybug_path, verbose=args.verbose)
3821
3880
  print(json.dumps({
3822
3881
  "mode": result.mode,
3823
3882
  "files_changed": result.files_changed,
@@ -3837,9 +3896,9 @@ def main() -> int:
3837
3896
  pass4_routes(tables, asts, source_root=root, verbose=args.verbose)
3838
3897
  pass5_imperative_edges(tables, asts, source_root=root, verbose=args.verbose)
3839
3898
  pass6_match_edges(tables, verbose=args.verbose)
3840
- write_kuzu(kuzu_path, tables, source_root=root, verbose=args.verbose)
3899
+ write_ladybug(ladybug_path, tables, source_root=root, verbose=args.verbose)
3841
3900
  if args.verbose:
3842
- _verbose_stderr_line(f"[graph] done · kuzu at {kuzu_path}")
3901
+ _verbose_stderr_line(f"[graph] done · ladybug at {ladybug_path}")
3843
3902
  return 0
3844
3903
 
3845
3904