java-codebase-rag 0.5.2__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  """Subprocess helpers for cocoindex + graph builder (no heavy ML imports at import time)."""
2
2
  from __future__ import annotations
3
3
 
4
+ import asyncio
4
5
  import os
5
6
  import shutil
6
7
  import subprocess
@@ -11,6 +12,7 @@ from pathlib import Path
11
12
 
12
13
  from java_codebase_rag.cli_format import Spinner, is_noise_line, stderr_is_tty
13
14
  from java_codebase_rag.cli_progress import emit_vectors_finish, emit_vectors_start
15
+ from java_codebase_rag.config import cocoindex_subprocess_env_defaults
14
16
 
15
17
  COCOINDEX_TARGET = "java_index_flow_lancedb.py:JavaCodeIndexLance"
16
18
 
@@ -110,6 +112,57 @@ def run_cocoindex_update(
110
112
  quiet: bool,
111
113
  verbose: bool = True,
112
114
  lance_project_root: Path | None = None,
115
+ ) -> subprocess.CompletedProcess[str]:
116
+ result = _run_cocoindex_update_impl(
117
+ env,
118
+ full_reprocess=full_reprocess,
119
+ quiet=quiet,
120
+ verbose=verbose,
121
+ lance_project_root=lance_project_root,
122
+ )
123
+ # After cocoindex returns exit 0 there are no concurrent writers, so this
124
+ # is the safe window to compact the Lance tables. The flow disabled its
125
+ # in-flight background optimize (see java_index_flow_lancedb.py), making
126
+ # this serialized pass the sole optimizer. Optimize failure does not flip
127
+ # the cocoindex CompletedProcess (a successful index is still usable, just
128
+ # not compacted); the outcome is logged to stderr only.
129
+ if result.returncode == 0:
130
+ _maybe_run_serialized_optimize(env, quiet=quiet)
131
+ return result
132
+
133
+
134
+ def _maybe_run_serialized_optimize(env: dict[str, str], *, quiet: bool) -> None:
135
+ """Resolve the index dir from *env* and run the serialized Lance optimize.
136
+
137
+ The flow's lifespan reads ``JAVA_CODEBASE_RAG_INDEX_DIR`` (set by the CLI /
138
+ config.subprocess_env), so it is guaranteed present when cocoindex ran.
139
+ If it is somehow absent we skip optimize with a stderr warning rather than
140
+ crash — a successful index is still searchable un-compacted.
141
+ """
142
+ idx_raw = env.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
143
+ if not idx_raw:
144
+ print(
145
+ "java-codebase-rag: optimize skipped — JAVA_CODEBASE_RAG_INDEX_DIR "
146
+ "not set in subprocess env",
147
+ file=sys.stderr,
148
+ )
149
+ return
150
+ try:
151
+ from java_codebase_rag.lance_optimize import optimize_lance_tables
152
+
153
+ asyncio.run(optimize_lance_tables(Path(idx_raw), quiet=quiet))
154
+ except Exception as exc:
155
+ # Never crash the CLI on an optimize failure — surface on stderr only.
156
+ print(f"java-codebase-rag: optimize failed: {exc}", file=sys.stderr)
157
+
158
+
159
+ def _run_cocoindex_update_impl(
160
+ env: dict[str, str],
161
+ *,
162
+ full_reprocess: bool,
163
+ quiet: bool,
164
+ verbose: bool = True,
165
+ lance_project_root: Path | None = None,
113
166
  ) -> subprocess.CompletedProcess[str]:
114
167
  exe = cocoindex_bin()
115
168
  if not exe.is_file():
@@ -128,6 +181,11 @@ def run_cocoindex_update(
128
181
  stdout="",
129
182
  stderr=f"java_index_flow_lancedb.py not found under {bd}",
130
183
  )
184
+ # Cap CocoIndex concurrency to avoid EMFILE ("too many open files") under
185
+ # default OS fd limits. See: https://github.com/HumanBean17/java-codebase-rag/issues/306
186
+ env = env.copy()
187
+ for _k, _v in cocoindex_subprocess_env_defaults().items():
188
+ env.setdefault(_k, _v)
131
189
  cmd: list[str] = [str(exe), "update", COCOINDEX_TARGET]
132
190
  if full_reprocess:
133
191
  cmd.extend(["--full-reprocess", "-f"])
@@ -197,7 +255,7 @@ def run_cocoindex_drop(env: dict[str, str], *, quiet: bool) -> subprocess.Comple
197
255
  def run_build_ast_graph(
198
256
  *,
199
257
  source_root: Path,
200
- kuzu_path: Path,
258
+ ladybug_path: Path,
201
259
  verbose: bool,
202
260
  quiet: bool = False,
203
261
  env: dict[str, str] | None = None,
@@ -215,8 +273,8 @@ def run_build_ast_graph(
215
273
  str(builder),
216
274
  "--source-root",
217
275
  str(source_root),
218
- "--kuzu-path",
219
- str(kuzu_path),
276
+ "--ladybug-path",
277
+ str(ladybug_path),
220
278
  ]
221
279
  # Three-tier: --quiet (silent) / default (filtered progress) / --verbose (raw).
222
280
  # Default passes --verbose so the builder emits per-pass progress lines,
@@ -250,7 +308,7 @@ def run_build_ast_graph(
250
308
  def run_incremental_graph(
251
309
  *,
252
310
  source_root: Path,
253
- kuzu_path: Path,
311
+ ladybug_path: Path,
254
312
  verbose: bool,
255
313
  quiet: bool = False,
256
314
  env: dict[str, str] | None = None,
@@ -269,8 +327,8 @@ def run_incremental_graph(
269
327
  str(builder),
270
328
  "--source-root",
271
329
  str(source_root),
272
- "--kuzu-path",
273
- str(kuzu_path),
330
+ "--ladybug-path",
331
+ str(ladybug_path),
274
332
  "--incremental",
275
333
  ]
276
334
  # Three-tier: --quiet (silent) / default (filtered progress) / --verbose (raw).
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: java-codebase-rag
3
- Version: 0.5.2
3
+ Version: 0.6.0
4
4
  Summary: MCP server for semantic + structural search over Java codebases
5
5
  Author: HumanBean17
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/HumanBean17/java-codebase-rag
8
8
  Project-URL: Repository, https://github.com/HumanBean17/java-codebase-rag
9
9
  Project-URL: Issues, https://github.com/HumanBean17/java-codebase-rag/issues
10
- Keywords: mcp,java,rag,code-search,graph,lancedb,kuzu
10
+ Keywords: mcp,java,rag,code-search,graph,lancedb,ladybug
11
11
  Classifier: Development Status :: 3 - Alpha
12
12
  Classifier: Intended Audience :: Developers
13
13
  Classifier: Programming Language :: Python :: 3
@@ -19,7 +19,7 @@ Requires-Python: >=3.11
19
19
  Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
21
  Requires-Dist: cocoindex[lancedb]<2,>=1.0.0a43
22
- Requires-Dist: kuzu<0.12,>=0.11.3
22
+ Requires-Dist: ladybug<0.18,>=0.17.1
23
23
  Requires-Dist: lancedb<0.31,>=0.25.3
24
24
  Requires-Dist: mcp<2,>=1.27.0
25
25
  Requires-Dist: numpy<2.5,>=1.26.4
@@ -103,7 +103,7 @@ java-codebase-rag install
103
103
  java-codebase-rag install --non-interactive --agent claude-code
104
104
  ```
105
105
 
106
- After `pip install --upgrade java-codebase-rag`, run `java-codebase-rag update` to refresh shipped artifacts.
106
+ After `pip install --upgrade java-codebase-rag`, run `java-codebase-rag update` to refresh shipped artifacts and catch up the index (Lance + graph).
107
107
 
108
108
  ### Manual registration
109
109
 
@@ -235,7 +235,7 @@ Run `java-codebase-rag --help` to list grouped subcommands. Operator playbook wi
235
235
  | Group | Subcommand | What it does |
236
236
  |---|---|---|
237
237
  | Setup | `install` | Interactive setup wizard: config, MCP registration, skill/agent deployment, indexing. |
238
- | Setup | `update` | Refresh shipped artifacts (skill, agent, MCP entry) after pip upgrade. |
238
+ | Setup | `update` | Refresh shipped artifacts (skill, agent, MCP entry) + incremental Lance/graph catch-up after pip upgrade. |
239
239
  | Lifecycle | `init` | First-time index. Refuses if artifacts already exist. |
240
240
  | Lifecycle | `increment` | CocoIndex catch-up + incremental Kuzu update. `--vectors-only` for Lance only. |
241
241
  | Lifecycle | `reprocess` | Full Lance + Kuzu rebuild. `--vectors-only` / `--graph-only` for a single phase. |
@@ -0,0 +1,33 @@
1
+ ast_java.py,sha256=TMesuv4SYqzkwfKxf_Pps0KaPLZNZOrhU8mL20bwqeQ,98882
2
+ brownfield_events.py,sha256=yxXkKDgMb3VPtaiakGzncHM_EGnda8xIue6w90yYp8s,2055
3
+ build_ast_graph.py,sha256=GNbjiIAwsXaJQ9Je6gbR-dB9SbnaLThya2pEw3tggQs,152396
4
+ chunk_heuristics.py,sha256=aQk2NOKxzUdqoUAJUO3G3LE0MN_bYZWNLQ0tkmj5uts,1813
5
+ graph_enrich.py,sha256=POT4LwSkTsrjUmP67bsm2UezUam70cunuPDYDh-v1Bs,63332
6
+ index_common.py,sha256=HT6FKHFJ084eFvd3fR1j8z8gf4eWoPHVW8GXLpw464I,285
7
+ java_index_flow_lancedb.py,sha256=MH9iTNF6HDHDTt5Jn7TOVE5hQ4WUPNt7PlQoh1tuh9o,13212
8
+ java_index_v1_common.py,sha256=nF1KrSqboF_RRvWerG9knRRFmWwsrG_CvhgnsoZ8KqA,1154
9
+ java_ontology.py,sha256=FcnOq1XWhUP03OfnTkRStslqrNyukzUKH7VNuK6Bme4,16425
10
+ ladybug_queries.py,sha256=912j9VAYDjcU4ReVorWQ6R4DZl0tteKic-Pqu0jyBS0,90837
11
+ mcp_hints.py,sha256=3swh05LSiWur3tm3-yssndBsLxIxFhy501kBtJI8jJ0,42509
12
+ mcp_v2.py,sha256=64UDrQ27hAQtlz3pFp9A3Xlk95bUjYZ4VBscsyAPCIY,79116
13
+ path_filtering.py,sha256=-oX16SYLWYwX9pcV1fu3vbVTIhY1GzFflT7J1E2tqPY,17122
14
+ pr_analysis.py,sha256=3-5L8_G5XupdJsl9RN73Lq-ejPoK11B3m_VzAx2fGG8,18413
15
+ search_lancedb.py,sha256=scG6HBUrsgIeSWFrGcLcGdhWv1qODOx4JOBMAlLDY_E,36793
16
+ server.py,sha256=uGKT0PdM-bVrzIsfbxF6ZuHGcuRMSSlvkJk0e7Ff43Y,30556
17
+ java_codebase_rag/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
18
+ java_codebase_rag/_fdlimit.py,sha256=WroFdfSNbcriKok6q8znTf74dqlznxea_1Fd5bHl_3o,1930
19
+ java_codebase_rag/cli.py,sha256=a5IFLWAsh77mfLv1Z9OdpvLaYvj4i0KR3_kLtL-ans8,34156
20
+ java_codebase_rag/cli_format.py,sha256=arU7P9W6Fvm7X_wzR1wJ8EfyxK1rDP_ESEhdA0ub4Mo,2579
21
+ java_codebase_rag/cli_progress.py,sha256=9jCqEagYOXs32SYVA31_sOCrONvYy7cl1CrdBD2Pg44,3168
22
+ java_codebase_rag/config.py,sha256=u4OomvglTWHUmMpcxN8wPRnRGfXVp3qK_GJ5pY96O98,16267
23
+ java_codebase_rag/installer.py,sha256=DlBuVVWbHXgcjaQkuXUeT9fNdmk7XZefVT3zzw47k18,45965
24
+ java_codebase_rag/lance_optimize.py,sha256=MzACYlgwxmkJCK64qQLyIAdizSq5BARqaMYSZONlc1I,6069
25
+ java_codebase_rag/pipeline.py,sha256=UcgluFAW9Ghnas8u40x45bVic0mQv6rjzcliDKsnYJI,11936
26
+ java_codebase_rag/install_data/agents/explorer-rag-enhanced.md,sha256=APl9d-No12qZNZLjU7mwNRwxHIgnT3ZtQZiD4clWlyU,14413
27
+ java_codebase_rag/install_data/skills/explore-codebase/SKILL.md,sha256=pIM-Xdwq_fXkhhBJCdb-fA2nes5c_mMPcdUXb7Adyxo,12040
28
+ java_codebase_rag-0.6.0.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
29
+ java_codebase_rag-0.6.0.dist-info/METADATA,sha256=GoMO3zFTb98w4rVV5SMXpcLK-irlDs7aUH0wBGlv5cQ,16887
30
+ java_codebase_rag-0.6.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
31
+ java_codebase_rag-0.6.0.dist-info/entry_points.txt,sha256=mVVQJa0n73OWfhHXYCDoPRrWin_LJhH2Rn0CkJ2iax4,101
32
+ java_codebase_rag-0.6.0.dist-info/top_level.txt,sha256=syQgi8XPBwY2ws_NZ1uRCxTf_s41NpshwEHNdcdnk3A,245
33
+ java_codebase_rag-0.6.0.dist-info/RECORD,,
@@ -8,7 +8,7 @@ java_codebase_rag
8
8
  java_index_flow_lancedb
9
9
  java_index_v1_common
10
10
  java_ontology
11
- kuzu_queries
11
+ ladybug_queries
12
12
  mcp_hints
13
13
  mcp_v2
14
14
  path_filtering
@@ -4,7 +4,7 @@ CocoIndex 1.0 app: index Java, Flyway SQL, and YAML into LanceDB.
4
4
  LanceDB requires a single primary key per table; each chunk gets a UUID `id`.
5
5
 
6
6
  Environment:
7
- JAVA_CODEBASE_RAG_INDEX_DIR — Lance tables + Kuzu + cocoindex state (default: ./.java-codebase-rag)
7
+ JAVA_CODEBASE_RAG_INDEX_DIR — Lance tables + LadybugDB + cocoindex state (default: ./.java-codebase-rag)
8
8
  JAVA_CODEBASE_RAG_SOURCE_ROOT — Java repo root for indexing (optional; else cocoindex cwd)
9
9
  SBERT_MODEL / SBERT_DEVICE — embedding (optional; YAML also supported via java-codebase-rag CLI)
10
10
 
@@ -36,6 +36,7 @@ from cocoindex.ops.text import RecursiveSplitter, detect_code_language
36
36
  from cocoindex.resources.file import PatternFilePathMatcher
37
37
 
38
38
  from java_codebase_rag.config import resolved_sbert_model_for_process_env
39
+ from java_codebase_rag.lance_optimize import LANCE_TABLE_NAMES
39
40
  from java_index_v1_common import (
40
41
  JAVA_CHUNK,
41
42
  SBERT_MODEL,
@@ -68,6 +69,20 @@ else:
68
69
 
69
70
  splitter = RecursiveSplitter()
70
71
 
72
+ # cocoindex 1.0.7 schedules ``table.optimize()`` (a LanceDB Rewrite/compaction
73
+ # transaction) as a *background* asyncio task after every
74
+ # ``num_transactions_before_optimize`` mutation batches (default 50). That
75
+ # background Rewrite races the concurrent ``table.delete()`` (Delete)
76
+ # transactions emitted by later batches, and LanceDB does not allow a Rewrite
77
+ # to commit concurrently with a Delete (upstream lancedb#1504), which floods
78
+ # stderr with "Retryable commit conflict ... preempted by concurrent
79
+ # transaction Delete". Setting this effectively to infinity disables the
80
+ # in-flight background optimize; the serialized post-flow optimize in
81
+ # ``lance_optimize.optimize_lance_tables`` then compacts the table with no
82
+ # concurrent writers. ``optimize()`` is pure maintenance (compact/prune/index);
83
+ # upsert/delete correctness via merge_insert does not depend on it.
84
+ _NUM_TXN_BEFORE_OPTIMIZE = 10**12
85
+
71
86
 
72
87
  @dataclass
73
88
  class JavaLanceChunk:
@@ -317,8 +332,9 @@ async def app_main() -> None:
317
332
  )
318
333
  java_table = await lancedb.mount_table_target(
319
334
  LANCE_DB,
320
- "javacodeindex_java_code",
335
+ LANCE_TABLE_NAMES[0],
321
336
  java_schema,
337
+ num_transactions_before_optimize=_NUM_TXN_BEFORE_OPTIMIZE,
322
338
  )
323
339
 
324
340
  sql_schema = await lancedb.TableSchema.from_class(
@@ -327,8 +343,9 @@ async def app_main() -> None:
327
343
  )
328
344
  sql_table = await lancedb.mount_table_target(
329
345
  LANCE_DB,
330
- "sqlschemaindex_sql_schema",
346
+ LANCE_TABLE_NAMES[1],
331
347
  sql_schema,
348
+ num_transactions_before_optimize=_NUM_TXN_BEFORE_OPTIMIZE,
332
349
  )
333
350
 
334
351
  yaml_schema = await lancedb.TableSchema.from_class(
@@ -337,8 +354,9 @@ async def app_main() -> None:
337
354
  )
338
355
  yaml_table = await lancedb.mount_table_target(
339
356
  LANCE_DB,
340
- "yamlconfigindex_yaml_config",
357
+ LANCE_TABLE_NAMES[2],
341
358
  yaml_schema,
359
+ num_transactions_before_optimize=_NUM_TXN_BEFORE_OPTIMIZE,
342
360
  )
343
361
 
344
362
  project_root = coco.use_context(PROJECT_ROOT)
java_ontology.py CHANGED
@@ -141,7 +141,7 @@ Cardinality = Literal["many_to_many", "many_to_one", "one_to_many", "one_to_one"
141
141
  @dataclass(frozen=True)
142
142
  class EdgeAttr:
143
143
  name: str
144
- kuzu_type: str
144
+ graph_type: str
145
145
  purpose: str
146
146
 
147
147
 
@@ -1,9 +1,9 @@
1
- """Read-only Cypher helpers over the Kuzu AST graph built by `build_ast_graph.py`.
1
+ """Read-only Cypher helpers over the Ladybug AST graph built by `build_ast_graph.py`.
2
2
 
3
- Each function opens a Kuzu connection on demand and returns plain JSON-ish dicts
3
+ Each function opens a Ladybug connection on demand and returns plain JSON-ish dicts
4
4
  so the MCP server can serialize them without further mapping.
5
5
 
6
- The Kuzu database is opened read-only and cached per-process. This module is
6
+ The Ladybug database is opened read-only and cached per-process. This module is
7
7
  intentionally dependency-light: nothing here imports LanceDB or sentence-transformers.
8
8
 
9
9
  Cypher pitfalls (see also ``AGENTS.md``): avoid ``label(e) IN $list`` in ``WHERE`` for
@@ -16,17 +16,37 @@ from __future__ import annotations
16
16
  import json
17
17
  import logging
18
18
  import os
19
+ import re
19
20
  import threading
20
21
  from dataclasses import asdict, dataclass
21
22
  from pathlib import Path
22
23
  from typing import Any, Literal
23
24
 
24
- import kuzu
25
+ import ladybug
25
26
 
26
27
  from ast_java import ONTOLOGY_VERSION as _ONTOLOGY_VERSION
27
28
 
28
29
  log = logging.getLogger(__name__)
29
30
 
31
+
32
+ def _parse_ladybug_json(raw: str | None) -> dict[str, Any]:
33
+ """Parse JSON from LadybugDB which returns unquoted keys like {key: value}."""
34
+ if not raw:
35
+ return {}
36
+ # LadybugDB returns JSON without quotes around keys: {packages: 1, files: 2}
37
+ # Convert to standard JSON: {"packages": 1, "files": 2}
38
+ # This regex matches word characters followed by ':' at the start of a key
39
+ quoted = re.sub(r'(\w+):', r'"\1":', raw)
40
+ try:
41
+ return json.loads(quoted)
42
+ except Exception:
43
+ try:
44
+ # Fallback: try parsing as-is (for standard JSON)
45
+ return json.loads(raw)
46
+ except Exception:
47
+ log.warning("Failed to parse counts_json: %s", raw[:100])
48
+ return {}
49
+
30
50
  # Composed describe / neighbors dot-keys (not stored graph edge labels).
31
51
  _MEMBER_EDGE_COMPOSED_REL_MAP: tuple[tuple[str, str], ...] = (
32
52
  ("DECLARES.DECLARES_CLIENT", "DECLARES_CLIENT"),
@@ -46,7 +66,7 @@ OVERRIDE_AXIS_COMPOSED_EDGE_TYPES: frozenset[str] = frozenset(_OVERRIDE_AXIS_COM
46
66
 
47
67
 
48
68
  def _coerce_id_list(raw: Any) -> list[str]:
49
- """Normalize Kuzu ``collect(DISTINCT ...)`` list results to string ids."""
69
+ """Normalize Ladybug ``collect(DISTINCT ...)`` list results to string ids."""
50
70
  if raw is None:
51
71
  return []
52
72
  if isinstance(raw, list):
@@ -56,8 +76,8 @@ def _coerce_id_list(raw: Any) -> list[str]:
56
76
 
57
77
 
58
78
  __all__ = [
59
- "KuzuGraph",
60
- "resolve_kuzu_path",
79
+ "LadybugGraph",
80
+ "resolve_ladybug_path",
61
81
  "SymbolHit",
62
82
  "EdgeHit",
63
83
  "CallEdge",
@@ -68,14 +88,14 @@ __all__ = [
68
88
  ]
69
89
 
70
90
 
71
- def resolve_kuzu_path(explicit: str | None = None) -> str:
72
- """Resolve the Kuzu DB path the same way the builder does."""
91
+ def resolve_ladybug_path(explicit: str | None = None) -> str:
92
+ """Resolve the Ladybug DB path the same way the builder does."""
73
93
  if explicit:
74
94
  return str(Path(explicit).expanduser())
75
95
  idx = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
76
96
  if idx and not idx.startswith(("s3://", "gs://", "az://")):
77
- return str(Path(os.path.expanduser(idx.rstrip("/"))) / "code_graph.kuzu")
78
- return str((Path.cwd() / ".java-codebase-rag" / "code_graph.kuzu").resolve())
97
+ return str(Path(os.path.expanduser(idx.rstrip("/"))) / "code_graph.lbug")
98
+ return str((Path.cwd() / ".java-codebase-rag" / "code_graph.lbug").resolve())
79
99
 
80
100
 
81
101
  @dataclass
@@ -165,10 +185,10 @@ class RouteCaller:
165
185
 
166
186
 
167
187
  def _symbol_return_for(alias: str) -> str:
168
- """Kuzu RETURN projection for Symbol properties, using the given node alias.
188
+ """Ladybug RETURN projection for Symbol properties, using the given node alias.
169
189
 
170
190
  Centralised so queries that bind Symbol under a non-`s` alias (e.g. `n` in
171
- graph-expansion / flow-tracing) don't emit `s.*` references that Kuzu
191
+ graph-expansion / flow-tracing) don't emit `s.*` references that Ladybug
172
192
  rejects with `Variable s is not in scope`.
173
193
  """
174
194
  return (
@@ -198,7 +218,7 @@ def _scope_filters(
198
218
 
199
219
  Mutates `params` to bind `$module` / `$microservice` only when the
200
220
  corresponding filter is set, so unused names don't leak into the
201
- Kuzu plan.
221
+ Ladybug plan.
202
222
  """
203
223
  out: list[str] = []
204
224
  if module:
@@ -274,7 +294,7 @@ _SYM_COLS = (
274
294
 
275
295
 
276
296
  def find_symbols_in_file_range(
277
- graph: "KuzuGraph",
297
+ graph: "LadybugGraph",
278
298
  *,
279
299
  filename: str,
280
300
  start_line: int,
@@ -324,25 +344,25 @@ def _call_graph_needle_phantom_arity_alt(needle: str) -> str | None:
324
344
  return needle[:i] + "(?)"
325
345
 
326
346
 
327
- class KuzuGraph:
328
- """Thin wrapper around a read-only Kuzu connection.
347
+ class LadybugGraph:
348
+ """Thin wrapper around a read-only Ladybug connection.
329
349
 
330
350
  Safe to share across threads: we hold a single `Connection`, guarded by a lock.
331
351
  """
332
352
 
333
353
  _lock = threading.Lock()
334
- _instance: "KuzuGraph | None" = None
354
+ _instance: "LadybugGraph | None" = None
335
355
  _instance_path: str | None = None
336
356
 
337
357
  def __init__(self, db_path: str) -> None:
338
358
  self.db_path = db_path
339
- self._db = kuzu.Database(db_path, read_only=True)
340
- self._conn = kuzu.Connection(self._db)
359
+ self._db = ladybug.Database(db_path, read_only=True)
360
+ self._conn = ladybug.Connection(self._db)
341
361
  self._conn_lock = threading.Lock()
342
362
 
343
363
  @classmethod
344
- def get(cls, db_path: str | None = None) -> "KuzuGraph":
345
- resolved = resolve_kuzu_path(db_path)
364
+ def get(cls, db_path: str | None = None) -> "LadybugGraph":
365
+ resolved = resolve_ladybug_path(db_path)
346
366
  with cls._lock:
347
367
  if cls._instance is None or cls._instance_path != resolved:
348
368
  instance = cls(resolved)
@@ -354,7 +374,7 @@ class KuzuGraph:
354
374
  f"required version {_ONTOLOGY_VERSION}. "
355
375
  "Rebuild the graph: `python build_ast_graph.py --source-root <repo>`, "
356
376
  "or run `java-codebase-rag reprocess --source-root <repo>` for a full "
357
- "Lance+Kuzu re-index."
377
+ "Lance+Ladybug re-index."
358
378
  )
359
379
  cls._instance = instance
360
380
  cls._instance_path = resolved
@@ -362,11 +382,11 @@ class KuzuGraph:
362
382
 
363
383
  @classmethod
364
384
  def exists(cls, db_path: str | None = None) -> bool:
365
- resolved = resolve_kuzu_path(db_path)
385
+ resolved = resolve_ladybug_path(db_path)
366
386
  p = Path(resolved)
367
387
  if not p.exists():
368
388
  return False
369
- # Kuzu represents DB as a directory; allow file form too (single-file DBs).
389
+ # Ladybug represents DB as a directory; allow file form too (single-file DBs).
370
390
  return True
371
391
 
372
392
  # ---- low-level ----
@@ -481,11 +501,15 @@ class KuzuGraph:
481
501
  if not rows:
482
502
  return {"error": "no GraphMeta node"}
483
503
  row = rows[0]
484
- counts: dict[str, Any]
485
- try:
486
- counts = json.loads(row.get("counts_json") or "{}")
487
- except Exception:
488
- counts = {}
504
+ counts: dict[str, Any] = _parse_ladybug_json(row.get("counts_json"))
505
+ # Ensure counts has expected keys even if empty
506
+ if not counts:
507
+ counts = {
508
+ "packages": 0, "files": 0, "types": 0, "members": 0, "phantoms": 0,
509
+ "extends": 0, "implements": 0, "injects": 0, "declares": 0, "overrides": 0,
510
+ "calls": 0, "routes": 0, "exposes": 0, "clients": 0, "declares_client": 0,
511
+ "producers": 0, "declares_producer": 0, "http_calls": 0, "async_calls": 0,
512
+ }
489
513
  routes_total = exposes_total = 0
490
514
  routes_resolved_pct = 0.0
491
515
  routes_by_framework: dict[str, Any] = {}
@@ -507,10 +531,7 @@ class KuzuGraph:
507
531
  cross_service_resolution: str | None = None
508
532
  if meta_mode != "legacy":
509
533
  rfw_raw = row.get("routes_by_framework") or "{}"
510
- try:
511
- routes_by_framework = json.loads(rfw_raw) if isinstance(rfw_raw, str) else (rfw_raw or {})
512
- except Exception:
513
- routes_by_framework = {}
534
+ routes_by_framework = _parse_ladybug_json(rfw_raw) if isinstance(rfw_raw, str) else (rfw_raw or {})
514
535
  if not isinstance(routes_by_framework, dict):
515
536
  routes_by_framework = {}
516
537
  routes_total = int(row.get("routes_total") or 0)
@@ -519,26 +540,17 @@ class KuzuGraph:
519
540
  if meta_mode in ("pr_f1", "pr_e3", "pre_e3"):
520
541
  routes_from_brownfield_pct = float(row.get("routes_from_brownfield_pct") or 0.0)
521
542
  rbl_raw = row.get("routes_by_layer") or "{}"
522
- try:
523
- routes_by_layer = json.loads(rbl_raw) if isinstance(rbl_raw, str) else (rbl_raw or {})
524
- except Exception:
525
- routes_by_layer = {}
543
+ routes_by_layer = _parse_ladybug_json(rbl_raw) if isinstance(rbl_raw, str) else (rbl_raw or {})
526
544
  if not isinstance(routes_by_layer, dict):
527
545
  routes_by_layer = {}
528
546
  http_calls_total = int(row.get("http_calls_total") or 0)
529
547
  async_calls_total = int(row.get("async_calls_total") or 0)
530
548
  hbs_raw = row.get("http_calls_by_strategy") or "{}"
531
549
  abs_raw = row.get("async_calls_by_strategy") or "{}"
532
- try:
533
- http_calls_by_strategy = json.loads(hbs_raw) if isinstance(hbs_raw, str) else (hbs_raw or {})
534
- except Exception:
535
- http_calls_by_strategy = {}
550
+ http_calls_by_strategy = _parse_ladybug_json(hbs_raw) if isinstance(hbs_raw, str) else (hbs_raw or {})
536
551
  if not isinstance(http_calls_by_strategy, dict):
537
552
  http_calls_by_strategy = {}
538
- try:
539
- async_calls_by_strategy = json.loads(abs_raw) if isinstance(abs_raw, str) else (abs_raw or {})
540
- except Exception:
541
- async_calls_by_strategy = {}
553
+ async_calls_by_strategy = _parse_ladybug_json(abs_raw) if isinstance(abs_raw, str) else (abs_raw or {})
542
554
  if not isinstance(async_calls_by_strategy, dict):
543
555
  async_calls_by_strategy = {}
544
556
  http_calls_resolved_pct = float(row.get("http_calls_resolved_pct") or 0.0)
@@ -547,16 +559,10 @@ class KuzuGraph:
547
559
  async_producers_from_brownfield_pct = float(row.get("async_producers_from_brownfield_pct") or 0.0)
548
560
  hmb_raw = row.get("http_calls_match_breakdown") or "{}"
549
561
  amb_raw = row.get("async_calls_match_breakdown") or "{}"
550
- try:
551
- http_calls_match_breakdown = json.loads(hmb_raw) if isinstance(hmb_raw, str) else (hmb_raw or {})
552
- except Exception:
553
- http_calls_match_breakdown = {}
562
+ http_calls_match_breakdown = _parse_ladybug_json(hmb_raw) if isinstance(hmb_raw, str) else (hmb_raw or {})
554
563
  if not isinstance(http_calls_match_breakdown, dict):
555
564
  http_calls_match_breakdown = {}
556
- try:
557
- async_calls_match_breakdown = json.loads(amb_raw) if isinstance(amb_raw, str) else (amb_raw or {})
558
- except Exception:
559
- async_calls_match_breakdown = {}
565
+ async_calls_match_breakdown = _parse_ladybug_json(amb_raw) if isinstance(amb_raw, str) else (amb_raw or {})
560
566
  if not isinstance(async_calls_match_breakdown, dict):
561
567
  async_calls_match_breakdown = {}
562
568
  cross_service_calls_total = int(row.get("cross_service_calls_total") or 0)
@@ -1013,7 +1019,7 @@ class KuzuGraph:
1013
1019
  microservice: str | None = None,
1014
1020
  capability: str | None = None,
1015
1021
  limit: int = 100) -> list[SymbolHit]:
1016
- # Kuzu supports `list_contains` for STRING[].
1022
+ # Ladybug supports `list_contains` for STRING[].
1017
1023
  filters = ["list_contains(s.annotations, $ann)"]
1018
1024
  params: dict[str, Any] = {"ann": annotation}
1019
1025
  if capability:
@@ -1454,7 +1460,7 @@ class KuzuGraph:
1454
1460
  ))
1455
1461
  if entry_roles:
1456
1462
  params["entry_roles"] = list(entry_roles)
1457
- # Kuzu 0.11.x does not support parameterized lists inside ANY
1463
+ # Ladybug 0.17.x does not support parameterized lists inside ANY
1458
1464
  # comprehensions, so we expand the fixed capability set as
1459
1465
  # individual list_contains predicates ORed together.
1460
1466
  cap_predicates = " OR ".join(