java-codebase-rag 0.5.3__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,148 @@
1
+ """Serialized post-flow LanceDB optimize with commit-conflict retry.
2
+
3
+ cocoindex 1.0.7 schedules ``table.optimize()`` (a LanceDB **Rewrite**/compaction
4
+ transaction) as a *background* ``asyncio`` task that races concurrent
5
+ ``table.delete()`` (**Delete**) transactions emitted by later mutation batches.
6
+ LanceDB does not allow a Rewrite to commit concurrently with a Delete
7
+ (upstream lancedb#1504 — "We do not support concurrent deletes right now"),
8
+ which surfaces as a flood of::
9
+
10
+ RuntimeError: lance error: Retryable commit conflict for version N: \
11
+ This Rewrite transaction was preempted by concurrent transaction Delete ...
12
+
13
+ To eliminate the race, the flow (``java_index_flow_lancedb.py``) disables the
14
+ in-flight background optimize entirely by raising
15
+ ``num_transactions_before_optimize`` to a value that is effectively never
16
+ reached. This module then performs a *single*, serialized optimize after the
17
+ flow returns (exit 0 → no concurrent writers), retrying the rare residual
18
+ commit conflict that two internal compaction passes can still produce.
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import asyncio
23
+ import sys
24
+ from pathlib import Path
25
+
26
+ # Single source of truth for the three Lance table names created by the flow.
27
+ # Keep in sync with ``search_lancedb.TABLES`` (the values there mirror these).
28
+ LANCE_TABLE_NAMES: tuple[str, ...] = (
29
+ "javacodeindex_java_code",
30
+ "sqlschemaindex_sql_schema",
31
+ "yamlconfigindex_yaml_config",
32
+ )
33
+
34
+ # Commit conflicts are transient; a handful of exponential-backoff retries is
35
+ # enough because, post-flow, there are no concurrent writers — only successive
36
+ # optimize/compaction passes within this single serialized call can still
37
+ # transiently preempt one another.
38
+ _MAX_ATTEMPTS = 6
39
+ _BASE_BACKOFF_S = 0.1
40
+
41
+ # Substrings identifying the retryable Lance commit-conflict error. LanceDB
42
+ # wraps the underlying lance error text into the raised ``RuntimeError`` str,
43
+ # so a substring match is the robust detector (no dedicated exception type).
44
+ _RETRYABLE_MARKERS = (
45
+ "Retryable commit conflict",
46
+ "preempted by concurrent transaction",
47
+ )
48
+
49
+
50
+ def _is_retryable(exc: BaseException) -> bool:
51
+ text = str(exc)
52
+ return any(marker in text for marker in _RETRYABLE_MARKERS)
53
+
54
+
55
+ async def _list_table_names(db: object) -> set[str]:
56
+ """Existing table names across LanceDB API variants (``list_tables`` ≥ ``table_names``)."""
57
+ if hasattr(db, "list_tables"):
58
+ response = await db.list_tables()
59
+ return set(getattr(response, "tables", response))
60
+ return set(await db.table_names())
61
+
62
+
63
+ async def optimize_lance_tables(index_dir: Path, *, quiet: bool = False) -> dict[str, str]:
64
+ """Optimize all known Lance tables under *index_dir*, serially, with retry.
65
+
66
+ Runs ``table.optimize()`` for each name in :data:`LANCE_TABLE_NAMES` that
67
+ exists in the DB. Retryable commit conflicts are retried with exponential
68
+ backoff; any other exception (or an exhausted retry budget) is captured
69
+ per-table in the returned dict and logged to **stderr** — never stdout,
70
+ since this is callable from stdio-MCP / JSON-stdout contexts.
71
+
72
+ Args:
73
+ index_dir: directory holding the Lance tables (the flow's LanceDB URI).
74
+ quiet: when True, suppress the per-table success/skip info lines on
75
+ stderr (errors are always logged).
76
+
77
+ Returns:
78
+ Mapping of table name → status. Values are ``"ok"``, ``"skipped"``
79
+ (table absent — e.g. a repo with no SQL/YAML), or ``"error: <text>"``.
80
+ """
81
+ # Lazy import: the flow imports this module for LANCE_TABLE_NAMES and must
82
+ # not pay the lancedb import cost at flow-definition time.
83
+ import lancedb
84
+
85
+ results: dict[str, str] = {}
86
+ db = await lancedb.connect_async(str(index_dir))
87
+ try:
88
+ try:
89
+ existing = await _list_table_names(db)
90
+ except Exception as exc:
91
+ print(
92
+ f"java-codebase-rag: optimize: failed to list tables in "
93
+ f"{index_dir}: {exc}",
94
+ file=sys.stderr,
95
+ )
96
+ return {name: f"error: list failed: {exc}" for name in LANCE_TABLE_NAMES}
97
+
98
+ for name in LANCE_TABLE_NAMES:
99
+ if name not in existing:
100
+ results[name] = "skipped"
101
+ if not quiet:
102
+ print(
103
+ f"java-codebase-rag: optimize: {name} absent, skipped",
104
+ file=sys.stderr,
105
+ )
106
+ continue
107
+ try:
108
+ table = await db.open_table(name)
109
+ except Exception as exc:
110
+ results[name] = f"error: open failed: {exc}"
111
+ print(
112
+ f"java-codebase-rag: optimize: {name} open failed: {exc}",
113
+ file=sys.stderr,
114
+ )
115
+ continue
116
+
117
+ last_exc: BaseException | None = None
118
+ for attempt in range(_MAX_ATTEMPTS):
119
+ try:
120
+ await table.optimize()
121
+ last_exc = None
122
+ break
123
+ except Exception as exc:
124
+ last_exc = exc
125
+ if _is_retryable(exc) and attempt < _MAX_ATTEMPTS - 1:
126
+ await asyncio.sleep(_BASE_BACKOFF_S * (2**attempt))
127
+ continue
128
+ # Non-retryable, or retries exhausted: stop the loop and
129
+ # surface below — do not swallow silently.
130
+ break
131
+
132
+ if last_exc is None:
133
+ results[name] = "ok"
134
+ if not quiet:
135
+ print(
136
+ f"java-codebase-rag: optimize: {name} ok",
137
+ file=sys.stderr,
138
+ )
139
+ else:
140
+ results[name] = f"error: {last_exc}"
141
+ print(
142
+ f"java-codebase-rag: optimize: {name} failed: {last_exc}",
143
+ file=sys.stderr,
144
+ )
145
+ finally:
146
+ # ``AsyncConnection.close`` is a *sync* method in lancedb 0.30.x.
147
+ db.close()
148
+ return results
@@ -1,6 +1,7 @@
1
1
  """Subprocess helpers for cocoindex + graph builder (no heavy ML imports at import time)."""
2
2
  from __future__ import annotations
3
3
 
4
+ import asyncio
4
5
  import os
5
6
  import shutil
6
7
  import subprocess
@@ -11,6 +12,7 @@ from pathlib import Path
11
12
 
12
13
  from java_codebase_rag.cli_format import Spinner, is_noise_line, stderr_is_tty
13
14
  from java_codebase_rag.cli_progress import emit_vectors_finish, emit_vectors_start
15
+ from java_codebase_rag.config import cocoindex_subprocess_env_defaults
14
16
 
15
17
  COCOINDEX_TARGET = "java_index_flow_lancedb.py:JavaCodeIndexLance"
16
18
 
@@ -110,6 +112,57 @@ def run_cocoindex_update(
110
112
  quiet: bool,
111
113
  verbose: bool = True,
112
114
  lance_project_root: Path | None = None,
115
+ ) -> subprocess.CompletedProcess[str]:
116
+ result = _run_cocoindex_update_impl(
117
+ env,
118
+ full_reprocess=full_reprocess,
119
+ quiet=quiet,
120
+ verbose=verbose,
121
+ lance_project_root=lance_project_root,
122
+ )
123
+ # After cocoindex returns exit 0 there are no concurrent writers, so this
124
+ # is the safe window to compact the Lance tables. The flow disabled its
125
+ # in-flight background optimize (see java_index_flow_lancedb.py), making
126
+ # this serialized pass the sole optimizer. Optimize failure does not flip
127
+ # the cocoindex CompletedProcess (a successful index is still usable, just
128
+ # not compacted); the outcome is logged to stderr only.
129
+ if result.returncode == 0:
130
+ _maybe_run_serialized_optimize(env, quiet=quiet)
131
+ return result
132
+
133
+
134
+ def _maybe_run_serialized_optimize(env: dict[str, str], *, quiet: bool) -> None:
135
+ """Resolve the index dir from *env* and run the serialized Lance optimize.
136
+
137
+ The flow's lifespan reads ``JAVA_CODEBASE_RAG_INDEX_DIR`` (set by the CLI /
138
+ config.subprocess_env), so it is guaranteed present when cocoindex ran.
139
+ If it is somehow absent we skip optimize with a stderr warning rather than
140
+ crash — a successful index is still searchable un-compacted.
141
+ """
142
+ idx_raw = env.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
143
+ if not idx_raw:
144
+ print(
145
+ "java-codebase-rag: optimize skipped — JAVA_CODEBASE_RAG_INDEX_DIR "
146
+ "not set in subprocess env",
147
+ file=sys.stderr,
148
+ )
149
+ return
150
+ try:
151
+ from java_codebase_rag.lance_optimize import optimize_lance_tables
152
+
153
+ asyncio.run(optimize_lance_tables(Path(idx_raw), quiet=quiet))
154
+ except Exception as exc:
155
+ # Never crash the CLI on an optimize failure — surface on stderr only.
156
+ print(f"java-codebase-rag: optimize failed: {exc}", file=sys.stderr)
157
+
158
+
159
+ def _run_cocoindex_update_impl(
160
+ env: dict[str, str],
161
+ *,
162
+ full_reprocess: bool,
163
+ quiet: bool,
164
+ verbose: bool = True,
165
+ lance_project_root: Path | None = None,
113
166
  ) -> subprocess.CompletedProcess[str]:
114
167
  exe = cocoindex_bin()
115
168
  if not exe.is_file():
@@ -128,10 +181,11 @@ def run_cocoindex_update(
128
181
  stdout="",
129
182
  stderr=f"java_index_flow_lancedb.py not found under {bd}",
130
183
  )
131
- # Set CocoIndex concurrency limits to prevent "too many open files" error
132
- # See: https://github.com/HumanBean17/java-codebase-rag/issues/293
184
+ # Cap CocoIndex concurrency to avoid EMFILE ("too many open files") under
185
+ # default OS fd limits. See: https://github.com/HumanBean17/java-codebase-rag/issues/306
133
186
  env = env.copy()
134
- env.setdefault("COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS", "256")
187
+ for _k, _v in cocoindex_subprocess_env_defaults().items():
188
+ env.setdefault(_k, _v)
135
189
  cmd: list[str] = [str(exe), "update", COCOINDEX_TARGET]
136
190
  if full_reprocess:
137
191
  cmd.extend(["--full-reprocess", "-f"])
@@ -201,7 +255,7 @@ def run_cocoindex_drop(env: dict[str, str], *, quiet: bool) -> subprocess.Comple
201
255
  def run_build_ast_graph(
202
256
  *,
203
257
  source_root: Path,
204
- kuzu_path: Path,
258
+ ladybug_path: Path,
205
259
  verbose: bool,
206
260
  quiet: bool = False,
207
261
  env: dict[str, str] | None = None,
@@ -219,8 +273,8 @@ def run_build_ast_graph(
219
273
  str(builder),
220
274
  "--source-root",
221
275
  str(source_root),
222
- "--kuzu-path",
223
- str(kuzu_path),
276
+ "--ladybug-path",
277
+ str(ladybug_path),
224
278
  ]
225
279
  # Three-tier: --quiet (silent) / default (filtered progress) / --verbose (raw).
226
280
  # Default passes --verbose so the builder emits per-pass progress lines,
@@ -254,7 +308,7 @@ def run_build_ast_graph(
254
308
  def run_incremental_graph(
255
309
  *,
256
310
  source_root: Path,
257
- kuzu_path: Path,
311
+ ladybug_path: Path,
258
312
  verbose: bool,
259
313
  quiet: bool = False,
260
314
  env: dict[str, str] | None = None,
@@ -273,8 +327,8 @@ def run_incremental_graph(
273
327
  str(builder),
274
328
  "--source-root",
275
329
  str(source_root),
276
- "--kuzu-path",
277
- str(kuzu_path),
330
+ "--ladybug-path",
331
+ str(ladybug_path),
278
332
  "--incremental",
279
333
  ]
280
334
  # Three-tier: --quiet (silent) / default (filtered progress) / --verbose (raw).
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: java-codebase-rag
3
- Version: 0.5.3
3
+ Version: 0.6.1
4
4
  Summary: MCP server for semantic + structural search over Java codebases
5
5
  Author: HumanBean17
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/HumanBean17/java-codebase-rag
8
8
  Project-URL: Repository, https://github.com/HumanBean17/java-codebase-rag
9
9
  Project-URL: Issues, https://github.com/HumanBean17/java-codebase-rag/issues
10
- Keywords: mcp,java,rag,code-search,graph,lancedb,kuzu
10
+ Keywords: mcp,java,rag,code-search,graph,lancedb,ladybug
11
11
  Classifier: Development Status :: 3 - Alpha
12
12
  Classifier: Intended Audience :: Developers
13
13
  Classifier: Programming Language :: Python :: 3
@@ -19,7 +19,7 @@ Requires-Python: >=3.11
19
19
  Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
21
  Requires-Dist: cocoindex[lancedb]<2,>=1.0.0a43
22
- Requires-Dist: kuzu<0.12,>=0.11.3
22
+ Requires-Dist: ladybug<0.18,>=0.17.1
23
23
  Requires-Dist: lancedb<0.31,>=0.25.3
24
24
  Requires-Dist: mcp<2,>=1.27.0
25
25
  Requires-Dist: numpy<2.5,>=1.26.4
@@ -35,6 +35,7 @@ Requires-Dist: unidiff<1,>=0.7.3
35
35
  Provides-Extra: dev
36
36
  Requires-Dist: pytest>=7; extra == "dev"
37
37
  Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
38
+ Requires-Dist: pytest-xdist>=3; extra == "dev"
38
39
  Requires-Dist: ruff>=0.4; extra == "dev"
39
40
  Dynamic: license-file
40
41
 
@@ -103,7 +104,7 @@ java-codebase-rag install
103
104
  java-codebase-rag install --non-interactive --agent claude-code
104
105
  ```
105
106
 
106
- After `pip install --upgrade java-codebase-rag`, run `java-codebase-rag update` to refresh shipped artifacts.
107
+ After `pip install --upgrade java-codebase-rag`, run `java-codebase-rag update` to refresh shipped artifacts and catch up the index (Lance + graph).
107
108
 
108
109
  ### Manual registration
109
110
 
@@ -235,7 +236,7 @@ Run `java-codebase-rag --help` to list grouped subcommands. Operator playbook wi
235
236
  | Group | Subcommand | What it does |
236
237
  |---|---|---|
237
238
  | Setup | `install` | Interactive setup wizard: config, MCP registration, skill/agent deployment, indexing. |
238
- | Setup | `update` | Refresh shipped artifacts (skill, agent, MCP entry) after pip upgrade. |
239
+ | Setup | `update` | Refresh shipped artifacts (skill, agent, MCP entry) + incremental Lance/graph catch-up after pip upgrade. |
239
240
  | Lifecycle | `init` | First-time index. Refuses if artifacts already exist. |
240
241
  | Lifecycle | `increment` | CocoIndex catch-up + incremental Kuzu update. `--vectors-only` for Lance only. |
241
242
  | Lifecycle | `reprocess` | Full Lance + Kuzu rebuild. `--vectors-only` / `--graph-only` for a single phase. |
@@ -0,0 +1,36 @@
1
+ ast_java.py,sha256=NQgZzstbsMq-PdowoD6r_ixJKxEEFzTP9xUzqDpiXeU,99661
2
+ brownfield_events.py,sha256=yxXkKDgMb3VPtaiakGzncHM_EGnda8xIue6w90yYp8s,2055
3
+ build_ast_graph.py,sha256=OKigswkUmWwUAKXXRNH4zplw2VonIdWUWzVjC-t5roo,152893
4
+ chunk_heuristics.py,sha256=aQk2NOKxzUdqoUAJUO3G3LE0MN_bYZWNLQ0tkmj5uts,1813
5
+ graph_enrich.py,sha256=POT4LwSkTsrjUmP67bsm2UezUam70cunuPDYDh-v1Bs,63332
6
+ index_common.py,sha256=HT6FKHFJ084eFvd3fR1j8z8gf4eWoPHVW8GXLpw464I,285
7
+ java_index_flow_lancedb.py,sha256=MH9iTNF6HDHDTt5Jn7TOVE5hQ4WUPNt7PlQoh1tuh9o,13212
8
+ java_index_v1_common.py,sha256=nF1KrSqboF_RRvWerG9knRRFmWwsrG_CvhgnsoZ8KqA,1154
9
+ java_ontology.py,sha256=71bCLDNvMy0SpZPzSR5apJ0qJXNd6y5ggkLdBEw_PFo,16682
10
+ kuzu_queries.py,sha256=9bQzrU311AOw_BcUp_KSGiZgPVSaLSU7y63XfcT_vqI,90137
11
+ ladybug_queries.py,sha256=912j9VAYDjcU4ReVorWQ6R4DZl0tteKic-Pqu0jyBS0,90837
12
+ mcp_hints.py,sha256=3swh05LSiWur3tm3-yssndBsLxIxFhy501kBtJI8jJ0,42509
13
+ mcp_v2.py,sha256=o94GJI7j6dLJDIA3R_1ZiQhjzQfMAEW3etdeZYnHOUc,80637
14
+ path_filtering.py,sha256=-oX16SYLWYwX9pcV1fu3vbVTIhY1GzFflT7J1E2tqPY,17122
15
+ pr_analysis.py,sha256=3-5L8_G5XupdJsl9RN73Lq-ejPoK11B3m_VzAx2fGG8,18413
16
+ search_lancedb.py,sha256=scG6HBUrsgIeSWFrGcLcGdhWv1qODOx4JOBMAlLDY_E,36793
17
+ server.py,sha256=Js3XDpV7ThAtj352StH6QdhHutf1D5qUkbR-8k3jO8g,31303
18
+ java_codebase_rag/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
19
+ java_codebase_rag/_fdlimit.py,sha256=WroFdfSNbcriKok6q8znTf74dqlznxea_1Fd5bHl_3o,1930
20
+ java_codebase_rag/cli.py,sha256=a5IFLWAsh77mfLv1Z9OdpvLaYvj4i0KR3_kLtL-ans8,34156
21
+ java_codebase_rag/cli_format.py,sha256=arU7P9W6Fvm7X_wzR1wJ8EfyxK1rDP_ESEhdA0ub4Mo,2579
22
+ java_codebase_rag/cli_progress.py,sha256=9jCqEagYOXs32SYVA31_sOCrONvYy7cl1CrdBD2Pg44,3168
23
+ java_codebase_rag/config.py,sha256=Gn3LgxkTOtAvsL-3U2Xn7atOIhyOT2aGmY8SBBTLoQg,16975
24
+ java_codebase_rag/installer.py,sha256=DlBuVVWbHXgcjaQkuXUeT9fNdmk7XZefVT3zzw47k18,45965
25
+ java_codebase_rag/lance_optimize.py,sha256=MzACYlgwxmkJCK64qQLyIAdizSq5BARqaMYSZONlc1I,6069
26
+ java_codebase_rag/pipeline.py,sha256=UcgluFAW9Ghnas8u40x45bVic0mQv6rjzcliDKsnYJI,11936
27
+ java_codebase_rag/install_data/agents/explorer-rag-enhanced.md,sha256=APl9d-No12qZNZLjU7mwNRwxHIgnT3ZtQZiD4clWlyU,14413
28
+ java_codebase_rag/install_data/skills/explore-codebase/SKILL.md,sha256=pIM-Xdwq_fXkhhBJCdb-fA2nes5c_mMPcdUXb7Adyxo,12040
29
+ java_codebase_rag-0.6.1.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
30
+ user_rag/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
31
+ user_rag/cli.py,sha256=TVcyfzwvmdYXJW6KrEYTKMHm7z2JSXMmz2uB-8kkjxY,5604
32
+ java_codebase_rag-0.6.1.dist-info/METADATA,sha256=aPiLbGD8xE-P3B_RI9gx7VuqrTd-VUriZ--ZPYNK02I,16934
33
+ java_codebase_rag-0.6.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
34
+ java_codebase_rag-0.6.1.dist-info/entry_points.txt,sha256=mVVQJa0n73OWfhHXYCDoPRrWin_LJhH2Rn0CkJ2iax4,101
35
+ java_codebase_rag-0.6.1.dist-info/top_level.txt,sha256=syQgi8XPBwY2ws_NZ1uRCxTf_s41NpshwEHNdcdnk3A,245
36
+ java_codebase_rag-0.6.1.dist-info/RECORD,,
@@ -8,7 +8,7 @@ java_codebase_rag
8
8
  java_index_flow_lancedb
9
9
  java_index_v1_common
10
10
  java_ontology
11
- kuzu_queries
11
+ ladybug_queries
12
12
  mcp_hints
13
13
  mcp_v2
14
14
  path_filtering
@@ -4,7 +4,7 @@ CocoIndex 1.0 app: index Java, Flyway SQL, and YAML into LanceDB.
4
4
  LanceDB requires a single primary key per table; each chunk gets a UUID `id`.
5
5
 
6
6
  Environment:
7
- JAVA_CODEBASE_RAG_INDEX_DIR — Lance tables + Kuzu + cocoindex state (default: ./.java-codebase-rag)
7
+ JAVA_CODEBASE_RAG_INDEX_DIR — Lance tables + LadybugDB + cocoindex state (default: ./.java-codebase-rag)
8
8
  JAVA_CODEBASE_RAG_SOURCE_ROOT — Java repo root for indexing (optional; else cocoindex cwd)
9
9
  SBERT_MODEL / SBERT_DEVICE — embedding (optional; YAML also supported via java-codebase-rag CLI)
10
10
 
@@ -36,6 +36,7 @@ from cocoindex.ops.text import RecursiveSplitter, detect_code_language
36
36
  from cocoindex.resources.file import PatternFilePathMatcher
37
37
 
38
38
  from java_codebase_rag.config import resolved_sbert_model_for_process_env
39
+ from java_codebase_rag.lance_optimize import LANCE_TABLE_NAMES
39
40
  from java_index_v1_common import (
40
41
  JAVA_CHUNK,
41
42
  SBERT_MODEL,
@@ -68,6 +69,20 @@ else:
68
69
 
69
70
  splitter = RecursiveSplitter()
70
71
 
72
+ # cocoindex 1.0.7 schedules ``table.optimize()`` (a LanceDB Rewrite/compaction
73
+ # transaction) as a *background* asyncio task after every
74
+ # ``num_transactions_before_optimize`` mutation batches (default 50). That
75
+ # background Rewrite races the concurrent ``table.delete()`` (Delete)
76
+ # transactions emitted by later batches, and LanceDB does not allow a Rewrite
77
+ # to commit concurrently with a Delete (upstream lancedb#1504), which floods
78
+ # stderr with "Retryable commit conflict ... preempted by concurrent
79
+ # transaction Delete". Setting this effectively to infinity disables the
80
+ # in-flight background optimize; the serialized post-flow optimize in
81
+ # ``lance_optimize.optimize_lance_tables`` then compacts the table with no
82
+ # concurrent writers. ``optimize()`` is pure maintenance (compact/prune/index);
83
+ # upsert/delete correctness via merge_insert does not depend on it.
84
+ _NUM_TXN_BEFORE_OPTIMIZE = 10**12
85
+
71
86
 
72
87
  @dataclass
73
88
  class JavaLanceChunk:
@@ -317,8 +332,9 @@ async def app_main() -> None:
317
332
  )
318
333
  java_table = await lancedb.mount_table_target(
319
334
  LANCE_DB,
320
- "javacodeindex_java_code",
335
+ LANCE_TABLE_NAMES[0],
321
336
  java_schema,
337
+ num_transactions_before_optimize=_NUM_TXN_BEFORE_OPTIMIZE,
322
338
  )
323
339
 
324
340
  sql_schema = await lancedb.TableSchema.from_class(
@@ -327,8 +343,9 @@ async def app_main() -> None:
327
343
  )
328
344
  sql_table = await lancedb.mount_table_target(
329
345
  LANCE_DB,
330
- "sqlschemaindex_sql_schema",
346
+ LANCE_TABLE_NAMES[1],
331
347
  sql_schema,
348
+ num_transactions_before_optimize=_NUM_TXN_BEFORE_OPTIMIZE,
332
349
  )
333
350
 
334
351
  yaml_schema = await lancedb.TableSchema.from_class(
@@ -337,8 +354,9 @@ async def app_main() -> None:
337
354
  )
338
355
  yaml_table = await lancedb.mount_table_target(
339
356
  LANCE_DB,
340
- "yamlconfigindex_yaml_config",
357
+ LANCE_TABLE_NAMES[2],
341
358
  yaml_schema,
359
+ num_transactions_before_optimize=_NUM_TXN_BEFORE_OPTIMIZE,
342
360
  )
343
361
 
344
362
  project_root = coco.use_context(PROJECT_ROOT)
java_ontology.py CHANGED
@@ -15,7 +15,10 @@ from ast_java import (
15
15
  _TYPE_ANN_TO_CAPABILITY,
16
16
  )
17
17
 
18
- # Roles: Spring stereotype values plus DTO from `infer_role_for_type`.
18
+ # Roles assignable by indexing: Spring stereotype values plus DTO. ``OTHER`` is the
19
+ # built-in inference fallback (ast_java.infer_role when nothing matches) and is
20
+ # deliberately excluded here — it is a read-side value (the mcp_v2 ``Role`` enum
21
+ # includes it) but not a role a user may set via @CodebaseRole / role_overrides.
19
22
  VALID_ROLES: frozenset[str] = frozenset((*ROLE_ANNOTATIONS.values(), "DTO"))
20
23
 
21
24
  VALID_CAPABILITIES: frozenset[str] = frozenset(
@@ -141,7 +144,7 @@ Cardinality = Literal["many_to_many", "many_to_one", "one_to_many", "one_to_one"
141
144
  @dataclass(frozen=True)
142
145
  class EdgeAttr:
143
146
  name: str
144
- kuzu_type: str
147
+ graph_type: str
145
148
  purpose: str
146
149
 
147
150