java-codebase-rag 0.6.1__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. {java_codebase_rag-0.6.1/java_codebase_rag.egg-info → java_codebase_rag-0.6.2}/PKG-INFO +1 -1
  2. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag/cli.py +18 -1
  3. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag/config.py +54 -5
  4. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag/installer.py +12 -2
  5. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2/java_codebase_rag.egg-info}/PKG-INFO +1 -1
  6. java_codebase_rag-0.6.2/java_codebase_rag.egg-info/entry_points.txt +3 -0
  7. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/pyproject.toml +2 -2
  8. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_config.py +149 -0
  9. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_installer.py +135 -0
  10. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_java_codebase_rag_cli.py +56 -0
  11. java_codebase_rag-0.6.1/java_codebase_rag.egg-info/entry_points.txt +0 -3
  12. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/LICENSE +0 -0
  13. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/README.md +0 -0
  14. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/ast_java.py +0 -0
  15. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/brownfield_events.py +0 -0
  16. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/build_ast_graph.py +0 -0
  17. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/chunk_heuristics.py +0 -0
  18. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/graph_enrich.py +0 -0
  19. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/index_common.py +0 -0
  20. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag/__init__.py +0 -0
  21. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag/_fdlimit.py +0 -0
  22. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag/cli_format.py +0 -0
  23. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag/cli_progress.py +0 -0
  24. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag/install_data/agents/explorer-rag-enhanced.md +0 -0
  25. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag/install_data/skills/explore-codebase/SKILL.md +0 -0
  26. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag/lance_optimize.py +0 -0
  27. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag/pipeline.py +0 -0
  28. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag.egg-info/SOURCES.txt +0 -0
  29. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag.egg-info/dependency_links.txt +0 -0
  30. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag.egg-info/requires.txt +0 -0
  31. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_codebase_rag.egg-info/top_level.txt +0 -0
  32. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_index_flow_lancedb.py +0 -0
  33. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_index_v1_common.py +0 -0
  34. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/java_ontology.py +0 -0
  35. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/ladybug_queries.py +0 -0
  36. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/mcp_hints.py +0 -0
  37. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/mcp_v2.py +0 -0
  38. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/path_filtering.py +0 -0
  39. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/pr_analysis.py +0 -0
  40. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/search_lancedb.py +0 -0
  41. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/server.py +0 -0
  42. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/setup.cfg +0 -0
  43. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_agent_skills_static.py +0 -0
  44. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_assign_endpoint_client_extraction.py +0 -0
  45. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_ast_graph_build.py +0 -0
  46. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_ast_java_calls.py +0 -0
  47. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_ast_java_capabilities.py +0 -0
  48. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_bank_chat_brownfield_integration.py +0 -0
  49. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_brownfield_clients.py +0 -0
  50. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_brownfield_events.py +0 -0
  51. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_brownfield_overrides.py +0 -0
  52. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_brownfield_routes.py +0 -0
  53. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_call_edge_matching.py +0 -0
  54. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_call_edges_e2e.py +0 -0
  55. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_call_graph_receiver_resolution.py +0 -0
  56. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_call_graph_smoke_roundtrip.py +0 -0
  57. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_call_invariant.py +0 -0
  58. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_cli_progress_stdout_invariant.py +0 -0
  59. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_cli_quiet_parity.py +0 -0
  60. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_client_hint_recovery.py +0 -0
  61. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_client_node_extraction.py +0 -0
  62. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_client_role_rename.py +0 -0
  63. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_cross_service_resolution_flag.py +0 -0
  64. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_edge_navigation_doc.py +0 -0
  65. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_fd_limit.py +0 -0
  66. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_feign_not_exposer.py +0 -0
  67. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_graph_enrich.py +0 -0
  68. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_incremental_graph.py +0 -0
  69. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_installer_integration.py +0 -0
  70. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_ladybug_queries.py +0 -0
  71. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_lance_optimize.py +0 -0
  72. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_lancedb_e2e.py +0 -0
  73. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_mcp_hints.py +0 -0
  74. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_mcp_server_project_root.py +0 -0
  75. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_mcp_tools.py +0 -0
  76. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_mcp_v2.py +0 -0
  77. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_mcp_v2_compose.py +0 -0
  78. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_meta_chain_core.py +0 -0
  79. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_microservice_scope.py +0 -0
  80. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_outgoing_call_extraction.py +0 -0
  81. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_packaging_metadata.py +0 -0
  82. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_path_filtering.py +0 -0
  83. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_pr_analysis.py +0 -0
  84. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_resolve_routes_messaging_layer_c.py +0 -0
  85. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_route_extraction.py +0 -0
  86. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_schema_consistency.py +0 -0
  87. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_search_lancedb.py +0 -0
  88. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_search_lancedb_capability.py +0 -0
  89. {java_codebase_rag-0.6.1 → java_codebase_rag-0.6.2}/tests/test_string_value_atoms.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: java-codebase-rag
3
- Version: 0.6.1
3
+ Version: 0.6.2
4
4
  Summary: MCP server for semantic + structural search over Java codebases
5
5
  Author: HumanBean17
6
6
  License-Expression: MIT
@@ -6,6 +6,7 @@ from __future__ import annotations
6
6
  import argparse
7
7
  import asyncio
8
8
  import json
9
+ import os
9
10
  import pprint
10
11
  import shutil
11
12
  import sys
@@ -930,5 +931,21 @@ def main(argv: list[str] | None = None) -> int:
930
931
  return 2
931
932
 
932
933
 
934
+ def _console_script_main() -> None:
935
+ """Real CLI entry: terminate without interpreter finalization.
936
+
937
+ A pyarrow/lance worker thread (loaded via lancedb in lifecycle commands) can
938
+ outlive CPython finalization in a one-shot CLI subprocess and trip
939
+ ``PyGILState_Release`` (SIGABRT, exit -6). Flushing + ``os._exit`` skips that
940
+ racy teardown — the command has already done its work and emitted its result.
941
+ ``main()`` stays return-based so in-process test callers (``cli.main(...)``)
942
+ keep working.
943
+ """
944
+ rc = main()
945
+ sys.stdout.flush()
946
+ sys.stderr.flush()
947
+ os._exit(rc)
948
+
949
+
933
950
  if __name__ == "__main__":
934
- raise SystemExit(main())
951
+ _console_script_main()
@@ -52,14 +52,36 @@ _DEFAULT_EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
52
52
  _UNRESOLVED_VAR_RE = re.compile(r"\$(\w+|\{[^}]+\})")
53
53
 
54
54
 
55
- def maybe_expand_embedding_model_path(value: str) -> str:
56
- """Expand ``~`` and ``$VAR`` when *value* is path-shaped.
55
+ def maybe_expand_embedding_model_path(
56
+ value: str,
57
+ *,
58
+ config_dir: Path | None = None,
59
+ source_root: Path | None = None,
60
+ source: SettingSource | None = None,
61
+ ) -> str:
62
+ """Expand ``~`` / ``$VAR`` for path-shaped values and resolve relatives to absolute.
57
63
 
58
64
  Path-shape: starts with ``/``, ``./``, ``../``, ``~``, or contains ``$``.
59
65
  Plain ``org/name`` (hub id) does not match and is passed through unchanged.
60
66
 
61
- Used for ``embedding.model`` after precedence resolution and for runtime
62
- ``SBERT_MODEL`` reads (e.g. MCP) so the string matches ``ResolvedOperatorConfig``.
67
+ Relative resolution mirrors :func:`_resolve_index_dir_path` so a committed
68
+ config is portable regardless of process CWD:
69
+
70
+ * YAML values (``source == "yaml"``) resolve against ``config_dir`` (the
71
+ directory holding ``.java-codebase-rag.yml``).
72
+ * CLI / env values resolve against ``source_root``.
73
+
74
+ Only a result that still starts with ``./`` or ``../`` *after* ``~`` /
75
+ ``$VAR`` expansion is re-based — so hub ids (``org/name``), absolute paths,
76
+ ``~/``-expanded paths, and an env var that already yielded an absolute path
77
+ are all left untouched.
78
+
79
+ When no base is supplied (the runtime ``SBERT_MODEL`` read via
80
+ :func:`resolved_sbert_model_for_process_env`), relative resolution is
81
+ skipped: the value is returned ``expandvars`` / ``expanduser``-expanded but
82
+ not re-based, matching the prior best-effort behavior. The main resolution
83
+ path (:func:`resolve_operator_config`) supplies a base, so the absolute path
84
+ it stores is what downstream loaders receive.
63
85
  """
64
86
  needs_expand = value.startswith(("/", "./", "../", "~")) or "$" in value
65
87
  if not needs_expand:
@@ -70,9 +92,31 @@ def maybe_expand_embedding_model_path(value: str) -> str:
70
92
  f"java-codebase-rag: path-shaped model string contains unresolved variable: {expanded}",
71
93
  file=sys.stderr,
72
94
  )
95
+ if expanded.startswith(("./", "../")):
96
+ base = _embedding_model_base(
97
+ source=source, config_dir=config_dir, source_root=source_root
98
+ )
99
+ if base is not None:
100
+ return str((base / expanded).resolve())
73
101
  return expanded
74
102
 
75
103
 
104
+ def _embedding_model_base(
105
+ *,
106
+ source: SettingSource | None,
107
+ config_dir: Path | None,
108
+ source_root: Path | None,
109
+ ) -> Path | None:
110
+ """Base directory for a relative ``embedding.model``.
111
+
112
+ Mirrors :func:`_resolve_index_dir_path`: YAML values anchor on the config
113
+ file's directory; CLI / env values anchor on the resolved ``source_root``.
114
+ """
115
+ if source == "yaml":
116
+ return config_dir
117
+ return source_root
118
+
119
+
76
120
  def resolved_sbert_model_for_process_env(import_time_default: str) -> str:
77
121
  """``SBERT_MODEL`` from the process environment, with the same expansion as YAML/CLI resolution.
78
122
 
@@ -387,7 +431,12 @@ def resolve_operator_config(
387
431
  yaml_path=("embedding", "model"),
388
432
  default=_DEFAULT_EMBEDDING_MODEL,
389
433
  )
390
- model = maybe_expand_embedding_model_path(model)
434
+ model = maybe_expand_embedding_model_path(
435
+ model,
436
+ config_dir=config_dir,
437
+ source_root=root,
438
+ source=model_src,
439
+ )
391
440
  device, device_src = _pick_optional_device(
392
441
  cli_val=cli_embedding_device,
393
442
  env_key="SBERT_DEVICE",
@@ -759,6 +759,11 @@ def generate_yaml_config(
759
759
  else:
760
760
  config["embedding"].pop("model", None)
761
761
 
762
+ # Seed cross-service resolution safe-by-default: only evidence-backed cross-service
763
+ # edges survive (see _is_brownfield_sourced in build_ast_graph). setdefault preserves
764
+ # an explicit user choice (e.g. `auto`) on re-run update.
765
+ config.setdefault("cross_service_resolution", "brownfield_only")
766
+
762
767
  # Keys NOT written by installer (preserved if present):
763
768
  # - source_root (config.py resolves from walk-up discovery)
764
769
  # - index_dir (config.py defaults to <source_root>/.java-codebase-rag)
@@ -1250,9 +1255,14 @@ def run_update(
1250
1255
  print("Skipping index update.")
1251
1256
  return EXIT_PARTIAL if has_artifact_failures else EXIT_SUCCESS
1252
1257
 
1253
- # Resolve configuration
1258
+ # Resolve configuration. Pass source_root=None so the YAML ``source_root``
1259
+ # field is honored exactly like increment/init/reprocess — passing the
1260
+ # discovered config dir here routes resolve_operator_config into the
1261
+ # explicit-override branch that SKIPS the YAML field, which made `update`
1262
+ # point cocoindex at the config dir (no Java) against the real index and
1263
+ # mass-delete it. Discovery still runs against the CLI's cwd.
1254
1264
  try:
1255
- cfg = resolve_operator_config(source_root=project_root, cli_index_dir=None)
1265
+ cfg = resolve_operator_config(source_root=None, cli_index_dir=None)
1256
1266
  index_dir = cfg.index_dir
1257
1267
  except Exception as e:
1258
1268
  print(f"\nWarning: Failed to resolve configuration: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: java-codebase-rag
3
- Version: 0.6.1
3
+ Version: 0.6.2
4
4
  Summary: MCP server for semantic + structural search over Java codebases
5
5
  Author: HumanBean17
6
6
  License-Expression: MIT
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ java-codebase-rag = java_codebase_rag.cli:_console_script_main
3
+ java-codebase-rag-mcp = server:main
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "java-codebase-rag"
7
- version = "0.6.1"
7
+ version = "0.6.2"
8
8
  description = "MCP server for semantic + structural search over Java codebases"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -53,7 +53,7 @@ Repository = "https://github.com/HumanBean17/java-codebase-rag"
53
53
  Issues = "https://github.com/HumanBean17/java-codebase-rag/issues"
54
54
 
55
55
  [project.scripts]
56
- java-codebase-rag = "java_codebase_rag.cli:main"
56
+ java-codebase-rag = "java_codebase_rag.cli:_console_script_main"
57
57
  java-codebase-rag-mcp = "server:main"
58
58
 
59
59
  [tool.setuptools]
@@ -292,6 +292,155 @@ class TestSourceRootPrecedence:
292
292
  assert result.index_dir == tmp_path / ".java-codebase-rag"
293
293
 
294
294
 
295
+ class TestEmbeddingModelRelativePath:
296
+ """``embedding.model`` relative paths resolve against a base directory.
297
+
298
+ Mirrors ``index_dir`` (see ``TestIndexDirRelativeToConfigDir``): a relative
299
+ model path in YAML resolves against the config file's directory; a relative
300
+ model path from CLI / env resolves against the resolved ``source_root``.
301
+ This makes a committed ``.java-codebase-rag.yml`` portable — the model loads
302
+ from the same absolute path for the CLI indexer and the MCP reader, instead
303
+ of resolving against an unreliable process CWD.
304
+ """
305
+
306
+ def test_yaml_relative_model_resolves_against_config_dir(self, tmp_path, monkeypatch):
307
+ """``embedding.model: ./models/minilm`` (YAML) -> <config_dir>/models/minilm."""
308
+ monkeypatch.delenv("SBERT_MODEL", raising=False)
309
+ monkeypatch.delenv("JAVA_CODEBASE_RAG_SOURCE_ROOT", raising=False)
310
+
311
+ config_dir = tmp_path / "ctx"
312
+ config_dir.mkdir()
313
+ (config_dir / YAML_CONFIG_FILENAMES[0]).write_text(
314
+ "embedding:\n model: ./models/minilm\n"
315
+ )
316
+ monkeypatch.chdir(config_dir)
317
+
318
+ result = resolve_operator_config(source_root=None)
319
+ assert result.embedding_model == str((config_dir / "models/minilm").resolve())
320
+ assert result.embedding_model_source == "yaml"
321
+
322
+ def test_yaml_double_dot_model_resolves_against_config_dir(self, tmp_path, monkeypatch):
323
+ """``embedding.model: ../shared/minilm`` (YAML) -> <config_dir>/../shared/minilm."""
324
+ monkeypatch.delenv("SBERT_MODEL", raising=False)
325
+ monkeypatch.delenv("JAVA_CODEBASE_RAG_SOURCE_ROOT", raising=False)
326
+
327
+ config_dir = tmp_path / "ctx"
328
+ config_dir.mkdir()
329
+ (config_dir / YAML_CONFIG_FILENAMES[0]).write_text(
330
+ "embedding:\n model: ../shared/minilm\n"
331
+ )
332
+ monkeypatch.chdir(config_dir)
333
+
334
+ result = resolve_operator_config(source_root=None)
335
+ assert result.embedding_model == str((tmp_path / "shared/minilm").resolve())
336
+
337
+ def test_env_relative_model_resolves_against_source_root(self, tmp_path, monkeypatch):
338
+ """``SBERT_MODEL=./models/minilm`` (env) -> <source_root>/models/minilm.
339
+
340
+ Config sets ``source_root: ../`` so source_root (tmp_path) differs from
341
+ config_dir (tmp_path/ctx); the env-sourced model must anchor on
342
+ source_root, not config_dir — matching ``index_dir``'s env base.
343
+ """
344
+ monkeypatch.delenv("JAVA_CODEBASE_RAG_SOURCE_ROOT", raising=False)
345
+
346
+ config_dir = tmp_path / "ctx"
347
+ config_dir.mkdir()
348
+ (config_dir / YAML_CONFIG_FILENAMES[0]).write_text("source_root: ../\n")
349
+ monkeypatch.chdir(config_dir)
350
+ monkeypatch.setenv("SBERT_MODEL", "./models/minilm")
351
+
352
+ result = resolve_operator_config(source_root=None)
353
+ assert result.source_root == tmp_path
354
+ assert result.embedding_model == str((tmp_path / "models/minilm").resolve())
355
+ assert result.embedding_model_source == "env"
356
+
357
+ def test_cli_relative_model_resolves_against_source_root(self, tmp_path, monkeypatch):
358
+ """``--embedding-model ./models/minilm`` (CLI) -> <source_root>/models/minilm."""
359
+ monkeypatch.delenv("SBERT_MODEL", raising=False)
360
+
361
+ result = resolve_operator_config(
362
+ source_root=tmp_path, cli_embedding_model="./models/minilm"
363
+ )
364
+ assert result.embedding_model == str((tmp_path / "models/minilm").resolve())
365
+ assert result.embedding_model_source == "cli"
366
+
367
+
368
+ class TestMaybeExpandEmbeddingModelPath:
369
+ """Unit tests pinning the expansion/resolution helper's contract."""
370
+
371
+ def test_no_base_leaves_relative_unchanged(self):
372
+ """Without a base dir, relative paths are NOT resolved.
373
+
374
+ ``resolved_sbert_model_for_process_env`` (the MCP runtime read of
375
+ ``SBERT_MODEL``) calls this with no base; it must stay a no-op for
376
+ relative values so MCP behavior is unchanged there. The main resolution
377
+ path supplies a base, so the absolute path it produces is what reaches
378
+ the lazy loader in practice.
379
+ """
380
+ from java_codebase_rag.config import maybe_expand_embedding_model_path
381
+
382
+ assert maybe_expand_embedding_model_path("./models/minilm") == "./models/minilm"
383
+ assert maybe_expand_embedding_model_path("../shared/minilm") == "../shared/minilm"
384
+
385
+ def test_hub_id_passthrough(self):
386
+ from java_codebase_rag.config import maybe_expand_embedding_model_path
387
+
388
+ assert maybe_expand_embedding_model_path("org/name") == "org/name"
389
+ assert (
390
+ maybe_expand_embedding_model_path("sentence-transformers/all-MiniLM-L6-v2")
391
+ == "sentence-transformers/all-MiniLM-L6-v2"
392
+ )
393
+
394
+ def test_absolute_passthrough(self):
395
+ from java_codebase_rag.config import maybe_expand_embedding_model_path
396
+
397
+ assert maybe_expand_embedding_model_path("/opt/models/minilm") == "/opt/models/minilm"
398
+
399
+ def test_env_var_expansion_preserved(self, monkeypatch):
400
+ from java_codebase_rag.config import maybe_expand_embedding_model_path
401
+
402
+ monkeypatch.setenv("MODEL_DIR", "/opt/models")
403
+ assert maybe_expand_embedding_model_path("${MODEL_DIR}/minilm") == "/opt/models/minilm"
404
+ assert maybe_expand_embedding_model_path("$MODEL_DIR/minilm") == "/opt/models/minilm"
405
+
406
+ def test_tilde_expansion_preserved(self, monkeypatch):
407
+ from java_codebase_rag.config import maybe_expand_embedding_model_path
408
+
409
+ monkeypatch.setenv("HOME", "/home/user")
410
+ assert maybe_expand_embedding_model_path("~/models/minilm") == "/home/user/models/minilm"
411
+
412
+ def test_yaml_base_resolves_relative(self, tmp_path):
413
+ from java_codebase_rag.config import maybe_expand_embedding_model_path
414
+
415
+ out = maybe_expand_embedding_model_path(
416
+ "./models/minilm", config_dir=tmp_path, source="yaml"
417
+ )
418
+ assert out == str((tmp_path / "models/minilm").resolve())
419
+
420
+ def test_cli_env_base_is_source_root(self, tmp_path):
421
+ from java_codebase_rag.config import maybe_expand_embedding_model_path
422
+
423
+ for src in ("cli", "env"):
424
+ out = maybe_expand_embedding_model_path(
425
+ "./models/minilm", source_root=tmp_path, source=src
426
+ )
427
+ assert out == str((tmp_path / "models/minilm").resolve())
428
+
429
+ def test_absolute_after_env_var_not_rebased(self, tmp_path, monkeypatch):
430
+ """An env var that already yields an absolute path is left absolute.
431
+
432
+ Guards the ``${HUB_ID}`` edge: only ``./`` / ``../``-prefixed results are
433
+ re-based, so a var holding ``org/name`` or an absolute path is untouched.
434
+ """
435
+ from java_codebase_rag.config import maybe_expand_embedding_model_path
436
+
437
+ monkeypatch.setenv("MODEL_DIR", "/opt/models")
438
+ out = maybe_expand_embedding_model_path(
439
+ "${MODEL_DIR}/minilm", config_dir=tmp_path, source="yaml"
440
+ )
441
+ assert out == "/opt/models/minilm"
442
+
443
+
295
444
  def test_cocoindex_subprocess_env_defaults_uses_real_inflight_env_var() -> None:
296
445
  """The throttle must use CocoIndex's REAL env var name.
297
446
 
@@ -696,6 +696,45 @@ class TestHandleRerun:
696
696
  assert result is None
697
697
 
698
698
 
699
+ class TestGenerateYamlConfigCrossService:
700
+ """cross_service_resolution is seeded safe-by-default; an explicit choice is never overridden."""
701
+
702
+ def test_fresh_install_seeds_brownfield_only(self):
703
+ import yaml
704
+ from java_codebase_rag.installer import generate_yaml_config
705
+
706
+ out = generate_yaml_config(
707
+ Path("."), model="auto", microservice_roots=None, existing_yaml=None
708
+ )
709
+ assert yaml.safe_load(out)["cross_service_resolution"] == "brownfield_only"
710
+
711
+ def test_explicit_auto_is_preserved_on_rerun(self):
712
+ import yaml
713
+ from java_codebase_rag.installer import generate_yaml_config
714
+
715
+ out = generate_yaml_config(
716
+ Path("."),
717
+ model="auto",
718
+ microservice_roots=None,
719
+ existing_yaml={"cross_service_resolution": "auto"},
720
+ )
721
+ assert yaml.safe_load(out)["cross_service_resolution"] == "auto"
722
+
723
+ def test_absent_key_seeded_and_existing_keys_preserved_on_rerun(self):
724
+ import yaml
725
+ from java_codebase_rag.installer import generate_yaml_config
726
+
727
+ out = generate_yaml_config(
728
+ Path("."),
729
+ model="auto",
730
+ microservice_roots=None,
731
+ existing_yaml={"brownfield_overrides": {"svc-a": {}}},
732
+ )
733
+ config = yaml.safe_load(out)
734
+ assert config["cross_service_resolution"] == "brownfield_only"
735
+ assert config["brownfield_overrides"] == {"svc-a": {}}
736
+
737
+
699
738
  class TestInstallIntegration:
700
739
  """Integration tests for install command."""
701
740
 
@@ -1161,6 +1200,102 @@ class TestRunUpdate:
1161
1200
  # Should succeed (no hosts is fatal, but no index is just a warning)
1162
1201
  assert result == 0
1163
1202
 
1203
+ def test_update_honors_yaml_source_root_for_nested_config_dir(
1204
+ self, tmp_path, monkeypatch
1205
+ ):
1206
+ """run_update must resolve source_root exactly like increment.
1207
+
1208
+ Regression for the "update mass-deletes the index" bug. run_update passed
1209
+ the discovered config dir as an explicit source_root, routing
1210
+ resolve_operator_config into the branch that SKIPS the YAML source_root
1211
+ field. With a config living in my-project-context/ next to
1212
+ ``source_root: ../``, update then indexed my-project-context/ (no Java)
1213
+ against the real index one level up — so cocoindex saw every indexed
1214
+ file as removed and deleted it (the "_deletions keeps growing" symptom
1215
+ after the run was ctrl+C'd mid-delete).
1216
+
1217
+ After the fix, the env handed to cocoindex carries the YAML-resolved
1218
+ source_root (one level above the config dir), NOT the config dir itself.
1219
+ """
1220
+ import json
1221
+ import shutil
1222
+ from subprocess import CompletedProcess
1223
+ from java_codebase_rag.installer import run_update
1224
+
1225
+ # Layout mirroring the reported bug:
1226
+ # tmp_path/
1227
+ # my-project-context/ <- cwd; config lives here
1228
+ # .java-codebase-rag.yml <- source_root: ../ ; index_dir: ../.java-codebase-rag
1229
+ # .java-codebase-rag/ <- real index, one level above the config
1230
+ # code_graph.lbug <- marker so "index exists"
1231
+ config_dir = tmp_path / "my-project-context"
1232
+ config_dir.mkdir()
1233
+ (config_dir / ".java-codebase-rag.yml").write_text(
1234
+ "source_root: ../\nindex_dir: ../.java-codebase-rag\n",
1235
+ encoding="utf-8",
1236
+ )
1237
+ index_dir = tmp_path / ".java-codebase-rag"
1238
+ index_dir.mkdir()
1239
+ (index_dir / "code_graph.lbug").write_text("", encoding="utf-8")
1240
+
1241
+ # A configured host so run_update reaches the index phase.
1242
+ (config_dir / ".mcp.json").write_text(
1243
+ json.dumps(
1244
+ {
1245
+ "mcpServers": {
1246
+ "java-codebase-rag": {
1247
+ "command": "/usr/local/bin/java-codebase-rag-mcp",
1248
+ "type": "stdio",
1249
+ }
1250
+ }
1251
+ }
1252
+ )
1253
+ )
1254
+ monkeypatch.setattr(shutil, "which", lambda x: "/usr/local/bin/java-codebase-rag-mcp")
1255
+ monkeypatch.setattr(
1256
+ "java_codebase_rag.installer._read_package_artifact",
1257
+ lambda path: "PACKAGE CONTENT",
1258
+ )
1259
+
1260
+ # The CLI invokes update from the config dir, so the process cwd is the
1261
+ # config dir — resolve_operator_config(source_root=None) discovers the
1262
+ # config via Path.cwd(), exactly as increment/init/reprocess do.
1263
+ # delenv: resolve_operator_config honors JAVA_CODEBASE_RAG_SOURCE_ROOT /
1264
+ # _INDEX_DIR from os.environ first, and apply_to_os_environ() writes them
1265
+ # unscoped — a sibling test can leak a value that overrides discovery.
1266
+ monkeypatch.delenv("JAVA_CODEBASE_RAG_SOURCE_ROOT", raising=False)
1267
+ monkeypatch.delenv("JAVA_CODEBASE_RAG_INDEX_DIR", raising=False)
1268
+ monkeypatch.chdir(config_dir)
1269
+
1270
+ # Capture the subprocess env run_update hands cocoindex: it carries the
1271
+ # resolved JAVA_CODEBASE_RAG_SOURCE_ROOT / _INDEX_DIR.
1272
+ captured: dict = {}
1273
+
1274
+ def capture_coco(env, *, full_reprocess, quiet, verbose=True, lance_project_root=None):
1275
+ captured["env"] = env
1276
+ return CompletedProcess(["cocoindex"], 0)
1277
+
1278
+ def noop_graph(**kwargs):
1279
+ return CompletedProcess(["build_ast_graph", "--incremental"], 0)
1280
+
1281
+ monkeypatch.setattr("java_codebase_rag.pipeline.run_cocoindex_update", capture_coco)
1282
+ monkeypatch.setattr("java_codebase_rag.pipeline.run_incremental_graph", noop_graph)
1283
+
1284
+ result = run_update(force=False, dry_run=False, cwd=config_dir)
1285
+
1286
+ # The index phase must have run (env captured), not been skipped.
1287
+ assert "env" in captured, "run_update did not reach the cocoindex update step"
1288
+ env = captured["env"]
1289
+ # source_root: ../ must resolve ONE level above the config dir (the real
1290
+ # Java tree), NOT the config dir itself.
1291
+ assert env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] == str(tmp_path.resolve())
1292
+ assert env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] != str(config_dir.resolve())
1293
+ # index_dir lands on the real index one level above the config dir.
1294
+ assert env["JAVA_CODEBASE_RAG_INDEX_DIR"] == str(index_dir.resolve())
1295
+ # result is independent of the source_root assertion (artifact refresh
1296
+ # may report partial failure unrelated to this regression); tolerate it.
1297
+ assert result in (0, 1)
1298
+
1164
1299
  def test_install_then_update_cycle(self, tmp_path, monkeypatch):
1165
1300
  """install then update: artifacts refreshed, no errors"""
1166
1301
  from java_codebase_rag.installer import run_install, run_update
@@ -1174,3 +1174,59 @@ def test_mcp_server_yaml_config_precedence_env_over_yaml(
1174
1174
 
1175
1175
  server_mod.resolve_operator_config.assert_called_once()
1176
1176
  assert server_mod.resolve_operator_config.call_args.kwargs["source_root"] == server_mod._project_root()
1177
+
1178
+
1179
+ def test_console_script_main_propagates_rc_via_os_exit_after_flush(
1180
+ monkeypatch: pytest.MonkeyPatch,
1181
+ ) -> None:
1182
+ """The installed CLI entry must flush streams and os._exit(rc) rather than
1183
+ return into normal interpreter finalization.
1184
+
1185
+ A pyarrow/lance worker thread can outlive CPython finalization in a one-shot
1186
+ CLI subprocess and trip ``PyGILState_Release`` (SIGABRT, exit -6). Routing the
1187
+ real entry through ``_console_script_main`` skips that racy teardown; ``main()``
1188
+ itself stays return-based so in-process test callers keep working.
1189
+ """
1190
+ import os as _os
1191
+
1192
+ from java_codebase_rag import cli as cli
1193
+
1194
+ class _StubStream:
1195
+ def __init__(self) -> None:
1196
+ self.flushed = False
1197
+
1198
+ def flush(self) -> None:
1199
+ self.flushed = True
1200
+
1201
+ for fake_rc in (0, 2):
1202
+ out = _StubStream()
1203
+ err = _StubStream()
1204
+ snapshot: dict[str, object] = {}
1205
+
1206
+ monkeypatch.setattr(cli, "main", lambda rc=fake_rc: rc)
1207
+ monkeypatch.setattr(sys, "stdout", out)
1208
+ monkeypatch.setattr(sys, "stderr", err)
1209
+
1210
+ def fake_exit(code: int) -> None:
1211
+ snapshot["exit_code"] = code
1212
+ snapshot["out_flushed_before_exit"] = out.flushed
1213
+ snapshot["err_flushed_before_exit"] = err.flushed
1214
+
1215
+ monkeypatch.setattr(_os, "_exit", fake_exit)
1216
+
1217
+ result = cli._console_script_main()
1218
+
1219
+ assert snapshot["exit_code"] == fake_rc, fake_rc
1220
+ assert snapshot["out_flushed_before_exit"] is True, fake_rc
1221
+ assert snapshot["err_flushed_before_exit"] is True, fake_rc
1222
+ assert result is None, fake_rc
1223
+
1224
+
1225
+ def test_console_script_entry_point_routes_through_wrapper() -> None:
1226
+ """``[project.scripts]`` must point ``java-codebase-rag`` at
1227
+ ``_console_script_main`` (not ``main``) so the deterministic-exit path is the
1228
+ one the installed CLI actually uses."""
1229
+ pyproject = (Path(__file__).resolve().parent.parent / "pyproject.toml").read_text(encoding="utf-8")
1230
+ assert 'java-codebase-rag = "java_codebase_rag.cli:_console_script_main"' in pyproject
1231
+ assert 'java-codebase-rag = "java-codebase-rag:main"' not in pyproject
1232
+ assert 'java-codebase-rag = "java_codebase_rag.cli:main"' not in pyproject
@@ -1,3 +0,0 @@
1
- [console_scripts]
2
- java-codebase-rag = java_codebase_rag.cli:main
3
- java-codebase-rag-mcp = server:main