java-codebase-rag 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1339 @@
1
+ """Interactive installer module for java-codebase-rag.
2
+
3
+ This module provides the `install` subcommand that walks users through:
4
+ 1. Java source detection
5
+ 2. Embedding model selection
6
+ 3. Agent host selection
7
+ 4. Scope selection (project/user)
8
+ 5. Artifact deployment (MCP config, skill, agent)
9
+ 6. YAML config generation and indexing
10
+ """
11
+
12
+ import json
13
+ import os
14
+ import shutil
15
+ import sys
16
+ import tempfile
17
+ from dataclasses import dataclass
18
+ from pathlib import Path
19
+ from typing import Literal, NamedTuple
20
+
21
+ import yaml
22
+
23
+ Scope = Literal["project", "user"]
24
+
25
+ # MCP server name constant
26
+ _MCP_SERVER_NAME = "java-codebase-rag"
27
+
28
+ # Exit code constants
29
+ EXIT_SUCCESS = 0
30
+ EXIT_PARTIAL = 1
31
+ EXIT_FATAL = 2
32
+
33
+
34
+ class ArtifactResult(NamedTuple):
35
+ """Result of deploying a single artifact."""
36
+
37
+ path: Path
38
+ success: bool
39
+ error: str | None
40
+
41
+
42
+ @dataclass(frozen=True)
43
+ class HostConfig:
44
+ """Configuration for an agent host."""
45
+
46
+ name: str # "claude-code", "qwen-code", "gigacode"
47
+ dir_name: str # ".claude", ".qwen", ".gigacode"
48
+ mcp_project: str # ".mcp.json", ".qwen/settings.json", ".gigacode/settings.json"
49
+ mcp_user: str # ".claude.json", ".qwen/settings.json", ".gigacode/settings.json"
50
+
51
+ def scope_path(self, scope: Scope, cwd: Path) -> Path:
52
+ """Return the host directory for the given scope."""
53
+ if scope == "project":
54
+ return cwd / self.dir_name
55
+ else: # user
56
+ return Path.home() / self.dir_name
57
+
58
+ def mcp_config_path(self, scope: Scope, cwd: Path) -> Path:
59
+ """Return the full path to the MCP config file."""
60
+ if scope == "project":
61
+ return cwd / self.mcp_project
62
+ else: # user
63
+ return Path.home() / self.mcp_user
64
+
65
+ def skills_dir(self, scope: Scope, cwd: Path) -> Path:
66
+ """Return the skills directory path."""
67
+ return self.scope_path(scope, cwd) / "skills"
68
+
69
+ def agents_dir(self, scope: Scope, cwd: Path) -> Path:
70
+ """Return the agents directory path."""
71
+ return self.scope_path(scope, cwd) / "agents"
72
+
73
+
74
+ HOSTS: dict[str, HostConfig] = {
75
+ "claude-code": HostConfig(
76
+ name="claude-code",
77
+ dir_name=".claude",
78
+ mcp_project=".mcp.json",
79
+ mcp_user=".claude.json",
80
+ ),
81
+ "qwen-code": HostConfig(
82
+ name="qwen-code",
83
+ dir_name=".qwen",
84
+ mcp_project=".qwen/settings.json",
85
+ mcp_user=".qwen/settings.json",
86
+ ),
87
+ "gigacode": HostConfig(
88
+ name="gigacode",
89
+ dir_name=".gigacode",
90
+ mcp_project=".gigacode/settings.json",
91
+ mcp_user=".gigacode/settings.json",
92
+ ),
93
+ }
94
+
95
+
96
+ def prompt(
97
+ prompt_type: str,
98
+ message: str,
99
+ *,
100
+ choices=None,
101
+ default=None,
102
+ ) -> list[str] | str | bool:
103
+ """Interactive prompt that dispatches to questionary on TTY, returns default otherwise.
104
+
105
+ Args:
106
+ prompt_type: Type of prompt ("checkbox", "select", "text", "confirm")
107
+ message: Prompt message to display
108
+ choices: List of choices (for checkbox/select)
109
+ default: Default value to return when not interactive
110
+
111
+ Returns:
112
+ - checkbox: list[str] of selected values
113
+ - select: str of selected value
114
+ - text: str of entered text
115
+ - confirm: bool (True/False)
116
+ """
117
+ if not sys.stdin.isatty():
118
+ return default
119
+
120
+ # Lazy import questionary only when needed (TTY)
121
+ import questionary
122
+ from prompt_toolkit.styles import Style
123
+
124
+ # Strip default ANSI colors — rely on ●/○ indicators only, no fg/bg highlights
125
+ # noinherit prevents prompt_toolkit from merging in questionary's default fg colors
126
+ no_color_style = Style(
127
+ [
128
+ ("highlighted", "noinherit"),
129
+ ("selected", "noinherit"),
130
+ ("pointer", "noinherit bold"),
131
+ ]
132
+ )
133
+
134
+ try:
135
+ if prompt_type == "checkbox":
136
+ return questionary.checkbox(message, choices=choices, style=no_color_style).ask()
137
+ elif prompt_type == "select":
138
+ return questionary.select(message, choices=choices, style=no_color_style).ask()
139
+ elif prompt_type == "text":
140
+ return questionary.text(message, default=default, style=no_color_style).ask()
141
+ elif prompt_type == "confirm":
142
+ return questionary.confirm(message, style=no_color_style).ask()
143
+ else:
144
+ raise ValueError(f"Unknown prompt_type: {prompt_type}")
145
+ except KeyboardInterrupt:
146
+ # User Ctrl+C is a clean abort, not a traceback
147
+ raise SystemExit(2)
148
+
149
+
150
+ def detect_java_directories(source_root: Path) -> list[Path]:
151
+ """Return Maven/Gradle module roots. If root has build file, returns [Path('.')].
152
+
153
+ Checks if source_root itself contains a build file (pom.xml, build.gradle, build.gradle.kts).
154
+ If YES: returns [Path(".")] — the entire project is indexed as one unit.
155
+ If NO: scans immediate children for directories containing build files.
156
+
157
+ Args:
158
+ source_root: Root directory to scan for Java projects
159
+
160
+ Returns:
161
+ List of detected module roots (relative to source_root)
162
+
163
+ Raises:
164
+ SystemExit(2): If no build files found in source_root or immediate children
165
+ """
166
+ build_files = ["pom.xml", "build.gradle", "build.gradle.kts"]
167
+
168
+ # Check if source_root itself has a build file
169
+ for bf in build_files:
170
+ if (source_root / bf).is_file():
171
+ return [Path(".")]
172
+
173
+ # Scan immediate children for build files
174
+ detected = []
175
+ for child in source_root.iterdir():
176
+ if not child.is_dir():
177
+ continue
178
+ # Check if this child directory has a build file
179
+ for bf in build_files:
180
+ if (child / bf).is_file():
181
+ detected.append(Path(child.name))
182
+ break
183
+
184
+ if not detected:
185
+ print(f"Error: No Java build files (pom.xml, build.gradle, build.gradle.kts) found in {source_root} or its immediate children.")
186
+ raise SystemExit(2)
187
+
188
+ return detected
189
+
190
+
191
+ def confirm_source_root(cwd: Path, *, non_interactive: bool) -> Path:
192
+ """Show cwd as source root, let user accept or change it. Returns resolved source_root.
193
+
194
+ Args:
195
+ cwd: Current working directory (default source root)
196
+ non_interactive: If True, return cwd without prompting
197
+
198
+ Returns:
199
+ Resolved source root path
200
+ """
201
+ if non_interactive:
202
+ return cwd
203
+
204
+ message = f"Source root [{cwd}]:"
205
+ user_input = prompt("text", message, default=str(cwd))
206
+
207
+ if not user_input or user_input == str(cwd):
208
+ return cwd
209
+
210
+ # Expand ~ and $HOME
211
+ expanded = os.path.expandvars(user_input.strip())
212
+ expanded = os.path.expanduser(expanded)
213
+ result = Path(expanded)
214
+
215
+ # Validate path exists and is a directory
216
+ while not result.is_dir():
217
+ print(f"Error: Path {result} does not exist or is not a directory.")
218
+ user_input = prompt("text", "Source root:", default=str(cwd))
219
+ if not user_input or user_input == str(cwd):
220
+ return cwd
221
+ expanded = os.path.expandvars(user_input.strip())
222
+ expanded = os.path.expanduser(expanded)
223
+ result = Path(expanded)
224
+
225
+ return result.resolve()
226
+
227
+
228
+ def resolve_model(model_input: str | None, *, non_interactive: bool) -> str:
229
+ """Resolve embedding model path or 'auto'.
230
+
231
+ Args:
232
+ model_input: User-provided model path or None
233
+ non_interactive: If True, return "auto" without prompting
234
+
235
+ Returns:
236
+ Resolved model string ("auto" or a valid path)
237
+ """
238
+ if model_input:
239
+ # Expand ~ and $HOME
240
+ expanded = os.path.expandvars(model_input.strip())
241
+ expanded = os.path.expanduser(expanded)
242
+ model_path = Path(expanded)
243
+
244
+ if model_path.exists():
245
+ return str(model_path)
246
+
247
+ # Path not found
248
+ if non_interactive:
249
+ print(f"Warning: Model path {model_input} not found, falling back to 'auto'.")
250
+ return "auto"
251
+
252
+ confirmed = prompt(
253
+ "confirm",
254
+ f"Model path {model_input} not found. Use 'auto' instead?",
255
+ )
256
+ if confirmed:
257
+ return "auto"
258
+ else:
259
+ # Re-prompt for model path
260
+ new_input = prompt("text", "Enter model path (or 'auto'):", default="auto")
261
+ if new_input == "auto" or not new_input:
262
+ return "auto"
263
+ return resolve_model(new_input, non_interactive=non_interactive)
264
+
265
+ if non_interactive:
266
+ return "auto"
267
+
268
+ # Interactive with no CLI input: prompt for model
269
+ user_input = prompt("text", "Embedding model path (or 'auto'):", default="auto")
270
+ if user_input == "auto" or not user_input:
271
+ return "auto"
272
+ return resolve_model(user_input, non_interactive=False)
273
+
274
+
275
+ def select_hosts(*, non_interactive: bool, cli_agents: list[str] | None) -> list[HostConfig]:
276
+ """Select agent hosts from checkbox or CLI flags. Returns list of selected HostConfig.
277
+
278
+ Args:
279
+ non_interactive: If True, use CLI flags only
280
+ cli_agents: List of agent names from CLI flags
281
+
282
+ Returns:
283
+ List of selected HostConfig objects
284
+
285
+ Raises:
286
+ SystemExit(2): If no agents selected or invalid agent name
287
+ """
288
+ if cli_agents:
289
+ # Validate agent names
290
+ for agent in cli_agents:
291
+ if agent not in HOSTS:
292
+ print(f"Error: Unknown agent '{agent}'. Valid agents: {', '.join(HOSTS.keys())}")
293
+ raise SystemExit(2)
294
+ return [HOSTS[agent] for agent in cli_agents]
295
+
296
+ if non_interactive:
297
+ print("Error: --agent flag is required in non-interactive mode.")
298
+ print(f"Valid agents: {', '.join(HOSTS.keys())}")
299
+ raise SystemExit(2)
300
+
301
+ # Interactive: show checkbox with claude-code pre-selected (most common)
302
+ # Changed from all pre-selected to avoid confusion
303
+ host_names = list(HOSTS.keys())
304
+ choices = [
305
+ {"name": name, "value": name, "checked": (name == "claude-code")}
306
+ for name in host_names
307
+ ]
308
+
309
+ print("Note: You can select multiple agent hosts with Space. Navigate with arrow keys.")
310
+ selected = prompt("checkbox", "Select agent hosts to configure:", choices=choices)
311
+
312
+ if not selected:
313
+ # User unselected all - prompt to re-select or abort
314
+ retry = prompt(
315
+ "confirm",
316
+ "At least one agent host is required. Re-select hosts?",
317
+ )
318
+ if retry:
319
+ return select_hosts(non_interactive=False, cli_agents=None)
320
+ else:
321
+ raise SystemExit(2)
322
+
323
+ # Show confirmation of what will be deployed
324
+ print(f"Will deploy to: {', '.join(selected)}")
325
+ return [HOSTS[name] for name in selected]
326
+
327
+
328
+ def select_scope(*, non_interactive: bool, cli_scope: str | None) -> Scope:
329
+ """Select 'project' or 'user' scope.
330
+
331
+ Args:
332
+ non_interactive: If True, return "project" without prompting
333
+ cli_scope: Scope from CLI flag
334
+
335
+ Returns:
336
+ Selected scope ("project" or "user")
337
+ """
338
+ if cli_scope:
339
+ if cli_scope not in ("project", "user"):
340
+ print(f"Error: Invalid scope '{cli_scope}'. Must be 'project' or 'user'.")
341
+ raise SystemExit(2)
342
+ return cli_scope # type: ignore
343
+
344
+ if non_interactive:
345
+ return "project"
346
+
347
+ # Interactive: prompt for scope
348
+ print("Note: 'project' scope stores configs in the project directory.")
349
+ print(" 'user' scope stores configs in your home directory.")
350
+ selected = prompt(
351
+ "select",
352
+ "Select installation scope:",
353
+ choices=["project", "user"],
354
+ )
355
+
356
+ if not selected:
357
+ return "project"
358
+
359
+ print(f"Selected scope: {selected}")
360
+ return selected # type: ignore
361
+
362
+
363
+ def resolve_mcp_command(*, non_interactive: bool) -> str:
364
+ """Resolve the absolute path to java-codebase-rag-mcp.
365
+
366
+ Returns the path string for use as MCP 'command' value.
367
+
368
+ Args:
369
+ non_interactive: If True, exit with code 2 when not found
370
+
371
+ Returns:
372
+ Absolute path to java-codebase-rag-mcp executable
373
+
374
+ Raises:
375
+ SystemExit(2): If not found and non-interactive, or user aborts
376
+ """
377
+ mcp_path = shutil.which("java-codebase-rag-mcp")
378
+
379
+ if mcp_path:
380
+ return mcp_path
381
+
382
+ # Not found on PATH
383
+ if non_interactive:
384
+ print("Error: `java-codebase-rag-mcp` not found on PATH.")
385
+ print("Ensure `java-codebase-rag` is installed, then re-run with `--non-interactive --agent <host>`.")
386
+ raise SystemExit(2)
387
+
388
+ # Interactive: prompt user for path
389
+ print("Warning: `java-codebase-rag-mcp` not found on PATH.")
390
+ user_path = prompt(
391
+ "text",
392
+ "Enter the full path to java-codebase-rag-mcp (or 'abort'):",
393
+ default="abort",
394
+ )
395
+
396
+ if user_path == "abort" or not user_path:
397
+ raise SystemExit(2)
398
+
399
+ # Expand and validate the provided path
400
+ expanded = os.path.expandvars(user_path.strip())
401
+ expanded = os.path.expanduser(expanded)
402
+ path_obj = Path(expanded)
403
+
404
+ while not path_obj.is_file():
405
+ print(f"Error: Path {path_obj} does not exist or is not a file.")
406
+ user_path = prompt(
407
+ "text",
408
+ "Enter the full path to java-codebase-rag-mcp (or 'abort'):",
409
+ default="abort",
410
+ )
411
+ if user_path == "abort" or not user_path:
412
+ raise SystemExit(2)
413
+ expanded = os.path.expandvars(user_path.strip())
414
+ expanded = os.path.expanduser(expanded)
415
+ path_obj = Path(expanded)
416
+
417
+ # Check if executable
418
+ if not os.access(path_obj, os.X_OK):
419
+ print(f"Warning: {path_obj} is not executable. This may cause issues.")
420
+
421
+ return str(path_obj.resolve())
422
+
423
+
424
+ def merge_mcp_config(config_path: Path, host: HostConfig, *, mcp_command: str) -> bool:
425
+ """Read, merge, write MCP config. Returns True if entry was added/updated.
426
+
427
+ Args:
428
+ config_path: Path to MCP config file
429
+ host: HostConfig for the agent host
430
+ mcp_command: Resolved absolute path to java-codebase-rag-mcp
431
+
432
+ Returns:
433
+ True if entry was added/updated, False if no change needed
434
+
435
+ Raises:
436
+ ValueError: If existing config file cannot be parsed as JSON
437
+ """
438
+ # Read existing config (or start with empty dict)
439
+ if config_path.is_file():
440
+ try:
441
+ with open(config_path, "r") as f:
442
+ config = json.load(f)
443
+ except json.JSONDecodeError as e:
444
+ raise ValueError(f"Failed to parse {config_path}: {e}") from e
445
+ else:
446
+ config = {}
447
+
448
+ # Ensure mcpServers key exists
449
+ if "mcpServers" not in config:
450
+ config["mcpServers"] = {}
451
+
452
+ # Prepare new entry
453
+ new_entry = {"command": mcp_command, "type": "stdio"}
454
+ existing_entry = config["mcpServers"].get(_MCP_SERVER_NAME)
455
+
456
+ # Check if entry already exists with same config
457
+ if existing_entry == new_entry:
458
+ return False
459
+
460
+ # Merge/update entry
461
+ config["mcpServers"][_MCP_SERVER_NAME] = new_entry
462
+
463
+ # Write atomically (write to tmp, then rename)
464
+ tmp_name = None
465
+ try:
466
+ with tempfile.NamedTemporaryFile(
467
+ mode="w",
468
+ dir=config_path.parent,
469
+ prefix=f".{config_path.name}.",
470
+ delete=False,
471
+ ) as tmp:
472
+ json.dump(config, tmp, indent=2)
473
+ tmp.flush()
474
+ os.fsync(tmp.fileno())
475
+ tmp_name = tmp.name
476
+
477
+ # Atomic rename
478
+ os.rename(tmp_name, config_path)
479
+ return True
480
+ except (IOError, OSError) as e:
481
+ if tmp_name:
482
+ try:
483
+ os.unlink(tmp_name)
484
+ except OSError:
485
+ pass
486
+ raise RuntimeError(f"Failed to write {config_path}: {e}") from e
487
+
488
+
489
+ def _read_package_artifact(relative_path: str) -> str:
490
+ """Read a shipped artifact from package data. Returns UTF-8 text."""
491
+ from importlib.resources import files
492
+
493
+ package = files("java_codebase_rag.install_data")
494
+ return package.joinpath(relative_path).read_text(encoding="utf-8")
495
+
496
+
497
+ def deploy_artifacts(
498
+ hosts: list[HostConfig],
499
+ scope: Scope,
500
+ cwd: Path,
501
+ *,
502
+ non_interactive: bool,
503
+ mcp_command: str,
504
+ ) -> list[ArtifactResult]:
505
+ """Deploy artifacts (MCP config, skill, agent) to selected hosts.
506
+
507
+ Args:
508
+ hosts: List of HostConfig objects to deploy to
509
+ scope: Installation scope ("project" or "user")
510
+ cwd: Current working directory
511
+ non_interactive: If True, skip overwrite prompts
512
+ mcp_command: Resolved absolute path to java-codebase-rag-mcp
513
+
514
+ Returns:
515
+ List of ArtifactResult objects for each deployment
516
+ """
517
+ results = []
518
+
519
+ for host in hosts:
520
+ # Deploy MCP config
521
+ mcp_config_path = host.mcp_config_path(scope, cwd)
522
+ mcp_result = _deploy_mcp_config(
523
+ mcp_config_path,
524
+ host,
525
+ non_interactive=non_interactive,
526
+ mcp_command=mcp_command,
527
+ )
528
+ results.append(mcp_result)
529
+
530
+ # Deploy skill
531
+ skills_dir = host.skills_dir(scope, cwd)
532
+ skill_dest = skills_dir / "explore-codebase" / "SKILL.md"
533
+ skill_result = _deploy_file(
534
+ skill_dest,
535
+ "skills/explore-codebase/SKILL.md",
536
+ artifact_type="skill",
537
+ non_interactive=non_interactive,
538
+ )
539
+ results.append(skill_result)
540
+
541
+ # Deploy agent
542
+ agents_dir = host.agents_dir(scope, cwd)
543
+ agent_dest = agents_dir / "explorer-rag-enhanced.md"
544
+ agent_result = _deploy_file(
545
+ agent_dest,
546
+ "agents/explorer-rag-enhanced.md",
547
+ artifact_type="agent",
548
+ non_interactive=non_interactive,
549
+ )
550
+ results.append(agent_result)
551
+
552
+ return results
553
+
554
+
555
+ def _deploy_mcp_config(
556
+ config_path: Path,
557
+ host: HostConfig,
558
+ *,
559
+ non_interactive: bool,
560
+ mcp_command: str,
561
+ ) -> ArtifactResult:
562
+ """Deploy MCP config file."""
563
+ try:
564
+ # Ensure parent directory exists
565
+ config_path.parent.mkdir(parents=True, exist_ok=True)
566
+
567
+ # Check writability
568
+ if not _is_writable(config_path.parent):
569
+ return ArtifactResult(
570
+ path=config_path,
571
+ success=False,
572
+ error=f"Directory not writable: {config_path.parent}",
573
+ )
574
+
575
+ # Merge config (returns True if updated, False if already current)
576
+ merge_mcp_config(config_path, host, mcp_command=mcp_command)
577
+ return ArtifactResult(path=config_path, success=True, error=None)
578
+ except ValueError as e:
579
+ return ArtifactResult(path=config_path, success=False, error=str(e))
580
+ except Exception as e:
581
+ return ArtifactResult(path=config_path, success=False, error=str(e))
582
+
583
+
584
+ def _deploy_file(
585
+ dest_path: Path,
586
+ package_relative_path: str,
587
+ *,
588
+ artifact_type: str,
589
+ non_interactive: bool,
590
+ ) -> ArtifactResult:
591
+ """Deploy a single file from package data to destination."""
592
+ try:
593
+ # Ensure parent directory exists
594
+ dest_path.parent.mkdir(parents=True, exist_ok=True)
595
+
596
+ # Check writability
597
+ if not _is_writable(dest_path.parent):
598
+ return ArtifactResult(
599
+ path=dest_path,
600
+ success=False,
601
+ error=f"Directory not writable: {dest_path.parent}",
602
+ )
603
+
604
+ # Read package data
605
+ content = _read_package_artifact(package_relative_path)
606
+
607
+ # Check if file exists
608
+ if dest_path.is_file():
609
+ # Check if content is identical
610
+ existing_content = dest_path.read_text(encoding="utf-8")
611
+ if content == existing_content:
612
+ return ArtifactResult(path=dest_path, success=True, error=None)
613
+
614
+ # File exists with different content - prompt for overwrite
615
+ if non_interactive:
616
+ # Skip in non-interactive mode
617
+ return ArtifactResult(
618
+ path=dest_path,
619
+ success=False,
620
+ error="File exists (skipped in non-interactive mode)",
621
+ )
622
+
623
+ # Interactive: prompt for overwrite
624
+ choice = prompt(
625
+ "select",
626
+ f"{artifact_type.capitalize()} file exists at {dest_path}",
627
+ choices=[
628
+ {"name": "Overwrite", "value": "overwrite"},
629
+ {"name": "Skip", "value": "skip"},
630
+ {"name": "Abort", "value": "abort"},
631
+ ],
632
+ )
633
+
634
+ if choice == "skip":
635
+ return ArtifactResult(
636
+ path=dest_path,
637
+ success=False,
638
+ error="Skipped by user",
639
+ )
640
+ elif choice == "abort":
641
+ raise SystemExit(2)
642
+
643
+ # Write file
644
+ dest_path.write_text(content, encoding="utf-8")
645
+ return ArtifactResult(path=dest_path, success=True, error=None)
646
+ except SystemExit:
647
+ raise
648
+ except Exception as e:
649
+ return ArtifactResult(path=dest_path, success=False, error=str(e))
650
+
651
+
652
+ def _is_writable(path: Path) -> bool:
653
+ """Check if a directory is writable."""
654
+ try:
655
+ test_file = path / ".write_test_java_codebase_rag"
656
+ test_file.touch()
657
+ test_file.unlink()
658
+ return True
659
+ except (OSError, IOError):
660
+ return False
661
+
662
+
663
+ def generate_yaml_config(
664
+ source_root: Path,
665
+ model: str,
666
+ microservice_roots: list[str] | None,
667
+ existing_yaml: dict | None,
668
+ ) -> str:
669
+ """Generate .java-codebase-rag.yml content from installer answers.
670
+
671
+ Args:
672
+ source_root: Source root directory
673
+ model: Embedding model path or "auto"
674
+ microservice_roots: List of microservice roots (None means all)
675
+ existing_yaml: Existing YAML data for re-run update mode
676
+
677
+ Returns:
678
+ YAML configuration string
679
+ """
680
+ # Start with existing YAML or empty dict
681
+ config = existing_yaml.copy() if existing_yaml else {}
682
+
683
+ # Write microservice_roots only if subset selected
684
+ if microservice_roots:
685
+ config["microservice_roots"] = microservice_roots
686
+ elif "microservice_roots" in config:
687
+ # Remove if not needed (was set before but user wants all)
688
+ del config["microservice_roots"]
689
+
690
+ # Write embedding.model only if not auto
691
+ if model != "auto":
692
+ if "embedding" not in config:
693
+ config["embedding"] = {}
694
+ config["embedding"]["model"] = model
695
+ elif "embedding" in config and "model" in config["embedding"]:
696
+ # Remove model if using auto
697
+ if config["embedding"] == {"model": model}:
698
+ del config["embedding"]
699
+ else:
700
+ config["embedding"].pop("model", None)
701
+
702
+ # Keys NOT written by installer (preserved if present):
703
+ # - source_root (config.py resolves from walk-up discovery)
704
+ # - index_dir (config.py defaults to <source_root>/.java-codebase-rag)
705
+ # - embedding.device (user can add manually)
706
+ # - hints.enabled (defaults to True in config.py)
707
+ # - brownfield_overrides (user-managed)
708
+
709
+ return yaml.dump(config, default_flow_style=False, sort_keys=False)
710
+
711
+
712
+ def update_gitignore(cwd: Path) -> None:
713
+ """Add .java-codebase-rag/ to .gitignore if not already present.
714
+
715
+ Args:
716
+ cwd: Current working directory
717
+ """
718
+ gitignore_path = cwd / ".gitignore"
719
+
720
+ # Check if git repo
721
+ if not (cwd / ".git").is_dir():
722
+ return
723
+
724
+ # Read existing .gitignore or create new
725
+ if gitignore_path.is_file():
726
+ lines = gitignore_path.read_text(encoding="utf-8").splitlines()
727
+ else:
728
+ lines = []
729
+
730
+ # Check for pattern (with or without trailing slash)
731
+ pattern_to_check = ".java-codebase-rag"
732
+ already_present = any(
733
+ line.strip().rstrip("/") == pattern_to_check or line.strip() == f"{pattern_to_check}/"
734
+ for line in lines
735
+ )
736
+
737
+ if not already_present:
738
+ lines.append("")
739
+ lines.append("# java-codebase-rag index directory")
740
+ lines.append(".java-codebase-rag/")
741
+ gitignore_path.write_text("\n".join(lines), encoding="utf-8")
742
+
743
+
744
+ def run_init_if_needed(
745
+ source_root: Path,
746
+ index_dir: Path,
747
+ model: str,
748
+ *,
749
+ non_interactive: bool,
750
+ quiet: bool,
751
+ ) -> bool:
752
+ """Run init if index directory has no artifacts. Return True if init was run.
753
+
754
+ Args:
755
+ source_root: Source root directory
756
+ index_dir: Index directory path
757
+ model: Embedding model path or "auto"
758
+ non_interactive: If True, suppress prompts
759
+ quiet: If True, suppress output
760
+
761
+ Returns:
762
+ True if init was run, False if skipped
763
+ """
764
+ from java_codebase_rag.config import (
765
+ index_dir_has_existing_artifacts,
766
+ resolve_operator_config,
767
+ )
768
+ from java_codebase_rag.pipeline import run_build_ast_graph, run_cocoindex_update
769
+
770
+ has_existing, _ = index_dir_has_existing_artifacts(index_dir)
771
+ if has_existing:
772
+ print("Index already exists. Run `java-codebase-rag reprocess` to rebuild.")
773
+ return False
774
+
775
+ print("Creating index...")
776
+ cfg = resolve_operator_config(
777
+ source_root=source_root,
778
+ cli_index_dir=None, # use default (<source_root>/.java-codebase-rag)
779
+ cli_embedding_model=model if model != "auto" else None,
780
+ )
781
+ cfg.apply_to_os_environ()
782
+
783
+ env = cfg.subprocess_env()
784
+
785
+ # Run CocoIndex update
786
+ coco = run_cocoindex_update(env, full_reprocess=False, quiet=quiet)
787
+ if coco.returncode != 0:
788
+ print(f"Error: CocoIndex update failed with code {coco.returncode}")
789
+ return False
790
+
791
+ # Run AST graph build
792
+ g = run_build_ast_graph(
793
+ source_root=cfg.source_root,
794
+ kuzu_path=cfg.kuzu_path,
795
+ verbose=not quiet,
796
+ quiet=quiet,
797
+ env=env,
798
+ )
799
+ if g.returncode != 0:
800
+ print(f"Error: AST graph build failed with code {g.returncode}")
801
+ return False
802
+
803
+ print("Index created successfully.")
804
+ return True
805
+
806
+
807
+ def handle_rerun(cwd: Path, *, non_interactive: bool) -> dict | None:
808
+ """If .java-codebase-rag.yml exists, offer update/fresh-start. Return existing YAML data or None.
809
+
810
+ Args:
811
+ cwd: Current working directory
812
+ non_interactive: If True, default to "Update" mode
813
+
814
+ Returns:
815
+ Parsed existing YAML data if updating, None if starting fresh
816
+ """
817
+ config_path = cwd / ".java-codebase-rag.yml"
818
+
819
+ if not config_path.is_file():
820
+ return None
821
+
822
+ try:
823
+ with open(config_path, "r") as f:
824
+ existing_config = yaml.safe_load(f) or {}
825
+ except yaml.YAMLError as e:
826
+ print(f"Warning: Failed to parse existing config: {e}")
827
+ return None
828
+
829
+ if non_interactive:
830
+ # Default to update mode in non-interactive
831
+ print(f"Found existing config at {config_path}")
832
+ return existing_config
833
+
834
+ # Interactive: show current values and ask
835
+ print(f"Found existing config at {config_path}")
836
+ print("Current configuration:")
837
+ for key, value in existing_config.items():
838
+ print(f" {key}: {value}")
839
+
840
+ choice = prompt(
841
+ "select",
842
+ "Choose an action:",
843
+ choices=[
844
+ {"name": "Update (keep existing values)", "value": "update"},
845
+ {"name": "Start fresh (new config)", "value": "fresh"},
846
+ {"name": "Abort", "value": "abort"},
847
+ ],
848
+ )
849
+
850
+ if choice == "abort":
851
+ raise SystemExit(2)
852
+ elif choice == "fresh":
853
+ return None
854
+ else: # update
855
+ return existing_config
856
+
857
+
858
+ def detect_configured_hosts(cwd: Path) -> list[tuple[HostConfig, str]]:
859
+ """Scan project + user config files for java-codebase-rag MCP entries.
860
+
861
+ Args:
862
+ cwd: Current working directory (for project-scope configs)
863
+
864
+ Returns:
865
+ List of (host_config, scope) tuples where scope is "project" or "user"
866
+ """
867
+ detected = []
868
+
869
+ # Check all hosts in both project and user scopes
870
+ for host_name, host_config in HOSTS.items():
871
+ # Check project scope
872
+ project_mcp_path = host_config.mcp_config_path("project", cwd)
873
+ if _has_java_codebase_rag_entry(project_mcp_path):
874
+ detected.append((host_config, "project"))
875
+
876
+ # Check user scope
877
+ user_mcp_path = host_config.mcp_config_path("user", cwd)
878
+ if _has_java_codebase_rag_entry(user_mcp_path):
879
+ detected.append((host_config, "user"))
880
+
881
+ return detected
882
+
883
+
884
+ def _has_java_codebase_rag_entry(config_path: Path) -> bool:
885
+ """Check if MCP config file has a java-codebase-rag entry.
886
+
887
+ Args:
888
+ config_path: Path to MCP config file
889
+
890
+ Returns:
891
+ True if file exists and contains java-codebase-rag in mcpServers
892
+ """
893
+ if not config_path.is_file():
894
+ return False
895
+
896
+ try:
897
+ with open(config_path, "r") as f:
898
+ config = json.load(f)
899
+ except (json.JSONDecodeError, IOError, OSError):
900
+ return False
901
+
902
+ mcp_servers = config.get("mcpServers", {})
903
+ return _MCP_SERVER_NAME in mcp_servers
904
+
905
+
906
+ def refresh_artifacts(
907
+ host: HostConfig,
908
+ scope: str,
909
+ cwd: Path,
910
+ *,
911
+ force: bool,
912
+ dry_run: bool,
913
+ ) -> list[ArtifactResult]:
914
+ """Overwrite skill and agent files from package data. Skip MCP if entry is correct.
915
+
916
+ Args:
917
+ host: HostConfig for the agent host
918
+ scope: Installation scope ("project" or "user")
919
+ cwd: Current working directory
920
+ force: If True, overwrite all files even if matching
921
+ dry_run: If True, print changes without writing
922
+
923
+ Returns:
924
+ List of ArtifactResult objects for each artifact
925
+ """
926
+ results = []
927
+
928
+ # Refresh skill file
929
+ skills_dir = host.skills_dir(scope, cwd)
930
+ skill_dest = skills_dir / "explore-codebase" / "SKILL.md"
931
+ skill_result = _refresh_file(
932
+ skill_dest,
933
+ "skills/explore-codebase/SKILL.md",
934
+ artifact_type="skill",
935
+ force=force,
936
+ dry_run=dry_run,
937
+ )
938
+ results.append(skill_result)
939
+
940
+ # Refresh agent file
941
+ agents_dir = host.agents_dir(scope, cwd)
942
+ agent_dest = agents_dir / "explorer-rag-enhanced.md"
943
+ agent_result = _refresh_file(
944
+ agent_dest,
945
+ "agents/explorer-rag-enhanced.md",
946
+ artifact_type="agent",
947
+ force=force,
948
+ dry_run=dry_run,
949
+ )
950
+ results.append(agent_result)
951
+
952
+ # Refresh MCP config (update command path if needed)
953
+ mcp_config_path = host.mcp_config_path(scope, cwd)
954
+ mcp_result = _refresh_mcp_config(mcp_config_path, host, force=force, dry_run=dry_run)
955
+ results.append(mcp_result)
956
+
957
+ return results
958
+
959
+
960
+ def _refresh_file(
961
+ dest_path: Path,
962
+ package_relative_path: str,
963
+ *,
964
+ artifact_type: str,
965
+ force: bool,
966
+ dry_run: bool,
967
+ ) -> ArtifactResult:
968
+ """Refresh a single file from package data.
969
+
970
+ Args:
971
+ dest_path: Destination file path
972
+ package_relative_path: Path relative to install_data
973
+ artifact_type: Type of artifact (for error messages)
974
+ force: If True, overwrite even if matching
975
+ dry_run: If True, print without writing
976
+
977
+ Returns:
978
+ ArtifactResult with success status
979
+ """
980
+ try:
981
+ # Read package data
982
+ package_content = _read_package_artifact(package_relative_path)
983
+
984
+ # Check if file exists
985
+ if dest_path.is_file():
986
+ existing_content = dest_path.read_text(encoding="utf-8")
987
+
988
+ # Skip if content matches and not forcing
989
+ if package_content == existing_content and not force:
990
+ return ArtifactResult(path=dest_path, success=True, error=None)
991
+
992
+ # Content differs or force mode
993
+ if dry_run:
994
+ print(f"Would update {artifact_type} file at {dest_path}")
995
+ return ArtifactResult(path=dest_path, success=True, error=None)
996
+
997
+ elif dry_run:
998
+ print(f"Would create {artifact_type} file at {dest_path}")
999
+ return ArtifactResult(path=dest_path, success=True, error=None)
1000
+
1001
+ # Ensure parent directory exists
1002
+ if not dry_run:
1003
+ dest_path.parent.mkdir(parents=True, exist_ok=True)
1004
+
1005
+ # Check writability
1006
+ if not _is_writable(dest_path.parent):
1007
+ return ArtifactResult(
1008
+ path=dest_path,
1009
+ success=False,
1010
+ error=f"Directory not writable: {dest_path.parent}",
1011
+ )
1012
+
1013
+ # Write file (skip in dry_run mode)
1014
+ if not dry_run:
1015
+ dest_path.write_text(package_content, encoding="utf-8")
1016
+ print(f"Updated {artifact_type} file at {dest_path}")
1017
+
1018
+ return ArtifactResult(path=dest_path, success=True, error=None)
1019
+
1020
+ except Exception as e:
1021
+ return ArtifactResult(path=dest_path, success=False, error=str(e))
1022
+
1023
+
1024
+ def _refresh_mcp_config(
1025
+ config_path: Path,
1026
+ host: HostConfig,
1027
+ *,
1028
+ force: bool,
1029
+ dry_run: bool,
1030
+ ) -> ArtifactResult:
1031
+ """Refresh MCP config entry (update command path if needed).
1032
+
1033
+ Args:
1034
+ config_path: Path to MCP config file
1035
+ host: HostConfig for the agent host
1036
+ force: If True, update even if matching
1037
+ dry_run: If True, print without writing
1038
+
1039
+ Returns:
1040
+ ArtifactResult with success status
1041
+ """
1042
+ try:
1043
+ # Resolve current MCP command path
1044
+ # Catch SystemExit because resolve_mcp_command raises it when binary not found
1045
+ try:
1046
+ mcp_command = resolve_mcp_command(non_interactive=True)
1047
+ except SystemExit:
1048
+ return ArtifactResult(
1049
+ path=config_path,
1050
+ success=False,
1051
+ error="java-codebase-rag-mcp not found on PATH",
1052
+ )
1053
+
1054
+ # Prepare new entry
1055
+ new_entry = {"command": mcp_command, "type": "stdio"}
1056
+
1057
+ # Read existing config
1058
+ if config_path.is_file():
1059
+ try:
1060
+ with open(config_path, "r") as f:
1061
+ config = json.load(f)
1062
+ except json.JSONDecodeError as e:
1063
+ return ArtifactResult(
1064
+ path=config_path,
1065
+ success=False,
1066
+ error=f"Failed to parse {config_path}: {e}",
1067
+ )
1068
+ else:
1069
+ config = {}
1070
+
1071
+ # Ensure mcpServers key exists
1072
+ if "mcpServers" not in config:
1073
+ config["mcpServers"] = {}
1074
+
1075
+ existing_entry = config["mcpServers"].get(_MCP_SERVER_NAME)
1076
+
1077
+ # Check if entry already matches (skip unless force)
1078
+ if existing_entry == new_entry and not force:
1079
+ return ArtifactResult(path=config_path, success=True, error=None)
1080
+
1081
+ # Entry differs or force mode
1082
+ if dry_run:
1083
+ print(f"Would update MCP config at {config_path}")
1084
+ return ArtifactResult(path=config_path, success=True, error=None)
1085
+
1086
+ # Merge/update entry
1087
+ config["mcpServers"][_MCP_SERVER_NAME] = new_entry
1088
+
1089
+ # Ensure parent directory exists
1090
+ config_path.parent.mkdir(parents=True, exist_ok=True)
1091
+
1092
+ # Check writability
1093
+ if not _is_writable(config_path.parent):
1094
+ return ArtifactResult(
1095
+ path=config_path,
1096
+ success=False,
1097
+ error=f"Directory not writable: {config_path.parent}",
1098
+ )
1099
+
1100
+ # Write atomically
1101
+ tmp_name = None
1102
+ try:
1103
+ with tempfile.NamedTemporaryFile(
1104
+ mode="w",
1105
+ dir=config_path.parent,
1106
+ prefix=f".{config_path.name}.",
1107
+ delete=False,
1108
+ ) as tmp:
1109
+ json.dump(config, tmp, indent=2)
1110
+ tmp.flush()
1111
+ os.fsync(tmp.fileno())
1112
+ tmp_name = tmp.name
1113
+
1114
+ # Atomic rename
1115
+ os.rename(tmp_name, config_path)
1116
+ print(f"Updated MCP config at {config_path}")
1117
+ return ArtifactResult(path=config_path, success=True, error=None)
1118
+
1119
+ except (IOError, OSError) as e:
1120
+ if tmp_name:
1121
+ try:
1122
+ os.unlink(tmp_name)
1123
+ except OSError:
1124
+ pass
1125
+ raise RuntimeError(f"Failed to write {config_path}: {e}") from e
1126
+
1127
+ except SystemExit as e:
1128
+ # Catch SystemExit from resolve_mcp_command and other exits
1129
+ return ArtifactResult(path=config_path, success=False, error=f"Command failed: {e.code}")
1130
+ except Exception as e:
1131
+ return ArtifactResult(path=config_path, success=False, error=str(e))
1132
+
1133
+
1134
+ def run_update(
1135
+ *,
1136
+ force: bool,
1137
+ dry_run: bool,
1138
+ cwd: Path | None = None,
1139
+ ) -> int:
1140
+ """Run the update pipeline. Returns exit code.
1141
+
1142
+ Args:
1143
+ force: If True, overwrite all artifacts even if matching
1144
+ dry_run: If True, print changes without writing
1145
+ cwd: Current working directory (defaults to Path.cwd())
1146
+
1147
+ Returns:
1148
+ Exit code (0=success, 1=partial, 2=fatal)
1149
+ """
1150
+ if cwd is None:
1151
+ cwd = Path.cwd()
1152
+ cwd = cwd.resolve()
1153
+
1154
+ # Detect configured hosts
1155
+ configured_hosts = detect_configured_hosts(cwd)
1156
+
1157
+ if not configured_hosts:
1158
+ print("No configured agent hosts found.")
1159
+ print("Run `java-codebase-rag install` first.")
1160
+ return EXIT_FATAL
1161
+
1162
+ print(f"Found {len(configured_hosts)} configured host(s).")
1163
+
1164
+ # Refresh artifacts for each host
1165
+ all_results = []
1166
+ for host_config, scope in configured_hosts:
1167
+ print(f"\nRefreshing {host_config.name} ({scope} scope)...")
1168
+ results = refresh_artifacts(host_config, scope, cwd, force=force, dry_run=dry_run)
1169
+ all_results.extend(results)
1170
+
1171
+ # Check for partial failures
1172
+ partial_failures = [r for r in all_results if not r.success]
1173
+ has_artifact_failures = len(partial_failures) > 0
1174
+ if partial_failures:
1175
+ print("\nWarning: Some artifacts failed to update:")
1176
+ for r in partial_failures:
1177
+ print(f" {r.path}: {r.error}")
1178
+
1179
+ # Check if index exists
1180
+ from java_codebase_rag.config import (
1181
+ discover_project_root,
1182
+ index_dir_has_existing_artifacts,
1183
+ resolve_operator_config,
1184
+ )
1185
+ from java_codebase_rag.pipeline import run_cocoindex_update
1186
+
1187
+ project_root = discover_project_root(cwd)
1188
+ if project_root is None:
1189
+ print("\nNo project configuration found (.java-codebase-rag.yml).")
1190
+ print("Skipping index update.")
1191
+ return EXIT_PARTIAL if has_artifact_failures else EXIT_SUCCESS
1192
+
1193
+ # Resolve configuration
1194
+ try:
1195
+ cfg = resolve_operator_config(source_root=project_root, cli_index_dir=None)
1196
+ index_dir = cfg.index_dir
1197
+ except Exception as e:
1198
+ print(f"\nWarning: Failed to resolve configuration: {e}")
1199
+ print("Skipping index update.")
1200
+ return EXIT_PARTIAL if has_artifact_failures else EXIT_SUCCESS
1201
+
1202
+ # Check if index has existing artifacts
1203
+ index_exists, _ = index_dir_has_existing_artifacts(index_dir)
1204
+
1205
+ if not index_exists:
1206
+ print("\nNo index found.")
1207
+ print("Run `java-codebase-rag install` to create one.")
1208
+ return EXIT_PARTIAL if has_artifact_failures else EXIT_SUCCESS
1209
+
1210
+ # Run increment (LanceDB catch-up)
1211
+ if not dry_run:
1212
+ print("\nUpdating index (incremental LanceDB update)...")
1213
+ cfg.apply_to_os_environ()
1214
+ env = cfg.subprocess_env()
1215
+
1216
+ coco = run_cocoindex_update(env, full_reprocess=False, quiet=True)
1217
+ if coco.returncode != 0:
1218
+ print(f"Error: Index update failed with code {coco.returncode}")
1219
+ return 1
1220
+
1221
+ # Print graph staleness warning
1222
+ from java_codebase_rag.cli import _INCREMENT_WARNING_LINES
1223
+ print("\n" + "\n".join(_INCREMENT_WARNING_LINES))
1224
+ else:
1225
+ print("\nWould run incremental index update.")
1226
+
1227
+ # Print summary
1228
+ print("\nUpdate complete.")
1229
+ successful = [r for r in all_results if r.success]
1230
+ print(f"Updated {len(successful)} artifact(s).")
1231
+
1232
+ return 1 if has_artifact_failures else 0
1233
+
1234
+
1235
+ def run_install(
1236
+ *,
1237
+ non_interactive: bool,
1238
+ agents: list[str] | None,
1239
+ scope: str | None,
1240
+ model: str | None,
1241
+ source_root: Path | None = None,
1242
+ quiet: bool = False,
1243
+ ) -> int:
1244
+ """Run the install pipeline. Returns exit code.
1245
+
1246
+ Args:
1247
+ non_interactive: If True, skip all prompts
1248
+ agents: List of agent names from CLI flags
1249
+ scope: Scope from CLI flag
1250
+ model: Model from CLI flag
1251
+ source_root: Source root path (defaults to cwd if None)
1252
+ quiet: If True, suppress output
1253
+
1254
+ Returns:
1255
+ Exit code (0=success, 1=partial, 2=fatal)
1256
+ """
1257
+ # Stage 0: Determine source root
1258
+ cwd = Path.cwd() if source_root is None else source_root
1259
+ cwd = cwd.resolve()
1260
+
1261
+ # Stage 0.5: Check for existing config (re-run detection)
1262
+ existing_config = handle_rerun(cwd, non_interactive=non_interactive)
1263
+
1264
+ # Stage 1: Java source detection (with confirmation in interactive mode)
1265
+ source_root = confirm_source_root(cwd, non_interactive=non_interactive)
1266
+
1267
+ # Detect Java directories
1268
+ try:
1269
+ java_dirs = detect_java_directories(source_root)
1270
+ except SystemExit as e:
1271
+ return e.code
1272
+
1273
+ # Stage 2: Embedding model
1274
+ resolved_model = resolve_model(model, non_interactive=non_interactive)
1275
+
1276
+ # Stage 3-4: Agent host + scope selection
1277
+ try:
1278
+ hosts = select_hosts(non_interactive=non_interactive, cli_agents=agents)
1279
+ selected_scope = select_scope(non_interactive=non_interactive, cli_scope=scope)
1280
+ except SystemExit as e:
1281
+ return e.code
1282
+
1283
+ # Stage 5: Artifact deployment
1284
+ mcp_command = resolve_mcp_command(non_interactive=non_interactive)
1285
+ results = deploy_artifacts(
1286
+ hosts,
1287
+ selected_scope,
1288
+ source_root,
1289
+ non_interactive=non_interactive,
1290
+ mcp_command=mcp_command,
1291
+ )
1292
+
1293
+ # Check for partial failures
1294
+ partial_failures = [r for r in results if not r.success]
1295
+ if partial_failures:
1296
+ print("Warning: Some artifacts failed to deploy:")
1297
+ for r in partial_failures:
1298
+ print(f" {r.path}: {r.error}")
1299
+ if all(
1300
+ r.success
1301
+ for r in results
1302
+ if r.path.suffix in [".json", ".yml", ".yaml"]
1303
+ ):
1304
+ # MCP configs succeeded - non-critical
1305
+ print("Continuing (MCP configs deployed successfully)...")
1306
+ else:
1307
+ # Critical failures
1308
+ return 1
1309
+
1310
+ # Stage 6: Index + finish
1311
+ # Generate YAML config
1312
+ yaml_content = generate_yaml_config(
1313
+ source_root,
1314
+ resolved_model,
1315
+ microservice_roots=[str(d) for d in java_dirs] if len(java_dirs) > 1 else None,
1316
+ existing_yaml=existing_config,
1317
+ )
1318
+
1319
+ # Write YAML config
1320
+ config_path = source_root / ".java-codebase-rag.yml"
1321
+ config_path.write_text(yaml_content, encoding="utf-8")
1322
+
1323
+ # Update .gitignore
1324
+ update_gitignore(source_root)
1325
+
1326
+ if not quiet:
1327
+ print("Configuration written to", config_path)
1328
+
1329
+ # Run init if index directory is empty
1330
+ index_dir = (source_root / ".java-codebase-rag").resolve()
1331
+ run_init_if_needed(
1332
+ source_root,
1333
+ index_dir,
1334
+ resolved_model,
1335
+ non_interactive=non_interactive,
1336
+ quiet=quiet,
1337
+ )
1338
+
1339
+ return 0