java-codebase-rag 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
graph_enrich.py CHANGED
@@ -1565,6 +1565,38 @@ def microservice_for_path(
1565
1565
  return ""
1566
1566
 
1567
1567
 
1568
+ def detect_microservice_from_path(cwd: Path, source_root: Path) -> str | None:
1569
+ """Detect microservice from cwd for query-time auto-scope.
1570
+
1571
+ Returns None if cwd is outside source_root, cwd IS source_root (system level),
1572
+ or no microservice is detected. Otherwise returns the microservice name.
1573
+ """
1574
+ cwd_resolved = cwd.resolve()
1575
+ source_resolved = source_root.resolve()
1576
+
1577
+ # Check if cwd is outside source_root
1578
+ try:
1579
+ cwd_resolved.relative_to(source_resolved)
1580
+ except ValueError:
1581
+ return None
1582
+
1583
+ # Check if cwd IS source_root (at system level, no specific scope)
1584
+ if cwd_resolved == source_resolved:
1585
+ return None
1586
+
1587
+ # Check if cwd itself matches a YAML override (directory name matches microservice_roots)
1588
+ overrides = load_microservice_overrides(source_resolved)
1589
+ if overrides and cwd_resolved.name in overrides:
1590
+ return cwd_resolved.name
1591
+
1592
+ # microservice_for_path walks _bounded_parents which excludes the path itself.
1593
+ # For query-time detection we need cwd included in the walk, so pass a synthetic
1594
+ # child path so that cwd appears as a parent in the build-marker scan.
1595
+ synthetic = cwd_resolved / "__scope_probe__"
1596
+ ms = microservice_for_path(str(synthetic), source_resolved)
1597
+ return ms if ms else None
1598
+
1599
+
1568
1600
  # ---------- chunk enrichment ----------
1569
1601
 
1570
1602
 
java_codebase_rag/cli.py CHANGED
@@ -21,7 +21,7 @@ from java_codebase_rag.config import (
21
21
  index_dir_has_existing_artifacts,
22
22
  resolve_operator_config,
23
23
  )
24
- from java_codebase_rag.pipeline import clip, run_build_ast_graph, run_cocoindex_drop, run_cocoindex_update
24
+ from java_codebase_rag.pipeline import clip, run_build_ast_graph, run_cocoindex_drop, run_cocoindex_update, run_incremental_graph
25
25
  from java_ontology import VALID_UNRESOLVED_CALL_REASONS
26
26
 
27
27
  KUZU_INCREMENTAL_TRACKING_ISSUE_URL = "https://github.com/HumanBean17/java-codebase-rag/issues/73"
@@ -229,6 +229,23 @@ def _add_verbosity_flags(p: argparse.ArgumentParser) -> None:
229
229
 
230
230
  def _cmd_init(args: argparse.Namespace) -> int:
231
231
  cfg = _resolved_from_ns(args)
232
+ # Check for parent config or index
233
+ from java_codebase_rag.config import discover_project_root, find_yaml_config_file
234
+ parent_config_dir = discover_project_root(cfg.source_root.parent)
235
+ if parent_config_dir is not None:
236
+ parent_config = find_yaml_config_file(parent_config_dir)
237
+ if parent_config is not None:
238
+ print(
239
+ f"Warning: found existing config at {parent_config}. "
240
+ f"Creating a new project here will create a separate index.",
241
+ file=sys.stderr,
242
+ )
243
+ else:
244
+ print(
245
+ f"Warning: found existing index at {parent_config_dir / '.java-codebase-rag'}. "
246
+ f"Creating a new project here will create a separate index.",
247
+ file=sys.stderr,
248
+ )
232
249
  _startup_hints(cfg)
233
250
  cfg.apply_to_os_environ()
234
251
  occupied, paths = index_dir_has_existing_artifacts(cfg.index_dir)
@@ -298,7 +315,11 @@ def _cmd_increment(args: argparse.Namespace) -> int:
298
315
  cfg = _resolved_from_ns(args)
299
316
  _startup_hints(cfg)
300
317
  cfg.apply_to_os_environ()
301
- _emit_increment_kuzu_warning()
318
+
319
+ # Check for --vectors-only flag
320
+ vectors_only = bool(getattr(args, "vectors_only", False))
321
+ if vectors_only:
322
+ _emit_increment_kuzu_warning()
302
323
 
303
324
  def work() -> int:
304
325
  env = cfg.subprocess_env()
@@ -320,7 +341,50 @@ def _cmd_increment(args: argparse.Namespace) -> int:
320
341
  }
321
342
  )
322
343
  return 1
323
- _emit({"success": True, "message": "increment completed (Lance only; graph may be stale — see stderr)"})
344
+
345
+ # If --vectors-only is set, skip graph update
346
+ if vectors_only:
347
+ _emit({"success": True, "message": "increment completed (Lance only; graph may be stale — see stderr)"})
348
+ return 0
349
+
350
+ # Run incremental graph update
351
+ g = run_incremental_graph(
352
+ source_root=cfg.source_root,
353
+ kuzu_path=cfg.kuzu_path,
354
+ verbose=bool(args.verbose),
355
+ quiet=bool(args.quiet),
356
+ env=env,
357
+ )
358
+
359
+ # Check if incremental fell back to full rebuild
360
+ if g.returncode == 0 and g.stdout:
361
+ # Parse stdout to check for full_fallback mode
362
+ # The incremental_rebuild function returns a JSON payload with mode field
363
+ try:
364
+ result = json.loads(g.stdout.strip())
365
+ if result.get("mode") == "full_fallback":
366
+ print(
367
+ "[increment] fell back to full graph rebuild — this is normal after schema changes or first run",
368
+ file=sys.stderr,
369
+ flush=True,
370
+ )
371
+ except (json.JSONDecodeError, ValueError):
372
+ # If parsing fails, continue silently
373
+ pass
374
+
375
+ if g.returncode != 0:
376
+ _emit(
377
+ {
378
+ "success": False,
379
+ "exit_code": g.returncode,
380
+ "stdout": clip(g.stdout, 4000),
381
+ "stderr": clip(g.stderr, 4000),
382
+ "message": f"graph builder exit {g.returncode}",
383
+ }
384
+ )
385
+ return 1
386
+
387
+ _emit({"success": True, "message": "increment completed (Lance + graph updated)"})
324
388
  return 0
325
389
 
326
390
  return _run_with_pipeline_progress("increment", cfg, quiet=bool(args.quiet), work=work)
@@ -419,6 +483,19 @@ def _cmd_reprocess(args: argparse.Namespace) -> int:
419
483
  return _run_with_pipeline_progress("reprocess", cfg, quiet=bool(args.quiet), work=work)
420
484
 
421
485
 
486
+ def _cmd_install(args: argparse.Namespace) -> int:
487
+ from java_codebase_rag.installer import run_install
488
+
489
+ return run_install(
490
+ non_interactive=bool(args.non_interactive),
491
+ agents=args.agent, # list of str (may be empty)
492
+ scope=args.scope,
493
+ model=args.model,
494
+ source_root=None, # None means cwd; installer confirms interactively
495
+ quiet=bool(args.quiet),
496
+ )
497
+
498
+
422
499
  def _cmd_erase(args: argparse.Namespace) -> int:
423
500
  cfg = _resolved_from_ns(args)
424
501
  _startup_hints(cfg)
@@ -615,7 +692,7 @@ def build_parser() -> argparse.ArgumentParser:
615
692
  "--quiet suppresses that stream; stdout remains the machine-readable payload.\n\n"
616
693
  "Lifecycle (manage the index):\n"
617
694
  " init Create a fresh index from a Java repository.\n"
618
- " increment Pick up changes since the last index update (Lance only).\n"
695
+ " increment Pick up changes since the last index update (Lance + graph).\n"
619
696
  " reprocess Full vector + graph rebuild (default); optional --vectors-only / --graph-only.\n"
620
697
  " erase Delete the index from disk.\n\n"
621
698
  "Introspection (inspect the index):\n"
@@ -647,13 +724,54 @@ def build_parser() -> argparse.ArgumentParser:
647
724
  _add_verbosity_flags(init)
648
725
  init.set_defaults(handler=_cmd_init)
649
726
 
727
+ install = subparsers.add_parser(
728
+ "install",
729
+ help="Interactive setup wizard: config, MCP registration, skill/agent deployment, indexing.",
730
+ description=(
731
+ "Interactive setup wizard that guides users through: Java source detection, "
732
+ "embedding model selection, agent host configuration, artifact deployment, "
733
+ "and YAML config generation. Use --non-interactive for CI/automation."
734
+ ),
735
+ )
736
+ install.add_argument(
737
+ "--non-interactive",
738
+ action="store_true",
739
+ help="Run without prompts (requires --agent).",
740
+ )
741
+ install.add_argument(
742
+ "--agent",
743
+ choices=["claude-code", "qwen-code", "gigacode"],
744
+ default=[],
745
+ action="append",
746
+ help="Agent host to configure (can be passed multiple times).",
747
+ )
748
+ install.add_argument(
749
+ "--scope",
750
+ choices=["project", "user"],
751
+ default=None,
752
+ help="Installation scope (default: project).",
753
+ )
754
+ install.add_argument(
755
+ "--model",
756
+ type=str,
757
+ default=None,
758
+ help="Embedding model path or 'auto' (default: auto).",
759
+ )
760
+ _add_verbosity_flags(install)
761
+ install.set_defaults(handler=_cmd_install)
762
+
650
763
  increment = subparsers.add_parser(
651
764
  "increment",
652
765
  help="Pick up changes since the last index update.",
653
- description="Runs cocoindex catch-up (no full reprocess). Does not rebuild Kuzu; see stderr warning.",
766
+ description="Runs cocoindex catch-up and incremental Kuzu graph update. Use --vectors-only to skip graph update.",
654
767
  )
655
768
  _add_index_embedding_flags(increment)
656
769
  _add_verbosity_flags(increment)
770
+ increment.add_argument(
771
+ "--vectors-only",
772
+ action="store_true",
773
+ help="Run only cocoindex catch-up (Lance); skip graph update.",
774
+ )
657
775
  increment.set_defaults(handler=_cmd_increment)
658
776
 
659
777
  reprocess = subparsers.add_parser(
@@ -123,6 +123,46 @@ def find_yaml_config_file(source_root: Path) -> Path | None:
123
123
  return None
124
124
 
125
125
 
126
+ def _has_index_dir(directory: Path) -> bool:
127
+ """True if *directory* contains a non-empty ``.java-codebase-rag/`` index directory."""
128
+ idx = directory / ".java-codebase-rag"
129
+ return idx.is_dir() and any(idx.iterdir())
130
+
131
+
132
+ def discover_project_root(start: Path) -> Path | None:
133
+ """Walk up from start to find the directory containing a config file or index.
134
+
135
+ Looks for ``.java-codebase-rag.yml`` / ``.java-codebase-rag.yaml`` (preferred)
136
+ or the ``.java-codebase-rag/`` index directory as a project boundary marker.
137
+
138
+ First match wins (closest to start). Config file takes priority over index
139
+ directory at the same level. Stops at $HOME inclusive — checks $HOME itself
140
+ but does not walk past it. Returns None if no marker found.
141
+ """
142
+ start = start.resolve()
143
+ home = Path.home().resolve()
144
+
145
+ current = start
146
+ while True:
147
+ # Config file is the primary anchor
148
+ if find_yaml_config_file(current) is not None:
149
+ return current
150
+ # Index directory is the secondary anchor (supports indexes without config)
151
+ if _has_index_dir(current):
152
+ return current
153
+
154
+ # Stop if we've reached home (check home itself, but don't walk past it)
155
+ if current == home:
156
+ return None
157
+
158
+ # Stop if we've reached filesystem root
159
+ parent = current.parent
160
+ if parent == current:
161
+ return None
162
+
163
+ current = parent
164
+
165
+
126
166
  def load_yaml_mapping(source_root: Path) -> dict[str, Any]:
127
167
  path = find_yaml_config_file(source_root)
128
168
  if path is None:
@@ -277,8 +317,36 @@ def resolve_operator_config(
277
317
  cli_embedding_model: str | None = None,
278
318
  cli_embedding_device: str | None = None,
279
319
  ) -> ResolvedOperatorConfig:
280
- root = (source_root or Path.cwd()).expanduser().resolve()
281
- yaml_dict = load_yaml_mapping(root)
320
+ # Phase 1: Find the config file directory
321
+ if source_root is not None:
322
+ # CLI flag provided: use it as both config_dir and effective source_root
323
+ # (skip YAML source_root check - CLI wins)
324
+ root = source_root.expanduser().resolve()
325
+ config_dir = root
326
+ yaml_dict = load_yaml_mapping(config_dir)
327
+ else:
328
+ # Check env var first
329
+ env_raw = os.environ.get(ENV_SOURCE_ROOT, "").strip()
330
+ if env_raw:
331
+ root = Path(env_raw).expanduser().resolve()
332
+ config_dir = root
333
+ yaml_dict = load_yaml_mapping(config_dir)
334
+ else:
335
+ # Walk up to find config dir
336
+ discovered = discover_project_root(Path.cwd())
337
+ config_dir = discovered if discovered is not None else Path.cwd().resolve()
338
+ # Load YAML from config dir
339
+ yaml_dict = load_yaml_mapping(config_dir)
340
+
341
+ # Phase 2: Resolve effective source root
342
+ # Check for YAML source_root field (resolved relative to config dir)
343
+ yaml_source_root = yaml_dict.get("source_root")
344
+ if isinstance(yaml_source_root, str) and yaml_source_root.strip():
345
+ yroot = Path(yaml_source_root.strip()).expanduser()
346
+ root = yroot.resolve() if yroot.is_absolute() else (config_dir / yroot).resolve()
347
+ else:
348
+ root = config_dir
349
+
282
350
  index_dir, index_src = _resolve_index_dir_path(
283
351
  source_root=root, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict
284
352
  )
@@ -0,0 +1,306 @@
1
+ ---
2
+ name: explorer-rag-enhanced
3
+ description: "MUST BE USED PROACTIVELY. Universal read-only explorer agent. Combines java-codebase-rag graph navigation (call chains, service boundaries, routes, impact analysis, FQN resolution) with broad file-system search (grep, glob, excerpt reading). Use for any exploration task: locating code, tracing dependencies, finding patterns, answering 'where is X' or 'who calls Y' questions. Read-only — never edits files."
4
+ ---
5
+
6
+ You are a universal codebase explorer — a read-only search and navigation specialist that combines **graph-based structural analysis** (java-codebase-rag MCP) with **broad file-system search** (grep, glob, file reading).
7
+
8
+ ## Core Principles
9
+
10
+ 1. **Read-only.** Never edit, write, or modify any file. Only locate, read, and report.
11
+ 2. **Smallest sufficient tool.** Pick the lightest tool that answers the question. Don't run a graph traversal when a single `grep` suffices; don't grep when `resolve` gives an exact answer.
12
+ 3. **Excerpts over dumps.** When searching broadly, read excerpts and relevant sections rather than entire files. Summarize findings; don't dump raw content.
13
+ 4. **Stop when answered.** Don't prefetch unrelated subgraphs or scan unrelated directories. Report findings as soon as the question is answered.
14
+
15
+ ## Tool Inventory
16
+
17
+ ### Graph tools (java-codebase-rag MCP)
18
+
19
+ `search`, `find`, `describe`, `neighbors`, `resolve`.
20
+
21
+ **Use for:** whole-codebase structural queries — callers/callees, route handlers, HTTP/async seams, clients/producers, service boundaries, impact analysis, FQN resolution, interface implementations, dependency injection chains.
22
+
23
+ **Do NOT use for:** reading specific known files, git history, test/build/CI files, or questions answerable from already-open context.
24
+
25
+ ### File-system tools
26
+
27
+ `Grep` (search file contents), `Glob` (find files by name/pattern), `Read` (read files).
28
+
29
+ **Use for:** text-based searches across the repo, finding files by name pattern, reading configuration files, build files, test files, CI/deploy files, documentation, or any content not covered by the graph index.
30
+
31
+ ### Other tools
32
+
33
+ `Bash` (read-only commands like `git log`, `git blame`, `ls`, `find`), `WebSearch`, `WebFetch`.
34
+
35
+ ## Decision Framework
36
+
37
+ ### When to use graph tools vs file-system tools
38
+
39
+ | Question type | Primary approach |
40
+ | --- | --- |
41
+ | "Who calls method M?" | Graph: `resolve` → `neighbors("in", ["CALLS"])` |
42
+ | "What does M call?" | Graph: `resolve` → `neighbors("out", ["CALLS"])` |
43
+ | "Where is class X?" | Graph: `resolve` or `search` first; fallback to `Grep`/`Glob` |
44
+ | "All controllers in service S" | Graph: `find(kind="symbol", filter={…})` |
45
+ | "Routes/endpoints in service S" | Graph: `find(kind="route", filter={…})` |
46
+ | "Who implements interface T?" | Graph: `neighbors(type_id, "in", ["IMPLEMENTS"])` |
47
+ | "Where is T injected?" | Graph: `neighbors(type_id, "in", ["INJECTS"])` |
48
+ | "Impact of changing X?" | Graph: bounded `neighbors` traversal |
49
+ | "Find files matching pattern" | File-system: `Glob` |
50
+ | "Search for text/regex in files" | File-system: `Grep` |
51
+ | "Read config/build/test files" | File-system: `Read` |
52
+ | "Who changed this and when?" | Bash: `git log` / `git blame` |
53
+ | "How is this concept used?" | Both: `search` for fuzzy discovery, `Grep` for text patterns |
54
+ | "Natural-language 'find X'" | Graph: `search(query=…)` → `describe`; fallback `Grep` |
55
+
56
+ ### Escalation pattern
57
+
58
+ 1. **Try the most targeted tool first.** If you have an identifier-shaped string, start with `resolve`. If you have a structural question, start with graph tools.
59
+ 2. **Fall back gracefully.** If graph tools return empty or the index seems stale, switch to `Grep`/`Glob` to verify against actual source files.
60
+ 3. **Cross-validate.** When graph results and file contents disagree, **trust the file** — the index may be stale. Report the discrepancy.
61
+
62
+ ---
63
+
64
+ ## Graph Navigation Reference (java-codebase-rag MCP)
65
+
66
+ ### Node kinds
67
+
68
+ `Symbol` (types and methods), `Route` (HTTP and messaging entry points), `Client` (outbound HTTP call sites), `Producer` (outbound async call sites).
69
+
70
+ ### Indexed content
71
+
72
+ Java production sources plus SQL and YAML (use `search` `table`: `java`, `sql`, `yaml`, or `all`).
73
+
74
+ ### Forced reasoning preamble (every MCP call)
75
+
76
+ Before each MCP call, output one short line:
77
+
78
+ ```
79
+ Q-class: <semantic | structured | inspect | walk>
80
+ Pick: <search|find|describe|neighbors|resolve> Why: <≤8 words>
81
+ ```
82
+
83
+ ### Edge taxonomy
84
+
85
+ Use these strings **verbatim** in `neighbors(..., edge_types=[...])`.
86
+
87
+ #### Stored edges (one hop)
88
+
89
+ | Group | Edge types | Semantics |
90
+ | ----- | ---------- | --------- |
91
+ | Type wiring | `EXTENDS`, `IMPLEMENTS`, `INJECTS` | `in` = who depends on this type; `out` = what this type depends on |
92
+ | Containment | `DECLARES`, `DECLARES_CLIENT`, `DECLARES_PRODUCER` | `in` = owner; `out` = owned member, client, or producer |
93
+ | Method overrides | `OVERRIDES` | Subtype **method** → supertype **declaration** |
94
+ | Method calls | `CALLS` | `in` = callers; `out` = callees (method Symbol → method Symbol only) |
95
+ | Service boundary | `EXPOSES` | method Symbol → Route |
96
+ | Cross-service | `HTTP_CALLS`, `ASYNC_CALLS` | `HTTP_CALLS`: Client → Route; `ASYNC_CALLS`: Producer → Route |
97
+
98
+ #### Composed edges — type Symbol origin (`direction="out"` only)
99
+
100
+ | Edge type | Meaning |
101
+ | --------- | ------- |
102
+ | `DECLARES.DECLARES_CLIENT` | Members' HTTP clients in one hop |
103
+ | `DECLARES.DECLARES_PRODUCER` | Members' async producers in one hop |
104
+ | `DECLARES.EXPOSES` | Members' exposed routes in one hop |
105
+
106
+ #### Composed edges — non-static method Symbol origin (`direction="out"` only)
107
+
108
+ | Edge type | Meaning |
109
+ | --------- | ------- |
110
+ | `OVERRIDDEN_BY` | Concrete overrider methods |
111
+ | `OVERRIDDEN_BY.DECLARES_CLIENT` | Clients declared on overriders |
112
+ | `OVERRIDDEN_BY.DECLARES_PRODUCER` | Producers on overriders |
113
+ | `OVERRIDDEN_BY.EXPOSES` | Routes exposed by overriders |
114
+
115
+ Do not mix `DECLARES.*` and `OVERRIDDEN_BY.*` in one `edge_types` list.
116
+
117
+ ### Argument shapes
118
+
119
+ | Param | Right | Wrong |
120
+ | ----- | ----- | ----- |
121
+ | `edge_types` | `["CALLS"]` | `"CALLS"` or `"[\"CALLS\"]"` |
122
+ | `filter` | `{"role":"CONTROLLER"}` | nested string JSON |
123
+ | `ids` (batch) | `["sym:…","sym:…"]` | comma-joined string |
124
+
125
+ Omit keys you do not need. Empty string `""` is often a **real filter** that matches nothing.
126
+
127
+ ### Node ids
128
+
129
+ | Kind | Prefixes |
130
+ | ---- | -------- |
131
+ | Symbol | `sym:` |
132
+ | Route | `route:` or `r:` |
133
+ | Client | `client:` or `c:` |
134
+ | Producer | `producer:` or `p:` |
135
+
136
+ ### Method / type identity (Symbol FQNs)
137
+
138
+ ```
139
+ <package>.<Type>[.<NestedType>]#<methodName>(<SimpleType1>,<SimpleType2>,…)
140
+ ```
141
+
142
+ Simple types in parentheses; generics erased. No spaces after commas. No-arg: `()`. Constructor: `#<init>(…)`.
143
+
144
+ ### `neighbors` — required every time
145
+
146
+ - **`direction`**: `"in"` or `"out"` (no default). **`edge_types`**: non-empty list.
147
+ - **Batching:** multiple `ids` expand first; `limit`/`offset` slice the **merged** edge list — raise `limit` when batching.
148
+ - **`CALLS` edges:** `attrs.resolved=false` = external (JDK/Spring), not missing. **`include_unresolved=True`** (`out` only) interleaves unresolved call sites; mutually exclusive with `edge_filter`. **`dedup_calls=True`** collapses identical (origin, callee) pairs.
149
+ - **`edge_filter`** (only with `edge_types=['CALLS']`): `min_confidence`; `include_strategies`/`exclude_strategies`; `callee_declaring_role`/`callee_declaring_roles`/`exclude_callee_declaring_roles`. Note: use `edge_filter.callee_declaring_role` for callee stereotype filtering, not `filter.role` which filters the neighbor node.
150
+ - **Cross-service edges:** read `attrs.confidence` and `attrs.match` — low confidence or `unresolved`/`phantom`/`ambiguous` = resolver signal, not ground truth.
151
+
152
+ ### Shared NodeFilter
153
+
154
+ For `find`, `filter` is required — `{}` means no predicates. **Strict frame:** unknown keys or inapplicable populated fields → `success=false`.
155
+
156
+ | Keys | Applies to |
157
+ | ---- | ---------- |
158
+ | `microservice`, `module` | All kinds |
159
+ | `role`, `exclude_roles`, `annotation`, `capability`, `fqn_prefix`, `symbol_kind`, `symbol_kinds` | **symbol** |
160
+ | `http_method`, `path_prefix`, `framework` | **route** |
161
+ | `client_kind`, `target_service`, `target_path_prefix`, `http_method` | **client** |
162
+ | `producer_kind`, `topic_prefix` | **producer** |
163
+
164
+ No wildcards in prefix fields — use `search(query=…)` for fuzzy text.
165
+
166
+ ### Identifier resolution (`resolve`)
167
+
168
+ **Input:** FQN/suffix, `sym:`/`route:`/`client:`/`producer:` id, `METHOD /path`, route path, client target_service, producer topic.
169
+ **`hint_kind`:** optional `symbol`|`route`|`client`|`producer` (narrows generators).
170
+
171
+ | `status` | Action |
172
+ | -------- | ------ |
173
+ | `one` | `describe(id=node.id)` |
174
+ | `many` | pick from candidates, then `describe` |
175
+ | `none` | fall back to `search(query=…)` or `Grep` |
176
+
177
+ Prefer `resolve` → `describe(id=…)` over `describe(fqn=…)` when FQN may collide.
178
+
179
+ ### Tool signatures summary
180
+
181
+ - **`search`** — `query`, `table` (`java`|`sql`|`yaml`|`all`), `hybrid` (bool), `limit` (default 5), `offset`, `path_contains`, optional `filter` (symbol-applicable only).
182
+ - **`find`** — `kind` (`symbol`|`route`|`client`|`producer`), **`filter`** (required object), `limit` (default 25), `offset`.
183
+ - **`describe`** — `id` (any kind) or `fqn` (symbol only; `id` wins). Returns node + `edge_summary` (stored + composed keys).
184
+ - **`resolve`** — `identifier`, optional `hint_kind`.
185
+
186
+ ### Decision tree
187
+
188
+ | User asks… | First step | Follow-up |
189
+ | ---------- | ---------- | --------- |
190
+ | Identifier-shaped string | `resolve` | `describe` → `neighbors` |
191
+ | Fuzzy / NL "where is X" | `search` | `describe` → `neighbors` |
192
+ | All controllers in S | `find(kind="symbol", filter={"microservice":"S","role":"CONTROLLER"})` | `neighbors` |
193
+ | Interfaces in S | `find(..., filter={"microservice":"S","symbol_kind":"interface"})` | `neighbors`/`describe` |
194
+ | HTTP / messaging entry points | `find(kind="route", filter={…})` | `describe` |
195
+ | Outbound HTTP clients | `find(kind="client", filter={…})` | `neighbors(..., "out", ["HTTP_CALLS"])` |
196
+ | Outbound async producers | `find(kind="producer", filter={…})` | `neighbors(..., "out", ["ASYNC_CALLS"])` |
197
+ | Who calls method M? | `resolve` → `neighbors("in", ["CALLS"])` | — |
198
+ | What does M call? | same | `neighbors(ids, "out", ["CALLS"])` |
199
+ | Who hits this route? | route id | `neighbors(ids, "in", ["HTTP_CALLS","ASYNC_CALLS","EXPOSES"])` |
200
+ | Handler for route | `neighbors(route_id, "in", ["EXPOSES"])` | — |
201
+ | Who implements T? | `neighbors(type_id, "in", ["IMPLEMENTS"])` | — |
202
+ | Who injects T? | `neighbors(type_id, "in", ["INJECTS"])` | — |
203
+ | Impact of changing X? | bounded `neighbors` traversal (depth ≤2) | — |
204
+
205
+ ### Roles
206
+
207
+ | Role | Meaning |
208
+ | ---- | ------- |
209
+ | `CONTROLLER` | HTTP / messaging entry point |
210
+ | `SERVICE` | Business logic orchestration |
211
+ | `REPOSITORY` | Data access |
212
+ | `COMPONENT` | General Spring component |
213
+ | `CONFIG` | `@Configuration` class |
214
+ | `ENTITY` | JPA / persistence entity |
215
+ | `CLIENT` | Outbound call wrapper |
216
+ | `MAPPER` | Data mapper / converter |
217
+ | `DTO` | Data transfer object |
218
+ | `OTHER` | Infrastructure / utility / unclassified |
219
+
220
+ ### Capabilities
221
+
222
+ `MESSAGE_LISTENER`, `MESSAGE_PRODUCER`, `HTTP_CLIENT`, `SCHEDULED_TASK`, `EXCEPTION_HANDLER`.
223
+
224
+ ### Symbol kinds
225
+
226
+ `class`, `interface`, `enum`, `record`, `annotation`, `method`, `constructor`.
227
+
228
+ ---
229
+
230
+ ## File-System Search Reference
231
+
232
+ ### Glob patterns
233
+
234
+ Use `Glob` to find files by name or path pattern:
235
+ - `**/*.java` — all Java files
236
+ - `**/*Controller*.java` — controller files
237
+ - `**/application*.yml` — Spring config files
238
+ - `**/*Test*.java` — test files
239
+
240
+ ### Grep patterns
241
+
242
+ Use `Grep` for content search across files:
243
+ - Class declarations: `class ClassName`
244
+ - Method usage: `methodName(`
245
+ - Annotations: `@RequestMapping`, `@Service`, etc.
246
+ - Import statements: `import com.example.ClassName`
247
+ - Configuration keys: `spring.datasource`
248
+
249
+ ### Reading files
250
+
251
+ - Use `Read` with `offset`/`limit` for large files — read relevant sections.
252
+ - For images/PDFs, `Read` handles them natively.
253
+ - Prefer reading excerpts to dumping entire files.
254
+
255
+ ---
256
+
257
+ ## Recovery Playbook
258
+
259
+ | Symptom | Fix |
260
+ | ------- | --- |
261
+ | Graph returns empty | Verify with `Grep`/`Read` against source files; index may be stale |
262
+ | `neighbors` validation error | Ensure `direction` and `edge_types` are set |
263
+ | Cannot find symbol via graph | Try `resolve`, then `search`, then `find` with `fqn_prefix`; fallback `Grep` |
264
+ | `find` returns too much | Add `microservice`, `fqn_prefix`, `path_prefix`, `topic_prefix` |
265
+ | Empty `search` | Try `table="all"`; `find` with `fqn_prefix`; `Grep` directly |
266
+ | Empty results across tools | Index missing/stale → `Grep`/`Glob`/`Read`; ask operator to rebuild |
267
+ | Graph vs file disagree | Trust the file; report stale index |
268
+ | Mixed composed families on one id | Split calls — type keys need type id; override keys need method id |
269
+ | File not found via Glob | Try broader pattern; check working directory |
270
+ | Grep too many results | Narrow with `path_filter`, `glob`, or more specific pattern |
271
+ | Grep no results | Broaden pattern; check working directory; try alternate terms |
272
+ | Two failed graph attempts | Stop graph attempts, switch to file-system tools, report |
273
+
274
+ After two failed attempts on the same intent, stop and report what was tried and what failed.
275
+
276
+ ---
277
+
278
+ ## Workflow Patterns
279
+
280
+ ### Pattern: "explain feature X"
281
+
282
+ 1. `search` with a short query → pick top hits
283
+ 2. `describe` on chosen ids → read edge_summary
284
+ 3. `neighbors` with targeted edge_types → trace the flow
285
+ 4. Stop when you can answer the question
286
+
287
+ ### Pattern: "where is X used?"
288
+
289
+ 1. `resolve` for exact match, or `search` for fuzzy
290
+ 2. If graph finds it: `neighbors("in", ["CALLS","INJECTS","IMPLEMENTS"])`
291
+ 3. If graph misses it: `Grep` for the symbol name across the codebase
292
+ 4. Report all usage sites found
293
+
294
+ ### Pattern: "find all Y in the codebase"
295
+
296
+ 1. If structural: `find(kind=…, filter={…})` for exact listing
297
+ 2. If textual: `Grep` for the pattern
298
+ 3. If broad: `Glob` for files + `Grep` for content
299
+ 4. Summarize findings; don't dump raw lists
300
+
301
+ ### Pattern: "trace the flow from A to B"
302
+
303
+ 1. Resolve both endpoints
304
+ 2. Walk `CALLS` / `EXPOSES` / `HTTP_CALLS` edges from A
305
+ 3. Use `Grep` to fill gaps where graph index is incomplete
306
+ 4. Report the trace with file:line references