synapse-orch-ai 1.5.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/backend/core/models.py +1 -1
  2. package/backend/core/routes/repos.py +12 -4
  3. package/backend/core/routes/tools.py +16 -1
  4. package/backend/core/tools.py +21 -3
  5. package/backend/services/code_indexer.py +55 -7
  6. package/backend/tools/code_search.py +299 -5
  7. package/frontend-build/.next/BUILD_ID +1 -1
  8. package/frontend-build/.next/build-manifest.json +3 -3
  9. package/frontend-build/.next/prerender-manifest.json +3 -3
  10. package/frontend-build/.next/server/app/_global-error.html +1 -1
  11. package/frontend-build/.next/server/app/_global-error.rsc +1 -1
  12. package/frontend-build/.next/server/app/_global-error.segments/__PAGE__.segment.rsc +1 -1
  13. package/frontend-build/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
  14. package/frontend-build/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
  15. package/frontend-build/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
  16. package/frontend-build/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
  17. package/frontend-build/.next/server/app/_not-found.html +1 -1
  18. package/frontend-build/.next/server/app/_not-found.rsc +1 -1
  19. package/frontend-build/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
  20. package/frontend-build/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
  21. package/frontend-build/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
  22. package/frontend-build/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
  23. package/frontend-build/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
  24. package/frontend-build/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
  25. package/frontend-build/.next/server/app/index.html +1 -1
  26. package/frontend-build/.next/server/app/index.rsc +1 -1
  27. package/frontend-build/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
  28. package/frontend-build/.next/server/app/index.segments/_full.segment.rsc +1 -1
  29. package/frontend-build/.next/server/app/index.segments/_head.segment.rsc +1 -1
  30. package/frontend-build/.next/server/app/index.segments/_index.segment.rsc +1 -1
  31. package/frontend-build/.next/server/app/index.segments/_tree.segment.rsc +1 -1
  32. package/frontend-build/.next/server/app/login.html +1 -1
  33. package/frontend-build/.next/server/app/login.rsc +1 -1
  34. package/frontend-build/.next/server/app/login.segments/_full.segment.rsc +1 -1
  35. package/frontend-build/.next/server/app/login.segments/_head.segment.rsc +1 -1
  36. package/frontend-build/.next/server/app/login.segments/_index.segment.rsc +1 -1
  37. package/frontend-build/.next/server/app/login.segments/_tree.segment.rsc +1 -1
  38. package/frontend-build/.next/server/app/login.segments/login/__PAGE__.segment.rsc +1 -1
  39. package/frontend-build/.next/server/app/login.segments/login.segment.rsc +1 -1
  40. package/frontend-build/.next/server/app/settings/[tab]/page_client-reference-manifest.js +1 -1
  41. package/frontend-build/.next/server/chunks/ssr/_0b~n.nn._.js +2 -2
  42. package/frontend-build/.next/server/middleware-build-manifest.js +3 -3
  43. package/frontend-build/.next/server/middleware-manifest.json +5 -5
  44. package/frontend-build/.next/server/pages/404.html +1 -1
  45. package/frontend-build/.next/server/pages/500.html +1 -1
  46. package/frontend-build/.next/server/server-reference-manifest.js +1 -1
  47. package/frontend-build/.next/server/server-reference-manifest.json +1 -1
  48. package/frontend-build/.next/static/chunks/{15z7zp13idekl.js → 0mw2uexyx0fyf.js} +2 -2
  49. package/package.json +1 -1
  50. /package/frontend-build/.next/static/{47MXNuH5FrJAfrk7sJf9l → yEEl_TTPPEzHJlgKg21Go}/_buildManifest.js +0 -0
  51. /package/frontend-build/.next/static/{47MXNuH5FrJAfrk7sJf9l → yEEl_TTPPEzHJlgKg21Go}/_clientMiddlewareManifest.js +0 -0
  52. /package/frontend-build/.next/static/{47MXNuH5FrJAfrk7sJf9l → yEEl_TTPPEzHJlgKg21Go}/_ssgManifest.js +0 -0
@@ -59,7 +59,7 @@ class Repo(BaseModel):
59
59
  name: str
60
60
  path: str
61
61
  description: str = ""
62
- included_patterns: list[str] = ["*.py", "*.ts", "*.tsx", "*.js", "*.jsx", "*.rs", "*.go", "*.java", "*.md", "*.html", "*.vue", "*.css", "*.scss", "*.cpp", "*.c"]
62
+ included_patterns: list[str] = ["*.py", "*.ts", "*.tsx", "*.js", "*.jsx", "*.rs", "*.go", "*.java", "*.md", "*.html", "*.vue", "*.svelte", "*.css", "*.scss", "*.cpp", "*.c"]
63
63
  excluded_patterns: list[str] = [".*", "node_modules", "__pycache__", "venv", ".git", "*.pyc"]
64
64
  last_indexed: str | None = None
65
65
  status: str = "pending" # pending | indexing | indexed | error
@@ -3,11 +3,16 @@ Repo management endpoints (CRUD + reindex).
3
3
  """
4
4
  import os
5
5
  import json
6
- from fastapi import APIRouter, HTTPException, BackgroundTasks
6
+ from fastapi import APIRouter, HTTPException, BackgroundTasks, Body
7
+ from pydantic import BaseModel
7
8
  from core.models import Repo
8
9
  from core.config import DATA_DIR, load_settings
9
10
  from core.json_store import JsonStore
10
11
 
12
+
13
+ class ReindexOptions(BaseModel):
14
+ full_reindex: bool = False
15
+
11
16
  router = APIRouter()
12
17
 
13
18
  _repos_store = JsonStore(os.path.join(DATA_DIR, "repos.json"))
@@ -147,7 +152,8 @@ async def delete_repo(repo_id: str):
147
152
  return {"status": "success"}
148
153
 
149
154
  @router.post("/api/repos/{repo_id}/reindex")
150
- async def reindex_repo(repo_id: str, background_tasks: BackgroundTasks):
155
+ async def reindex_repo(repo_id: str, background_tasks: BackgroundTasks,
156
+ opts: ReindexOptions = Body(default=ReindexOptions())):
151
157
  repos = load_repos()
152
158
  repo = next((r for r in repos if r["id"] == repo_id), None)
153
159
  if not repo:
@@ -174,14 +180,16 @@ async def reindex_repo(repo_id: str, background_tasks: BackgroundTasks):
174
180
  # Run in background
175
181
  try:
176
182
  from services.code_indexer import run_index
177
- background_tasks.add_task(run_index, repo_id, real_path, repo["included_patterns"], repo["excluded_patterns"])
183
+ background_tasks.add_task(run_index, repo_id, real_path,
184
+ repo["included_patterns"], repo["excluded_patterns"],
185
+ opts.full_reindex)
178
186
  except ImportError as e:
179
187
  print("Indexer unavailable:", e)
180
188
  repo["status"] = "error"
181
189
  save_repos(repos)
182
190
  raise HTTPException(status_code=500, detail="Indexer service not available")
183
191
 
184
- return {"status": "indexing_started"}
192
+ return {"status": "indexing_started", "full_reindex": opts.full_reindex}
185
193
 
186
194
 
187
195
  @router.post("/api/repos/{repo_id}/stop-index")
@@ -35,6 +35,21 @@ async def get_custom_tools():
35
35
  return load_custom_tools()
36
36
 
37
37
 
38
+ _NATIVE_SERVER_LABELS: dict[str, str] = {
39
+ "code_vault_search": "Code Search",
40
+ "time": "Time & Date",
41
+ "sql": "SQL Database",
42
+ "personal_details": "Personal Details",
43
+ "collect_data": "Collect Data",
44
+ "pdf_parser": "PDF Parser",
45
+ "xlsx_parser": "Excel / XLSX",
46
+ "vault_sandbox": "Vault Sandbox",
47
+ "web_scraper": "Web Scraper",
48
+ "bash": "Bash",
49
+ "Filesystem": "Filesystem",
50
+ }
51
+
52
+
38
53
  @router.get("/api/tools/available")
39
54
  async def get_available_tools():
40
55
  """List all available tools from all sources (Native Agents, External MCP, Custom HTTP)"""
@@ -54,7 +69,7 @@ async def get_available_tools():
54
69
  cfg = _server.mcp_manager.get_server_config(server_name)
55
70
  display_label = (cfg.get("label") or server_name) if cfg else server_name
56
71
  else:
57
- display_label = server_name
72
+ display_label = _NATIVE_SERVER_LABELS.get(server_name, server_name)
58
73
 
59
74
  result = await session.list_tools()
60
75
  for t in result.tools:
@@ -29,6 +29,10 @@ DEFAULT_TOOLS_BY_TYPE = {
29
29
  },
30
30
  "code": {
31
31
  "search_codebase",
32
+ "multi_repo_search",
33
+ "find_similar_code",
34
+ "list_indexed_files",
35
+ "get_file_chunks",
32
36
  },
33
37
  "orchestrator": set(), # orchestrator agents delegate to sub-agents; no extra tools needed
34
38
  "delegate": set(), # delegate agents route to sub-agents via synthetic delegate_to_agent tool
@@ -90,10 +94,12 @@ async def aggregate_all_tools(agent_sessions, active_agent, custom_tools_list):
90
94
  if tool_name not in allowed_tools:
91
95
  allowed_tools.append(tool_name)
92
96
 
93
- # Remove search_codebase if embed_code is disabled
97
+ # Remove embedding tools if embed_code is disabled
94
98
  settings = load_settings()
95
99
  if not settings.get("embed_code", False):
96
- allowed_tools = [t for t in allowed_tools if t != "search_codebase"]
100
+ _embed_tools = {"search_codebase", "multi_repo_search", "find_similar_code",
101
+ "list_indexed_files", "get_file_chunks"}
102
+ allowed_tools = [t for t in allowed_tools if t not in _embed_tools]
97
103
 
98
104
  # Standard MCP Tools
99
105
  for session_name, session in agent_sessions.items():
@@ -207,6 +213,18 @@ def build_system_prompt(agent_system_template, tools_json, session_id, session_s
207
213
  """
208
214
  # Determine if code embedding is enabled (for conditional tool description)
209
215
  _embed_code = load_settings().get("embed_code", False)
216
+ _embed_tools_desc = (
217
+ "- **`search_codebase`** - semantic search within specific repos. Requires `repo_ids`."
218
+ " Add `file_filter` (e.g. `.py`, `components`) to narrow results.\n"
219
+ "- **`multi_repo_search`** - like search_codebase but `repo_ids` is optional;"
220
+ " omit to search ALL indexed repos at once.\n"
221
+ "- **`find_similar_code`** - pass a code snippet to find similar patterns across repos"
222
+ " (vs. natural language in search_codebase).\n"
223
+ "- **`list_indexed_files`** - list every file in the embedding index with chunk counts."
224
+ " Use before searching to understand coverage.\n"
225
+ "- **`get_file_chunks`** - retrieve all indexed chunks for a specific file"
226
+ " (the semantic outline). Use after finding a relevant file."
227
+ ) if _embed_code else ""
210
228
 
211
229
  # Get current date/time for context injection
212
230
  now = datetime.datetime.now(zoneinfo.ZoneInfo("UTC"))
@@ -279,7 +297,7 @@ You have access to the following tools:
279
297
  {tools_json}
280
298
 
281
299
  **CODE & FILE NAVIGATION:**
282
- {"- **`search_codebase`** — semantic search across indexed repos. Requires `repo_ids`. Best for broad symbol or concept search." if _embed_code else ""}
300
+ {_embed_tools_desc}
283
301
  - **`grep`** — search for a pattern inside a file or across all files in a folder. Pass a file path to search that file, or a folder path to search all files within it. Use `file_pattern` to filter by extension (e.g. `*.py`, `*.ts`).
284
302
  - **`glob`** — discover file paths by pattern (e.g. `**/*.py`, `src/**/*.ts`).
285
303
  - **`read_file`** — read an entire file. Use when you already know the path and the file is small. For large files, prefer `read_file_by_lines` or `grep`.
@@ -356,6 +356,31 @@ def get_index_status(repo_id: str) -> dict:
356
356
  return {"status": "error", "message": str(e), "count": 0}
357
357
 
358
358
 
359
+ def _get_current_vector_dim(repo_id: str) -> int | None:
360
+ """Return the vector column dimension currently in the DB, or None if absent."""
361
+ if not COCOINDEX_AVAILABLE:
362
+ return None
363
+ table_name = get_table_name(repo_id)
364
+ db_url = _get_db_url()
365
+ if not db_url:
366
+ return None
367
+ try:
368
+ with psycopg.connect(db_url) as conn:
369
+ with conn.cursor() as cur:
370
+ cur.execute("""
371
+ SELECT atttypmod
372
+ FROM pg_attribute
373
+ JOIN pg_class ON pg_class.oid = pg_attribute.attrelid
374
+ WHERE pg_class.relname = %s
375
+ AND pg_attribute.attname = 'embedding'
376
+ AND atttypmod > 0
377
+ """, (table_name,))
378
+ row = cur.fetchone()
379
+ return row[0] if row else None
380
+ except Exception:
381
+ return None
382
+
383
+
359
384
  def drop_index(repo_id: str):
360
385
  """Drop all tables and CocoIndex metadata for a repo — ensures clean rebuild."""
361
386
  if not COCOINDEX_AVAILABLE:
@@ -399,7 +424,7 @@ def _update_repo_status(repo_id: str, **fields):
399
424
  json.dump(repos, f, indent=4)
400
425
 
401
426
 
402
- def run_index_task(repo_id: str, repo_path: str, included_patterns: list[str], excluded_patterns: list[str]):
427
+ def run_index_task(repo_id: str, repo_path: str, included_patterns: list[str], excluded_patterns: list[str], full_reindex: bool = True):
403
428
  if not COCOINDEX_AVAILABLE:
404
429
  msg = (
405
430
  "CocoIndex not installed in the backend venv — indexing skipped.\n"
@@ -416,11 +441,31 @@ def run_index_task(repo_id: str, repo_path: str, included_patterns: list[str], e
416
441
  stop = _stop_events.setdefault(repo_id, threading.Event())
417
442
  stop.clear() # reset from any previous stop request
418
443
 
419
- print(f"Starting index builder for {repo_id}...")
444
+ print(f"Starting index builder for {repo_id} (full_reindex={full_reindex})...")
420
445
  _update_repo_status(repo_id, status="indexing", error_message=None)
421
446
  try:
422
- print("[index] Step 0: drop stale tables + CocoIndex metadata")
423
- drop_index(repo_id)
447
+ if full_reindex:
448
+ print("[index] Step 0: drop stale tables + CocoIndex metadata")
449
+ drop_index(repo_id)
450
+ else:
451
+ # Schema guard: if the embedding dim stored in the DB differs from the currently
452
+ # configured model's dim, a full rebuild is unavoidable (vector columns are fixed-width).
453
+ try:
454
+ model = get_configured_embedding_model()
455
+ settings = load_settings()
456
+ dim = probe_embedding_dim(model, settings)
457
+ current_dim = _get_current_vector_dim(repo_id)
458
+ if current_dim is not None and current_dim != dim:
459
+ print(f"[index] Embedding dim changed ({current_dim} → {dim}), promoting to full reindex")
460
+ drop_index(repo_id)
461
+ full_reindex = True
462
+ else:
463
+ print("[index] Incremental reindex — tracking table preserved, only changed files will be processed")
464
+ except Exception as e:
465
+ print(f"[index] Schema check failed ({e}), falling back to full reindex")
466
+ drop_index(repo_id)
467
+ full_reindex = True
468
+
424
469
  if stop.is_set():
425
470
  print(f"[index] Stop requested after step 0 — aborting {repo_id}")
426
471
  _update_repo_status(repo_id, status="stopped", error_message=None)
@@ -451,7 +496,10 @@ def run_index_task(repo_id: str, repo_path: str, included_patterns: list[str], e
451
496
  # update() is a long-running Rust call — we can't interrupt it mid-way,
452
497
  # but we check the stop flag immediately after it returns.
453
498
  print("[index] Step 4: repo_flow.update()")
454
- repo_flow.update(full_reprocess=True)
499
+ if full_reindex:
500
+ repo_flow.update(full_reprocess=True)
501
+ else:
502
+ repo_flow.update() # incremental — CocoIndex processes only changed/new files
455
503
 
456
504
  if stop.is_set():
457
505
  print(f"[index] Stop requested — marking {repo_id} as stopped")
@@ -493,10 +541,10 @@ def stop_index(repo_id: str) -> bool:
493
541
  return False
494
542
 
495
543
 
496
- def run_index(repo_id: str, repo_path: str, included_patterns: list[str], excluded_patterns: list[str]):
544
+ def run_index(repo_id: str, repo_path: str, included_patterns: list[str], excluded_patterns: list[str], full_reindex: bool = True):
497
545
  t = threading.Thread(
498
546
  target=run_index_task,
499
- args=(repo_id, repo_path, included_patterns, excluded_patterns),
547
+ args=(repo_id, repo_path, included_patterns, excluded_patterns, full_reindex),
500
548
  daemon=True,
501
549
  )
502
550
  _active_threads[repo_id] = t
@@ -108,7 +108,20 @@ def _get_table_name(repo_id: str) -> str:
108
108
  return f"ci_{repo_id}__emb"
109
109
 
110
110
 
111
- async def _search(query: str, repo_ids: list[str], top_k: int = 10) -> list[dict]:
111
+ def _load_indexed_repo_ids() -> list[str]:
112
+ """Return repo IDs where status == 'indexed' from repos.json."""
113
+ try:
114
+ from core.config import DATA_DIR
115
+ repos_file = os.path.join(DATA_DIR, "repos.json")
116
+ with open(repos_file) as f:
117
+ repos = json.load(f)
118
+ return [r["id"] for r in repos if r.get("status") == "indexed" and r.get("id")]
119
+ except Exception:
120
+ return []
121
+
122
+
123
+ async def _search(query: str, repo_ids: list[str], top_k: int = 10,
124
+ file_filter: str | None = None, min_score: float = 0.0) -> list[dict]:
112
125
  """Search indexed repos using cosine similarity."""
113
126
  try:
114
127
  pool = _get_pool()
@@ -143,13 +156,17 @@ async def _search(query: str, repo_ids: list[str], top_k: int = 10) -> list[dict
143
156
 
144
157
  repo_root = repo_path_map.get(repo_id, "")
145
158
  for row in cur.fetchall():
146
- # location may be a psycopg Range object — convert to string
147
159
  loc = row[2]
148
160
  if hasattr(loc, 'lower') and hasattr(loc, 'upper'):
149
161
  loc = f"{loc.lower}-{loc.upper}"
150
162
  else:
151
163
  loc = str(loc) if loc is not None else ""
152
164
  filename = row[0].lstrip("/")
165
+ score = round(1.0 - row[3], 5)
166
+ if score < min_score:
167
+ continue
168
+ if file_filter and file_filter not in filename:
169
+ continue
153
170
  full_path = f"{repo_root}/{filename}" if repo_root else filename
154
171
  all_results.append({
155
172
  "repo_id": repo_id,
@@ -157,7 +174,7 @@ async def _search(query: str, repo_ids: list[str], top_k: int = 10) -> list[dict
157
174
  "full_path": full_path,
158
175
  "code": row[1],
159
176
  "location": loc,
160
- "score": round(1.0 - row[3], 5)
177
+ "score": score,
161
178
  })
162
179
  except Exception as e:
163
180
  err_text = str(e)
@@ -175,6 +192,98 @@ async def _search(query: str, repo_ids: list[str], top_k: int = 10) -> list[dict
175
192
  return all_results[:top_k]
176
193
 
177
194
 
195
+ def _list_files_in_index(repo_id: str, file_filter: str | None = None) -> dict:
196
+ """Query the embedding table for distinct filenames and chunk counts."""
197
+ if not _VALID_REPO_ID.match(repo_id):
198
+ return {"error": f"Invalid repo_id format: {repo_id}"}
199
+ try:
200
+ pool = _get_pool()
201
+ except Exception as e:
202
+ return {"error": f"Database connection failed: {e}"}
203
+
204
+ repo_path_map = _load_repo_paths()
205
+ repo_root = repo_path_map.get(repo_id, "")
206
+ table_name = _get_table_name(repo_id)
207
+
208
+ try:
209
+ with pool.connection() as conn:
210
+ with conn.cursor() as cur:
211
+ if file_filter:
212
+ cur.execute(f"""
213
+ SELECT filename, COUNT(*) AS chunk_count
214
+ FROM "{table_name}"
215
+ WHERE filename LIKE %s
216
+ GROUP BY filename
217
+ ORDER BY filename
218
+ """, (f"%{file_filter}%",))
219
+ else:
220
+ cur.execute(f"""
221
+ SELECT filename, COUNT(*) AS chunk_count
222
+ FROM "{table_name}"
223
+ GROUP BY filename
224
+ ORDER BY filename
225
+ """)
226
+ rows = cur.fetchall()
227
+ except Exception as e:
228
+ return {"error": f"Query failed for repo '{repo_id}': {e}"}
229
+
230
+ files = []
231
+ total_chunks = 0
232
+ for row in rows:
233
+ filename = row[0].lstrip("/")
234
+ chunk_count = row[1]
235
+ total_chunks += chunk_count
236
+ full_path = f"{repo_root}/{filename}" if repo_root else filename
237
+ files.append({"filename": filename, "chunk_count": chunk_count, "full_path": full_path})
238
+
239
+ return {"repo_id": repo_id, "files": files, "total_files": len(files), "total_chunks": total_chunks}
240
+
241
+
242
+ def _get_chunks_for_file(repo_id: str, filename: str) -> dict:
243
+ """Retrieve all indexed chunks for a specific file."""
244
+ if not _VALID_REPO_ID.match(repo_id):
245
+ return {"error": f"Invalid repo_id format: {repo_id}"}
246
+ try:
247
+ pool = _get_pool()
248
+ except Exception as e:
249
+ return {"error": f"Database connection failed: {e}"}
250
+
251
+ repo_path_map = _load_repo_paths()
252
+ repo_root = repo_path_map.get(repo_id, "")
253
+ table_name = _get_table_name(repo_id)
254
+ clean_filename = filename.lstrip("/")
255
+
256
+ try:
257
+ with pool.connection() as conn:
258
+ with conn.cursor() as cur:
259
+ # Try both with and without leading slash to be robust
260
+ cur.execute(f"""
261
+ SELECT location, code
262
+ FROM "{table_name}"
263
+ WHERE filename = %s OR filename = %s
264
+ ORDER BY location
265
+ """, (clean_filename, "/" + clean_filename))
266
+ rows = cur.fetchall()
267
+ except Exception as e:
268
+ return {"error": f"Query failed for '{filename}' in repo '{repo_id}': {e}"}
269
+
270
+ if not rows:
271
+ return {"error": f"No chunks found for '{filename}' in repo '{repo_id}'. Use list_indexed_files to see available files."}
272
+
273
+ chunks = []
274
+ for row in rows:
275
+ loc = row[0]
276
+ if hasattr(loc, 'lower') and hasattr(loc, 'upper'):
277
+ loc = f"{loc.lower}-{loc.upper}"
278
+ else:
279
+ loc = str(loc) if loc is not None else ""
280
+ chunks.append({"location": loc, "code": row[1]})
281
+
282
+ full_path = f"{repo_root}/{clean_filename}" if repo_root else clean_filename
283
+ return {"repo_id": repo_id, "filename": clean_filename, "full_path": full_path,
284
+ "chunks": chunks, "total_chunks": len(chunks)}
285
+
286
+
178
287
  def _grep_file(
179
288
  file_path: str,
180
289
  pattern: str,
@@ -397,7 +506,9 @@ async def list_tools() -> list[types.Tool]:
397
506
  description=(
398
507
  "Search indexed code repositories for relevant code snippets using semantic vector search. "
399
508
  "Returns matching code with filename, location, and relevance score. "
400
- "You MUST provide repo_ids — check the LINKED CODE REPOSITORIES section in your system prompt for available repo IDs."
509
+ "You MUST provide repo_ids — check the LINKED CODE REPOSITORIES section in your system prompt for available repo IDs. "
510
+ "Use file_filter to narrow results to a specific path or file type (e.g. 'components', '.py'). "
511
+ "Use min_score (0–1) to discard low-confidence matches."
401
512
  ),
402
513
  inputSchema={
403
514
  "type": "object",
@@ -415,11 +526,140 @@ async def list_tools() -> list[types.Tool]:
415
526
  "type": "integer",
416
527
  "description": "Number of results to return (default 10)",
417
528
  "default": 10
529
+ },
530
+ "file_filter": {
531
+ "type": "string",
532
+ "description": "Only return results from files whose path contains this string (e.g. 'src/api', '.py', 'components')"
533
+ },
534
+ "min_score": {
535
+ "type": "number",
536
+ "description": "Minimum relevance score 0–1 (default 0.0 — return all). Higher values (e.g. 0.7) return only strong matches.",
537
+ "default": 0.0
418
538
  }
419
539
  },
420
540
  "required": ["query", "repo_ids"]
421
541
  },
422
542
  ),
543
+ types.Tool(
544
+ name="multi_repo_search",
545
+ description=(
546
+ "Search across multiple indexed repos simultaneously and return globally ranked results. "
547
+ "Unlike search_codebase, repo_ids is optional — if omitted, ALL indexed repos are searched automatically. "
548
+ "Use this when you don't know which repo contains the code you're looking for, or want the best match anywhere. "
549
+ "Use file_filter to narrow to a specific path or file type."
550
+ ),
551
+ inputSchema={
552
+ "type": "object",
553
+ "properties": {
554
+ "query": {
555
+ "type": "string",
556
+ "description": "Natural language or code search query"
557
+ },
558
+ "repo_ids": {
559
+ "type": "array",
560
+ "items": {"type": "string"},
561
+ "description": "Repos to search. Omit or pass an empty list to search ALL indexed repos."
562
+ },
563
+ "top_k": {
564
+ "type": "integer",
565
+ "description": "Total results to return across all repos (default 10)",
566
+ "default": 10
567
+ },
568
+ "file_filter": {
569
+ "type": "string",
570
+ "description": "Only return results from files whose path contains this string"
571
+ },
572
+ "min_score": {
573
+ "type": "number",
574
+ "description": "Minimum relevance score 0–1 (default 0.0)",
575
+ "default": 0.0
576
+ }
577
+ },
578
+ "required": ["query"]
579
+ },
580
+ ),
581
+ types.Tool(
582
+ name="find_similar_code",
583
+ description=(
584
+ "Given a code snippet, find semantically similar code patterns across one or more repos. "
585
+ "Unlike search_codebase (which takes natural language), this embeds actual code — "
586
+ "use it when you have a piece of code and want to find similar implementations or usages elsewhere. "
587
+ "repo_ids is optional; omit to search all indexed repos."
588
+ ),
589
+ inputSchema={
590
+ "type": "object",
591
+ "properties": {
592
+ "code": {
593
+ "type": "string",
594
+ "description": "A code snippet to find similar patterns for"
595
+ },
596
+ "repo_ids": {
597
+ "type": "array",
598
+ "items": {"type": "string"},
599
+ "description": "Repos to search. Omit to search all indexed repos."
600
+ },
601
+ "top_k": {
602
+ "type": "integer",
603
+ "description": "Number of results to return (default 10)",
604
+ "default": 10
605
+ },
606
+ "file_filter": {
607
+ "type": "string",
608
+ "description": "Only return results from files whose path contains this string"
609
+ },
610
+ "min_score": {
611
+ "type": "number",
612
+ "description": "Minimum relevance score 0–1 (default 0.0)",
613
+ "default": 0.0
614
+ }
615
+ },
616
+ "required": ["code"]
617
+ },
618
+ ),
619
+ types.Tool(
620
+ name="list_indexed_files",
621
+ description=(
622
+ "List all files that have been embedded in the vector index for a repo, with their chunk counts. "
623
+ "Use this to discover what's covered by the index before searching, or to verify a specific file was indexed. "
624
+ "Optionally filter by file path substring."
625
+ ),
626
+ inputSchema={
627
+ "type": "object",
628
+ "properties": {
629
+ "repo_id": {
630
+ "type": "string",
631
+ "description": "The repo ID to list indexed files for"
632
+ },
633
+ "file_filter": {
634
+ "type": "string",
635
+ "description": "Only show files whose path contains this string (e.g. 'src/', '.py')"
636
+ }
637
+ },
638
+ "required": ["repo_id"]
639
+ },
640
+ ),
641
+ types.Tool(
642
+ name="get_file_chunks",
643
+ description=(
644
+ "Get all embedded chunks for a specific file — the semantic view of that file as seen by the index. "
645
+ "After finding a relevant file via search_codebase, use this to retrieve all its indexed segments "
646
+ "and understand the full scope of what's embedded, without reading the raw file."
647
+ ),
648
+ inputSchema={
649
+ "type": "object",
650
+ "properties": {
651
+ "repo_id": {
652
+ "type": "string",
653
+ "description": "The repo ID the file belongs to"
654
+ },
655
+ "filename": {
656
+ "type": "string",
657
+ "description": "Relative file path as returned by search_codebase (e.g. 'src/auth/login.py')"
658
+ }
659
+ },
660
+ "required": ["repo_id", "filename"]
661
+ },
662
+ ),
423
663
  types.Tool(
424
664
  name="grep",
425
665
  description=(
@@ -496,13 +736,67 @@ async def call_tool(
496
736
  query = arguments.get("query", "")
497
737
  repo_ids = arguments.get("repo_ids", [])
498
738
  top_k = arguments.get("top_k", 10)
739
+ file_filter = arguments.get("file_filter") or None
740
+ min_score = float(arguments.get("min_score", 0.0))
499
741
 
500
742
  if not query or not repo_ids:
501
743
  return [types.TextContent(type="text", text=json.dumps({"error": "Both 'query' and 'repo_ids' are required."}))]
502
744
 
503
- results = await _search(query, repo_ids, top_k)
745
+ results = await _search(query, repo_ids, top_k, file_filter=file_filter, min_score=min_score)
504
746
  return [types.TextContent(type="text", text=json.dumps({"results": results}, ensure_ascii=False))]
505
747
 
748
+ if name == "multi_repo_search":
749
+ query = arguments.get("query", "")
750
+ if not query:
751
+ return [types.TextContent(type="text", text=json.dumps({"error": "'query' is required."}))]
752
+
753
+ repo_ids = arguments.get("repo_ids") or []
754
+ if not repo_ids:
755
+ repo_ids = _load_indexed_repo_ids()
756
+ if not repo_ids:
757
+ return [types.TextContent(type="text", text=json.dumps({"error": "No indexed repos found. Index a repo first."}))]
758
+
759
+ top_k = arguments.get("top_k", 10)
760
+ file_filter = arguments.get("file_filter") or None
761
+ min_score = float(arguments.get("min_score", 0.0))
762
+
763
+ results = await _search(query, repo_ids, top_k, file_filter=file_filter, min_score=min_score)
764
+ return [types.TextContent(type="text", text=json.dumps({"results": results, "repos_searched": repo_ids}, ensure_ascii=False))]
765
+
766
+ if name == "find_similar_code":
767
+ code = arguments.get("code", "")
768
+ if not code:
769
+ return [types.TextContent(type="text", text=json.dumps({"error": "'code' is required."}))]
770
+
771
+ repo_ids = arguments.get("repo_ids") or []
772
+ if not repo_ids:
773
+ repo_ids = _load_indexed_repo_ids()
774
+ if not repo_ids:
775
+ return [types.TextContent(type="text", text=json.dumps({"error": "No indexed repos found. Index a repo first."}))]
776
+
777
+ top_k = arguments.get("top_k", 10)
778
+ file_filter = arguments.get("file_filter") or None
779
+ min_score = float(arguments.get("min_score", 0.0))
780
+
781
+ results = await _search(code, repo_ids, top_k, file_filter=file_filter, min_score=min_score)
782
+ return [types.TextContent(type="text", text=json.dumps({"results": results, "repos_searched": repo_ids}, ensure_ascii=False))]
783
+
784
+ if name == "list_indexed_files":
785
+ repo_id = arguments.get("repo_id", "")
786
+ if not repo_id:
787
+ return [types.TextContent(type="text", text=json.dumps({"error": "'repo_id' is required."}))]
788
+ file_filter = arguments.get("file_filter") or None
789
+ result = _list_files_in_index(repo_id, file_filter=file_filter)
790
+ return [types.TextContent(type="text", text=json.dumps(result, ensure_ascii=False))]
791
+
792
+ if name == "get_file_chunks":
793
+ repo_id = arguments.get("repo_id", "")
794
+ filename = arguments.get("filename", "")
795
+ if not repo_id or not filename:
796
+ return [types.TextContent(type="text", text=json.dumps({"error": "Both 'repo_id' and 'filename' are required."}))]
797
+ result = _get_chunks_for_file(repo_id, filename)
798
+ return [types.TextContent(type="text", text=json.dumps(result, ensure_ascii=False))]
799
+
506
800
  if name == "grep":
507
801
  # Accept both new `path` and legacy `file_path` for backward compatibility
508
802
  path = arguments.get("path") or arguments.get("file_path")
@@ -1 +1 @@
1
- 47MXNuH5FrJAfrk7sJf9l
1
+ yEEl_TTPPEzHJlgKg21Go
@@ -7,9 +7,9 @@
7
7
  "static/chunks/03~yq9q893hmn.js"
8
8
  ],
9
9
  "lowPriorityFiles": [
10
- "static/47MXNuH5FrJAfrk7sJf9l/_buildManifest.js",
11
- "static/47MXNuH5FrJAfrk7sJf9l/_ssgManifest.js",
12
- "static/47MXNuH5FrJAfrk7sJf9l/_clientMiddlewareManifest.js"
10
+ "static/yEEl_TTPPEzHJlgKg21Go/_buildManifest.js",
11
+ "static/yEEl_TTPPEzHJlgKg21Go/_ssgManifest.js",
12
+ "static/yEEl_TTPPEzHJlgKg21Go/_clientMiddlewareManifest.js"
13
13
  ],
14
14
  "rootMainFiles": [
15
15
  "static/chunks/0qmh881w772ef.js",
@@ -131,8 +131,8 @@
131
131
  "dynamicRoutes": {},
132
132
  "notFoundRoutes": [],
133
133
  "preview": {
134
- "previewModeId": "a0c0e723631d4645cff9560c0e471540",
135
- "previewModeSigningKey": "afd1ef141acca6335fa57541b914fbf175e0791f4e33dc44ea89dcae8517e371",
136
- "previewModeEncryptionKey": "bb4cfb0c917fd8eb580d0e14c92f71ac59af18f950aa75fbd4ea97c0e98ecf14"
134
+ "previewModeId": "bc368afe07caae8bf169c8234849e882",
135
+ "previewModeSigningKey": "190f16e9b88e7771194d70cdf5e69441824646affcdc450555fd563eed084faa",
136
+ "previewModeEncryptionKey": "a2688049a98d936f285726f270987cb712966062a8078f5f90eb21aa28b36b66"
137
137
  }
138
138
  }