synapse-orch-ai 1.5.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/backend/core/models.py +1 -1
- package/backend/core/routes/repos.py +12 -4
- package/backend/core/routes/tools.py +16 -1
- package/backend/core/tools.py +21 -3
- package/backend/services/code_indexer.py +55 -7
- package/backend/tools/code_search.py +299 -5
- package/frontend-build/.next/BUILD_ID +1 -1
- package/frontend-build/.next/build-manifest.json +3 -3
- package/frontend-build/.next/prerender-manifest.json +3 -3
- package/frontend-build/.next/server/app/_global-error.html +1 -1
- package/frontend-build/.next/server/app/_global-error.rsc +1 -1
- package/frontend-build/.next/server/app/_global-error.segments/__PAGE__.segment.rsc +1 -1
- package/frontend-build/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
- package/frontend-build/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
- package/frontend-build/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
- package/frontend-build/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
- package/frontend-build/.next/server/app/_not-found.html +1 -1
- package/frontend-build/.next/server/app/_not-found.rsc +1 -1
- package/frontend-build/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
- package/frontend-build/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
- package/frontend-build/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
- package/frontend-build/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
- package/frontend-build/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
- package/frontend-build/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
- package/frontend-build/.next/server/app/index.html +1 -1
- package/frontend-build/.next/server/app/index.rsc +1 -1
- package/frontend-build/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
- package/frontend-build/.next/server/app/index.segments/_full.segment.rsc +1 -1
- package/frontend-build/.next/server/app/index.segments/_head.segment.rsc +1 -1
- package/frontend-build/.next/server/app/index.segments/_index.segment.rsc +1 -1
- package/frontend-build/.next/server/app/index.segments/_tree.segment.rsc +1 -1
- package/frontend-build/.next/server/app/login.html +1 -1
- package/frontend-build/.next/server/app/login.rsc +1 -1
- package/frontend-build/.next/server/app/login.segments/_full.segment.rsc +1 -1
- package/frontend-build/.next/server/app/login.segments/_head.segment.rsc +1 -1
- package/frontend-build/.next/server/app/login.segments/_index.segment.rsc +1 -1
- package/frontend-build/.next/server/app/login.segments/_tree.segment.rsc +1 -1
- package/frontend-build/.next/server/app/login.segments/login/__PAGE__.segment.rsc +1 -1
- package/frontend-build/.next/server/app/login.segments/login.segment.rsc +1 -1
- package/frontend-build/.next/server/app/settings/[tab]/page_client-reference-manifest.js +1 -1
- package/frontend-build/.next/server/chunks/ssr/_0b~n.nn._.js +2 -2
- package/frontend-build/.next/server/middleware-build-manifest.js +3 -3
- package/frontend-build/.next/server/middleware-manifest.json +5 -5
- package/frontend-build/.next/server/pages/404.html +1 -1
- package/frontend-build/.next/server/pages/500.html +1 -1
- package/frontend-build/.next/server/server-reference-manifest.js +1 -1
- package/frontend-build/.next/server/server-reference-manifest.json +1 -1
- package/frontend-build/.next/static/chunks/{15z7zp13idekl.js → 0mw2uexyx0fyf.js} +2 -2
- package/package.json +1 -1
- /package/frontend-build/.next/static/{47MXNuH5FrJAfrk7sJf9l → yEEl_TTPPEzHJlgKg21Go}/_buildManifest.js +0 -0
- /package/frontend-build/.next/static/{47MXNuH5FrJAfrk7sJf9l → yEEl_TTPPEzHJlgKg21Go}/_clientMiddlewareManifest.js +0 -0
- /package/frontend-build/.next/static/{47MXNuH5FrJAfrk7sJf9l → yEEl_TTPPEzHJlgKg21Go}/_ssgManifest.js +0 -0
package/backend/core/models.py
CHANGED
|
@@ -59,7 +59,7 @@ class Repo(BaseModel):
|
|
|
59
59
|
name: str
|
|
60
60
|
path: str
|
|
61
61
|
description: str = ""
|
|
62
|
-
included_patterns: list[str] = ["*.py", "*.ts", "*.tsx", "*.js", "*.jsx", "*.rs", "*.go", "*.java", "*.md", "*.html", "*.vue", "*.css", "*.scss", "*.cpp", "*.c"]
|
|
62
|
+
included_patterns: list[str] = ["*.py", "*.ts", "*.tsx", "*.js", "*.jsx", "*.rs", "*.go", "*.java", "*.md", "*.html", "*.vue", "*.svelte", "*.css", "*.scss", "*.cpp", "*.c"]
|
|
63
63
|
excluded_patterns: list[str] = [".*", "node_modules", "__pycache__", "venv", ".git", "*.pyc"]
|
|
64
64
|
last_indexed: str | None = None
|
|
65
65
|
status: str = "pending" # pending | indexing | indexed | error
|
|
@@ -3,11 +3,16 @@ Repo management endpoints (CRUD + reindex).
|
|
|
3
3
|
"""
|
|
4
4
|
import os
|
|
5
5
|
import json
|
|
6
|
-
from fastapi import APIRouter, HTTPException, BackgroundTasks
|
|
6
|
+
from fastapi import APIRouter, HTTPException, BackgroundTasks, Body
|
|
7
|
+
from pydantic import BaseModel
|
|
7
8
|
from core.models import Repo
|
|
8
9
|
from core.config import DATA_DIR, load_settings
|
|
9
10
|
from core.json_store import JsonStore
|
|
10
11
|
|
|
12
|
+
|
|
13
|
+
class ReindexOptions(BaseModel):
|
|
14
|
+
full_reindex: bool = False
|
|
15
|
+
|
|
11
16
|
router = APIRouter()
|
|
12
17
|
|
|
13
18
|
_repos_store = JsonStore(os.path.join(DATA_DIR, "repos.json"))
|
|
@@ -147,7 +152,8 @@ async def delete_repo(repo_id: str):
|
|
|
147
152
|
return {"status": "success"}
|
|
148
153
|
|
|
149
154
|
@router.post("/api/repos/{repo_id}/reindex")
|
|
150
|
-
async def reindex_repo(repo_id: str, background_tasks: BackgroundTasks
|
|
155
|
+
async def reindex_repo(repo_id: str, background_tasks: BackgroundTasks,
|
|
156
|
+
opts: ReindexOptions = Body(default=ReindexOptions())):
|
|
151
157
|
repos = load_repos()
|
|
152
158
|
repo = next((r for r in repos if r["id"] == repo_id), None)
|
|
153
159
|
if not repo:
|
|
@@ -174,14 +180,16 @@ async def reindex_repo(repo_id: str, background_tasks: BackgroundTasks):
|
|
|
174
180
|
# Run in background
|
|
175
181
|
try:
|
|
176
182
|
from services.code_indexer import run_index
|
|
177
|
-
background_tasks.add_task(run_index, repo_id, real_path,
|
|
183
|
+
background_tasks.add_task(run_index, repo_id, real_path,
|
|
184
|
+
repo["included_patterns"], repo["excluded_patterns"],
|
|
185
|
+
opts.full_reindex)
|
|
178
186
|
except ImportError as e:
|
|
179
187
|
print("Indexer unavailable:", e)
|
|
180
188
|
repo["status"] = "error"
|
|
181
189
|
save_repos(repos)
|
|
182
190
|
raise HTTPException(status_code=500, detail="Indexer service not available")
|
|
183
191
|
|
|
184
|
-
return {"status": "indexing_started"}
|
|
192
|
+
return {"status": "indexing_started", "full_reindex": opts.full_reindex}
|
|
185
193
|
|
|
186
194
|
|
|
187
195
|
@router.post("/api/repos/{repo_id}/stop-index")
|
|
@@ -35,6 +35,21 @@ async def get_custom_tools():
|
|
|
35
35
|
return load_custom_tools()
|
|
36
36
|
|
|
37
37
|
|
|
38
|
+
_NATIVE_SERVER_LABELS: dict[str, str] = {
|
|
39
|
+
"code_vault_search": "Code Search",
|
|
40
|
+
"time": "Time & Date",
|
|
41
|
+
"sql": "SQL Database",
|
|
42
|
+
"personal_details": "Personal Details",
|
|
43
|
+
"collect_data": "Collect Data",
|
|
44
|
+
"pdf_parser": "PDF Parser",
|
|
45
|
+
"xlsx_parser": "Excel / XLSX",
|
|
46
|
+
"vault_sandbox": "Vault Sandbox",
|
|
47
|
+
"web_scraper": "Web Scraper",
|
|
48
|
+
"bash": "Bash",
|
|
49
|
+
"Filesystem": "Filesystem",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
38
53
|
@router.get("/api/tools/available")
|
|
39
54
|
async def get_available_tools():
|
|
40
55
|
"""List all available tools from all sources (Native Agents, External MCP, Custom HTTP)"""
|
|
@@ -54,7 +69,7 @@ async def get_available_tools():
|
|
|
54
69
|
cfg = _server.mcp_manager.get_server_config(server_name)
|
|
55
70
|
display_label = (cfg.get("label") or server_name) if cfg else server_name
|
|
56
71
|
else:
|
|
57
|
-
display_label = server_name
|
|
72
|
+
display_label = _NATIVE_SERVER_LABELS.get(server_name, server_name)
|
|
58
73
|
|
|
59
74
|
result = await session.list_tools()
|
|
60
75
|
for t in result.tools:
|
package/backend/core/tools.py
CHANGED
|
@@ -29,6 +29,10 @@ DEFAULT_TOOLS_BY_TYPE = {
|
|
|
29
29
|
},
|
|
30
30
|
"code": {
|
|
31
31
|
"search_codebase",
|
|
32
|
+
"multi_repo_search",
|
|
33
|
+
"find_similar_code",
|
|
34
|
+
"list_indexed_files",
|
|
35
|
+
"get_file_chunks",
|
|
32
36
|
},
|
|
33
37
|
"orchestrator": set(), # orchestrator agents delegate to sub-agents; no extra tools needed
|
|
34
38
|
"delegate": set(), # delegate agents route to sub-agents via synthetic delegate_to_agent tool
|
|
@@ -90,10 +94,12 @@ async def aggregate_all_tools(agent_sessions, active_agent, custom_tools_list):
|
|
|
90
94
|
if tool_name not in allowed_tools:
|
|
91
95
|
allowed_tools.append(tool_name)
|
|
92
96
|
|
|
93
|
-
# Remove
|
|
97
|
+
# Remove embedding tools if embed_code is disabled
|
|
94
98
|
settings = load_settings()
|
|
95
99
|
if not settings.get("embed_code", False):
|
|
96
|
-
|
|
100
|
+
_embed_tools = {"search_codebase", "multi_repo_search", "find_similar_code",
|
|
101
|
+
"list_indexed_files", "get_file_chunks"}
|
|
102
|
+
allowed_tools = [t for t in allowed_tools if t not in _embed_tools]
|
|
97
103
|
|
|
98
104
|
# Standard MCP Tools
|
|
99
105
|
for session_name, session in agent_sessions.items():
|
|
@@ -207,6 +213,18 @@ def build_system_prompt(agent_system_template, tools_json, session_id, session_s
|
|
|
207
213
|
"""
|
|
208
214
|
# Determine if code embedding is enabled (for conditional tool description)
|
|
209
215
|
_embed_code = load_settings().get("embed_code", False)
|
|
216
|
+
_embed_tools_desc = (
|
|
217
|
+
"- **`search_codebase`** - semantic search within specific repos. Requires `repo_ids`."
|
|
218
|
+
" Add `file_filter` (e.g. `.py`, `components`) to narrow results.\n"
|
|
219
|
+
"- **`multi_repo_search`** - like search_codebase but `repo_ids` is optional;"
|
|
220
|
+
" omit to search ALL indexed repos at once.\n"
|
|
221
|
+
"- **`find_similar_code`** - pass a code snippet to find similar patterns across repos"
|
|
222
|
+
" (vs. natural language in search_codebase).\n"
|
|
223
|
+
"- **`list_indexed_files`** - list every file in the embedding index with chunk counts."
|
|
224
|
+
" Use before searching to understand coverage.\n"
|
|
225
|
+
"- **`get_file_chunks`** - retrieve all indexed chunks for a specific file"
|
|
226
|
+
" (the semantic outline). Use after finding a relevant file."
|
|
227
|
+
) if _embed_code else ""
|
|
210
228
|
|
|
211
229
|
# Get current date/time for context injection
|
|
212
230
|
now = datetime.datetime.now(zoneinfo.ZoneInfo("UTC"))
|
|
@@ -279,7 +297,7 @@ You have access to the following tools:
|
|
|
279
297
|
{tools_json}
|
|
280
298
|
|
|
281
299
|
**CODE & FILE NAVIGATION:**
|
|
282
|
-
{
|
|
300
|
+
{_embed_tools_desc}
|
|
283
301
|
- **`grep`** — search for a pattern inside a file or across all files in a folder. Pass a file path to search that file, or a folder path to search all files within it. Use `file_pattern` to filter by extension (e.g. `*.py`, `*.ts`).
|
|
284
302
|
- **`glob`** — discover file paths by pattern (e.g. `**/*.py`, `src/**/*.ts`).
|
|
285
303
|
- **`read_file`** — read an entire file. Use when you already know the path and the file is small. For large files, prefer `read_file_by_lines` or `grep`.
|
|
@@ -356,6 +356,31 @@ def get_index_status(repo_id: str) -> dict:
|
|
|
356
356
|
return {"status": "error", "message": str(e), "count": 0}
|
|
357
357
|
|
|
358
358
|
|
|
359
|
+
def _get_current_vector_dim(repo_id: str) -> int | None:
|
|
360
|
+
"""Return the vector column dimension currently in the DB, or None if absent."""
|
|
361
|
+
if not COCOINDEX_AVAILABLE:
|
|
362
|
+
return None
|
|
363
|
+
table_name = get_table_name(repo_id)
|
|
364
|
+
db_url = _get_db_url()
|
|
365
|
+
if not db_url:
|
|
366
|
+
return None
|
|
367
|
+
try:
|
|
368
|
+
with psycopg.connect(db_url) as conn:
|
|
369
|
+
with conn.cursor() as cur:
|
|
370
|
+
cur.execute("""
|
|
371
|
+
SELECT atttypmod
|
|
372
|
+
FROM pg_attribute
|
|
373
|
+
JOIN pg_class ON pg_class.oid = pg_attribute.attrelid
|
|
374
|
+
WHERE pg_class.relname = %s
|
|
375
|
+
AND pg_attribute.attname = 'embedding'
|
|
376
|
+
AND atttypmod > 0
|
|
377
|
+
""", (table_name,))
|
|
378
|
+
row = cur.fetchone()
|
|
379
|
+
return row[0] if row else None
|
|
380
|
+
except Exception:
|
|
381
|
+
return None
|
|
382
|
+
|
|
383
|
+
|
|
359
384
|
def drop_index(repo_id: str):
|
|
360
385
|
"""Drop all tables and CocoIndex metadata for a repo — ensures clean rebuild."""
|
|
361
386
|
if not COCOINDEX_AVAILABLE:
|
|
@@ -399,7 +424,7 @@ def _update_repo_status(repo_id: str, **fields):
|
|
|
399
424
|
json.dump(repos, f, indent=4)
|
|
400
425
|
|
|
401
426
|
|
|
402
|
-
def run_index_task(repo_id: str, repo_path: str, included_patterns: list[str], excluded_patterns: list[str]):
|
|
427
|
+
def run_index_task(repo_id: str, repo_path: str, included_patterns: list[str], excluded_patterns: list[str], full_reindex: bool = True):
|
|
403
428
|
if not COCOINDEX_AVAILABLE:
|
|
404
429
|
msg = (
|
|
405
430
|
"CocoIndex not installed in the backend venv — indexing skipped.\n"
|
|
@@ -416,11 +441,31 @@ def run_index_task(repo_id: str, repo_path: str, included_patterns: list[str], e
|
|
|
416
441
|
stop = _stop_events.setdefault(repo_id, threading.Event())
|
|
417
442
|
stop.clear() # reset from any previous stop request
|
|
418
443
|
|
|
419
|
-
print(f"Starting index builder for {repo_id}...")
|
|
444
|
+
print(f"Starting index builder for {repo_id} (full_reindex={full_reindex})...")
|
|
420
445
|
_update_repo_status(repo_id, status="indexing", error_message=None)
|
|
421
446
|
try:
|
|
422
|
-
|
|
423
|
-
|
|
447
|
+
if full_reindex:
|
|
448
|
+
print("[index] Step 0: drop stale tables + CocoIndex metadata")
|
|
449
|
+
drop_index(repo_id)
|
|
450
|
+
else:
|
|
451
|
+
# Schema guard: if the embedding dim stored in the DB differs from the currently
|
|
452
|
+
# configured model's dim, a full rebuild is unavoidable (vector columns are fixed-width).
|
|
453
|
+
try:
|
|
454
|
+
model = get_configured_embedding_model()
|
|
455
|
+
settings = load_settings()
|
|
456
|
+
dim = probe_embedding_dim(model, settings)
|
|
457
|
+
current_dim = _get_current_vector_dim(repo_id)
|
|
458
|
+
if current_dim is not None and current_dim != dim:
|
|
459
|
+
print(f"[index] Embedding dim changed ({current_dim} → {dim}), promoting to full reindex")
|
|
460
|
+
drop_index(repo_id)
|
|
461
|
+
full_reindex = True
|
|
462
|
+
else:
|
|
463
|
+
print("[index] Incremental reindex — tracking table preserved, only changed files will be processed")
|
|
464
|
+
except Exception as e:
|
|
465
|
+
print(f"[index] Schema check failed ({e}), falling back to full reindex")
|
|
466
|
+
drop_index(repo_id)
|
|
467
|
+
full_reindex = True
|
|
468
|
+
|
|
424
469
|
if stop.is_set():
|
|
425
470
|
print(f"[index] Stop requested after step 0 — aborting {repo_id}")
|
|
426
471
|
_update_repo_status(repo_id, status="stopped", error_message=None)
|
|
@@ -451,7 +496,10 @@ def run_index_task(repo_id: str, repo_path: str, included_patterns: list[str], e
|
|
|
451
496
|
# update() is a long-running Rust call — we can't interrupt it mid-way,
|
|
452
497
|
# but we check the stop flag immediately after it returns.
|
|
453
498
|
print("[index] Step 4: repo_flow.update()")
|
|
454
|
-
|
|
499
|
+
if full_reindex:
|
|
500
|
+
repo_flow.update(full_reprocess=True)
|
|
501
|
+
else:
|
|
502
|
+
repo_flow.update() # incremental — CocoIndex processes only changed/new files
|
|
455
503
|
|
|
456
504
|
if stop.is_set():
|
|
457
505
|
print(f"[index] Stop requested — marking {repo_id} as stopped")
|
|
@@ -493,10 +541,10 @@ def stop_index(repo_id: str) -> bool:
|
|
|
493
541
|
return False
|
|
494
542
|
|
|
495
543
|
|
|
496
|
-
def run_index(repo_id: str, repo_path: str, included_patterns: list[str], excluded_patterns: list[str]):
|
|
544
|
+
def run_index(repo_id: str, repo_path: str, included_patterns: list[str], excluded_patterns: list[str], full_reindex: bool = True):
|
|
497
545
|
t = threading.Thread(
|
|
498
546
|
target=run_index_task,
|
|
499
|
-
args=(repo_id, repo_path, included_patterns, excluded_patterns),
|
|
547
|
+
args=(repo_id, repo_path, included_patterns, excluded_patterns, full_reindex),
|
|
500
548
|
daemon=True,
|
|
501
549
|
)
|
|
502
550
|
_active_threads[repo_id] = t
|
|
@@ -108,7 +108,20 @@ def _get_table_name(repo_id: str) -> str:
|
|
|
108
108
|
return f"ci_{repo_id}__emb"
|
|
109
109
|
|
|
110
110
|
|
|
111
|
-
|
|
111
|
+
def _load_indexed_repo_ids() -> list[str]:
|
|
112
|
+
"""Return repo IDs where status == 'indexed' from repos.json."""
|
|
113
|
+
try:
|
|
114
|
+
from core.config import DATA_DIR
|
|
115
|
+
repos_file = os.path.join(DATA_DIR, "repos.json")
|
|
116
|
+
with open(repos_file) as f:
|
|
117
|
+
repos = json.load(f)
|
|
118
|
+
return [r["id"] for r in repos if r.get("status") == "indexed" and r.get("id")]
|
|
119
|
+
except Exception:
|
|
120
|
+
return []
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
async def _search(query: str, repo_ids: list[str], top_k: int = 10,
|
|
124
|
+
file_filter: str | None = None, min_score: float = 0.0) -> list[dict]:
|
|
112
125
|
"""Search indexed repos using cosine similarity."""
|
|
113
126
|
try:
|
|
114
127
|
pool = _get_pool()
|
|
@@ -143,13 +156,17 @@ async def _search(query: str, repo_ids: list[str], top_k: int = 10) -> list[dict
|
|
|
143
156
|
|
|
144
157
|
repo_root = repo_path_map.get(repo_id, "")
|
|
145
158
|
for row in cur.fetchall():
|
|
146
|
-
# location may be a psycopg Range object — convert to string
|
|
147
159
|
loc = row[2]
|
|
148
160
|
if hasattr(loc, 'lower') and hasattr(loc, 'upper'):
|
|
149
161
|
loc = f"{loc.lower}-{loc.upper}"
|
|
150
162
|
else:
|
|
151
163
|
loc = str(loc) if loc is not None else ""
|
|
152
164
|
filename = row[0].lstrip("/")
|
|
165
|
+
score = round(1.0 - row[3], 5)
|
|
166
|
+
if score < min_score:
|
|
167
|
+
continue
|
|
168
|
+
if file_filter and file_filter not in filename:
|
|
169
|
+
continue
|
|
153
170
|
full_path = f"{repo_root}/{filename}" if repo_root else filename
|
|
154
171
|
all_results.append({
|
|
155
172
|
"repo_id": repo_id,
|
|
@@ -157,7 +174,7 @@ async def _search(query: str, repo_ids: list[str], top_k: int = 10) -> list[dict
|
|
|
157
174
|
"full_path": full_path,
|
|
158
175
|
"code": row[1],
|
|
159
176
|
"location": loc,
|
|
160
|
-
"score":
|
|
177
|
+
"score": score,
|
|
161
178
|
})
|
|
162
179
|
except Exception as e:
|
|
163
180
|
err_text = str(e)
|
|
@@ -175,6 +192,98 @@ async def _search(query: str, repo_ids: list[str], top_k: int = 10) -> list[dict
|
|
|
175
192
|
return all_results[:top_k]
|
|
176
193
|
|
|
177
194
|
|
|
195
|
+
def _list_files_in_index(repo_id: str, file_filter: str | None = None) -> dict:
|
|
196
|
+
"""Query the embedding table for distinct filenames and chunk counts."""
|
|
197
|
+
if not _VALID_REPO_ID.match(repo_id):
|
|
198
|
+
return {"error": f"Invalid repo_id format: {repo_id}"}
|
|
199
|
+
try:
|
|
200
|
+
pool = _get_pool()
|
|
201
|
+
except Exception as e:
|
|
202
|
+
return {"error": f"Database connection failed: {e}"}
|
|
203
|
+
|
|
204
|
+
repo_path_map = _load_repo_paths()
|
|
205
|
+
repo_root = repo_path_map.get(repo_id, "")
|
|
206
|
+
table_name = _get_table_name(repo_id)
|
|
207
|
+
|
|
208
|
+
try:
|
|
209
|
+
with pool.connection() as conn:
|
|
210
|
+
with conn.cursor() as cur:
|
|
211
|
+
if file_filter:
|
|
212
|
+
cur.execute(f"""
|
|
213
|
+
SELECT filename, COUNT(*) AS chunk_count
|
|
214
|
+
FROM "{table_name}"
|
|
215
|
+
WHERE filename LIKE %s
|
|
216
|
+
GROUP BY filename
|
|
217
|
+
ORDER BY filename
|
|
218
|
+
""", (f"%{file_filter}%",))
|
|
219
|
+
else:
|
|
220
|
+
cur.execute(f"""
|
|
221
|
+
SELECT filename, COUNT(*) AS chunk_count
|
|
222
|
+
FROM "{table_name}"
|
|
223
|
+
GROUP BY filename
|
|
224
|
+
ORDER BY filename
|
|
225
|
+
""")
|
|
226
|
+
rows = cur.fetchall()
|
|
227
|
+
except Exception as e:
|
|
228
|
+
return {"error": f"Query failed for repo '{repo_id}': {e}"}
|
|
229
|
+
|
|
230
|
+
files = []
|
|
231
|
+
total_chunks = 0
|
|
232
|
+
for row in rows:
|
|
233
|
+
filename = row[0].lstrip("/")
|
|
234
|
+
chunk_count = row[1]
|
|
235
|
+
total_chunks += chunk_count
|
|
236
|
+
full_path = f"{repo_root}/{filename}" if repo_root else filename
|
|
237
|
+
files.append({"filename": filename, "chunk_count": chunk_count, "full_path": full_path})
|
|
238
|
+
|
|
239
|
+
return {"repo_id": repo_id, "files": files, "total_files": len(files), "total_chunks": total_chunks}
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _get_chunks_for_file(repo_id: str, filename: str) -> dict:
|
|
243
|
+
"""Retrieve all indexed chunks for a specific file."""
|
|
244
|
+
if not _VALID_REPO_ID.match(repo_id):
|
|
245
|
+
return {"error": f"Invalid repo_id format: {repo_id}"}
|
|
246
|
+
try:
|
|
247
|
+
pool = _get_pool()
|
|
248
|
+
except Exception as e:
|
|
249
|
+
return {"error": f"Database connection failed: {e}"}
|
|
250
|
+
|
|
251
|
+
repo_path_map = _load_repo_paths()
|
|
252
|
+
repo_root = repo_path_map.get(repo_id, "")
|
|
253
|
+
table_name = _get_table_name(repo_id)
|
|
254
|
+
clean_filename = filename.lstrip("/")
|
|
255
|
+
|
|
256
|
+
try:
|
|
257
|
+
with pool.connection() as conn:
|
|
258
|
+
with conn.cursor() as cur:
|
|
259
|
+
# Try both with and without leading slash to be robust
|
|
260
|
+
cur.execute(f"""
|
|
261
|
+
SELECT location, code
|
|
262
|
+
FROM "{table_name}"
|
|
263
|
+
WHERE filename = %s OR filename = %s
|
|
264
|
+
ORDER BY location
|
|
265
|
+
""", (clean_filename, "/" + clean_filename))
|
|
266
|
+
rows = cur.fetchall()
|
|
267
|
+
except Exception as e:
|
|
268
|
+
return {"error": f"Query failed for '{filename}' in repo '{repo_id}': {e}"}
|
|
269
|
+
|
|
270
|
+
if not rows:
|
|
271
|
+
return {"error": f"No chunks found for '{filename}' in repo '{repo_id}'. Use list_indexed_files to see available files."}
|
|
272
|
+
|
|
273
|
+
chunks = []
|
|
274
|
+
for row in rows:
|
|
275
|
+
loc = row[0]
|
|
276
|
+
if hasattr(loc, 'lower') and hasattr(loc, 'upper'):
|
|
277
|
+
loc = f"{loc.lower}-{loc.upper}"
|
|
278
|
+
else:
|
|
279
|
+
loc = str(loc) if loc is not None else ""
|
|
280
|
+
chunks.append({"location": loc, "code": row[1]})
|
|
281
|
+
|
|
282
|
+
full_path = f"{repo_root}/{clean_filename}" if repo_root else clean_filename
|
|
283
|
+
return {"repo_id": repo_id, "filename": clean_filename, "full_path": full_path,
|
|
284
|
+
"chunks": chunks, "total_chunks": len(chunks)}
|
|
285
|
+
|
|
286
|
+
|
|
178
287
|
def _grep_file(
|
|
179
288
|
file_path: str,
|
|
180
289
|
pattern: str,
|
|
@@ -397,7 +506,9 @@ async def list_tools() -> list[types.Tool]:
|
|
|
397
506
|
description=(
|
|
398
507
|
"Search indexed code repositories for relevant code snippets using semantic vector search. "
|
|
399
508
|
"Returns matching code with filename, location, and relevance score. "
|
|
400
|
-
"You MUST provide repo_ids — check the LINKED CODE REPOSITORIES section in your system prompt for available repo IDs."
|
|
509
|
+
"You MUST provide repo_ids — check the LINKED CODE REPOSITORIES section in your system prompt for available repo IDs. "
|
|
510
|
+
"Use file_filter to narrow results to a specific path or file type (e.g. 'components', '.py'). "
|
|
511
|
+
"Use min_score (0–1) to discard low-confidence matches."
|
|
401
512
|
),
|
|
402
513
|
inputSchema={
|
|
403
514
|
"type": "object",
|
|
@@ -415,11 +526,140 @@ async def list_tools() -> list[types.Tool]:
|
|
|
415
526
|
"type": "integer",
|
|
416
527
|
"description": "Number of results to return (default 10)",
|
|
417
528
|
"default": 10
|
|
529
|
+
},
|
|
530
|
+
"file_filter": {
|
|
531
|
+
"type": "string",
|
|
532
|
+
"description": "Only return results from files whose path contains this string (e.g. 'src/api', '.py', 'components')"
|
|
533
|
+
},
|
|
534
|
+
"min_score": {
|
|
535
|
+
"type": "number",
|
|
536
|
+
"description": "Minimum relevance score 0–1 (default 0.0 — return all). Higher values (e.g. 0.7) return only strong matches.",
|
|
537
|
+
"default": 0.0
|
|
418
538
|
}
|
|
419
539
|
},
|
|
420
540
|
"required": ["query", "repo_ids"]
|
|
421
541
|
},
|
|
422
542
|
),
|
|
543
|
+
types.Tool(
|
|
544
|
+
name="multi_repo_search",
|
|
545
|
+
description=(
|
|
546
|
+
"Search across multiple indexed repos simultaneously and return globally ranked results. "
|
|
547
|
+
"Unlike search_codebase, repo_ids is optional — if omitted, ALL indexed repos are searched automatically. "
|
|
548
|
+
"Use this when you don't know which repo contains the code you're looking for, or want the best match anywhere. "
|
|
549
|
+
"Use file_filter to narrow to a specific path or file type."
|
|
550
|
+
),
|
|
551
|
+
inputSchema={
|
|
552
|
+
"type": "object",
|
|
553
|
+
"properties": {
|
|
554
|
+
"query": {
|
|
555
|
+
"type": "string",
|
|
556
|
+
"description": "Natural language or code search query"
|
|
557
|
+
},
|
|
558
|
+
"repo_ids": {
|
|
559
|
+
"type": "array",
|
|
560
|
+
"items": {"type": "string"},
|
|
561
|
+
"description": "Repos to search. Omit or pass an empty list to search ALL indexed repos."
|
|
562
|
+
},
|
|
563
|
+
"top_k": {
|
|
564
|
+
"type": "integer",
|
|
565
|
+
"description": "Total results to return across all repos (default 10)",
|
|
566
|
+
"default": 10
|
|
567
|
+
},
|
|
568
|
+
"file_filter": {
|
|
569
|
+
"type": "string",
|
|
570
|
+
"description": "Only return results from files whose path contains this string"
|
|
571
|
+
},
|
|
572
|
+
"min_score": {
|
|
573
|
+
"type": "number",
|
|
574
|
+
"description": "Minimum relevance score 0–1 (default 0.0)",
|
|
575
|
+
"default": 0.0
|
|
576
|
+
}
|
|
577
|
+
},
|
|
578
|
+
"required": ["query"]
|
|
579
|
+
},
|
|
580
|
+
),
|
|
581
|
+
types.Tool(
|
|
582
|
+
name="find_similar_code",
|
|
583
|
+
description=(
|
|
584
|
+
"Given a code snippet, find semantically similar code patterns across one or more repos. "
|
|
585
|
+
"Unlike search_codebase (which takes natural language), this embeds actual code — "
|
|
586
|
+
"use it when you have a piece of code and want to find similar implementations or usages elsewhere. "
|
|
587
|
+
"repo_ids is optional; omit to search all indexed repos."
|
|
588
|
+
),
|
|
589
|
+
inputSchema={
|
|
590
|
+
"type": "object",
|
|
591
|
+
"properties": {
|
|
592
|
+
"code": {
|
|
593
|
+
"type": "string",
|
|
594
|
+
"description": "A code snippet to find similar patterns for"
|
|
595
|
+
},
|
|
596
|
+
"repo_ids": {
|
|
597
|
+
"type": "array",
|
|
598
|
+
"items": {"type": "string"},
|
|
599
|
+
"description": "Repos to search. Omit to search all indexed repos."
|
|
600
|
+
},
|
|
601
|
+
"top_k": {
|
|
602
|
+
"type": "integer",
|
|
603
|
+
"description": "Number of results to return (default 10)",
|
|
604
|
+
"default": 10
|
|
605
|
+
},
|
|
606
|
+
"file_filter": {
|
|
607
|
+
"type": "string",
|
|
608
|
+
"description": "Only return results from files whose path contains this string"
|
|
609
|
+
},
|
|
610
|
+
"min_score": {
|
|
611
|
+
"type": "number",
|
|
612
|
+
"description": "Minimum relevance score 0–1 (default 0.0)",
|
|
613
|
+
"default": 0.0
|
|
614
|
+
}
|
|
615
|
+
},
|
|
616
|
+
"required": ["code"]
|
|
617
|
+
},
|
|
618
|
+
),
|
|
619
|
+
types.Tool(
|
|
620
|
+
name="list_indexed_files",
|
|
621
|
+
description=(
|
|
622
|
+
"List all files that have been embedded in the vector index for a repo, with their chunk counts. "
|
|
623
|
+
"Use this to discover what's covered by the index before searching, or to verify a specific file was indexed. "
|
|
624
|
+
"Optionally filter by file path substring."
|
|
625
|
+
),
|
|
626
|
+
inputSchema={
|
|
627
|
+
"type": "object",
|
|
628
|
+
"properties": {
|
|
629
|
+
"repo_id": {
|
|
630
|
+
"type": "string",
|
|
631
|
+
"description": "The repo ID to list indexed files for"
|
|
632
|
+
},
|
|
633
|
+
"file_filter": {
|
|
634
|
+
"type": "string",
|
|
635
|
+
"description": "Only show files whose path contains this string (e.g. 'src/', '.py')"
|
|
636
|
+
}
|
|
637
|
+
},
|
|
638
|
+
"required": ["repo_id"]
|
|
639
|
+
},
|
|
640
|
+
),
|
|
641
|
+
types.Tool(
|
|
642
|
+
name="get_file_chunks",
|
|
643
|
+
description=(
|
|
644
|
+
"Get all embedded chunks for a specific file — the semantic view of that file as seen by the index. "
|
|
645
|
+
"After finding a relevant file via search_codebase, use this to retrieve all its indexed segments "
|
|
646
|
+
"and understand the full scope of what's embedded, without reading the raw file."
|
|
647
|
+
),
|
|
648
|
+
inputSchema={
|
|
649
|
+
"type": "object",
|
|
650
|
+
"properties": {
|
|
651
|
+
"repo_id": {
|
|
652
|
+
"type": "string",
|
|
653
|
+
"description": "The repo ID the file belongs to"
|
|
654
|
+
},
|
|
655
|
+
"filename": {
|
|
656
|
+
"type": "string",
|
|
657
|
+
"description": "Relative file path as returned by search_codebase (e.g. 'src/auth/login.py')"
|
|
658
|
+
}
|
|
659
|
+
},
|
|
660
|
+
"required": ["repo_id", "filename"]
|
|
661
|
+
},
|
|
662
|
+
),
|
|
423
663
|
types.Tool(
|
|
424
664
|
name="grep",
|
|
425
665
|
description=(
|
|
@@ -496,13 +736,67 @@ async def call_tool(
|
|
|
496
736
|
query = arguments.get("query", "")
|
|
497
737
|
repo_ids = arguments.get("repo_ids", [])
|
|
498
738
|
top_k = arguments.get("top_k", 10)
|
|
739
|
+
file_filter = arguments.get("file_filter") or None
|
|
740
|
+
min_score = float(arguments.get("min_score", 0.0))
|
|
499
741
|
|
|
500
742
|
if not query or not repo_ids:
|
|
501
743
|
return [types.TextContent(type="text", text=json.dumps({"error": "Both 'query' and 'repo_ids' are required."}))]
|
|
502
744
|
|
|
503
|
-
results = await _search(query, repo_ids, top_k)
|
|
745
|
+
results = await _search(query, repo_ids, top_k, file_filter=file_filter, min_score=min_score)
|
|
504
746
|
return [types.TextContent(type="text", text=json.dumps({"results": results}, ensure_ascii=False))]
|
|
505
747
|
|
|
748
|
+
if name == "multi_repo_search":
|
|
749
|
+
query = arguments.get("query", "")
|
|
750
|
+
if not query:
|
|
751
|
+
return [types.TextContent(type="text", text=json.dumps({"error": "'query' is required."}))]
|
|
752
|
+
|
|
753
|
+
repo_ids = arguments.get("repo_ids") or []
|
|
754
|
+
if not repo_ids:
|
|
755
|
+
repo_ids = _load_indexed_repo_ids()
|
|
756
|
+
if not repo_ids:
|
|
757
|
+
return [types.TextContent(type="text", text=json.dumps({"error": "No indexed repos found. Index a repo first."}))]
|
|
758
|
+
|
|
759
|
+
top_k = arguments.get("top_k", 10)
|
|
760
|
+
file_filter = arguments.get("file_filter") or None
|
|
761
|
+
min_score = float(arguments.get("min_score", 0.0))
|
|
762
|
+
|
|
763
|
+
results = await _search(query, repo_ids, top_k, file_filter=file_filter, min_score=min_score)
|
|
764
|
+
return [types.TextContent(type="text", text=json.dumps({"results": results, "repos_searched": repo_ids}, ensure_ascii=False))]
|
|
765
|
+
|
|
766
|
+
if name == "find_similar_code":
|
|
767
|
+
code = arguments.get("code", "")
|
|
768
|
+
if not code:
|
|
769
|
+
return [types.TextContent(type="text", text=json.dumps({"error": "'code' is required."}))]
|
|
770
|
+
|
|
771
|
+
repo_ids = arguments.get("repo_ids") or []
|
|
772
|
+
if not repo_ids:
|
|
773
|
+
repo_ids = _load_indexed_repo_ids()
|
|
774
|
+
if not repo_ids:
|
|
775
|
+
return [types.TextContent(type="text", text=json.dumps({"error": "No indexed repos found. Index a repo first."}))]
|
|
776
|
+
|
|
777
|
+
top_k = arguments.get("top_k", 10)
|
|
778
|
+
file_filter = arguments.get("file_filter") or None
|
|
779
|
+
min_score = float(arguments.get("min_score", 0.0))
|
|
780
|
+
|
|
781
|
+
results = await _search(code, repo_ids, top_k, file_filter=file_filter, min_score=min_score)
|
|
782
|
+
return [types.TextContent(type="text", text=json.dumps({"results": results, "repos_searched": repo_ids}, ensure_ascii=False))]
|
|
783
|
+
|
|
784
|
+
if name == "list_indexed_files":
|
|
785
|
+
repo_id = arguments.get("repo_id", "")
|
|
786
|
+
if not repo_id:
|
|
787
|
+
return [types.TextContent(type="text", text=json.dumps({"error": "'repo_id' is required."}))]
|
|
788
|
+
file_filter = arguments.get("file_filter") or None
|
|
789
|
+
result = _list_files_in_index(repo_id, file_filter=file_filter)
|
|
790
|
+
return [types.TextContent(type="text", text=json.dumps(result, ensure_ascii=False))]
|
|
791
|
+
|
|
792
|
+
if name == "get_file_chunks":
|
|
793
|
+
repo_id = arguments.get("repo_id", "")
|
|
794
|
+
filename = arguments.get("filename", "")
|
|
795
|
+
if not repo_id or not filename:
|
|
796
|
+
return [types.TextContent(type="text", text=json.dumps({"error": "Both 'repo_id' and 'filename' are required."}))]
|
|
797
|
+
result = _get_chunks_for_file(repo_id, filename)
|
|
798
|
+
return [types.TextContent(type="text", text=json.dumps(result, ensure_ascii=False))]
|
|
799
|
+
|
|
506
800
|
if name == "grep":
|
|
507
801
|
# Accept both new `path` and legacy `file_path` for backward compatibility
|
|
508
802
|
path = arguments.get("path") or arguments.get("file_path")
|
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
yEEl_TTPPEzHJlgKg21Go
|
|
@@ -7,9 +7,9 @@
|
|
|
7
7
|
"static/chunks/03~yq9q893hmn.js"
|
|
8
8
|
],
|
|
9
9
|
"lowPriorityFiles": [
|
|
10
|
-
"static/
|
|
11
|
-
"static/
|
|
12
|
-
"static/
|
|
10
|
+
"static/yEEl_TTPPEzHJlgKg21Go/_buildManifest.js",
|
|
11
|
+
"static/yEEl_TTPPEzHJlgKg21Go/_ssgManifest.js",
|
|
12
|
+
"static/yEEl_TTPPEzHJlgKg21Go/_clientMiddlewareManifest.js"
|
|
13
13
|
],
|
|
14
14
|
"rootMainFiles": [
|
|
15
15
|
"static/chunks/0qmh881w772ef.js",
|
|
@@ -131,8 +131,8 @@
|
|
|
131
131
|
"dynamicRoutes": {},
|
|
132
132
|
"notFoundRoutes": [],
|
|
133
133
|
"preview": {
|
|
134
|
-
"previewModeId": "
|
|
135
|
-
"previewModeSigningKey": "
|
|
136
|
-
"previewModeEncryptionKey": "
|
|
134
|
+
"previewModeId": "bc368afe07caae8bf169c8234849e882",
|
|
135
|
+
"previewModeSigningKey": "190f16e9b88e7771194d70cdf5e69441824646affcdc450555fd563eed084faa",
|
|
136
|
+
"previewModeEncryptionKey": "a2688049a98d936f285726f270987cb712966062a8078f5f90eb21aa28b36b66"
|
|
137
137
|
}
|
|
138
138
|
}
|