sari 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. app/__init__.py +1 -0
  2. app/config.py +240 -0
  3. app/db.py +932 -0
  4. app/dedup_queue.py +77 -0
  5. app/engine_registry.py +56 -0
  6. app/engine_runtime.py +472 -0
  7. app/http_server.py +204 -0
  8. app/indexer.py +1532 -0
  9. app/main.py +147 -0
  10. app/models.py +39 -0
  11. app/queue_pipeline.py +65 -0
  12. app/ranking.py +144 -0
  13. app/registry.py +172 -0
  14. app/search_engine.py +572 -0
  15. app/watcher.py +124 -0
  16. app/workspace.py +286 -0
  17. deckard/__init__.py +3 -0
  18. deckard/__main__.py +4 -0
  19. deckard/main.py +345 -0
  20. deckard/version.py +1 -0
  21. mcp/__init__.py +1 -0
  22. mcp/__main__.py +19 -0
  23. mcp/cli.py +485 -0
  24. mcp/daemon.py +149 -0
  25. mcp/proxy.py +304 -0
  26. mcp/registry.py +218 -0
  27. mcp/server.py +519 -0
  28. mcp/session.py +234 -0
  29. mcp/telemetry.py +112 -0
  30. mcp/test_cli.py +89 -0
  31. mcp/test_daemon.py +124 -0
  32. mcp/test_server.py +197 -0
  33. mcp/tools/__init__.py +14 -0
  34. mcp/tools/_util.py +244 -0
  35. mcp/tools/deckard_guide.py +32 -0
  36. mcp/tools/doctor.py +208 -0
  37. mcp/tools/get_callers.py +60 -0
  38. mcp/tools/get_implementations.py +60 -0
  39. mcp/tools/index_file.py +75 -0
  40. mcp/tools/list_files.py +138 -0
  41. mcp/tools/read_file.py +48 -0
  42. mcp/tools/read_symbol.py +99 -0
  43. mcp/tools/registry.py +212 -0
  44. mcp/tools/repo_candidates.py +89 -0
  45. mcp/tools/rescan.py +46 -0
  46. mcp/tools/scan_once.py +54 -0
  47. mcp/tools/search.py +208 -0
  48. mcp/tools/search_api_endpoints.py +72 -0
  49. mcp/tools/search_symbols.py +63 -0
  50. mcp/tools/status.py +135 -0
  51. sari/__init__.py +1 -0
  52. sari/__main__.py +4 -0
  53. sari-0.0.1.dist-info/METADATA +521 -0
  54. sari-0.0.1.dist-info/RECORD +58 -0
  55. sari-0.0.1.dist-info/WHEEL +5 -0
  56. sari-0.0.1.dist-info/entry_points.txt +2 -0
  57. sari-0.0.1.dist-info/licenses/LICENSE +21 -0
  58. sari-0.0.1.dist-info/top_level.txt +4 -0
mcp/tools/registry.py ADDED
@@ -0,0 +1,212 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Callable, Dict, List
3
+
4
+ import mcp.tools.search as search_tool
5
+ import mcp.tools.status as status_tool
6
+ import mcp.tools.repo_candidates as repo_candidates_tool
7
+ import mcp.tools.list_files as list_files_tool
8
+ import mcp.tools.read_file as read_file_tool
9
+ import mcp.tools.search_symbols as search_symbols_tool
10
+ import mcp.tools.read_symbol as read_symbol_tool
11
+ import mcp.tools.doctor as doctor_tool
12
+ import mcp.tools.search_api_endpoints as search_api_endpoints_tool
13
+ import mcp.tools.index_file as index_file_tool
14
+ import mcp.tools.rescan as rescan_tool
15
+ import mcp.tools.scan_once as scan_once_tool
16
+ import mcp.tools.get_callers as get_callers_tool
17
+ import mcp.tools.get_implementations as get_implementations_tool
18
+ import mcp.tools.deckard_guide as deckard_guide_tool
19
+
20
+
21
+ @dataclass
22
+ class ToolContext:
23
+ db: Any
24
+ engine: Any
25
+ indexer: Any
26
+ roots: List[str]
27
+ cfg: Any
28
+ logger: Any
29
+ workspace_root: str
30
+ server_version: str
31
+
32
+
33
+ @dataclass
34
+ class Tool:
35
+ name: str
36
+ description: str
37
+ input_schema: Dict[str, Any]
38
+ handler: Callable[[ToolContext, Dict[str, Any]], Dict[str, Any]]
39
+
40
+
41
+ class ToolRegistry:
42
+ def __init__(self) -> None:
43
+ self._tools: Dict[str, Tool] = {}
44
+
45
+ def register(self, tool: Tool) -> None:
46
+ self._tools[tool.name] = tool
47
+
48
+ def list_tools(self) -> List[Dict[str, Any]]:
49
+ return [
50
+ {"name": t.name, "description": t.description, "inputSchema": t.input_schema}
51
+ for t in self._tools.values()
52
+ ]
53
+
54
+ def execute(self, name: str, ctx: ToolContext, args: Dict[str, Any]) -> Dict[str, Any]:
55
+ if name not in self._tools:
56
+ raise ValueError(f"Unknown tool: {name}")
57
+ return self._tools[name].handler(ctx, args)
58
+
59
+
60
+ def build_default_registry() -> ToolRegistry:
61
+ reg = ToolRegistry()
62
+
63
+ reg.register(Tool(
64
+ name="sari_guide",
65
+ description="Usage guide. Call this if unsure; it enforces search-first workflow.",
66
+ input_schema={"type": "object", "properties": {}},
67
+ handler=lambda ctx, args: deckard_guide_tool.execute_deckard_guide(args),
68
+ ))
69
+
70
+ reg.register(Tool(
71
+ name="search",
72
+ description="SEARCH FIRST. Use before opening files to locate relevant paths/symbols.",
73
+ input_schema={
74
+ "type": "object",
75
+ "properties": {
76
+ "query": {"type": "string", "description": "Search query (keywords, function names, regex)"},
77
+ "repo": {"type": "string", "description": "Limit search to specific repository"},
78
+ "limit": {"type": "integer", "description": "Maximum results (default: 10, max: 50)", "default": 10},
79
+ "offset": {"type": "integer", "description": "Pagination offset (default: 0)", "default": 0},
80
+ "file_types": {"type": "array", "items": {"type": "string"}, "description": "Filter by file extensions"},
81
+ "path_pattern": {"type": "string", "description": "Glob pattern for path matching"},
82
+ "exclude_patterns": {"type": "array", "items": {"type": "string"}, "description": "Patterns to exclude"},
83
+ "recency_boost": {"type": "boolean", "description": "Boost recently modified files", "default": False},
84
+ "use_regex": {"type": "boolean", "description": "Treat query as regex pattern", "default": False},
85
+ "case_sensitive": {"type": "boolean", "description": "Case-sensitive search", "default": False},
86
+ "context_lines": {"type": "integer", "description": "Number of context lines in snippet", "default": 5},
87
+ "total_mode": {"type": "string", "enum": ["exact", "approx"], "description": "Total count mode"},
88
+ "root_ids": {"type": "array", "items": {"type": "string"}, "description": "Limit search to specific root_ids"},
89
+ "scope": {"type": "string", "description": "Alias for 'repo'"},
90
+ "type": {"type": "string", "enum": ["docs", "code"], "description": "Filter by type: 'docs' or 'code'"},
91
+ },
92
+ "required": ["query"],
93
+ },
94
+ handler=lambda ctx, args: search_tool.execute_search(args, ctx.db, ctx.logger, ctx.roots, engine=ctx.engine),
95
+ ))
96
+
97
+ reg.register(Tool(
98
+ name="status",
99
+ description="Get indexer status. Use details=true for per-repo stats.",
100
+ input_schema={"type": "object", "properties": {"details": {"type": "boolean", "default": False}}},
101
+ handler=lambda ctx, args: status_tool.execute_status(args, ctx.indexer, ctx.db, ctx.cfg, ctx.workspace_root, ctx.server_version, ctx.logger),
102
+ ))
103
+
104
+ reg.register(Tool(
105
+ name="rescan",
106
+ description="Trigger an async rescan of the workspace index.",
107
+ input_schema={"type": "object", "properties": {}},
108
+ handler=lambda ctx, args: rescan_tool.execute_rescan(args, ctx.indexer),
109
+ ))
110
+
111
+ reg.register(Tool(
112
+ name="scan_once",
113
+ description="Run a synchronous scan once (blocking).",
114
+ input_schema={"type": "object", "properties": {}},
115
+ handler=lambda ctx, args: scan_once_tool.execute_scan_once(args, ctx.indexer),
116
+ ))
117
+
118
+ reg.register(Tool(
119
+ name="repo_candidates",
120
+ description="Suggest top repos for a query. Use before search if repo is unknown.",
121
+ input_schema={"type": "object", "properties": {"query": {"type": "string"}, "limit": {"type": "integer", "default": 3}}, "required": ["query"]},
122
+ handler=lambda ctx, args: repo_candidates_tool.execute_repo_candidates(args, ctx.db, ctx.logger, ctx.roots),
123
+ ))
124
+
125
+ reg.register(Tool(
126
+ name="list_files",
127
+ description="List indexed files with filters. If repo is omitted, returns repo summary only.",
128
+ input_schema={
129
+ "type": "object",
130
+ "properties": {
131
+ "repo": {"type": "string"},
132
+ "path_pattern": {"type": "string"},
133
+ "file_types": {"type": "array", "items": {"type": "string"}},
134
+ "include_hidden": {"type": "boolean", "default": False},
135
+ "summary": {"type": "boolean", "default": False},
136
+ "limit": {"type": "integer", "default": 100},
137
+ "offset": {"type": "integer", "default": 0},
138
+ },
139
+ },
140
+ handler=lambda ctx, args: list_files_tool.execute_list_files(args, ctx.db, ctx.logger, ctx.roots),
141
+ ))
142
+
143
+ reg.register(Tool(
144
+ name="read_file",
145
+ description="Read full file content by path. Use only after search narrows candidates.",
146
+ input_schema={"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]},
147
+ handler=lambda ctx, args: read_file_tool.execute_read_file(args, ctx.db, ctx.roots),
148
+ ))
149
+
150
+ reg.register(Tool(
151
+ name="search_symbols",
152
+ description="Search for symbols by name. Prefer this to scanning files.",
153
+ input_schema={"type": "object", "properties": {"query": {"type": "string"}, "limit": {"type": "integer", "default": 20}}, "required": ["query"]},
154
+ handler=lambda ctx, args: search_symbols_tool.execute_search_symbols(args, ctx.db, ctx.roots),
155
+ ))
156
+
157
+ reg.register(Tool(
158
+ name="read_symbol",
159
+ description="Read symbol definition block by name/path. Use after search_symbols.",
160
+ input_schema={"type": "object", "properties": {"path": {"type": "string"}, "name": {"type": "string"}}, "required": ["path", "name"]},
161
+ handler=lambda ctx, args: read_symbol_tool.execute_read_symbol(args, ctx.db, ctx.logger, ctx.roots),
162
+ ))
163
+
164
+ reg.register(Tool(
165
+ name="doctor",
166
+ description="Run health checks and return structured diagnostics.",
167
+ input_schema={
168
+ "type": "object",
169
+ "properties": {
170
+ "include_network": {"type": "boolean", "default": True},
171
+ "include_port": {"type": "boolean", "default": True},
172
+ "include_db": {"type": "boolean", "default": True},
173
+ "include_disk": {"type": "boolean", "default": True},
174
+ "include_daemon": {"type": "boolean", "default": True},
175
+ "include_venv": {"type": "boolean", "default": True},
176
+ "include_marker": {"type": "boolean", "default": False},
177
+ "port": {"type": "integer", "default": 47800},
178
+ "min_disk_gb": {"type": "number", "default": 1.0},
179
+ },
180
+ },
181
+ handler=lambda ctx, args: doctor_tool.execute_doctor(args),
182
+ ))
183
+
184
+ reg.register(Tool(
185
+ name="search_api_endpoints",
186
+ description="Search API endpoints by path pattern (search-first for APIs).",
187
+ input_schema={"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]},
188
+ handler=lambda ctx, args: search_api_endpoints_tool.execute_search_api_endpoints(args, ctx.db, ctx.roots),
189
+ ))
190
+
191
+ reg.register(Tool(
192
+ name="index_file",
193
+ description="Force immediate re-indexing for a file path. Use when content seems stale.",
194
+ input_schema={"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]},
195
+ handler=lambda ctx, args: index_file_tool.execute_index_file(args, ctx.indexer, ctx.roots),
196
+ ))
197
+
198
+ reg.register(Tool(
199
+ name="get_callers",
200
+ description="Find callers of a symbol (use after search_symbols).",
201
+ input_schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]},
202
+ handler=lambda ctx, args: get_callers_tool.execute_get_callers(args, ctx.db, ctx.roots),
203
+ ))
204
+
205
+ reg.register(Tool(
206
+ name="get_implementations",
207
+ description="Find implementations of a symbol (use after search_symbols).",
208
+ input_schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]},
209
+ handler=lambda ctx, args: get_implementations_tool.execute_get_implementations(args, ctx.db, ctx.roots),
210
+ ))
211
+
212
+ return reg
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Repo candidates tool for Sari MCP Server.
4
+ """
5
+ import json
6
+ from typing import Any, Dict, List
7
+ from mcp.tools._util import mcp_response, pack_header, pack_line, pack_encode_id, pack_encode_text, pack_error, ErrorCode, resolve_root_ids
8
+
9
+ try:
10
+ from app.db import LocalSearchDB
11
+ from mcp.telemetry import TelemetryLogger
12
+ except ImportError:
13
+ # Fallback for direct script execution
14
+ import sys
15
+ from pathlib import Path
16
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
17
+ from app.db import LocalSearchDB
18
+ from mcp.telemetry import TelemetryLogger
19
+
20
+
21
+ def execute_repo_candidates(args: Dict[str, Any], db: LocalSearchDB, logger: TelemetryLogger = None, roots: List[str] = None) -> Dict[str, Any]:
22
+ """Execute repo_candidates tool."""
23
+ query = args.get("query", "")
24
+ try:
25
+ limit_arg = min(int(args.get("limit", 3)), 5)
26
+ except (ValueError, TypeError):
27
+ limit_arg = 3
28
+
29
+ if not query.strip():
30
+ return mcp_response(
31
+ "repo_candidates",
32
+ lambda: pack_error("repo_candidates", ErrorCode.INVALID_ARGS, "query is required"),
33
+ lambda: {"error": {"code": ErrorCode.INVALID_ARGS.value, "message": "query is required"}, "isError": True},
34
+ )
35
+
36
+ def get_candidates():
37
+ root_ids = resolve_root_ids(list(roots or []))
38
+ candidates = db.repo_candidates(q=query, limit=limit_arg, root_ids=root_ids)
39
+ for candidate in candidates:
40
+ score = candidate.get("score", 0)
41
+ if score >= 10:
42
+ reason = f"High match ({score} files contain '{query}')"
43
+ elif score >= 5:
44
+ reason = f"Moderate match ({score} files)"
45
+ else:
46
+ reason = f"Low match ({score} files)"
47
+ candidate["reason"] = reason
48
+ return candidates
49
+
50
+ # --- JSON Builder ---
51
+ def build_json() -> Dict[str, Any]:
52
+ candidates = get_candidates()
53
+ return {
54
+ "query": query,
55
+ "candidates": candidates,
56
+ "hint": "Use 'repo' parameter in search to narrow down scope after selection",
57
+ }
58
+
59
+ # --- PACK1 Builder ---
60
+ def build_pack() -> str:
61
+ candidates = get_candidates()
62
+
63
+ # Header
64
+ kv = {"q": pack_encode_text(query), "limit": limit_arg}
65
+ lines = [
66
+ pack_header("repo_candidates", kv, returned=len(candidates))
67
+ ]
68
+
69
+ # Records
70
+ for c in candidates:
71
+ # r:repo=<repo> score=<score> reason=<reason>
72
+ kv_line = {
73
+ "repo": pack_encode_id(c["repo"]),
74
+ "score": str(c["score"]),
75
+ "reason": pack_encode_text(c["reason"])
76
+ }
77
+ lines.append(pack_line("r", kv_line))
78
+
79
+ return "\n".join(lines)
80
+
81
+ if logger:
82
+ # We need candidate count for logging, but don't want to run query twice optimally.
83
+ # But for simplicity in this structure, we let builders run query.
84
+ # Telemetry here might be slightly off if we don't capture result from mcp_response,
85
+ # but execute_repo_candidates returns the result, so we can't easily hook in unless we move logging inside builders or after mcp_response.
86
+ # Let's log *after* mcp_response call by peeking result, or just log query intent.
87
+ logger.log_telemetry(f"tool=repo_candidates query='{query}' limit={limit_arg}")
88
+
89
+ return mcp_response("repo_candidates", build_pack, build_json)
mcp/tools/rescan.py ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Rescan tool for Local Search MCP Server.
4
+ """
5
+ from typing import Any, Dict
6
+
7
+ from mcp.tools._util import mcp_response, pack_header, pack_line, pack_error, ErrorCode
8
+
9
+ try:
10
+ from app.indexer import Indexer
11
+ except ImportError:
12
+ import sys
13
+ from pathlib import Path
14
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
15
+ from app.indexer import Indexer
16
+
17
+
18
+ def execute_rescan(args: Dict[str, Any], indexer: Indexer) -> Dict[str, Any]:
19
+ """Trigger async rescan on indexer."""
20
+ if not indexer:
21
+ return mcp_response(
22
+ "rescan",
23
+ lambda: pack_error("rescan", ErrorCode.INTERNAL, "indexer not available"),
24
+ lambda: {"error": {"code": ErrorCode.INTERNAL.value, "message": "indexer not available"}, "isError": True},
25
+ )
26
+
27
+ if not getattr(indexer, "indexing_enabled", True):
28
+ mode = getattr(indexer, "indexer_mode", "off")
29
+ code = ErrorCode.ERR_INDEXER_DISABLED if mode == "off" else ErrorCode.ERR_INDEXER_FOLLOWER
30
+ return mcp_response(
31
+ "rescan",
32
+ lambda: pack_error("rescan", code, "Indexer is not available in follower/off mode", fields={"mode": mode}),
33
+ lambda: {"error": {"code": code.value, "message": "Indexer is not available in follower/off mode", "data": {"mode": mode}}, "isError": True},
34
+ )
35
+
36
+ indexer.request_rescan()
37
+
38
+ def build_json() -> Dict[str, Any]:
39
+ return {"requested": True}
40
+
41
+ def build_pack() -> str:
42
+ lines = [pack_header("rescan", {}, returned=1)]
43
+ lines.append(pack_line("m", kv={"requested": "true"}))
44
+ return "\n".join(lines)
45
+
46
+ return mcp_response("rescan", build_pack, build_json)
mcp/tools/scan_once.py ADDED
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Scan-once tool for Local Search MCP Server.
4
+ """
5
+ from typing import Any, Dict
6
+
7
+ from mcp.tools._util import mcp_response, pack_header, pack_line, pack_error, ErrorCode
8
+
9
+ try:
10
+ from app.indexer import Indexer
11
+ except ImportError:
12
+ import sys
13
+ from pathlib import Path
14
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
15
+ from app.indexer import Indexer
16
+
17
+
18
+ def execute_scan_once(args: Dict[str, Any], indexer: Indexer) -> Dict[str, Any]:
19
+ """Run a synchronous scan once."""
20
+ if not indexer:
21
+ return mcp_response(
22
+ "scan_once",
23
+ lambda: pack_error("scan_once", ErrorCode.INTERNAL, "indexer not available"),
24
+ lambda: {"error": {"code": ErrorCode.INTERNAL.value, "message": "indexer not available"}, "isError": True},
25
+ )
26
+
27
+ if not getattr(indexer, "indexing_enabled", True):
28
+ mode = getattr(indexer, "indexer_mode", "off")
29
+ code = ErrorCode.ERR_INDEXER_DISABLED if mode == "off" else ErrorCode.ERR_INDEXER_FOLLOWER
30
+ return mcp_response(
31
+ "scan_once",
32
+ lambda: pack_error("scan_once", code, "Indexer is not available in follower/off mode", fields={"mode": mode}),
33
+ lambda: {"error": {"code": code.value, "message": "Indexer is not available in follower/off mode", "data": {"mode": mode}}, "isError": True},
34
+ )
35
+
36
+ indexer.scan_once()
37
+ try:
38
+ scanned = indexer.status.scanned_files
39
+ indexed = indexer.status.indexed_files
40
+ except Exception:
41
+ scanned = 0
42
+ indexed = 0
43
+
44
+ def build_json() -> Dict[str, Any]:
45
+ return {"ok": True, "scanned_files": scanned, "indexed_files": indexed}
46
+
47
+ def build_pack() -> str:
48
+ lines = [pack_header("scan_once", {}, returned=1)]
49
+ lines.append(pack_line("m", kv={"ok": "true"}))
50
+ lines.append(pack_line("m", kv={"scanned_files": str(scanned)}))
51
+ lines.append(pack_line("m", kv={"indexed_files": str(indexed)}))
52
+ return "\n".join(lines)
53
+
54
+ return mcp_response("scan_once", build_pack, build_json)
mcp/tools/search.py ADDED
@@ -0,0 +1,208 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Search tool for Local Search MCP Server (SSOT).
4
+ """
5
+ import time
6
+ from typing import Any, Dict, List
7
+
8
+ from mcp.tools._util import (
9
+ mcp_response,
10
+ pack_header,
11
+ pack_line,
12
+ pack_truncated,
13
+ pack_encode_id,
14
+ pack_encode_text,
15
+ resolve_root_ids,
16
+ pack_error,
17
+ ErrorCode,
18
+ )
19
+
20
+ try:
21
+ from app.db import LocalSearchDB, SearchOptions
22
+ from app.engine_runtime import EngineError
23
+ from mcp.telemetry import TelemetryLogger
24
+ except ImportError:
25
+ import sys
26
+ from pathlib import Path
27
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
28
+ from app.db import LocalSearchDB, SearchOptions
29
+ from app.engine_runtime import EngineError
30
+ from mcp.telemetry import TelemetryLogger
31
+
32
+
33
+ def execute_search(
34
+ args: Dict[str, Any],
35
+ db: LocalSearchDB,
36
+ logger: TelemetryLogger,
37
+ roots: List[str],
38
+ engine: Any = None,
39
+ ) -> Dict[str, Any]:
40
+ start_ts = time.time()
41
+ engine = engine or getattr(db, "engine", None)
42
+
43
+ root_ids = resolve_root_ids(roots)
44
+ req_root_ids = args.get("root_ids")
45
+ if isinstance(req_root_ids, list):
46
+ req_root_ids = [str(r) for r in req_root_ids if r]
47
+ if root_ids:
48
+ root_ids = [r for r in root_ids if r in req_root_ids]
49
+ else:
50
+ root_ids = list(req_root_ids)
51
+ if req_root_ids and not root_ids:
52
+ if db and db.has_legacy_paths():
53
+ root_ids = []
54
+ else:
55
+ return mcp_response(
56
+ "search",
57
+ lambda: pack_error("search", ErrorCode.ERR_ROOT_OUT_OF_SCOPE, "root_ids out of scope", hints=["outside final_roots"]),
58
+ lambda: {"error": {"code": ErrorCode.ERR_ROOT_OUT_OF_SCOPE.value, "message": "root_ids out of scope"}, "isError": True},
59
+ )
60
+
61
+ query = (args.get("query") or "").strip()
62
+ if not query:
63
+ return mcp_response(
64
+ "search",
65
+ lambda: pack_error("search", ErrorCode.INVALID_ARGS, "query is required"),
66
+ lambda: {"error": {"code": ErrorCode.INVALID_ARGS.value, "message": "query is required"}, "isError": True},
67
+ )
68
+
69
+ repo = args.get("scope") or args.get("repo")
70
+ if repo == "workspace":
71
+ repo = None
72
+
73
+ file_types = list(args.get("file_types", []))
74
+ search_type = args.get("type")
75
+ if search_type == "docs":
76
+ doc_exts = ["md", "txt", "pdf", "docx", "rst", "pdf"]
77
+ file_types.extend([e for e in doc_exts if e not in file_types])
78
+
79
+ try:
80
+ limit = int(args.get("limit", 8))
81
+ except (ValueError, TypeError):
82
+ limit = 8
83
+ limit = max(1, min(limit, 50))
84
+
85
+ try:
86
+ offset = max(int(args.get("offset", 0)), 0)
87
+ except (ValueError, TypeError):
88
+ offset = 0
89
+
90
+ try:
91
+ raw_lines = int(args.get("context_lines", 5))
92
+ snippet_lines = min(max(raw_lines, 1), 20)
93
+ except (ValueError, TypeError):
94
+ snippet_lines = 5
95
+
96
+ total_mode = str(args.get("total_mode") or "").strip().lower()
97
+ if total_mode not in {"exact", "approx"}:
98
+ total_mode = "exact"
99
+
100
+ engine_mode = "sqlite"
101
+ index_version = ""
102
+ if engine and hasattr(engine, "status"):
103
+ st = engine.status()
104
+ engine_mode = st.engine_mode
105
+ index_version = st.index_version
106
+ if engine_mode == "embedded" and not st.engine_ready:
107
+ return mcp_response(
108
+ "search",
109
+ lambda: pack_error("search", ErrorCode.ERR_ENGINE_UNAVAILABLE, f"engine_ready=false reason={st.reason}", hints=[st.hint] if st.hint else None),
110
+ lambda: {
111
+ "error": {"code": ErrorCode.ERR_ENGINE_UNAVAILABLE.value, "message": f"engine_ready=false reason={st.reason}", "hint": st.hint},
112
+ "isError": True,
113
+ },
114
+ )
115
+
116
+ opts = SearchOptions(
117
+ query=query,
118
+ repo=repo,
119
+ limit=limit,
120
+ offset=offset,
121
+ snippet_lines=snippet_lines,
122
+ file_types=file_types,
123
+ path_pattern=args.get("path_pattern"),
124
+ exclude_patterns=args.get("exclude_patterns", []),
125
+ recency_boost=bool(args.get("recency_boost", False)),
126
+ use_regex=bool(args.get("use_regex", False)),
127
+ case_sensitive=bool(args.get("case_sensitive", False)),
128
+ total_mode=total_mode,
129
+ root_ids=root_ids,
130
+ )
131
+
132
+ try:
133
+ hits, meta = engine.search_v2(opts) if engine else ([], {})
134
+ except EngineError as exc:
135
+ code = getattr(ErrorCode, exc.code, ErrorCode.ERR_ENGINE_QUERY)
136
+ return mcp_response(
137
+ "search",
138
+ lambda: pack_error("search", code, exc.message, hints=[exc.hint] if exc.hint else None),
139
+ lambda: {"error": {"code": code.value, "message": exc.message, "hint": exc.hint}, "isError": True},
140
+ )
141
+ except Exception as exc:
142
+ return mcp_response(
143
+ "search",
144
+ lambda: pack_error("search", ErrorCode.ERR_ENGINE_QUERY, f"engine query failed: {exc}"),
145
+ lambda: {"error": {"code": ErrorCode.ERR_ENGINE_QUERY.value, "message": f"engine query failed: {exc}"}, "isError": True},
146
+ )
147
+
148
+ latency_ms = int((time.time() - start_ts) * 1000)
149
+ total = meta.get("total", -1)
150
+ total_mode = meta.get("total_mode", total_mode)
151
+
152
+ def build_json() -> Dict[str, Any]:
153
+ results: List[Dict[str, Any]] = []
154
+ for hit in hits:
155
+ results.append({
156
+ "doc_id": hit.path,
157
+ "repo": hit.repo,
158
+ "path": hit.path,
159
+ "score": hit.score,
160
+ "snippet": hit.snippet,
161
+ "mtime": hit.mtime,
162
+ "size": hit.size,
163
+ "match_count": hit.match_count,
164
+ "file_type": hit.file_type,
165
+ "hit_reason": hit.hit_reason,
166
+ "context_symbol": hit.context_symbol,
167
+ "docstring": hit.docstring,
168
+ "metadata": hit.metadata,
169
+ })
170
+ return {
171
+ "query": query,
172
+ "limit": limit,
173
+ "offset": offset,
174
+ "results": results,
175
+ "meta": {
176
+ "total": total,
177
+ "total_mode": total_mode,
178
+ "engine": engine_mode,
179
+ "latency_ms": latency_ms,
180
+ "index_version": index_version,
181
+ },
182
+ }
183
+
184
+ def build_pack() -> str:
185
+ returned = len(hits)
186
+ header = pack_header("search", {"q": pack_encode_text(query)}, returned=returned)
187
+ lines = [header]
188
+ lines.append(pack_line("m", {"total": str(total)}))
189
+ lines.append(pack_line("m", {"total_mode": total_mode}))
190
+ lines.append(pack_line("m", {"engine": engine_mode}))
191
+ lines.append(pack_line("m", {"latency_ms": str(latency_ms)}))
192
+ if index_version:
193
+ lines.append(pack_line("m", {"index_version": pack_encode_id(index_version)}))
194
+ for h in hits:
195
+ lines.append(pack_line("r", {
196
+ "path": pack_encode_id(h.path),
197
+ "repo": pack_encode_id(h.repo),
198
+ "score": f"{h.score:.3f}",
199
+ "mtime": str(h.mtime),
200
+ "size": str(h.size),
201
+ "file_type": pack_encode_id(h.file_type),
202
+ "snippet": pack_encode_text(h.snippet),
203
+ }))
204
+ if returned >= limit:
205
+ lines.append(pack_truncated(offset + limit, limit, "maybe"))
206
+ return "\n".join(lines)
207
+
208
+ return mcp_response("search", build_pack, build_json)