sari 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. app/__init__.py +1 -0
  2. app/config.py +240 -0
  3. app/db.py +932 -0
  4. app/dedup_queue.py +77 -0
  5. app/engine_registry.py +56 -0
  6. app/engine_runtime.py +472 -0
  7. app/http_server.py +204 -0
  8. app/indexer.py +1532 -0
  9. app/main.py +147 -0
  10. app/models.py +39 -0
  11. app/queue_pipeline.py +65 -0
  12. app/ranking.py +144 -0
  13. app/registry.py +172 -0
  14. app/search_engine.py +572 -0
  15. app/watcher.py +124 -0
  16. app/workspace.py +286 -0
  17. deckard/__init__.py +3 -0
  18. deckard/__main__.py +4 -0
  19. deckard/main.py +345 -0
  20. deckard/version.py +1 -0
  21. mcp/__init__.py +1 -0
  22. mcp/__main__.py +19 -0
  23. mcp/cli.py +485 -0
  24. mcp/daemon.py +149 -0
  25. mcp/proxy.py +304 -0
  26. mcp/registry.py +218 -0
  27. mcp/server.py +519 -0
  28. mcp/session.py +234 -0
  29. mcp/telemetry.py +112 -0
  30. mcp/test_cli.py +89 -0
  31. mcp/test_daemon.py +124 -0
  32. mcp/test_server.py +197 -0
  33. mcp/tools/__init__.py +14 -0
  34. mcp/tools/_util.py +244 -0
  35. mcp/tools/deckard_guide.py +32 -0
  36. mcp/tools/doctor.py +208 -0
  37. mcp/tools/get_callers.py +60 -0
  38. mcp/tools/get_implementations.py +60 -0
  39. mcp/tools/index_file.py +75 -0
  40. mcp/tools/list_files.py +138 -0
  41. mcp/tools/read_file.py +48 -0
  42. mcp/tools/read_symbol.py +99 -0
  43. mcp/tools/registry.py +212 -0
  44. mcp/tools/repo_candidates.py +89 -0
  45. mcp/tools/rescan.py +46 -0
  46. mcp/tools/scan_once.py +54 -0
  47. mcp/tools/search.py +208 -0
  48. mcp/tools/search_api_endpoints.py +72 -0
  49. mcp/tools/search_symbols.py +63 -0
  50. mcp/tools/status.py +135 -0
  51. sari/__init__.py +1 -0
  52. sari/__main__.py +4 -0
  53. sari-0.0.1.dist-info/METADATA +521 -0
  54. sari-0.0.1.dist-info/RECORD +58 -0
  55. sari-0.0.1.dist-info/WHEEL +5 -0
  56. sari-0.0.1.dist-info/entry_points.txt +2 -0
  57. sari-0.0.1.dist-info/licenses/LICENSE +21 -0
  58. sari-0.0.1.dist-info/top_level.txt +4 -0
mcp/server.py ADDED
@@ -0,0 +1,519 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ MCP Server for Local Search (STDIO mode)
4
+ Follows Model Context Protocol specification: https://modelcontextprotocol.io/specification/2025-11-25
5
+
6
+ v2.5.0 enhancements:
7
+ - Search pagination (offset, total, has_more)
8
+ - Detailed status stats (repo_stats)
9
+ - Improved UX (root display, fallback reasons)
10
+
11
+ Usage:
12
+ python3 .codex/tools/sari/mcp/server.py
13
+
14
+ Environment:
15
+ LOCAL_SEARCH_WORKSPACE_ROOT - Workspace root directory (default: cwd)
16
+ """
17
+ import json
18
+ import os
19
+ import sys
20
+ import threading
21
+ import time
22
+ from pathlib import Path
23
+ from typing import Any, Dict, Optional
24
+
25
+ # Add project root to sys.path for absolute imports
26
+ SCRIPT_DIR = Path(__file__).parent
27
+ REPO_ROOT = SCRIPT_DIR.parent
28
+ if str(REPO_ROOT) not in sys.path:
29
+ sys.path.insert(0, str(REPO_ROOT))
30
+
31
+ from app.config import Config
32
+ from app.db import LocalSearchDB, SearchOptions
33
+ from app.indexer import Indexer
34
+ from app.workspace import WorkspaceManager
35
+ from mcp.telemetry import TelemetryLogger
36
+
37
+ # Import tools using absolute paths
38
+ import mcp.tools.search as search_tool
39
+ import mcp.tools.status as status_tool
40
+ import mcp.tools.repo_candidates as repo_candidates_tool
41
+ import mcp.tools.list_files as list_files_tool
42
+ import mcp.tools.read_file as read_file_tool
43
+ import mcp.tools.search_symbols as search_symbols_tool
44
+ import mcp.tools.read_symbol as read_symbol_tool
45
+ import mcp.tools.doctor as doctor_tool
46
+ import mcp.tools.search_api_endpoints as search_api_endpoints_tool
47
+ import mcp.tools.index_file as index_file_tool
48
+ import mcp.tools.rescan as rescan_tool
49
+ import mcp.tools.scan_once as scan_once_tool
50
+ import mcp.tools.get_callers as get_callers_tool
51
+ import mcp.tools.get_implementations as get_implementations_tool
52
+ import mcp.tools.deckard_guide as deckard_guide_tool
53
+ from mcp.tools.registry import ToolContext, build_default_registry
54
+
55
+
56
+ class LocalSearchMCPServer:
57
+ """MCP Server for Local Search - STDIO mode."""
58
+
59
+ PROTOCOL_VERSION = "2025-11-25"
60
+ SERVER_NAME = "sari"
61
+ # Version is injected via environment variable by the bootstrapper
62
+ @staticmethod
63
+ def _resolve_version() -> str:
64
+ v = (os.environ.get("DECKARD_VERSION") or "").strip()
65
+ if v:
66
+ return v
67
+ ver_path = REPO_ROOT / "VERSION"
68
+ if ver_path.exists():
69
+ try:
70
+ return ver_path.read_text(encoding="utf-8").strip() or "dev"
71
+ except Exception:
72
+ pass
73
+ return "dev"
74
+
75
+ SERVER_VERSION = _resolve_version.__func__()
76
+
77
+ def __init__(self, workspace_root: str):
78
+ self.workspace_root = workspace_root
79
+ self._root_uri: Optional[str] = None
80
+ self.cfg: Optional[Config] = None
81
+ self.db: Optional[LocalSearchDB] = None
82
+ self.indexer: Optional[Indexer] = None
83
+ self._indexer_thread: Optional[threading.Thread] = None
84
+ self._initialized = False
85
+ self._init_lock = threading.Lock()
86
+ self._search_first_mode = self._resolve_search_first_policy()
87
+ self._search_usage = {
88
+ "search": 0,
89
+ "search_symbols": 0,
90
+ "last_search_ts": None,
91
+ "last_search_symbols_ts": None,
92
+ "read_without_search": 0,
93
+ }
94
+ self._tool_registry = build_default_registry()
95
+
96
+ # Initialize telemetry logger
97
+ self.logger = TelemetryLogger(WorkspaceManager.get_global_log_dir())
98
+
99
+ @staticmethod
100
+ def _resolve_search_first_policy() -> str:
101
+ raw_mode = (os.environ.get("DECKARD_SEARCH_FIRST_MODE") or "").strip().lower()
102
+ if raw_mode in {"off", "warn", "enforce"}:
103
+ return raw_mode
104
+ raw_enforce = (os.environ.get("DECKARD_ENFORCE_SEARCH_FIRST") or "").strip().lower()
105
+ if raw_enforce:
106
+ return "off" if raw_enforce in {"0", "false", "no", "off"} else "enforce"
107
+ return "warn"
108
+
109
+ def _mark_search(self, kind: str) -> None:
110
+ now = time.time()
111
+ if kind == "search":
112
+ self._search_usage["search"] += 1
113
+ self._search_usage["last_search_ts"] = now
114
+ elif kind == "search_symbols":
115
+ self._search_usage["search_symbols"] += 1
116
+ self._search_usage["last_search_symbols_ts"] = now
117
+
118
+ def _has_search_context(self) -> bool:
119
+ return (self._search_usage.get("search", 0) > 0 or
120
+ self._search_usage.get("search_symbols", 0) > 0)
121
+
122
+ def _search_first_error(self) -> Dict[str, Any]:
123
+ self._search_usage["read_without_search"] += 1
124
+ try:
125
+ self.logger.log_telemetry(
126
+ f"policy=search_first mode={self._search_first_mode} action=enforce violations={self._search_usage['read_without_search']}"
127
+ )
128
+ except Exception:
129
+ pass
130
+ from mcp.tools._util import mcp_response, pack_error, ErrorCode
131
+ return mcp_response(
132
+ "search_first",
133
+ lambda: pack_error("search_first", ErrorCode.INVALID_ARGS, "search-first policy active. Call search/search_symbols before read_file/read_symbol."),
134
+ lambda: {"error": {"code": ErrorCode.INVALID_ARGS.value, "message": "search-first policy active. Call search/search_symbols before read_file/read_symbol."}, "isError": True},
135
+ )
136
+
137
+ def _search_first_warning(self, result: Dict[str, Any]) -> Dict[str, Any]:
138
+ self._search_usage["read_without_search"] += 1
139
+ try:
140
+ self.logger.log_telemetry(
141
+ f"policy=search_first mode={self._search_first_mode} action=warn violations={self._search_usage['read_without_search']}"
142
+ )
143
+ except Exception:
144
+ pass
145
+ warnings = list(result.get("warnings", []))
146
+ warnings.append("Search-first policy (advisory): call search/search_symbols before read_file/read_symbol.")
147
+ result["warnings"] = warnings
148
+ return result
149
+
150
+ def _ensure_initialized(self) -> None:
151
+ """Lazy initialization of database and indexer."""
152
+ if self._initialized:
153
+ return
154
+
155
+ with self._init_lock:
156
+ # Double-check after acquiring lock
157
+ if self._initialized:
158
+ return
159
+
160
+ try:
161
+ config_path = WorkspaceManager.resolve_config_path(self.workspace_root)
162
+ self.cfg = Config.load(str(config_path), workspace_root_override=self.workspace_root, root_uri=self._root_uri)
163
+
164
+ db_path = Path(self.cfg.db_path)
165
+
166
+ db_path.parent.mkdir(parents=True, exist_ok=True)
167
+ self.db = LocalSearchDB(str(db_path))
168
+ try:
169
+ from app.engine_registry import get_default_engine
170
+ self.db.set_engine(get_default_engine(self.db, self.cfg, self.cfg.workspace_roots))
171
+ except Exception as e:
172
+ self.logger.log_error(f"engine init failed: {e}")
173
+ self.logger.log_info(f"DB path: {db_path}")
174
+
175
+ from app.indexer import resolve_indexer_settings
176
+ mode, enabled, startup_enabled, lock_handle = resolve_indexer_settings(str(db_path))
177
+ self.indexer = Indexer(self.cfg, self.db, self.logger, indexer_mode=mode, indexing_enabled=enabled, startup_index_enabled=startup_enabled, lock_handle=lock_handle)
178
+
179
+ self._indexer_thread = threading.Thread(target=self.indexer.run_forever, daemon=True)
180
+ self._indexer_thread.start()
181
+
182
+ init_timeout = float(os.environ.get("DECKARD_INIT_TIMEOUT") or os.environ.get("LOCAL_SEARCH_INIT_TIMEOUT") or "5")
183
+ if init_timeout > 0:
184
+ wait_iterations = int(init_timeout * 10)
185
+ for _ in range(wait_iterations):
186
+ if self.indexer.status.index_ready:
187
+ break
188
+ time.sleep(0.1)
189
+
190
+ self._initialized = True
191
+ except Exception as e:
192
+ self.logger.log_error(f"Initialization failed: {e}")
193
+ raise
194
+
195
+
196
+ def handle_initialize(self, params: Dict[str, Any]) -> Dict[str, Any]:
197
+ # Trace full initialize payload to verify what clients send.
198
+ try:
199
+ self.logger.log_info(
200
+ "Initialize params (full): "
201
+ + json.dumps(params, ensure_ascii=False)
202
+ )
203
+ except Exception as e:
204
+ self.logger.log_error(f"Initialize params log failed: {e}")
205
+
206
+ # Parse rootUri from client or detect fallback
207
+ root_uri = params.get("rootUri") or params.get("rootPath")
208
+ roots = WorkspaceManager.resolve_workspace_roots(root_uri=root_uri)
209
+ new_workspace = roots[0] if roots else WorkspaceManager.resolve_workspace_root()
210
+
211
+ # Thread-safe workspace change
212
+ with self._init_lock:
213
+ if new_workspace != self.workspace_root:
214
+ self.workspace_root = new_workspace
215
+ self._root_uri = root_uri
216
+ self._initialized = False # Force re-initialization with new workspace
217
+ self.logger.log_info(f"Workspace set to: {self.workspace_root}")
218
+
219
+ return {
220
+ "protocolVersion": self.PROTOCOL_VERSION,
221
+ "serverInfo": {
222
+ "name": self.SERVER_NAME,
223
+ "version": self.SERVER_VERSION,
224
+ },
225
+ "capabilities": {
226
+ "tools": {},
227
+ },
228
+ }
229
+
230
+ def handle_initialized(self, params: Dict[str, Any]) -> None:
231
+ self._ensure_initialized()
232
+
233
+ def handle_tools_list(self, params: Dict[str, Any]) -> Dict[str, Any]:
234
+ """Handle tools/list request - v2.5.0 enhanced schema."""
235
+ return {"tools": self._tool_registry.list_tools()}
236
+
237
+ def handle_tools_call(self, params: Dict[str, Any]) -> Dict[str, Any]:
238
+ self._ensure_initialized()
239
+
240
+ tool_name = params.get("name")
241
+ args = params.get("arguments", {})
242
+ ctx = ToolContext(
243
+ db=self.db,
244
+ engine=self.db.engine if self.db else None,
245
+ indexer=self.indexer,
246
+ roots=self.cfg.workspace_roots if self.cfg else [],
247
+ cfg=self.cfg,
248
+ logger=self.logger,
249
+ workspace_root=self.workspace_root,
250
+ server_version=self.SERVER_VERSION,
251
+ )
252
+
253
+ if tool_name == "search":
254
+ return self._tool_search(args)
255
+ if tool_name == "read_file":
256
+ return self._tool_read_file(args)
257
+ if tool_name == "search_symbols":
258
+ return self._tool_search_symbols(args)
259
+ if tool_name == "read_symbol":
260
+ return self._tool_read_symbol(args)
261
+ if tool_name == "doctor":
262
+ return self._tool_doctor(args)
263
+
264
+ return self._tool_registry.execute(tool_name, ctx, args)
265
+
266
+ def _tool_search(self, args: Dict[str, Any]) -> Dict[str, Any]:
267
+ """Execute enhanced search tool (v2.5.0)."""
268
+ result = search_tool.execute_search(args, self.db, self.logger, self.cfg.workspace_roots, engine=self.db.engine)
269
+ if not result.get("isError"):
270
+ self._mark_search("search")
271
+ return result
272
+
273
+ def _tool_status(self, args: Dict[str, Any]) -> Dict[str, Any]:
274
+ return status_tool.execute_status(args, self.indexer, self.db, self.cfg, self.workspace_root, self.SERVER_VERSION)
275
+
276
+ def _tool_repo_candidates(self, args: Dict[str, Any]) -> Dict[str, Any]:
277
+ return repo_candidates_tool.execute_repo_candidates(args, self.db, self.logger, self.cfg.workspace_roots)
278
+
279
+ def _tool_list_files(self, args: Dict[str, Any]) -> Dict[str, Any]:
280
+ return list_files_tool.execute_list_files(args, self.db, self.logger, self.cfg.workspace_roots)
281
+
282
+ def _tool_read_file(self, args: Dict[str, Any]) -> Dict[str, Any]:
283
+ if self._search_first_mode != "off" and not self._has_search_context():
284
+ if self._search_first_mode == "enforce":
285
+ return self._search_first_error()
286
+ result = read_file_tool.execute_read_file(args, self.db, self.cfg.workspace_roots)
287
+ return self._search_first_warning(result)
288
+ return read_file_tool.execute_read_file(args, self.db, self.cfg.workspace_roots)
289
+
290
+ def _tool_search_symbols(self, args: Dict[str, Any]) -> Dict[str, Any]:
291
+ result = search_symbols_tool.execute_search_symbols(args, self.db, self.cfg.workspace_roots)
292
+ if not result.get("isError"):
293
+ self._mark_search("search_symbols")
294
+ return result
295
+
296
+ def _tool_read_symbol(self, args: Dict[str, Any]) -> Dict[str, Any]:
297
+ if self._search_first_mode != "off" and not self._has_search_context():
298
+ if self._search_first_mode == "enforce":
299
+ return self._search_first_error()
300
+ result = read_symbol_tool.execute_read_symbol(args, self.db, self.logger, self.cfg.workspace_roots)
301
+ return self._search_first_warning(result)
302
+ return read_symbol_tool.execute_read_symbol(args, self.db, self.logger, self.cfg.workspace_roots)
303
+
304
+ def _tool_doctor(self, args: Dict[str, Any]) -> Dict[str, Any]:
305
+ payload = dict(args)
306
+ payload["search_usage"] = dict(self._search_usage)
307
+ payload["search_first_mode"] = self._search_first_mode
308
+ return doctor_tool.execute_doctor(payload)
309
+
310
+ def handle_request(self, request: Dict[str, Any]) -> Optional[Dict[str, Any]]:
311
+ method = request.get("method")
312
+ params = request.get("params", {})
313
+ msg_id = request.get("id")
314
+
315
+ is_notification = msg_id is None
316
+
317
+ try:
318
+ if method == "initialize":
319
+ result = self.handle_initialize(params)
320
+ elif method == "initialized":
321
+ self.handle_initialized(params)
322
+ return None
323
+ elif method == "tools/list":
324
+ result = self.handle_tools_list(params)
325
+ elif method == "tools/call":
326
+ result = self.handle_tools_call(params)
327
+ elif method == "ping":
328
+ result = {}
329
+ else:
330
+ if is_notification:
331
+ return None
332
+ return {
333
+ "jsonrpc": "2.0",
334
+ "id": msg_id,
335
+ "error": {
336
+ "code": -32601,
337
+ "message": f"Method not found: {method}",
338
+ },
339
+ }
340
+
341
+ if is_notification:
342
+ return None
343
+
344
+ return {
345
+ "jsonrpc": "2.0",
346
+ "id": msg_id,
347
+ "result": result,
348
+ }
349
+ except Exception as e:
350
+ self.logger.log_error(f"Error handling {method}: {e}")
351
+ if is_notification:
352
+ return None
353
+ return {
354
+ "jsonrpc": "2.0",
355
+ "id": msg_id,
356
+ "error": {
357
+ "code": -32000,
358
+ "message": str(e),
359
+ },
360
+ }
361
+
362
+ def shutdown(self) -> None:
363
+ """Stops the indexer and closes the database."""
364
+ self.logger.log_info(f"Shutting down server for workspace: {self.workspace_root}")
365
+ if self.indexer:
366
+ self.indexer.stop()
367
+ if self.db:
368
+ self.db.close()
369
+
370
+ def run(self) -> None:
371
+ self.logger.log_info(f"Starting MCP server (workspace: {self.workspace_root})")
372
+ use_text_io = not hasattr(sys.stdin, "buffer") or not hasattr(sys.stdout, "buffer")
373
+
374
+ def _read_mcp_message(stdin):
375
+ line = stdin.readline()
376
+ if not line:
377
+ return None, None
378
+ if use_text_io:
379
+ while line in ("\n", "\r\n"):
380
+ line = stdin.readline()
381
+ if not line:
382
+ return None, None
383
+
384
+ if line.lstrip().startswith(("{", "[")):
385
+ return line.rstrip("\r\n"), "jsonl"
386
+
387
+ headers = [line]
388
+ while True:
389
+ h = stdin.readline()
390
+ if not h:
391
+ return None, None
392
+ if h in ("\n", "\r\n"):
393
+ break
394
+ headers.append(h)
395
+
396
+ content_length = None
397
+ for h in headers:
398
+ parts = h.split(":", 1)
399
+ if len(parts) != 2:
400
+ continue
401
+ key = parts[0].strip().lower()
402
+ if key == "content-length":
403
+ try:
404
+ content_length = int(parts[1].strip())
405
+ except ValueError:
406
+ pass
407
+ break
408
+
409
+ if content_length is None or content_length <= 0:
410
+ return None, None
411
+
412
+ body = stdin.read(content_length)
413
+ if not body:
414
+ return None, None
415
+ return body, "framed"
416
+
417
+ while line in (b"\n", b"\r\n"):
418
+ line = stdin.readline()
419
+ if not line:
420
+ return None, None
421
+
422
+ if line.lstrip().startswith((b"{", b"[")):
423
+ return line.rstrip(b"\r\n"), "jsonl"
424
+
425
+ headers = [line]
426
+ while True:
427
+ h = stdin.readline()
428
+ if not h:
429
+ return None, None
430
+ if h in (b"\n", b"\r\n"):
431
+ break
432
+ headers.append(h)
433
+
434
+ content_length = None
435
+ for h in headers:
436
+ parts = h.decode("utf-8", errors="ignore").split(":", 1)
437
+ if len(parts) != 2:
438
+ continue
439
+ key = parts[0].strip().lower()
440
+ if key == "content-length":
441
+ try:
442
+ content_length = int(parts[1].strip())
443
+ except ValueError:
444
+ pass
445
+ break
446
+
447
+ if content_length is None or content_length <= 0:
448
+ return None, None
449
+
450
+ body = stdin.read(content_length)
451
+ if not body:
452
+ return None, None
453
+ return body, "framed"
454
+
455
+ def _write_response(resp, mode):
456
+ if resp is None:
457
+ return
458
+ if use_text_io:
459
+ payload = json.dumps(resp)
460
+ if mode == "jsonl":
461
+ sys.stdout.write(payload + "\n")
462
+ sys.stdout.flush()
463
+ else:
464
+ header = f"Content-Length: {len(payload)}\r\n\r\n"
465
+ sys.stdout.write(header + payload)
466
+ sys.stdout.flush()
467
+ else:
468
+ payload = json.dumps(resp).encode("utf-8")
469
+ if mode == "jsonl":
470
+ sys.stdout.buffer.write(payload + b"\n")
471
+ sys.stdout.buffer.flush()
472
+ else:
473
+ header = f"Content-Length: {len(payload)}\r\n\r\n".encode("ascii")
474
+ sys.stdout.buffer.write(header + payload)
475
+ sys.stdout.buffer.flush()
476
+
477
+ try:
478
+ stdin = sys.stdin if use_text_io else sys.stdin.buffer
479
+ while True:
480
+ body, mode = _read_mcp_message(stdin)
481
+ if body is None:
482
+ break
483
+ try:
484
+ if use_text_io:
485
+ request = json.loads(body)
486
+ else:
487
+ request = json.loads(body.decode("utf-8"))
488
+ response = self.handle_request(request)
489
+ _write_response(response, mode)
490
+ except json.JSONDecodeError as e:
491
+ self.logger.log_error(f"JSON decode error: {e}")
492
+ error_response = {
493
+ "jsonrpc": "2.0",
494
+ "id": None,
495
+ "error": {
496
+ "code": -32700,
497
+ "message": "Parse error",
498
+ },
499
+ }
500
+ _write_response(error_response, mode)
501
+ except KeyboardInterrupt:
502
+ self.logger.log_info("Shutting down...")
503
+ finally:
504
+ if self.indexer:
505
+ self.indexer.stop()
506
+ if self.db:
507
+ self.db.close()
508
+
509
+
510
+ def main() -> None:
511
+ # Use WorkspaceManager for workspace detection
512
+ workspace_root = WorkspaceManager.resolve_workspace_root()
513
+
514
+ server = LocalSearchMCPServer(workspace_root)
515
+ server.run()
516
+
517
+
518
+ if __name__ == "__main__":
519
+ main()