sari 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. app/__init__.py +1 -0
  2. app/config.py +240 -0
  3. app/db.py +932 -0
  4. app/dedup_queue.py +77 -0
  5. app/engine_registry.py +56 -0
  6. app/engine_runtime.py +472 -0
  7. app/http_server.py +204 -0
  8. app/indexer.py +1532 -0
  9. app/main.py +147 -0
  10. app/models.py +39 -0
  11. app/queue_pipeline.py +65 -0
  12. app/ranking.py +144 -0
  13. app/registry.py +172 -0
  14. app/search_engine.py +572 -0
  15. app/watcher.py +124 -0
  16. app/workspace.py +286 -0
  17. deckard/__init__.py +3 -0
  18. deckard/__main__.py +4 -0
  19. deckard/main.py +345 -0
  20. deckard/version.py +1 -0
  21. mcp/__init__.py +1 -0
  22. mcp/__main__.py +19 -0
  23. mcp/cli.py +485 -0
  24. mcp/daemon.py +149 -0
  25. mcp/proxy.py +304 -0
  26. mcp/registry.py +218 -0
  27. mcp/server.py +519 -0
  28. mcp/session.py +234 -0
  29. mcp/telemetry.py +112 -0
  30. mcp/test_cli.py +89 -0
  31. mcp/test_daemon.py +124 -0
  32. mcp/test_server.py +197 -0
  33. mcp/tools/__init__.py +14 -0
  34. mcp/tools/_util.py +244 -0
  35. mcp/tools/deckard_guide.py +32 -0
  36. mcp/tools/doctor.py +208 -0
  37. mcp/tools/get_callers.py +60 -0
  38. mcp/tools/get_implementations.py +60 -0
  39. mcp/tools/index_file.py +75 -0
  40. mcp/tools/list_files.py +138 -0
  41. mcp/tools/read_file.py +48 -0
  42. mcp/tools/read_symbol.py +99 -0
  43. mcp/tools/registry.py +212 -0
  44. mcp/tools/repo_candidates.py +89 -0
  45. mcp/tools/rescan.py +46 -0
  46. mcp/tools/scan_once.py +54 -0
  47. mcp/tools/search.py +208 -0
  48. mcp/tools/search_api_endpoints.py +72 -0
  49. mcp/tools/search_symbols.py +63 -0
  50. mcp/tools/status.py +135 -0
  51. sari/__init__.py +1 -0
  52. sari/__main__.py +4 -0
  53. sari-0.0.1.dist-info/METADATA +521 -0
  54. sari-0.0.1.dist-info/RECORD +58 -0
  55. sari-0.0.1.dist-info/WHEEL +5 -0
  56. sari-0.0.1.dist-info/entry_points.txt +2 -0
  57. sari-0.0.1.dist-info/licenses/LICENSE +21 -0
  58. sari-0.0.1.dist-info/top_level.txt +4 -0
mcp/session.py ADDED
@@ -0,0 +1,234 @@
1
+ import json
2
+ import logging
3
+ import asyncio
4
+ import inspect
5
+ from typing import Dict, Any, Optional
6
+ from .registry import Registry, SharedState
7
+ from app.workspace import WorkspaceManager
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class Session:
12
+ """
13
+ Handles a single client connection.
14
+ Parses JSON-RPC, manages workspace binding via Registry.
15
+ """
16
+ def __init__(self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter):
17
+ self.reader = reader
18
+ self.writer = writer
19
+ self.workspace_root: Optional[str] = None
20
+ self.shared_state: Optional[SharedState] = None
21
+ self.registry = Registry.get_instance()
22
+ self.running = True
23
+
24
+ async def handle_connection(self):
25
+ try:
26
+ while self.running:
27
+ # Read Headers
28
+ headers = {}
29
+ line_count = 0
30
+ while True:
31
+ line = await self.reader.readline()
32
+ if not line:
33
+ self.running = False
34
+ break
35
+
36
+ line_str = line.decode("utf-8").strip()
37
+ line_count += 1
38
+
39
+ if not line_str:
40
+ break
41
+
42
+ # Protocol Check: First line must be Content-Length
43
+ if line_count == 1:
44
+ if line_str.startswith("{"):
45
+ logger.error("Received JSONL instead of HTTP-style framed message")
46
+ await self.send_error(None, -32700, "JSONL not supported. Use Content-Length framing.")
47
+ self.running = False
48
+ break
49
+
50
+ if not line_str.lower().startswith("content-length:"):
51
+ logger.error(f"First header must be Content-Length, got: {line_str!r}")
52
+ await self.send_error(None, -32700, "Invalid protocol framing: Content-Length header required first")
53
+ self.running = False
54
+ break
55
+
56
+ if ":" in line_str:
57
+ k, v = line_str.split(":", 1)
58
+ headers[k.strip().lower()] = v.strip()
59
+ else:
60
+ # Malformed header or missing Content-Length
61
+ logger.error(f"Malformed header line: {line_str!r}")
62
+ await self.send_error(None, -32700, "Invalid protocol framing")
63
+ self.running = False
64
+ break
65
+
66
+ if not self.running:
67
+ break
68
+
69
+ try:
70
+ content_length = int(headers.get("content-length", 0))
71
+ except (ValueError, TypeError):
72
+ logger.error(f"Invalid Content-Length value: {headers.get('content-length')!r}")
73
+ await self.send_error(None, -32700, "Invalid Content-Length value")
74
+ self.running = False
75
+ break
76
+
77
+ if content_length <= 0:
78
+ logger.error("Received message without Content-Length (JSONL is not supported)")
79
+ await self.send_error(None, -32700, "Content-Length header required (JSONL is not supported)")
80
+ # Since protocol framing is broken, we must terminate
81
+ self.running = False
82
+ break
83
+
84
+ body = await self.reader.readexactly(content_length)
85
+ if not body:
86
+ break
87
+
88
+ try:
89
+ request_str = body.decode("utf-8")
90
+ request = json.loads(request_str)
91
+ await self.process_request(request)
92
+ except json.JSONDecodeError:
93
+ logger.error(f"Invalid JSON received: {body[:100]!r}")
94
+ # Try to extract ID manually for better correlation if possible
95
+ msg_id = None
96
+ try:
97
+ # Simple regex for "id": 123 or "id": "abc"
98
+ import re
99
+ match = re.search(r'"id"\s*:\s*("(?:\\"|[^"])*"|\d+|null)', request_str)
100
+ if match:
101
+ msg_id = json.loads(match.group(1))
102
+ except Exception:
103
+ pass
104
+ await self.send_error(msg_id, -32700, "Parse error")
105
+ except Exception as e:
106
+ logger.error(f"Error processing request: {e}", exc_info=True)
107
+ # We might have parsed the ID already if it's not a Parse error
108
+ msg_id = None
109
+ try:
110
+ msg_id = json.loads(body.decode("utf-8")).get("id")
111
+ except Exception:
112
+ pass
113
+ await self.send_error(msg_id, -32603, str(e))
114
+
115
+ except (asyncio.IncompleteReadError, ConnectionResetError):
116
+ logger.info("Connection closed by client")
117
+ finally:
118
+ self.cleanup()
119
+ try:
120
+ res = self.writer.close()
121
+ if inspect.isawaitable(res):
122
+ await res
123
+ except Exception:
124
+ pass
125
+ try:
126
+ await self.writer.wait_closed()
127
+ except Exception:
128
+ pass
129
+
130
+ async def process_request(self, request: Dict[str, Any]):
131
+ method = request.get("method")
132
+ params = request.get("params", {})
133
+ msg_id = request.get("id")
134
+
135
+ if method == "initialize":
136
+ await self.handle_initialize(request)
137
+ elif method == "initialized":
138
+ # Just forward to server if bound
139
+ if self.shared_state:
140
+ loop = asyncio.get_event_loop()
141
+ await loop.run_in_executor(
142
+ None,
143
+ self.shared_state.server.handle_initialized,
144
+ params
145
+ )
146
+ elif method == "shutdown":
147
+ # Respond to shutdown but keep connection open for exit
148
+ response = {"jsonrpc": "2.0", "id": msg_id, "result": None}
149
+ await self.send_json(response)
150
+ elif method == "exit":
151
+ self.running = False
152
+ else:
153
+ # Forward other requests to the bound server
154
+ if not self.shared_state:
155
+ await self.send_error(msg_id, -32002, "Server not initialized. Send 'initialize' first.")
156
+ return
157
+
158
+ # Execute in thread pool to not block async loop
159
+ # Since LocalSearchMCPServer is synchronous
160
+ loop = asyncio.get_event_loop()
161
+ response = await loop.run_in_executor(
162
+ None,
163
+ self.shared_state.server.handle_request,
164
+ request
165
+ )
166
+
167
+ if response:
168
+ await self.send_json(response)
169
+
170
+ async def handle_initialize(self, request: Dict[str, Any]):
171
+ params = request.get("params", {})
172
+ msg_id = request.get("id")
173
+
174
+ root_uri = params.get("rootUri") or params.get("rootPath")
175
+ if not root_uri:
176
+ # Fallback for clients that omit rootUri/rootPath
177
+ root_uri = WorkspaceManager.resolve_workspace_root()
178
+
179
+ # Handle file:// prefix
180
+ if root_uri.startswith("file://"):
181
+ workspace_root = root_uri[7:]
182
+ else:
183
+ workspace_root = root_uri
184
+
185
+ # If already bound to a different workspace, release it
186
+ if self.workspace_root and self.workspace_root != workspace_root:
187
+ self.registry.release(self.workspace_root)
188
+ self.shared_state = None
189
+
190
+ self.workspace_root = workspace_root
191
+ self.shared_state = self.registry.get_or_create(self.workspace_root)
192
+
193
+ # Delegate specific initialize logic to the server instance
194
+ # We need to construct the result based on server's response
195
+ # LocalSearchMCPServer.handle_initialize returns the result dict directly
196
+ try:
197
+ result = self.shared_state.server.handle_initialize(params)
198
+ response = {
199
+ "jsonrpc": "2.0",
200
+ "id": msg_id,
201
+ "result": result
202
+ }
203
+ await self.send_json(response)
204
+ except Exception as e:
205
+ # Rollback: release the workspace if initialization failed
206
+ self.registry.release(self.workspace_root)
207
+ self.workspace_root = None
208
+ self.shared_state = None
209
+ await self.send_error(msg_id, -32000, str(e))
210
+
211
+ async def send_json(self, data: Dict[str, Any]):
212
+ body = json.dumps(data).encode("utf-8")
213
+ header = f"Content-Length: {len(body)}\r\n\r\n".encode("ascii")
214
+ res = self.writer.write(header + body)
215
+ if inspect.isawaitable(res):
216
+ await res
217
+ await self.writer.drain()
218
+
219
+ async def send_error(self, msg_id: Any, code: int, message: str):
220
+ response = {
221
+ "jsonrpc": "2.0",
222
+ "id": msg_id,
223
+ "error": {
224
+ "code": code,
225
+ "message": message
226
+ }
227
+ }
228
+ await self.send_json(response)
229
+
230
+ def cleanup(self):
231
+ if self.workspace_root:
232
+ self.registry.release(self.workspace_root)
233
+ self.workspace_root = None
234
+ self.shared_state = None
mcp/telemetry.py ADDED
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Telemetry and logging for Local Search MCP Server.
4
+ """
5
+ import sys
6
+ import queue
7
+ import threading
8
+ import time
9
+ from pathlib import Path
10
+ from datetime import datetime
11
+ from typing import Optional
12
+ try:
13
+ from app.indexer import _redact
14
+ except ImportError:
15
+ # Fallback if imports fail (e.g. running script standalone without path)
16
+ # But usually app is in path.
17
+ def _redact(t): return t
18
+
19
+ class TelemetryLogger:
20
+ """Handles logging and telemetry for MCP server."""
21
+
22
+ def __init__(self, log_dir: Optional[Path] = None):
23
+ """
24
+ Initialize telemetry logger.
25
+
26
+ Args:
27
+ log_dir: Directory for log files. If None, uses global log dir.
28
+ """
29
+ self.log_dir = Path(log_dir) if log_dir else None
30
+ self._queue: Optional[queue.Queue] = None
31
+ self._writer_thread: Optional[threading.Thread] = None
32
+ self._stop_event = threading.Event()
33
+ self._drop_count = 0
34
+ self._backlog_limit = 1000
35
+
36
+ if self.log_dir:
37
+ self._queue = queue.Queue()
38
+ self._writer_thread = threading.Thread(target=self._writer_loop, daemon=True)
39
+ self._writer_thread.start()
40
+
41
+ def log_error(self, message: str) -> None:
42
+ """Log error message to stderr and file."""
43
+ print(f"[sari] ERROR: {message}", file=sys.stderr, flush=True)
44
+ self._enqueue(f"[ERROR] {message}")
45
+
46
+ def log_info(self, message: str) -> None:
47
+ """Log info message to stderr and file."""
48
+ print(f"[sari] INFO: {message}", file=sys.stderr, flush=True)
49
+ self._enqueue(f"[INFO] {message}")
50
+
51
+ def log_telemetry(self, message: str) -> None:
52
+ """
53
+ Log telemetry to file.
54
+
55
+ Args:
56
+ message: Telemetry message to log
57
+ """
58
+ self._enqueue(message)
59
+
60
+ def _enqueue(self, message: str) -> None:
61
+ if not self._queue:
62
+ return
63
+ if self._queue.qsize() > self._backlog_limit:
64
+ self._drop_count += 1
65
+ return
66
+ self._queue.put(message)
67
+
68
+ def _writer_loop(self) -> None:
69
+ if not self.log_dir:
70
+ return
71
+ while not self._stop_event.is_set() or (self._queue and not self._queue.empty()):
72
+ try:
73
+ msg = self._queue.get(timeout=0.2) if self._queue else None
74
+ except queue.Empty:
75
+ continue
76
+ if msg is None:
77
+ continue
78
+ self._write_to_file(msg)
79
+ if self._queue:
80
+ self._queue.task_done()
81
+
82
+ def _write_to_file(self, message: str) -> None:
83
+ """Helper to write message with timestamp to log file."""
84
+ if not self.log_dir:
85
+ return
86
+
87
+ # Redact secrets before writing to disk
88
+ message = _redact(message)
89
+
90
+ try:
91
+ self.log_dir.mkdir(parents=True, exist_ok=True)
92
+ log_file = self.log_dir / "sari.log"
93
+
94
+ timestamp = datetime.now().astimezone().isoformat()
95
+ with open(log_file, "a", encoding="utf-8") as f:
96
+ f.write(f"[{timestamp}] {message}\n")
97
+ except Exception as e:
98
+ print(f"[sari] ERROR: Failed to log to file: {e}", file=sys.stderr, flush=True)
99
+
100
+ def stop(self, timeout: float = 2.0) -> None:
101
+ if not self._queue or not self._writer_thread:
102
+ return
103
+ self._stop_event.set()
104
+ self._writer_thread.join(timeout=timeout)
105
+
106
+ def get_queue_depth(self) -> int:
107
+ if not self._queue:
108
+ return 0
109
+ return self._queue.qsize()
110
+
111
+ def get_drop_count(self) -> int:
112
+ return self._drop_count
mcp/test_cli.py ADDED
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Unit tests for Sari CLI HTTP helpers.
4
+ """
5
+ import io
6
+ import json
7
+ import os
8
+ import sys
9
+ import tempfile
10
+ from contextlib import redirect_stdout
11
+ from pathlib import Path
12
+ from unittest.mock import patch
13
+
14
+ SCRIPT_DIR = Path(__file__).parent
15
+ sys.path.insert(0, str(SCRIPT_DIR))
16
+
17
+ from cli import _get_http_host_port, cmd_search, cmd_status
18
+
19
+
20
+ def _set_env(key: str, value: str):
21
+ if value is None:
22
+ os.environ.pop(key, None)
23
+ else:
24
+ os.environ[key] = value
25
+
26
+
27
+ def test_get_http_host_port_prefers_server_json():
28
+ with tempfile.TemporaryDirectory() as tmpdir:
29
+ data_dir = Path(tmpdir) / ".codex" / "tools" / "sari" / "data"
30
+ data_dir.mkdir(parents=True, exist_ok=True)
31
+ server_json = data_dir / "server.json"
32
+ server_json.write_text(json.dumps({"host": "127.0.0.1", "port": 47788}))
33
+
34
+ prev = os.environ.get("DECKARD_WORKSPACE_ROOT")
35
+ _set_env("DECKARD_WORKSPACE_ROOT", tmpdir)
36
+ try:
37
+ host, port = _get_http_host_port()
38
+ assert host == "127.0.0.1"
39
+ assert port == 47788
40
+ finally:
41
+ _set_env("DECKARD_WORKSPACE_ROOT", prev)
42
+
43
+
44
+ def test_cmd_status_prints_json():
45
+ with patch("cli._request_http", return_value={"ok": True}) as mock_req:
46
+ buf = io.StringIO()
47
+ with redirect_stdout(buf):
48
+ rc = cmd_status(None)
49
+ assert rc == 0
50
+ mock_req.assert_called_once_with("/status", {})
51
+ out = buf.getvalue().strip()
52
+ assert out == json.dumps({"ok": True}, ensure_ascii=False, indent=2)
53
+
54
+
55
+ def test_cmd_search_prints_json():
56
+ args = type("Args", (), {"query": "AuthService", "repo": "demo", "limit": 7})
57
+ with patch("cli._request_http", return_value={"ok": True, "q": "AuthService"}) as mock_req:
58
+ buf = io.StringIO()
59
+ with redirect_stdout(buf):
60
+ rc = cmd_search(args)
61
+ assert rc == 0
62
+ mock_req.assert_called_once_with("/search", {"q": "AuthService", "limit": 7, "repo": "demo"})
63
+ out = buf.getvalue().strip()
64
+ assert out == json.dumps({"ok": True, "q": "AuthService"}, ensure_ascii=False, indent=2)
65
+
66
+
67
+ def run_tests():
68
+ tests = [
69
+ test_get_http_host_port_prefers_server_json,
70
+ test_cmd_status_prints_json,
71
+ test_cmd_search_prints_json,
72
+ ]
73
+ passed = 0
74
+ failed = 0
75
+ for test in tests:
76
+ try:
77
+ test()
78
+ print(f"✓ {test.__name__}")
79
+ passed += 1
80
+ except Exception:
81
+ failed += 1
82
+ raise
83
+ print(f"\n{passed} passed, {failed} failed")
84
+ return failed == 0
85
+
86
+
87
+ if __name__ == "__main__":
88
+ success = run_tests()
89
+ sys.exit(0 if success else 1)
mcp/test_daemon.py ADDED
@@ -0,0 +1,124 @@
1
+ import asyncio
2
+ import json
3
+ import socket
4
+ import subprocess
5
+ import sys
6
+ import time
7
+ import os
8
+ import signal
9
+ from pathlib import Path
10
+
11
+ DAEMON_PORT = 47780
12
+ DAEMON_HOST = "127.0.0.1"
13
+
14
+ def wait_for_port(port, timeout=5):
15
+ start = time.time()
16
+ while time.time() - start < timeout:
17
+ try:
18
+ with socket.create_connection((DAEMON_HOST, port), timeout=0.1):
19
+ return True
20
+ except (ConnectionRefusedError, OSError):
21
+ time.sleep(0.1)
22
+ return False
23
+
24
+ def send_rpc(sock, method, params=None, msg_id=1):
25
+ req = {
26
+ "jsonrpc": "2.0",
27
+ "method": method,
28
+ "id": msg_id,
29
+ "params": params or {}
30
+ }
31
+ body = json.dumps(req).encode('utf-8')
32
+ header = f"Content-Length: {len(body)}\r\n\r\n".encode('ascii')
33
+ sock.sendall(header + body)
34
+
35
+ # Read response
36
+ f = sock.makefile('rb')
37
+ # Read headers
38
+ headers = {}
39
+ while True:
40
+ line = f.readline()
41
+ if not line or line == b"\r\n":
42
+ break
43
+ line_str = line.decode('utf-8').strip()
44
+ if ":" in line_str:
45
+ k, v = line_str.split(":", 1)
46
+ headers[k.strip().lower()] = v.strip()
47
+
48
+ content_length = int(headers.get("content-length", 0))
49
+ if content_length > 0:
50
+ return json.loads(f.read(content_length).decode('utf-8'))
51
+ return None
52
+
53
+ def test_daemon():
54
+ print("Starting daemon...")
55
+ env = os.environ.copy()
56
+ env["DECKARD_DAEMON_PORT"] = str(DAEMON_PORT)
57
+
58
+ # Run as module from repo root
59
+ repo_root = Path(__file__).parent.parent
60
+
61
+ proc = subprocess.Popen(
62
+ [sys.executable, "-m", "mcp.daemon"],
63
+ cwd=str(repo_root),
64
+ env=env,
65
+ stdout=subprocess.PIPE,
66
+ stderr=subprocess.PIPE,
67
+ text=True
68
+ )
69
+
70
+ try:
71
+ if not wait_for_port(DAEMON_PORT):
72
+ print("Daemon failed to start")
73
+ print(proc.stderr.read())
74
+ sys.exit(1)
75
+
76
+ print("Daemon started.")
77
+
78
+ # Client 1: WS1
79
+ s1 = socket.create_connection((DAEMON_HOST, DAEMON_PORT))
80
+ print("Client 1 connected")
81
+ res1 = send_rpc(s1, "initialize", {"rootUri": "file:///tmp/test_ws1"})
82
+ print(f"Client 1 init result: {res1}")
83
+ assert "result" in res1
84
+
85
+ # Client 2: WS1 (Should share indexer)
86
+ s2 = socket.create_connection((DAEMON_HOST, DAEMON_PORT))
87
+ print("Client 2 connected")
88
+ res2 = send_rpc(s2, "initialize", {"rootUri": "file:///tmp/test_ws1"})
89
+ print(f"Client 2 init result: {res2}")
90
+ assert "result" in res2
91
+
92
+ # Client 3: WS2 (New indexer)
93
+ s3 = socket.create_connection((DAEMON_HOST, DAEMON_PORT))
94
+ print("Client 3 connected")
95
+ res3 = send_rpc(s3, "initialize", {"rootUri": "file:///tmp/test_ws2"})
96
+ print(f"Client 3 init result: {res3}")
97
+ assert "result" in res3
98
+
99
+ # Verify functionality - e.g. tools/list
100
+ res_list = send_rpc(s1, "tools/list", {}, msg_id=2)
101
+ assert len(res_list["result"]["tools"]) > 0
102
+ print("Client 1 tools list OK")
103
+
104
+ # Clean up
105
+ s1.close()
106
+ s2.close()
107
+ s3.close()
108
+ print("Clients disconnected")
109
+
110
+ time.sleep(1) # Allow daemon to log disconnects
111
+
112
+ finally:
113
+ print("Stopping daemon...")
114
+ proc.terminate()
115
+ try:
116
+ outs, errs = proc.communicate(timeout=2)
117
+ print("Daemon stdout:", outs)
118
+ print("Daemon stderr:", errs)
119
+ except subprocess.TimeoutExpired:
120
+ proc.kill()
121
+ print("Daemon killed")
122
+
123
+ if __name__ == "__main__":
124
+ test_daemon()