sari 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. app/__init__.py +1 -0
  2. app/config.py +240 -0
  3. app/db.py +932 -0
  4. app/dedup_queue.py +77 -0
  5. app/engine_registry.py +56 -0
  6. app/engine_runtime.py +472 -0
  7. app/http_server.py +204 -0
  8. app/indexer.py +1532 -0
  9. app/main.py +147 -0
  10. app/models.py +39 -0
  11. app/queue_pipeline.py +65 -0
  12. app/ranking.py +144 -0
  13. app/registry.py +172 -0
  14. app/search_engine.py +572 -0
  15. app/watcher.py +124 -0
  16. app/workspace.py +286 -0
  17. deckard/__init__.py +3 -0
  18. deckard/__main__.py +4 -0
  19. deckard/main.py +345 -0
  20. deckard/version.py +1 -0
  21. mcp/__init__.py +1 -0
  22. mcp/__main__.py +19 -0
  23. mcp/cli.py +485 -0
  24. mcp/daemon.py +149 -0
  25. mcp/proxy.py +304 -0
  26. mcp/registry.py +218 -0
  27. mcp/server.py +519 -0
  28. mcp/session.py +234 -0
  29. mcp/telemetry.py +112 -0
  30. mcp/test_cli.py +89 -0
  31. mcp/test_daemon.py +124 -0
  32. mcp/test_server.py +197 -0
  33. mcp/tools/__init__.py +14 -0
  34. mcp/tools/_util.py +244 -0
  35. mcp/tools/deckard_guide.py +32 -0
  36. mcp/tools/doctor.py +208 -0
  37. mcp/tools/get_callers.py +60 -0
  38. mcp/tools/get_implementations.py +60 -0
  39. mcp/tools/index_file.py +75 -0
  40. mcp/tools/list_files.py +138 -0
  41. mcp/tools/read_file.py +48 -0
  42. mcp/tools/read_symbol.py +99 -0
  43. mcp/tools/registry.py +212 -0
  44. mcp/tools/repo_candidates.py +89 -0
  45. mcp/tools/rescan.py +46 -0
  46. mcp/tools/scan_once.py +54 -0
  47. mcp/tools/search.py +208 -0
  48. mcp/tools/search_api_endpoints.py +72 -0
  49. mcp/tools/search_symbols.py +63 -0
  50. mcp/tools/status.py +135 -0
  51. sari/__init__.py +1 -0
  52. sari/__main__.py +4 -0
  53. sari-0.0.1.dist-info/METADATA +521 -0
  54. sari-0.0.1.dist-info/RECORD +58 -0
  55. sari-0.0.1.dist-info/WHEEL +5 -0
  56. sari-0.0.1.dist-info/entry_points.txt +2 -0
  57. sari-0.0.1.dist-info/licenses/LICENSE +21 -0
  58. sari-0.0.1.dist-info/top_level.txt +4 -0
mcp/proxy.py ADDED
@@ -0,0 +1,304 @@
1
+ import sys
2
+ import json
3
+ import socket
4
+ import threading
5
+ import os
6
+ import time
7
+ import subprocess
8
+ import logging
9
+ import sys
10
+ import tempfile
11
+ from pathlib import Path
12
+
13
+ # Add project root to sys.path for absolute imports
14
+ SCRIPT_DIR = Path(__file__).parent
15
+ REPO_ROOT = SCRIPT_DIR.parent
16
+ if str(REPO_ROOT) not in sys.path:
17
+ sys.path.insert(0, str(REPO_ROOT))
18
+
19
+ from mcp.telemetry import TelemetryLogger
20
+ from app.workspace import WorkspaceManager
21
+
22
+ try:
23
+ import fcntl # type: ignore
24
+ except Exception:
25
+ fcntl = None
26
+
27
+ # Configure logging to stderr so it doesn't interfere with MCP STDIO
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
31
+ stream=sys.stderr
32
+ )
33
+ logger = logging.getLogger("mcp-proxy")
34
+ telemetry = TelemetryLogger(WorkspaceManager.get_global_log_dir())
35
+
36
+
37
+ def _log_info(message: str) -> None:
38
+ logger.info(message)
39
+ try:
40
+ telemetry.log_info(message)
41
+ except Exception:
42
+ pass
43
+
44
+
45
+ def _log_error(message: str) -> None:
46
+ logger.error(message)
47
+ try:
48
+ telemetry.log_error(message)
49
+ except Exception:
50
+ pass
51
+
52
+ DEFAULT_HOST = "127.0.0.1"
53
+ DEFAULT_PORT = 47779
54
+ _HEADER_SEP = b"\r\n\r\n"
55
+ _MODE_FRAMED = "framed"
56
+ _MODE_JSONL = "jsonl"
57
+
58
+ def _lock_file(lock_file) -> None:
59
+ if fcntl is not None:
60
+ fcntl.flock(lock_file, fcntl.LOCK_EX)
61
+ return
62
+ try:
63
+ import msvcrt
64
+ lock_file.seek(0)
65
+ msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
66
+ except Exception:
67
+ pass
68
+
69
+ def _unlock_file(lock_file) -> None:
70
+ if fcntl is not None:
71
+ fcntl.flock(lock_file, fcntl.LOCK_UN)
72
+ return
73
+ try:
74
+ import msvcrt
75
+ lock_file.seek(0)
76
+ msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
77
+ except Exception:
78
+ pass
79
+
80
+ def start_daemon_if_needed(host, port):
81
+ """Checks if daemon is running, if not starts it."""
82
+ try:
83
+ with socket.create_connection((host, port), timeout=0.1):
84
+ return True
85
+ except (ConnectionRefusedError, OSError):
86
+ pass
87
+
88
+ lock_path = os.path.join(tempfile.gettempdir(), f"sari-daemon-{host}-{port}.lock")
89
+ with open(lock_path, "w") as lock_file:
90
+ try:
91
+ # Acquire exclusive lock (blocking)
92
+ _lock_file(lock_file)
93
+
94
+ # Double-check if daemon started while waiting for lock
95
+ try:
96
+ with socket.create_connection((host, port), timeout=0.1):
97
+ return True
98
+ except (ConnectionRefusedError, OSError):
99
+ pass
100
+
101
+ _log_info("Daemon not running, starting...")
102
+
103
+ # Assume we are in mcp/proxy.py, so parent of parent is repo root
104
+ repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
105
+
106
+ env = os.environ.copy()
107
+ # Do not infer workspace root from marker; rely on explicit roots/env/config.
108
+
109
+ # Detach process
110
+ subprocess.Popen(
111
+ [sys.executable, "-m", "mcp.daemon"],
112
+ cwd=repo_root,
113
+ env=env,
114
+ start_new_session=True,
115
+ stdout=subprocess.DEVNULL,
116
+ stderr=subprocess.DEVNULL
117
+ )
118
+
119
+ # Wait for it to come up
120
+ for _ in range(20):
121
+ try:
122
+ with socket.create_connection((host, port), timeout=0.1):
123
+ _log_info("Daemon started successfully.")
124
+ return True
125
+ except (ConnectionRefusedError, OSError):
126
+ time.sleep(0.1)
127
+
128
+ _log_error("Failed to start daemon.")
129
+ return False
130
+
131
+ finally:
132
+ _unlock_file(lock_file)
133
+
134
+ def forward_socket_to_stdout(sock, mode_holder):
135
+ try:
136
+ f = sock.makefile("rb")
137
+ while True:
138
+ # Read Headers
139
+ headers = {}
140
+ while True:
141
+ line = f.readline()
142
+ if not line:
143
+ break
144
+ line_str = line.decode("utf-8").strip()
145
+ if not line_str:
146
+ break
147
+ if ":" in line_str:
148
+ k, v = line_str.split(":", 1)
149
+ headers[k.strip().lower()] = v.strip()
150
+
151
+ if not headers and not line:
152
+ break
153
+
154
+ content_length = int(headers.get("content-length", 0))
155
+ if content_length <= 0:
156
+ continue
157
+
158
+ body = f.read(content_length)
159
+ if not body:
160
+ break
161
+
162
+ mode = mode_holder.get("mode") or _MODE_FRAMED
163
+ if mode == _MODE_JSONL:
164
+ sys.stdout.buffer.write(body + b"\n")
165
+ sys.stdout.buffer.flush()
166
+ else:
167
+ header = f"Content-Length: {len(body)}\r\n\r\n".encode("ascii")
168
+ sys.stdout.buffer.write(header + body)
169
+ sys.stdout.buffer.flush()
170
+ except Exception as e:
171
+ _log_error(f"Error forwarding socket to stdout: {e}")
172
+ finally:
173
+ # If socket closes, we should probably exit
174
+ os._exit(0)
175
+
176
+ def _read_mcp_message(stdin):
177
+ """Read one MCP framed message (Content-Length) or JSONL fallback."""
178
+ line = stdin.readline()
179
+ if not line:
180
+ return None
181
+ while line in (b"\n", b"\r\n"):
182
+ line = stdin.readline()
183
+ if not line:
184
+ return None
185
+
186
+ if line.lstrip().startswith((b"{", b"[")):
187
+ return line.rstrip(b"\r\n"), _MODE_JSONL
188
+
189
+ headers = [line]
190
+ while True:
191
+ h = stdin.readline()
192
+ if not h:
193
+ return None
194
+ if h in (b"\n", b"\r\n"):
195
+ break
196
+ headers.append(h)
197
+
198
+ content_length = None
199
+ for h in headers:
200
+ parts = h.decode("utf-8", errors="ignore").split(":", 1)
201
+ if len(parts) != 2:
202
+ continue
203
+ key = parts[0].strip().lower()
204
+ if key == "content-length":
205
+ try:
206
+ content_length = int(parts[1].strip())
207
+ except ValueError:
208
+ pass
209
+ break
210
+
211
+ if content_length is None:
212
+ return None
213
+
214
+ body = stdin.read(content_length)
215
+ if not body:
216
+ return None
217
+ return body, _MODE_FRAMED
218
+
219
+
220
+ def forward_stdin_to_socket(sock, mode_holder):
221
+ try:
222
+ stdin = sys.stdin.buffer
223
+ while True:
224
+ res = _read_mcp_message(stdin)
225
+ if res is None:
226
+ break
227
+ msg, mode = res
228
+ # Inject rootUri for initialize when client omits it (per-connection workspace)
229
+ try:
230
+ req = json.loads(msg.decode("utf-8"))
231
+
232
+ def _inject(obj):
233
+ if not isinstance(obj, dict) or obj.get("method") != "initialize":
234
+ return obj, False
235
+ params = obj.get("params") or {}
236
+ if params.get("rootUri") or params.get("rootPath"):
237
+ return obj, False
238
+ ws = os.environ.get("DECKARD_WORKSPACE_ROOT") or os.environ.get("LOCAL_SEARCH_WORKSPACE_ROOT")
239
+ if not ws:
240
+ return obj, False
241
+ params = dict(params)
242
+ params["rootUri"] = f"file://{ws}"
243
+ obj = dict(obj)
244
+ obj["params"] = params
245
+ _log_info(f"Injected rootUri for initialize: {ws}")
246
+ return obj, True
247
+
248
+ injected = False
249
+ if isinstance(req, dict):
250
+ req, injected = _inject(req)
251
+ elif isinstance(req, list):
252
+ new_list = []
253
+ for item in req:
254
+ item2, did = _inject(item)
255
+ injected = injected or did
256
+ new_list.append(item2)
257
+ req = new_list
258
+
259
+ if injected:
260
+ msg = json.dumps(req).encode("utf-8")
261
+ except Exception:
262
+ pass
263
+ if mode_holder.get("mode") is None:
264
+ mode_holder["mode"] = mode
265
+
266
+ header = f"Content-Length: {len(msg)}\r\n\r\n".encode("ascii")
267
+ sock.sendall(header + msg)
268
+ except Exception as e:
269
+ _log_error(f"Error forwarding stdin to socket: {e}")
270
+ sock.close()
271
+ sys.exit(1)
272
+
273
+ def main():
274
+ # Log startup context for diagnostics
275
+ _log_info(
276
+ "Proxy startup: cwd=%s argv=%s env.DECKARD_WORKSPACE_ROOT=%s env.LOCAL_SEARCH_WORKSPACE_ROOT=%s"
277
+ % (
278
+ os.getcwd(),
279
+ sys.argv,
280
+ os.environ.get("DECKARD_WORKSPACE_ROOT"),
281
+ os.environ.get("LOCAL_SEARCH_WORKSPACE_ROOT"),
282
+ )
283
+ )
284
+ host = os.environ.get("DECKARD_DAEMON_HOST", DEFAULT_HOST)
285
+ port = int(os.environ.get("DECKARD_DAEMON_PORT", DEFAULT_PORT))
286
+
287
+ if not start_daemon_if_needed(host, port):
288
+ sys.exit(1)
289
+
290
+ try:
291
+ sock = socket.create_connection((host, port))
292
+ except Exception as e:
293
+ _log_error(f"Could not connect to daemon: {e}")
294
+ sys.exit(1)
295
+
296
+ # Start threads for bidirectional forwarding
297
+ mode_holder = {"mode": None}
298
+ t1 = threading.Thread(target=forward_socket_to_stdout, args=(sock, mode_holder), daemon=True)
299
+ t1.start()
300
+
301
+ forward_stdin_to_socket(sock, mode_holder)
302
+
303
+ if __name__ == "__main__":
304
+ main()
mcp/registry.py ADDED
@@ -0,0 +1,218 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Workspace Registry for Sari Daemon.
4
+
5
+ Manages shared state (server instance) per workspace with refcount-based lifecycle.
6
+ When all clients disconnect from a workspace (refcount=0), resources are cleaned up.
7
+ """
8
+ import logging
9
+ import threading
10
+ from pathlib import Path
11
+ from typing import Dict, Optional
12
+
13
+ from app.http_server import serve_forever
14
+ from app.registry import ServerRegistry
15
+
16
+ logger = logging.getLogger("sari.registry")
17
+
18
+
19
+ class SharedState:
20
+ """Holds the server instance and reference count for a workspace.
21
+
22
+ Multiple clients connected to the same workspace share this state,
23
+ avoiding duplicate indexing and DB connections.
24
+
25
+ Also manages the dedicated HTTP server for this workspace.
26
+ """
27
+
28
+ def __init__(self, workspace_root: str):
29
+ from .server import LocalSearchMCPServer
30
+ self.workspace_root = workspace_root
31
+ self.server = LocalSearchMCPServer(workspace_root)
32
+ self.ref_count = 0
33
+ self.lock = threading.Lock()
34
+
35
+ # HTTP Server State
36
+ self.httpd = None
37
+ self.http_port = 0
38
+ self.http_thread = None
39
+
40
+ # Initialize Core Server (Loads Config)
41
+ try:
42
+ self.server._ensure_initialized()
43
+ cfg = self.server.cfg
44
+
45
+ # Start HTTP Server
46
+ self.httpd, self.http_port = serve_forever(
47
+ host=cfg.server_host,
48
+ port=cfg.server_port,
49
+ db=self.server.db,
50
+ indexer=self.server.indexer,
51
+ version=self.server.SERVER_VERSION,
52
+ workspace_root=self.workspace_root
53
+ )
54
+ logger.info(f"Started HTTP Server for {workspace_root} on port {self.http_port}")
55
+
56
+ except Exception as e:
57
+ logger.error(f"Failed to start server components for {workspace_root}: {e}")
58
+ # We don't raise here to allow partial functionality (MCP might work without HTTP?)
59
+ # But usually if init fails, MCP fails too.
60
+ pass
61
+
62
+ logger.info(f"Created SharedState for workspace: {workspace_root}")
63
+
64
+ def acquire(self) -> int:
65
+ """Increment refcount (client connected)."""
66
+ with self.lock:
67
+ self.ref_count += 1
68
+ logger.debug(f"Acquired {self.workspace_root} (refcount={self.ref_count})")
69
+ return self.ref_count
70
+
71
+ def release(self) -> int:
72
+ """Decrement refcount (client disconnected)."""
73
+ with self.lock:
74
+ self.ref_count -= 1
75
+ logger.debug(f"Released {self.workspace_root} (refcount={self.ref_count})")
76
+ return self.ref_count
77
+
78
+ def shutdown(self) -> None:
79
+ """Stop indexer, close DB, and shutdown HTTP server."""
80
+ logger.info(f"Shutting down SharedState for {self.workspace_root}")
81
+
82
+ # Unregister from Global Registry
83
+ try:
84
+ ServerRegistry().unregister(self.workspace_root)
85
+ except Exception as e:
86
+ logger.error(f"Failed to unregister workspace: {e}")
87
+
88
+ # Shutdown HTTP Server
89
+ if self.httpd:
90
+ logger.info("Shutting down HTTP server...")
91
+ self.httpd.shutdown()
92
+ self.httpd.server_close()
93
+
94
+ self.server.shutdown()
95
+
96
+
97
+ class Registry:
98
+ """Singleton registry to manage shared server instances.
99
+
100
+ Provides refcount-based lifecycle management:
101
+ - get_or_create(): Get or create shared state, refcount++
102
+ - release(): refcount--, cleanup if refcount==0
103
+ """
104
+ _instance: Optional["Registry"] = None
105
+ _lock = threading.Lock()
106
+
107
+ def __init__(self):
108
+ self._workspaces: Dict[str, SharedState] = {}
109
+ self._registry_lock = threading.Lock()
110
+
111
+ @classmethod
112
+ def get_instance(cls) -> "Registry":
113
+ """Get the singleton Registry instance."""
114
+ with cls._lock:
115
+ if cls._instance is None:
116
+ cls._instance = Registry()
117
+ logger.info("Registry singleton created")
118
+ return cls._instance
119
+
120
+ @classmethod
121
+ def reset_instance(cls) -> None:
122
+ """Reset singleton for testing purposes."""
123
+ with cls._lock:
124
+ if cls._instance is not None:
125
+ cls._instance.shutdown_all()
126
+ cls._instance = None
127
+
128
+ def get_or_create(self, workspace_root: str) -> SharedState:
129
+ """Get existing or create new SharedState for a workspace.
130
+
131
+ Automatically increments refcount.
132
+
133
+ Args:
134
+ workspace_root: Absolute path to workspace root
135
+
136
+ Returns:
137
+ SharedState for the workspace
138
+ """
139
+ resolved_root = str(Path(workspace_root).resolve())
140
+
141
+ with self._registry_lock:
142
+ if resolved_root not in self._workspaces:
143
+ self._workspaces[resolved_root] = SharedState(resolved_root)
144
+ logger.info(f"Registered new workspace: {resolved_root}")
145
+
146
+ state = self._workspaces[resolved_root]
147
+ state.acquire()
148
+ return state
149
+
150
+ def release(self, workspace_root: str) -> None:
151
+ """Release SharedState for a workspace.
152
+
153
+ Decrements refcount. If refcount reaches 0, cleans up resources.
154
+
155
+ Args:
156
+ workspace_root: Absolute path to workspace root
157
+ """
158
+ resolved_root = str(Path(workspace_root).resolve())
159
+
160
+ with self._registry_lock:
161
+ if resolved_root not in self._workspaces:
162
+ logger.warning(f"Attempted to release unknown workspace: {resolved_root}")
163
+ return
164
+
165
+ state = self._workspaces[resolved_root]
166
+ remaining = state.release()
167
+
168
+ if remaining <= 0:
169
+ state.shutdown()
170
+ del self._workspaces[resolved_root]
171
+ logger.info(f"Unregistered workspace: {resolved_root}")
172
+
173
+ def get(self, workspace_root: str) -> Optional[SharedState]:
174
+ """Get SharedState without modifying refcount.
175
+
176
+ Args:
177
+ workspace_root: Absolute path to workspace root
178
+
179
+ Returns:
180
+ SharedState if exists, None otherwise
181
+ """
182
+ resolved_root = str(Path(workspace_root).resolve())
183
+ with self._registry_lock:
184
+ return self._workspaces.get(resolved_root)
185
+
186
+ def list_workspaces(self) -> Dict[str, int]:
187
+ """List all active workspaces with their refcounts.
188
+
189
+ Returns:
190
+ Dict mapping workspace_root to refcount
191
+ """
192
+ with self._registry_lock:
193
+ return {ws: state.ref_count for ws, state in self._workspaces.items()}
194
+
195
+ def active_count(self) -> int:
196
+ """Get number of active workspaces.
197
+
198
+ Returns:
199
+ Number of workspaces with refcount > 0
200
+ """
201
+ with self._registry_lock:
202
+ return len(self._workspaces)
203
+
204
+ def shutdown_all(self) -> None:
205
+ """Shutdown all workspaces (for daemon stop)."""
206
+ with self._registry_lock:
207
+ workspace_count = len(self._workspaces)
208
+ for workspace_root, state in list(self._workspaces.items()):
209
+ logger.info(f"Shutting down workspace: {workspace_root}")
210
+ state.shutdown()
211
+ self._workspaces.clear()
212
+ logger.info(f"Registry shutdown complete ({workspace_count} workspaces)")
213
+
214
+
215
+ # Convenience function for getting the registry
216
+ def get_registry() -> Registry:
217
+ """Get the global Registry singleton."""
218
+ return Registry.get_instance()