PyPI - sari - Versions diffs - 0.0.1__py3-none-any.whl - Mend

sari 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

app/__init__.py +1 -0
app/config.py +240 -0
app/db.py +932 -0
app/dedup_queue.py +77 -0
app/engine_registry.py +56 -0
app/engine_runtime.py +472 -0
app/http_server.py +204 -0
app/indexer.py +1532 -0
app/main.py +147 -0
app/models.py +39 -0
app/queue_pipeline.py +65 -0
app/ranking.py +144 -0
app/registry.py +172 -0
app/search_engine.py +572 -0
app/watcher.py +124 -0
app/workspace.py +286 -0
deckard/__init__.py +3 -0
deckard/__main__.py +4 -0
deckard/main.py +345 -0
deckard/version.py +1 -0
mcp/__init__.py +1 -0
mcp/__main__.py +19 -0
mcp/cli.py +485 -0
mcp/daemon.py +149 -0
mcp/proxy.py +304 -0
mcp/registry.py +218 -0
mcp/server.py +519 -0
mcp/session.py +234 -0
mcp/telemetry.py +112 -0
mcp/test_cli.py +89 -0
mcp/test_daemon.py +124 -0
mcp/test_server.py +197 -0
mcp/tools/__init__.py +14 -0
mcp/tools/_util.py +244 -0
mcp/tools/deckard_guide.py +32 -0
mcp/tools/doctor.py +208 -0
mcp/tools/get_callers.py +60 -0
mcp/tools/get_implementations.py +60 -0
mcp/tools/index_file.py +75 -0
mcp/tools/list_files.py +138 -0
mcp/tools/read_file.py +48 -0
mcp/tools/read_symbol.py +99 -0
mcp/tools/registry.py +212 -0
mcp/tools/repo_candidates.py +89 -0
mcp/tools/rescan.py +46 -0
mcp/tools/scan_once.py +54 -0
mcp/tools/search.py +208 -0
mcp/tools/search_api_endpoints.py +72 -0
mcp/tools/search_symbols.py +63 -0
mcp/tools/status.py +135 -0
sari/__init__.py +1 -0
sari/__main__.py +4 -0
sari-0.0.1.dist-info/METADATA +521 -0
sari-0.0.1.dist-info/RECORD +58 -0
sari-0.0.1.dist-info/WHEEL +5 -0
sari-0.0.1.dist-info/entry_points.txt +2 -0
sari-0.0.1.dist-info/licenses/LICENSE +21 -0
sari-0.0.1.dist-info/top_level.txt +4 -0

app/main.py ADDED Viewed

@@ -0,0 +1,147 @@
+import json
+import os
+import signal
+import threading
+import time
+import ipaddress
+from pathlib import Path
+from datetime import datetime
+# Support both `python3 app/main.py` (script mode) and package mode.
+try:
+    from .config import Config, resolve_config_path  # type: ignore
+    from . import config as config_mod  # type: ignore
+    from .db import LocalSearchDB  # type: ignore
+    from .http_server import serve_forever  # type: ignore
+    from .indexer import Indexer  # type: ignore
+    from .workspace import WorkspaceManager  # type: ignore
+except ImportError:  # script mode
+    from config import Config, resolve_config_path  # type: ignore
+    import config as config_mod  # type: ignore
+    from db import LocalSearchDB  # type: ignore
+    from http_server import serve_forever  # type: ignore
+    from indexer import Indexer  # type: ignore
+    from workspace import WorkspaceManager  # type: ignore
+def _repo_root() -> str:
+    # Fallback to current working directory if not running from a nested structure
+    return str(Path.cwd())
+def main() -> int:
+    # v2.3.2: Auto-detect workspace root for HTTP fallback
+    workspace_root = WorkspaceManager.resolve_workspace_root()
+    # Set env var so Config can pick it up
+    os.environ["LOCAL_SEARCH_WORKSPACE_ROOT"] = workspace_root
+    cfg_path = resolve_config_path(workspace_root)
+    # Graceful config loading (Global Install Support)
+    if os.path.exists(cfg_path):
+        cfg = Config.load(cfg_path)
+    else:
+        # Use safe defaults if config.json is missing.
+        print(f"[sari] Config not found in workspace ({cfg_path}), using defaults.")
+        defaults = config_mod.Config.get_defaults(workspace_root)
+        cfg = Config(**defaults)
+    # Security hardening: loopback-only by default.
+    # Allow opt-in override only when explicitly requested.
+    allow_non_loopback = os.environ.get("LOCAL_SEARCH_ALLOW_NON_LOOPBACK") == "1"
+    host = (cfg.http_api_host or "127.0.0.1").strip()
+    try:
+        is_loopback = host.lower() == "localhost" or ipaddress.ip_address(host).is_loopback
+    except ValueError:
+        # Non-IP hostnames are only allowed if they resolve to localhost explicitly.
+        is_loopback = host.lower() == "localhost"
+    if (not is_loopback) and (not allow_non_loopback):
+        raise SystemExit(
+            f"sari refused to start: server_host must be loopback only (127.0.0.1/localhost/::1). got={host}. "
+            "Set LOCAL_SEARCH_ALLOW_NON_LOOPBACK=1 to override (NOT recommended)."
+        )
+    # v2.4.1: Workspace-local DB path enforcement (multi-workspace support)
+    # DB path is now determined by Config.load
+    db_path = cfg.db_path
+    Path(db_path).parent.mkdir(parents=True, exist_ok=True)
+    print(f"[sari] DB path: {db_path}")
+    db = LocalSearchDB(db_path)
+    try:
+        from app.engine_registry import get_default_engine
+        db.set_engine(get_default_engine(db, cfg, cfg.workspace_roots))
+    except Exception as e:
+        print(f"[sari] engine init failed: {e}")
+    from app.indexer import resolve_indexer_settings
+    mode, enabled, startup_enabled, lock_handle = resolve_indexer_settings(str(db_path))
+    indexer = Indexer(cfg, db, indexer_mode=mode, indexing_enabled=enabled, startup_index_enabled=startup_enabled, lock_handle=lock_handle)
+    # Start HTTP immediately so health checks don't block on initial indexing.
+    # v2.3.3: serve_forever returns (httpd, actual_port) for fallback tracking
+    version = os.environ.get("DECKARD_VERSION", "dev")
+    httpd, actual_port = serve_forever(host, cfg.http_api_port, db, indexer, version=version, workspace_root=workspace_root)
+    # Write server.json with actual binding info (single source of truth for port tracking)
+    data_dir = Path(workspace_root) / ".codex" / "tools" / "sari" / "data"
+    data_dir.mkdir(parents=True, exist_ok=True)
+    server_json = data_dir / "server.json"
+    server_info = {
+        "host": host,
+        "port": actual_port,  # v2.3.3: use actual bound port, not config port
+        "config_port": cfg.http_api_port,  # original requested port for reference
+        "pid": os.getpid(),
+        "started_at": datetime.now().isoformat(),
+    }
+    server_json.write_text(json.dumps(server_info, indent=2), encoding="utf-8")
+    if actual_port != cfg.http_api_port:
+        print(f"[sari] server.json updated with fallback port {actual_port}")
+    try:
+        port_file = Path(db_path + ".http_api.port")
+        port_file.write_text(str(actual_port) + "\n", encoding="utf-8")
+    except Exception:
+        pass
+    stop_evt = threading.Event()
+    def _shutdown(*_):
+        if stop_evt.is_set():
+            return
+        stop_evt.set()
+        try:
+            indexer.stop()
+        except Exception:
+            pass
+        try:
+            httpd.shutdown()
+        except Exception:
+            pass
+        try:
+            db.close()
+        except Exception:
+            pass
+    signal.signal(signal.SIGINT, _shutdown)
+    signal.signal(signal.SIGTERM, _shutdown)
+    # Index in background.
+    idx_thread = threading.Thread(target=indexer.run_forever, daemon=True)
+    idx_thread.start()
+    try:
+        while not stop_evt.is_set():
+            time.sleep(0.2)
+    finally:
+        _shutdown()
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

app/models.py ADDED Viewed

@@ -0,0 +1,39 @@
+from dataclasses import dataclass, field
+from typing import Optional, List
+@dataclass
+class SearchHit:
+    """Enhanced search result with metadata."""
+    repo: str
+    path: str
+    score: float
+    snippet: str
+    # v2.3.1: Added metadata
+    mtime: int = 0
+    size: int = 0
+    match_count: int = 0
+    file_type: str = ""
+    hit_reason: str = ""  # v2.4.3: Added hit reason
+    context_symbol: str = ""  # v2.6.0: Enclosing symbol context
+    docstring: str = "" # v2.9.0: Docstring/Javadoc
+    metadata: str = "{}" # v2.9.0: Raw metadata JSON
+@dataclass
+class SearchOptions:
+    """Search configuration options (v2.5.1)."""
+    query: str = ""
+    repo: Optional[str] = None
+    limit: int = 20
+    offset: int = 0
+    snippet_lines: int = 5
+    # Filtering
+    file_types: List[str] = field(default_factory=list)  # e.g., ["py", "ts"]
+    path_pattern: Optional[str] = None  # e.g., "src/**/*.ts"
+    exclude_patterns: List[str] = field(default_factory=list)  # e.g., ["node_modules", "build"]
+    recency_boost: bool = False
+    use_regex: bool = False
+    case_sensitive: bool = False
+    root_ids: List[str] = field(default_factory=list)
+    # Pagination & Performance (v2.5.1)
+    total_mode: str = "exact"  # "exact" | "approx"

app/queue_pipeline.py ADDED Viewed

@@ -0,0 +1,65 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from enum import Enum
+from typing import Iterable, List, Optional, Tuple
+class FsEventKind(str, Enum):
+    CREATED = "CREATED"
+    MODIFIED = "MODIFIED"
+    DELETED = "DELETED"
+    MOVED = "MOVED"
+@dataclass(frozen=True)
+class FsEvent:
+    kind: FsEventKind
+    path: str
+    dest_path: Optional[str] = None
+    ts: float = 0.0
+class TaskAction(str, Enum):
+    INDEX = "INDEX"
+    DELETE = "DELETE"
+@dataclass
+class CoalesceTask:
+    action: TaskAction
+    path: str
+    attempts: int = 0
+    enqueue_ts: float = 0.0
+    last_seen: float = 0.0
+@dataclass
+class DbTask:
+    kind: str
+    rows: Optional[List[tuple]] = None
+    path: Optional[str] = None
+    paths: Optional[List[str]] = None
+    ts: Optional[int] = None
+    repo_meta: Optional[dict] = None
+    engine_docs: Optional[List[dict]] = None
+    engine_deletes: Optional[List[str]] = None
+def coalesce_action(existing: Optional[TaskAction], incoming: TaskAction) -> TaskAction:
+    if incoming == TaskAction.DELETE:
+        return TaskAction.DELETE
+    if existing == TaskAction.DELETE:
+        return TaskAction.DELETE
+    return TaskAction.INDEX
+def split_moved_event(event: FsEvent) -> List[Tuple[TaskAction, str]]:
+    if event.kind != FsEventKind.MOVED:
+        return []
+    actions: List[Tuple[TaskAction, str]] = []
+    if event.path:
+        actions.append((TaskAction.DELETE, event.path))
+    if event.dest_path:
+        actions.append((TaskAction.INDEX, event.dest_path))
+    return actions

app/ranking.py ADDED Viewed

@@ -0,0 +1,144 @@
+import re
+import time
+import fnmatch
+from pathlib import Path
+from typing import List, Optional, Any
+def glob_to_like(pattern: str) -> str:
+    """Convert glob-style pattern to SQL LIKE pattern for 1st-pass filtering."""
+    if not pattern:
+        return "%"
+    # v2.5.4: Better glob-to-like conversion
+    res = pattern.replace("**", "%").replace("*", "%").replace("?", "_")
+    if not ("%" in res or "_" in res):
+        res = f"%{res}%" # Contains if no wildcards
+    # Ensure it starts/ends correctly for directory patterns
+    if pattern.endswith("/**"):
+        res = res.rstrip("%") + "%"
+    while "%%" in res:
+        res = res.replace("%%", "%")
+    return res
+def get_file_extension(path: str) -> str:
+    ext = Path(path).suffix
+    return ext[1:].lower() if ext else ""
+def calculate_recency_score(mtime: int, base_score: float) -> float:
+    now = time.time()
+    age_days = (now - mtime) / 86400
+    if age_days < 1:
+        boost = 1.5
+    elif age_days < 7:
+        boost = 1.3
+    elif age_days < 30:
+        boost = 1.1
+    else:
+        boost = 1.0
+    # v2.5.4: Ensure boost works even if base_score is 0 (bias added)
+    return (base_score + 0.1) * boost
+def extract_terms(q: str) -> List[str]:
+    # v2.5.4: Use regex to extract quoted phrases or space-separated words
+    raw = re.findall(r'"([^"]*)"|\'([^\']*)\'|(\S+)', q or "")
+    out: List[str] = []
+    for group in raw:
+        # group is a tuple of (double_quoted, single_quoted, bare_word)
+        t = group[0] or group[1] or group[2]
+        t = t.strip()
+        if not t or t in {"AND", "OR", "NOT"}:
+            continue
+        if ":" in t and len(t.split(":", 1)[0]) <= 10:
+            t = t.split(":", 1)[1]
+        t = t.strip()
+        if t:
+            out.append(t)
+    return out
+def count_matches(content: str, query: str, use_regex: bool, case_sensitive: bool) -> int:
+    if not query: return 0
+    if use_regex:
+        flags = 0 if case_sensitive else re.IGNORECASE
+        try:
+            return len(re.findall(query, content, flags))
+        except re.error:
+            return 0
+    else:
+        if case_sensitive:
+            return content.count(query)
+        # v2.7.0: Use regex for case-insensitive count to better handle unicode
+        try:
+            return len(re.findall(re.escape(query), content, re.IGNORECASE))
+        except Exception:
+            # Fallback to simple count if regex fails for any reason
+            return content.lower().count(query.lower())
+def snippet_around(content: str, terms: List[str], max_lines: int,
+                    highlight: bool = True) -> str:
+    if max_lines <= 0:
+        return ""
+    lines = content.splitlines()
+    if not lines:
+        return ""
+    lower_lines = [l.lower() for l in lines]
+    lower_terms = [t.lower() for t in terms if t.strip()]
+    if not lower_terms:
+        return "\n".join(f"L{i+1}: {ln}" for i, ln in enumerate(lines[:max_lines]))
+    # Score per line
+    # +1 per match, +5 if definition (def/class) AND match
+    line_scores = [0] * len(lines)
+    def_pattern = re.compile(r"\b(class|def|function|struct|interface|type)\s+", re.IGNORECASE)
+    has_any_match = False
+    for i, line_lower in enumerate(lower_lines):
+        score = 0
+        for t in lower_terms:
+            if t in line_lower:
+                score += 1
+        if score > 0:
+            has_any_match = True
+            if def_pattern.search(line_lower):
+                score += 5
+        line_scores[i] = score
+    if not has_any_match:
+         return "\n".join(f"L{i+1}: {ln}" for i, ln in enumerate(lines[:max_lines]))
+    # Find best window (Sliding Window)
+    window_size = min(len(lines), max_lines)
+    current_score = sum(line_scores[:window_size])
+    best_window_score = current_score
+    best_start = 0
+    for i in range(1, len(lines) - window_size + 1):
+        current_score = current_score - line_scores[i-1] + line_scores[i + window_size - 1]
+        if current_score > best_window_score:
+            best_window_score = current_score
+            best_start = i
+    # Extract window
+    start_idx = best_start
+    end_idx = start_idx + window_size
+    out_lines = []
+    highlight_patterns = [re.compile(re.escape(t), re.IGNORECASE) for t in terms if t.strip()]
+    for i in range(start_idx, end_idx):
+        line = lines[i]
+        if highlight:
+            for pat in highlight_patterns:
+                # Use backreference to preserve case
+                line = pat.sub(r">>>\g<0><<<", line)
+        out_lines.append(f"L{i+1}: {line}")
+    return "\n".join(out_lines)

app/registry.py ADDED Viewed

@@ -0,0 +1,172 @@
+import json
+import os
+import sys
+import time
+import socket
+from pathlib import Path
+from typing import Dict, Optional, Any
+# Cross-platform file locking
+IS_WINDOWS = os.name == 'nt'
+if not IS_WINDOWS:
+    import fcntl
+# Local Standard Path
+if os.environ.get("DECKARD_REGISTRY_FILE"):
+    REGISTRY_FILE = Path(os.environ["DECKARD_REGISTRY_FILE"]).resolve()
+    REGISTRY_DIR = REGISTRY_FILE.parent
+else:
+    REGISTRY_DIR = Path.home() / ".local" / "share" / "sari"
+    REGISTRY_FILE = REGISTRY_DIR / "server.json"
+class ServerRegistry:
+    """
+    Manages the 'server.json' registry for Sari Daemons.
+    Maps Workspace Root Paths -> {Port, PID, Status}.
+    Thread/Process safe via fcntl locking.
+    """
+    def __init__(self):
+        REGISTRY_DIR.mkdir(parents=True, exist_ok=True)
+        if not REGISTRY_FILE.exists():
+            self._write_empty()
+    def _write_empty(self):
+        with open(REGISTRY_FILE, "w") as f:
+            json.dump({"version": "1.0", "instances": {}}, f)
+    def _load(self) -> Dict[str, Any]:
+        """Load registry with lock."""
+        try:
+            with open(REGISTRY_FILE, "r+") as f:
+                if not IS_WINDOWS:
+                    fcntl.flock(f, fcntl.LOCK_SH)
+                try:
+                    return json.load(f)
+                except json.JSONDecodeError:
+                    return {"version": "1.0", "instances": {}}
+                finally:
+                    if not IS_WINDOWS:
+                        fcntl.flock(f, fcntl.LOCK_UN)
+        except FileNotFoundError:
+            return {"version": "1.0", "instances": {}}
+    def _save(self, data: Dict[str, Any]):
+        """Save registry with lock."""
+        with open(REGISTRY_FILE, "w") as f:
+            if not IS_WINDOWS:
+                fcntl.flock(f, fcntl.LOCK_EX)
+            try:
+                json.dump(data, f, indent=2)
+            finally:
+                if not IS_WINDOWS:
+                    fcntl.flock(f, fcntl.LOCK_UN)
+    def register(self, workspace_root: str, port: int, pid: int) -> None:
+        """Register a running daemon."""
+        # Normalize path
+        workspace_root = str(Path(workspace_root).resolve())
+        # Read-Modify-Write loop needs EX lock on read too if strict,
+        # but simple file lock wrapper is okay for low contention.
+        # Ideally open "r+" with LOCK_EX, read, seek 0, write, truncate.
+        with open(REGISTRY_FILE, "r+") as f:
+            if not IS_WINDOWS:
+                fcntl.flock(f, fcntl.LOCK_EX)
+            try:
+                try:
+                    data = json.load(f)
+                except:
+                    data = {"version": "1.0", "instances": {}}
+                instances = data.get("instances", {})
+                instances[workspace_root] = {
+                    "port": port,
+                    "pid": pid,
+                    "start_ts": time.time(),
+                    "status": "active"
+                }
+                data["instances"] = instances
+                f.seek(0)
+                json.dump(data, f, indent=2)
+                f.truncate()
+            finally:
+                if not IS_WINDOWS:
+                    fcntl.flock(f, fcntl.LOCK_UN)
+    def unregister(self, workspace_root: str) -> None:
+        """Remove a daemon (on shutdown)."""
+        workspace_root = str(Path(workspace_root).resolve())
+        with open(REGISTRY_FILE, "r+") as f:
+            if not IS_WINDOWS:
+                fcntl.flock(f, fcntl.LOCK_EX)
+            try:
+                try:
+                    data = json.load(f)
+                except:
+                    return
+                instances = data.get("instances", {})
+                if workspace_root in instances:
+                    del instances[workspace_root]
+                    data["instances"] = instances
+                    f.seek(0)
+                    json.dump(data, f, indent=2)
+                    f.truncate()
+            finally:
+                if not IS_WINDOWS:
+                    fcntl.flock(f, fcntl.LOCK_UN)
+    def get_instance(self, workspace_root: str) -> Optional[Dict[str, Any]]:
+        """Get info for a workspace daemon. Checks liveness."""
+        workspace_root = str(Path(workspace_root).resolve())
+        data = self._load()
+        inst = data.get("instances", {}).get(workspace_root)
+        if not inst:
+            return None
+        # Check if process is actually alive
+        pid = inst.get("pid")
+        if not self._is_process_alive(pid):
+            # Lazy cleanup? Or just return None.
+            # Let's clean up lazily if we have the lock, but here we just have read lock (via load).
+            # Just return None, cleanup happens on next write or dedicated gc.
+            return None
+        return inst
+    def _is_process_alive(self, pid: int) -> bool:
+        if not pid: return False
+        try:
+            os.kill(pid, 0) # Signal 0 checks existence
+            return True
+        except OSError:
+            return False
+    def find_free_port(self, start_port: int = 47777, max_port: int = 65535) -> int:
+        """Find a port not in use by other instances AND OS."""
+        # 1. Get used ports from registry
+        data = self._load()
+        used_ports = {
+            info["port"] for info in data.get("instances", {}).values()
+            if self._is_process_alive(info.get("pid"))
+        }
+        for port in range(start_port, max_port + 1):
+            if port in used_ports:
+                continue
+            # 2. Check OS binding
+            try:
+                with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+                    s.bind(("127.0.0.1", port))
+                    return port
+            except OSError:
+                continue
+        raise RuntimeError("No free ports available")