PyPI - mikoshi - Versions diffs - 0.1.9__py3-none-any.whl - Mend

mikoshi 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

mikoshi/__init__.py +3 -0
mikoshi/auth.py +265 -0
mikoshi/chunking.py +44 -0
mikoshi/cli.py +295 -0
mikoshi/config.py +125 -0
mikoshi/entitlements.py +32 -0
mikoshi/hashing.py +11 -0
mikoshi/ignore.py +139 -0
mikoshi/indexing/__init__.py +9 -0
mikoshi/indexing/file_scanner.py +60 -0
mikoshi/indexing/index_store.py +87 -0
mikoshi/indexing/indexer.py +237 -0
mikoshi/mcp_server/__init__.py +3 -0
mikoshi/mcp_server/server.py +135 -0
mikoshi/retrieval/__init__.py +17 -0
mikoshi/retrieval/hybrid.py +109 -0
mikoshi/retrieval/lexical.py +68 -0
mikoshi/retrieval/rerank.py +27 -0
mikoshi/retrieval/semantic.py +175 -0
mikoshi/utils/__init__.py +11 -0
mikoshi/utils/timer.py +18 -0
mikoshi/utils/types.py +111 -0
mikoshi-0.1.9.dist-info/METADATA +52 -0
mikoshi-0.1.9.dist-info/RECORD +26 -0
mikoshi-0.1.9.dist-info/WHEEL +5 -0
mikoshi-0.1.9.dist-info/top_level.txt +1 -0

mikoshi/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+__all__ = ["__version__"]
+__version__ = "0.1.9"

mikoshi/auth.py ADDED Viewed

@@ -0,0 +1,265 @@
+from __future__ import annotations
+import base64
+import hashlib
+import json
+import os
+import secrets
+import webbrowser
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+from urllib.parse import urlencode
+import httpx
+from mikoshi.entitlements import DEFAULT_FEATURES, DEFAULT_PLAN
+AUTH_FILENAME = "auth.json"
+CONFIG_FILENAME = "config.json"
+DEFAULT_API_BASE_URL = "https://neet.gg"
+class AuthError(RuntimeError):
+    pass
+@dataclass(frozen=True)
+class AuthState:
+    access_token: str
+    expires_at: str
+    plan: str
+    features: list[str]
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "access_token": self.access_token,
+            "expires_at": self.expires_at,
+            "plan": self.plan,
+            "features": list(self.features),
+        }
+    @staticmethod
+    def from_dict(data: dict[str, Any]) -> "AuthState":
+        features = data.get("features") or []
+        if isinstance(features, str):
+            features = [features]
+        return AuthState(
+            access_token=str(data.get("access_token", "")),
+            expires_at=str(data.get("expires_at", "")),
+            plan=str(data.get("plan", DEFAULT_PLAN)),
+            features=[str(item) for item in features],
+        )
+@dataclass(frozen=True)
+class BrokerConfig:
+    api_base_url: str
+def _index_root() -> Path:
+    return Path(os.getenv("MIKOSHI_INDEX_ROOT", "~/.mikoshi")).expanduser()
+def auth_path() -> Path:
+    return _index_root() / AUTH_FILENAME
+def config_path() -> Path:
+    return _index_root() / CONFIG_FILENAME
+def load_broker_config() -> BrokerConfig:
+    path = config_path()
+    if not path.exists():
+        return BrokerConfig(api_base_url=DEFAULT_API_BASE_URL)
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+    except Exception:
+        return BrokerConfig(api_base_url=DEFAULT_API_BASE_URL)
+    if not isinstance(data, dict):
+        return BrokerConfig(api_base_url=DEFAULT_API_BASE_URL)
+    api_base_url = str(data.get("api_base_url", "")).strip().rstrip("/")
+    if not api_base_url:
+        api_base_url = DEFAULT_API_BASE_URL
+    return BrokerConfig(api_base_url=api_base_url)
+def save_broker_config(api_base_url: str) -> None:
+    clean_url = api_base_url.strip().rstrip("/") or DEFAULT_API_BASE_URL
+    path = config_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    payload = json.dumps({"api_base_url": clean_url}, indent=2, sort_keys=True)
+    temp_path = path.with_suffix(".json.tmp")
+    fd = os.open(temp_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
+    with os.fdopen(fd, "w", encoding="utf-8") as handle:
+        handle.write(payload)
+        handle.write("\n")
+    os.replace(temp_path, path)
+    os.chmod(path, 0o600)
+def load_auth_state() -> AuthState | None:
+    path = auth_path()
+    if not path.exists():
+        return None
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+    except Exception:
+        return None
+    if not isinstance(data, dict):
+        return None
+    return AuthState.from_dict(data)
+def save_auth_state(state: AuthState) -> None:
+    path = auth_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    payload = json.dumps(state.to_dict(), indent=2, sort_keys=True)
+    temp_path = path.with_suffix(".json.tmp")
+    fd = os.open(temp_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
+    with os.fdopen(fd, "w", encoding="utf-8") as handle:
+        handle.write(payload)
+        handle.write("\n")
+    os.replace(temp_path, path)
+    os.chmod(path, 0o600)
+def clear_auth_state() -> None:
+    path = auth_path()
+    if path.exists():
+        path.unlink()
+def _parse_expires_at(value: str) -> datetime | None:
+    if not value:
+        return None
+    try:
+        if value.endswith("Z"):
+            value = value[:-1] + "+00:00"
+        return datetime.fromisoformat(value)
+    except Exception:
+        return None
+def is_expired(state: AuthState) -> bool:
+    expires_at = _parse_expires_at(state.expires_at)
+    if not expires_at:
+        return True
+    return datetime.now(timezone.utc) >= expires_at
+def email_from_token(access_token: str) -> str | None:
+    try:
+        parts = access_token.split(".")
+        if len(parts) < 2:
+            return None
+        payload = parts[1] + "=" * (-len(parts[1]) % 4)
+        decoded = base64.urlsafe_b64decode(payload.encode("utf-8"))
+        data = json.loads(decoded.decode("utf-8"))
+        for key in ("email", "user_email", "preferred_username"):
+            value = data.get(key)
+            if value:
+                return str(value)
+        return None
+    except Exception:
+        return None
+def _generate_state() -> str:
+    return secrets.token_urlsafe(32).rstrip("=")
+def _generate_code_verifier() -> str:
+    return secrets.token_urlsafe(64).rstrip("=")
+def _code_challenge(code_verifier: str) -> str:
+    digest = hashlib.sha256(code_verifier.encode("utf-8")).digest()
+    return base64.urlsafe_b64encode(digest).decode("utf-8").rstrip("=")
+def _login_url(api_base_url: str, state: str, challenge: str) -> str:
+    query = {
+        "response_type": "code",
+        "client_id": "mikoshi",
+        "code_challenge": challenge,
+        "code_challenge_method": "S256",
+        "state": state,
+    }
+    return f"{api_base_url.rstrip('/')}/login?{urlencode(query)}"
+def _parse_paste_payload(text: str) -> tuple[str, str]:
+    try:
+        data = json.loads(text)
+    except Exception as exc:
+        raise AuthError("Invalid JSON response.") from exc
+    if not isinstance(data, dict):
+        raise AuthError("Invalid JSON response.")
+    code = str(data.get("code", "")).strip()
+    state = str(data.get("state", "")).strip()
+    if not code or not state:
+        raise AuthError("Invalid JSON response.")
+    return code, state
+def _ensure_state_match(expected: str, actual: str) -> None:
+    if expected != actual:
+        raise AuthError("State mismatch. Please retry login.")
+def _exchange_code(
+    api_base_url: str, code: str, code_verifier: str, state: str
+) -> dict[str, Any]:
+    payload = {"code": code, "code_verifier": code_verifier, "state": state}
+    url = f"{api_base_url.rstrip('/')}/cli/exchange"
+    with httpx.Client(timeout=10.0) as client:
+        response = client.post(url, json=payload)
+    if response.status_code >= 400:
+        raise AuthError("Login failed. Please try again.")
+    data = response.json()
+    if not isinstance(data, dict):
+        raise AuthError("Login failed. Invalid response.")
+    return data
+def login() -> str:
+    config = load_broker_config()
+    api_base_url = config.api_base_url
+    state = _generate_state()
+    code_verifier = _generate_code_verifier()
+    challenge = _code_challenge(code_verifier)
+    login_url = _login_url(api_base_url, state, challenge)
+    print("🔐 Starting authentication...")
+    print("🌐 Opening authentication page in your browser...")
+    opened = webbrowser.open(login_url, new=1, autoraise=True)
+    if not opened:
+        raise AuthError("Unable to open authentication page.")
+    raw = input("Paste the JSON response here: ").strip()
+    code, returned_state = _parse_paste_payload(raw)
+    _ensure_state_match(state, returned_state)
+    data = _exchange_code(api_base_url, code, code_verifier, returned_state)
+    access_token = str(data.get("access_token", "")).strip()
+    expires_at = str(data.get("expires_at", "")).strip()
+    email = str(data.get("email", "")).strip()
+    if not access_token or not expires_at or not email:
+        raise AuthError("Login failed. Invalid response.")
+    plan = str(data.get("plan") or DEFAULT_PLAN)
+    features = data.get("features") or list(DEFAULT_FEATURES)
+    if isinstance(features, str):
+        features = [features]
+    state_obj = AuthState(
+        access_token=access_token,
+        expires_at=expires_at,
+        plan=plan,
+        features=[str(item) for item in features],
+    )
+    save_auth_state(state_obj)
+    return email

mikoshi/chunking.py ADDED Viewed

@@ -0,0 +1,44 @@
+from __future__ import annotations
+from dataclasses import dataclass
+@dataclass(frozen=True)
+class ChunkSpan:
+    start_line: int
+    end_line: int
+    text: str
+def chunk_text(text: str, max_lines: int, overlap: int) -> list[ChunkSpan]:
+    if max_lines <= 0:
+        raise ValueError("max_lines must be > 0")
+    if overlap < 0:
+        raise ValueError("overlap must be >= 0")
+    if overlap >= max_lines:
+        raise ValueError("overlap must be smaller than max_lines")
+    lines = text.splitlines()
+    if not lines:
+        return []
+    step = max_lines - overlap
+    chunks: list[ChunkSpan] = []
+    start = 0
+    while start < len(lines):
+        end = min(start + max_lines, len(lines))
+        chunk_lines = lines[start:end]
+        chunk_text_value = "\n".join(chunk_lines)
+        chunks.append(
+            ChunkSpan(
+                start_line=start + 1,
+                end_line=end,
+                text=chunk_text_value,
+            )
+        )
+        if end == len(lines):
+            break
+        start += step
+    return chunks

mikoshi/cli.py ADDED Viewed

@@ -0,0 +1,295 @@
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+from mikoshi.config import ConfigError, configure_external_libs, load_config
+from mikoshi.auth import (
+    AuthError,
+    DEFAULT_API_BASE_URL,
+    clear_auth_state,
+    email_from_token,
+    is_expired,
+    load_auth_state,
+    login,
+    save_broker_config,
+)
+from mikoshi.indexing.index_store import IndexStore
+from mikoshi.utils.types import SearchResult
+def _format_index_root(path: Path) -> str:
+    try:
+        home = Path.home().resolve()
+        resolved = path.expanduser().resolve()
+        if resolved == home:
+            return "~"
+        if str(resolved).startswith(str(home) + "/"):
+            return str(resolved).replace(str(home), "~", 1)
+        return str(resolved)
+    except Exception:
+        return str(path)
+def _search(repo_path: str, query: str, k: int) -> list[SearchResult]:
+    from mikoshi.retrieval.hybrid import search_repo
+    return search_repo(repo_path, query, k)
+def cmd_index(args: argparse.Namespace) -> int:
+    try:
+        from mikoshi.indexing.indexer import index_repo
+        result = index_repo(args.path)
+    except ConfigError as exc:
+        print(f"Config error: {exc}", file=sys.stderr)
+        return 2
+    print(
+        json.dumps(
+            {
+                "repo_id": result.repo_id,
+                "chunks_indexed": result.chunks_indexed,
+                "took_ms": result.took_ms,
+            },
+            indent=2,
+        )
+    )
+    return 0
+def cmd_search(args: argparse.Namespace) -> int:
+    try:
+        results = _search(args.path, args.query, args.k)
+    except ConfigError as exc:
+        print(f"Config error: {exc}", file=sys.stderr)
+        return 2
+    except RuntimeError as exc:
+        print(str(exc), file=sys.stderr)
+        return 1
+    for result in results:
+        print(f"{result.relpath}:{result.start_line}-{result.end_line} ({result.score:.3f})")
+        print(result.snippet)
+        print()
+    return 0
+def cmd_doctor(args: argparse.Namespace) -> int:
+    exit_code = 0
+    major, minor = sys.version_info[:2]
+    version_label = f"{major}.{minor}.x"
+    if (major, minor) >= (3, 11):
+        print(f"✅ Python: {version_label}")
+    else:
+        print(f"❌ Python: {version_label} (requires 3.11+)")
+        exit_code = 1
+    try:
+        config = load_config()
+    except ConfigError as exc:
+        print(f"❌ Config: {exc}")
+        return 1
+    print(f"✅ Mikoshi index root: {_format_index_root(config.index_root)}")
+    print(
+        f"✅ Embeddings: provider={config.embeddings.provider} "
+        f"model={config.embeddings.model}"
+    )
+    model_cached = True
+    if config.embeddings.provider == "local":
+        try:
+            from huggingface_hub import snapshot_download
+            try:
+                snapshot_download(
+                    config.embeddings.model,
+                    local_files_only=True,
+                )
+                model_cached = True
+            except Exception:
+                model_cached = False
+        except Exception:
+            print("❌ Dependencies: huggingface_hub missing")
+            return 1
+    offline = os.getenv("MIKOSHI_OFFLINE", "").strip().lower() in {
+        "1",
+        "true",
+        "yes",
+        "on",
+    }
+    if config.embeddings.provider == "local":
+        if not model_cached and offline:
+            print("❌ Model cached: no (offline)")
+            exit_code = 1
+        else:
+            print(f"✅ Model cached: {'yes' if model_cached else 'no'}")
+    else:
+        print("✅ Model cached: yes")
+    if args.path:
+        repo_root = Path(args.path).expanduser().resolve()
+        store = IndexStore(repo_root, config.index_root)
+        meta = store.load_meta()
+        if meta:
+            print(
+                "✅ Repo indexed: yes "
+                f"(chunks={meta.chunks}, last_index_time={meta.updated_at})"
+            )
+        else:
+            print("✅ Repo indexed: no")
+    return exit_code
+def cmd_login(args: argparse.Namespace) -> int:
+    state = load_auth_state()
+    if state and not is_expired(state):
+        answer = input(
+            "⚠️ You are already logged in. Re-authenticating will replace your current session. Continue? (y/N):"
+        ).strip().lower()
+        if answer not in {"y", "yes"}:
+            return 0
+    try:
+        email = login()
+    except AuthError as exc:
+        print(str(exc), file=sys.stderr)
+        return 2
+    print(f"✅ Logged in as {email}")
+    return 0
+def cmd_logout(args: argparse.Namespace) -> int:
+    clear_auth_state()
+    print("✅ Logged out")
+    return 0
+def cmd_whoami(args: argparse.Namespace) -> int:
+    state = load_auth_state()
+    if not state or is_expired(state):
+        print("🔒 Not signed in")
+        return 1
+    email = email_from_token(state.access_token) or "unknown"
+    plan = state.plan.title() if state.plan else "Free"
+    print(f"✅ {email} ({plan})")
+    return 0
+def cmd_auth_configure(args: argparse.Namespace) -> int:
+    prompt = f"API base URL [{DEFAULT_API_BASE_URL}]: "
+    api_base_url = input(prompt).strip() or DEFAULT_API_BASE_URL
+    try:
+        save_broker_config(api_base_url)
+    except AuthError as exc:
+        print(str(exc), file=sys.stderr)
+        return 2
+    print("✅ Auth configured")
+    return 0
+def cmd_status(args: argparse.Namespace) -> int:
+    config = load_config()
+    repo_root = Path(args.path).expanduser().resolve()
+    store = IndexStore(repo_root, config.index_root)
+    meta = store.load_meta()
+    if not meta:
+        print(
+            json.dumps(
+                {
+                    "indexed": False,
+                    "chunks": 0,
+                    "last_index_time": None,
+                    "model": None,
+                },
+                indent=2,
+            )
+        )
+        return 0
+    print(
+        json.dumps(
+            {
+                "indexed": True,
+                "chunks": meta.chunks,
+                "last_index_time": meta.updated_at,
+                "model": meta.model,
+            },
+            indent=2,
+        )
+    )
+    return 0
+def cmd_clear(args: argparse.Namespace) -> int:
+    config = load_config()
+    repo_root = Path(args.path).expanduser().resolve()
+    store = IndexStore(repo_root, config.index_root)
+    store.clear()
+    print(json.dumps({"ok": True}))
+    return 0
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(prog="mikoshi")
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Enable verbose external library output",
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+    index_parser = sub.add_parser("index", help="Index a repository")
+    index_parser.add_argument("path", help="Path to repository")
+    index_parser.set_defaults(func=cmd_index)
+    search_parser = sub.add_parser("search", help="Search an indexed repository")
+    search_parser.add_argument("path", help="Path to repository")
+    search_parser.add_argument("query", help="Search query")
+    search_parser.add_argument("--k", type=int, default=8, help="Number of results")
+    search_parser.set_defaults(func=cmd_search)
+    doctor_parser = sub.add_parser("doctor", help="Check Mikoshi setup")
+    doctor_parser.add_argument("path", nargs="?", help="Optional repo path")
+    doctor_parser.set_defaults(func=cmd_doctor)
+    status_parser = sub.add_parser("status", help="Show index status")
+    status_parser.add_argument("path", help="Path to repository")
+    status_parser.set_defaults(func=cmd_status)
+    clear_parser = sub.add_parser("clear", help="Clear index data")
+    clear_parser.add_argument("path", help="Path to repository")
+    clear_parser.set_defaults(func=cmd_clear)
+    login_parser = sub.add_parser("login", help="Sign in")
+    login_parser.set_defaults(func=cmd_login)
+    logout_parser = sub.add_parser("logout", help="Clear local auth state")
+    logout_parser.set_defaults(func=cmd_logout)
+    whoami_parser = sub.add_parser("whoami", help="Show current auth status")
+    whoami_parser.set_defaults(func=cmd_whoami)
+    auth_parser = sub.add_parser("auth", help="Auth configuration")
+    auth_sub = auth_parser.add_subparsers(dest="auth_command", required=True)
+    auth_configure = auth_sub.add_parser("configure", help="Set auth config")
+    auth_configure.set_defaults(func=cmd_auth_configure)
+    return parser
+def main(argv: list[str] | None = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    quiet = not args.verbose
+    os.environ["MIKOSHI_QUIET_EXTERNAL_LIBS"] = "1" if quiet else "0"
+    configure_external_libs(quiet)
+    return int(args.func(args))
+if __name__ == "__main__":
+    raise SystemExit(main())