PyPI - bithub - Versions diffs - 0.1.0__py3-none-any.whl - Mend

bithub 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

bithub/__init__.py +3 -0
bithub/api.py +286 -0
bithub/builder.py +235 -0
bithub/cli.py +401 -0
bithub/config.py +102 -0
bithub/dashboard_api.py +50 -0
bithub/downloader.py +362 -0
bithub/logging_setup.py +42 -0
bithub/model_manager.py +206 -0
bithub/registry.json +68 -0
bithub/registry.py +55 -0
bithub/repl.py +203 -0
bithub/server.py +226 -0
bithub/static/app.js +200 -0
bithub/static/index.html +51 -0
bithub/static/style.css +72 -0
bithub-0.1.0.dist-info/METADATA +175 -0
bithub-0.1.0.dist-info/RECORD +22 -0
bithub-0.1.0.dist-info/WHEEL +5 -0
bithub-0.1.0.dist-info/entry_points.txt +2 -0
bithub-0.1.0.dist-info/licenses/LICENSE +21 -0
bithub-0.1.0.dist-info/top_level.txt +1 -0

bithub/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""bithub — Ollama for 1-bit LLMs."""
+__version__ = "0.1.0"

bithub/api.py ADDED Viewed

@@ -0,0 +1,286 @@
+"""
+OpenAI-compatible API layer for bithub.
+Wraps the bitnet.cpp inference engine behind a FastAPI server that
+speaks the OpenAI Chat Completions protocol. Any app that works with
+OpenAI (Open WebUI, Cursor, custom scripts) can connect directly.
+Endpoints:
+    GET  /v1/models              — list available/loaded models
+    POST /v1/chat/completions    — chat completion (streaming + non-streaming)
+    GET  /health                 — server health check
+"""
+import json
+import sys
+from pathlib import Path
+from typing import List, Optional, Union
+import httpx
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import StreamingResponse, JSONResponse
+from pydantic import BaseModel, Field, validator
+from rich.console import Console
+from bithub.downloader import get_downloaded_models
+from bithub.model_manager import ModelManager
+console = Console()
+# ──────────────────────────────────────────────────────────────
+# Pydantic models for OpenAI-compatible request/response
+# ──────────────────────────────────────────────────────────────
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+class ChatCompletionRequest(BaseModel):
+    model: str
+    messages: List[ChatMessage]
+    temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0)
+    top_p: Optional[float] = Field(default=0.9, ge=0.0, le=1.0)
+    max_tokens: Optional[int] = Field(default=512, gt=0)
+    stream: Optional[bool] = False
+    stop: Optional[Union[List[str], str]] = None
+    @validator("messages")
+    def messages_must_not_be_empty(cls, v):
+        if len(v) == 0:
+            raise ValueError("messages must not be empty")
+        return v
+class ChatCompletionChoice(BaseModel):
+    index: int = 0
+    message: ChatMessage
+    finish_reason: str = "stop"
+class UsageInfo(BaseModel):
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+class ChatCompletionResponse(BaseModel):
+    id: str = "chatcmpl-bitnet"
+    object: str = "chat.completion"
+    created: int = 0
+    model: str = ""
+    choices: List[ChatCompletionChoice] = []
+    usage: UsageInfo = Field(default_factory=UsageInfo)
+# ──────────────────────────────────────────────────────────────
+# FastAPI app
+# ──────────────────────────────────────────────────────────────
+def create_app(
+    model_name: str,
+    gguf_path: Path,
+    threads: int = 2,
+    context_size: int = 2048,
+    backend_port: int = 8081,
+    manager: Optional[ModelManager] = None,
+) -> FastAPI:
+    """
+    Create the FastAPI app with model backend(s).
+    In single-model mode (no manager provided), creates a ModelManager
+    with a single registered model for backwards compatibility.
+    In multi-model mode, uses the provided ModelManager which may have
+    multiple models registered.
+    Args:
+        model_name: Name of the model to serve (single-model mode)
+        gguf_path: Path to the GGUF file (single-model mode)
+        threads: CPU threads for inference
+        context_size: Context window size
+        backend_port: Internal port for the llama-server backend
+        manager: Optional ModelManager for multi-model mode
+    Returns:
+        Configured FastAPI app
+    """
+    if manager is None:
+        manager = ModelManager(base_port=backend_port)
+        manager.register(model_name, gguf_path, threads=threads, context_size=context_size)
+    app = FastAPI(
+        title="bithub API",
+        description="OpenAI-compatible API for BitNet 1-bit LLMs",
+        version="0.1.0",
+    )
+    from bithub.dashboard_api import init_dashboard
+    dashboard_router = init_dashboard(manager)
+    app.include_router(dashboard_router)
+    from fastapi.staticfiles import StaticFiles
+    from fastapi.responses import FileResponse
+    static_dir = Path(__file__).parent / "static"
+    if static_dir.exists():
+        @app.get("/")
+        async def dashboard_root():
+            return FileResponse(static_dir / "index.html")
+        app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
+    @app.on_event("startup")
+    async def startup():
+        console.print("\n[bold]Starting model backends...[/bold]")
+        manager.start_all()
+        loaded = [m["name"] for m in manager.list_models() if m["loaded"]]
+        if loaded:
+            console.print(f"[green]Ready! Models: {', '.join(loaded)}[/green]\n")
+    @app.on_event("shutdown")
+    async def shutdown():
+        console.print("\n[yellow]Shutting down backends...[/yellow]")
+        manager.stop_all()
+    # ── Health ──────────────────────────────────────────────
+    @app.get("/health")
+    async def health():
+        loaded = [m for m in manager.list_models() if m["loaded"]]
+        return {
+            "status": "ok" if loaded else "no_models_loaded",
+            "models_loaded": len(loaded),
+        }
+    # ── Models ──────────────────────────────────────────────
+    @app.get("/v1/models")
+    async def list_models_endpoint():
+        """List available models (OpenAI-compatible)."""
+        models = []
+        for m in manager.list_models():
+            models.append({
+                "id": m["name"],
+                "object": "model",
+                "created": 0,
+                "owned_by": "bithub",
+                "status": "loaded" if m["loaded"] else "available",
+            })
+        downloaded = get_downloaded_models()
+        registered_names = {m["name"] for m in manager.list_models()}
+        for d in downloaded:
+            if d["name"] not in registered_names:
+                models.append({
+                    "id": d["name"],
+                    "object": "model",
+                    "created": 0,
+                    "owned_by": "bithub",
+                    "status": "available",
+                })
+        return {"object": "list", "data": models}
+    # ── Chat Completions ────────────────────────────────────
+    @app.post("/v1/chat/completions")
+    async def chat_completions(request: ChatCompletionRequest):
+        """OpenAI-compatible chat completion endpoint."""
+        model_name = request.model
+        if not manager.is_loaded(model_name):
+            if model_name in manager.models:
+                if not manager.ensure_loaded(model_name):
+                    raise HTTPException(
+                        status_code=503,
+                        detail=f"Failed to start backend for {model_name}",
+                    )
+            else:
+                available = [m["name"] for m in manager.list_models()]
+                raise HTTPException(
+                    status_code=404,
+                    detail=f"Model '{model_name}' not found. Available: {available}",
+                )
+        backend_url = manager.get_backend_url(model_name)
+        if not backend_url:
+            raise HTTPException(status_code=503, detail="Backend not available")
+        url = f"{backend_url}/v1/chat/completions"
+        payload = {
+            "model": request.model,
+            "messages": [{"role": m.role, "content": m.content} for m in request.messages],
+            "temperature": request.temperature,
+            "top_p": request.top_p,
+            "max_tokens": request.max_tokens,
+            "stream": request.stream,
+        }
+        if request.stop:
+            payload["stop"] = request.stop
+        if request.stream:
+            return await _stream_response(url, payload, model_name)
+        else:
+            return await _non_stream_response(url, payload, model_name)
+    async def _non_stream_response(
+        backend_url: str, payload: dict, model_name: str
+    ) -> JSONResponse:
+        """Forward a non-streaming request to the backend."""
+        try:
+            async with httpx.AsyncClient(timeout=300) as client:
+                resp = await client.post(backend_url, json=payload)
+                if resp.status_code != 200:
+                    raise HTTPException(
+                        status_code=resp.status_code,
+                        detail=f"Backend error: {resp.text[:500]}",
+                    )
+                data = resp.json()
+                # Ensure the model name matches what the user expects
+                data["model"] = model_name
+                return JSONResponse(content=data)
+        except httpx.ConnectError:
+            raise HTTPException(status_code=503, detail="Backend not reachable")
+        except httpx.ReadTimeout:
+            raise HTTPException(status_code=504, detail="Backend timed out")
+    async def _stream_response(
+        backend_url: str, payload: dict, model_name: str
+    ) -> StreamingResponse:
+        """Forward a streaming request to the backend."""
+        async def generate():
+            try:
+                async with httpx.AsyncClient(timeout=300) as client:
+                    async with client.stream(
+                        "POST", backend_url, json=payload
+                    ) as resp:
+                        async for line in resp.aiter_lines():
+                            if line.startswith("data: "):
+                                chunk_str = line[6:]
+                                if chunk_str.strip() == "[DONE]":
+                                    yield "data: [DONE]\n\n"
+                                    break
+                                try:
+                                    chunk = json.loads(chunk_str)
+                                    chunk["model"] = model_name
+                                    yield f"data: {json.dumps(chunk)}\n\n"
+                                except json.JSONDecodeError:
+                                    yield f"data: {chunk_str}\n\n"
+            except httpx.ConnectError:
+                error = {"error": {"message": "Backend not reachable", "type": "server_error"}}
+                yield f"data: {json.dumps(error)}\n\n"
+                yield "data: [DONE]\n\n"
+        return StreamingResponse(
+            generate(),
+            media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+            },
+        )
+    return app

bithub/builder.py ADDED Viewed

@@ -0,0 +1,235 @@
+"""
+Model builder — clone and build bitnet.cpp for local inference.
+bitnet.cpp is Microsoft's inference engine optimized for 1-bit LLMs.
+This module handles cloning the repo and running the build process,
+so the user doesn't have to do it manually.
+"""
+import subprocess
+import sys
+from pathlib import Path
+from typing import List, Optional
+from rich.console import Console
+from bithub.config import BITNET_CPP_DIR, PREBUILT_DIR, ensure_dirs
+console = Console()
+BITNET_CPP_REPO = "https://github.com/microsoft/BitNet.git"
+def is_bitnet_cpp_built() -> bool:
+    """Check if bitnet.cpp binaries are available (prebuilt or compiled)."""
+    if (PREBUILT_DIR / "llama-server").exists() or (PREBUILT_DIR / "llama-cli").exists():
+        return True
+    build_dir = BITNET_CPP_DIR / "build"
+    if not build_dir.exists():
+        return False
+    inference_bin = _find_inference_binary()
+    return inference_bin is not None
+def _find_inference_binary() -> Optional[Path]:
+    """Find the bitnet.cpp inference binary."""
+    candidates = [
+        BITNET_CPP_DIR / "build" / "bin" / "llama-cli",
+        BITNET_CPP_DIR / "build" / "bin" / "main",
+        BITNET_CPP_DIR / "build" / "bin" / "llama-server",
+    ]
+    for path in candidates:
+        if path.exists():
+            return path
+    return None
+def _find_server_binary() -> Optional[Path]:
+    """Find the bitnet.cpp server binary specifically."""
+    candidates = [
+        BITNET_CPP_DIR / "build" / "bin" / "llama-server",
+        BITNET_CPP_DIR / "build" / "bin" / "server",
+    ]
+    for path in candidates:
+        if path.exists():
+            return path
+    return None
+def get_inference_binary() -> Optional[Path]:
+    """Get the inference binary, checking prebuilt dir first."""
+    prebuilt_cli = PREBUILT_DIR / "llama-cli"
+    if prebuilt_cli.exists():
+        return prebuilt_cli
+    return _find_inference_binary()
+def get_server_binary() -> Optional[Path]:
+    """Get the server binary, checking prebuilt dir first."""
+    prebuilt_server = PREBUILT_DIR / "llama-server"
+    if prebuilt_server.exists():
+        return prebuilt_server
+    return _find_server_binary()
+def _run_command(cmd: List[str], cwd: Optional[Path] = None, desc: str = "") -> bool:
+    """Run a shell command with live output."""
+    if desc:
+        console.print(f"  [dim]{desc}[/dim]")
+    try:
+        result = subprocess.run(
+            cmd,
+            cwd=cwd,
+            capture_output=False,
+            text=True,
+            check=True,
+        )
+        return True
+    except subprocess.CalledProcessError as e:
+        console.print(f"[red]Command failed (exit code {e.returncode}): {' '.join(cmd)}[/red]")
+        return False
+    except FileNotFoundError:
+        console.print(f"[red]Command not found: {cmd[0]}[/red]")
+        console.print("Make sure required build tools are installed (cmake, clang, git).")
+        return False
+def _check_prerequisites() -> List[str]:
+    """Check for required build tools and return list of missing ones."""
+    missing = []
+    for tool in ["git", "cmake", "python3"]:
+        try:
+            subprocess.run(
+                [tool, "--version"],
+                capture_output=True,
+                check=True,
+            )
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            missing.append(tool)
+    return missing
+def clone_bitnet_cpp(force: bool = False) -> bool:
+    """
+    Clone the bitnet.cpp repository.
+    Args:
+        force: If True, remove and re-clone.
+    Returns:
+        True if successful.
+    """
+    ensure_dirs()
+    if BITNET_CPP_DIR.exists() and not force:
+        console.print("[green]bitnet.cpp already cloned.[/green]")
+        return True
+    if BITNET_CPP_DIR.exists() and force:
+        import shutil
+        console.print("[yellow]Removing existing bitnet.cpp clone...[/yellow]")
+        shutil.rmtree(BITNET_CPP_DIR)
+    console.print(f"\n[bold]Cloning bitnet.cpp[/bold]")
+    console.print(f"  From: {BITNET_CPP_REPO}")
+    console.print(f"  To:   {BITNET_CPP_DIR}\n")
+    return _run_command(
+        ["git", "clone", "--recursive", BITNET_CPP_REPO, str(BITNET_CPP_DIR)],
+        desc="git clone --recursive ...",
+    )
+def build_bitnet_cpp() -> bool:
+    """
+    Build bitnet.cpp using its setup_env.py script.
+    The setup_env.py script in bitnet.cpp handles:
+    - Installing Python dependencies
+    - Running cmake configure
+    - Building the project
+    Returns:
+        True if successful.
+    """
+    if not BITNET_CPP_DIR.exists():
+        console.print("[red]bitnet.cpp not cloned. Run clone first.[/red]")
+        return False
+    setup_script = BITNET_CPP_DIR / "setup_env.py"
+    if setup_script.exists():
+        console.print("\n[bold]Building bitnet.cpp via setup_env.py[/bold]\n")
+        return _run_command(
+            [sys.executable, str(setup_script), "-md", "dummy"],
+            cwd=BITNET_CPP_DIR,
+            desc="python setup_env.py ...",
+        )
+    else:
+        # Fallback: manual cmake build
+        console.print("\n[bold]Building bitnet.cpp via cmake[/bold]\n")
+        build_dir = BITNET_CPP_DIR / "build"
+        build_dir.mkdir(exist_ok=True)
+        success = _run_command(
+            ["cmake", "..", "-DCMAKE_BUILD_TYPE=Release"],
+            cwd=build_dir,
+            desc="cmake configure ...",
+        )
+        if not success:
+            return False
+        return _run_command(
+            ["cmake", "--build", ".", "--config", "Release", "-j"],
+            cwd=build_dir,
+            desc="cmake build ...",
+        )
+def setup_bitnet_cpp(force: bool = False) -> bool:
+    """
+    Full setup: check prerequisites, clone, and build bitnet.cpp.
+    This is the main entry point — it does everything needed to get
+    the inference engine ready.
+    Returns:
+        True if the engine is ready to use.
+    """
+    # Check if already built
+    if not force and is_bitnet_cpp_built():
+        binary = get_inference_binary()
+        console.print(f"[green]bitnet.cpp already built:[/green] {binary}")
+        return True
+    # Check prerequisites
+    console.print("[bold]Checking build prerequisites...[/bold]")
+    missing = _check_prerequisites()
+    if missing:
+        console.print(f"[red]Missing required tools: {', '.join(missing)}[/red]")
+        console.print("\nInstall them first:")
+        console.print("  macOS:  brew install cmake llvm git")
+        console.print("  Ubuntu: sudo apt install cmake clang git")
+        return False
+    console.print("[green]All prerequisites found.[/green]")
+    # Clone
+    if not clone_bitnet_cpp(force=force):
+        return False
+    # Build
+    if not build_bitnet_cpp():
+        return False
+    # Verify
+    binary = get_inference_binary()
+    if binary:
+        console.print(f"\n[green]bitnet.cpp built successfully![/green]")
+        console.print(f"  Binary: {binary}")
+        return True
+    else:
+        console.print("[red]Build completed but inference binary not found.[/red]")
+        console.print("You may need to build manually. See:")
+        console.print(f"  {BITNET_CPP_DIR}/README.md")
+        return False