PyPI - bithub - Versions diffs - 0.1.0__py3-none-any.whl - Mend

bithub 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

bithub/__init__.py +3 -0
bithub/api.py +286 -0
bithub/builder.py +235 -0
bithub/cli.py +401 -0
bithub/config.py +102 -0
bithub/dashboard_api.py +50 -0
bithub/downloader.py +362 -0
bithub/logging_setup.py +42 -0
bithub/model_manager.py +206 -0
bithub/registry.json +68 -0
bithub/registry.py +55 -0
bithub/repl.py +203 -0
bithub/server.py +226 -0
bithub/static/app.js +200 -0
bithub/static/index.html +51 -0
bithub/static/style.css +72 -0
bithub-0.1.0.dist-info/METADATA +175 -0
bithub-0.1.0.dist-info/RECORD +22 -0
bithub-0.1.0.dist-info/WHEEL +5 -0
bithub-0.1.0.dist-info/entry_points.txt +2 -0
bithub-0.1.0.dist-info/licenses/LICENSE +21 -0
bithub-0.1.0.dist-info/top_level.txt +1 -0

bithub/registry.py ADDED Viewed

@@ -0,0 +1,55 @@
+"""Model registry — loads and queries the curated model catalog."""
+import json
+from pathlib import Path
+from typing import Optional
+from bithub.config import BITHUB_HOME
+REGISTRY_PATH = Path(__file__).parent / "registry.json"
+CUSTOM_MODELS_PATH = BITHUB_HOME / "custom_models.json"
+def load_registry() -> dict:
+    """Load the model registry from disk. Raises on missing/invalid file."""
+    with open(REGISTRY_PATH) as f:
+        data = json.load(f)
+    if "models" not in data:
+        raise ValueError(f"Registry {REGISTRY_PATH} missing 'models' key")
+    return data
+def get_model_info(model_name: str) -> Optional[dict]:
+    """Return info dict for a model, checking registry then custom models."""
+    registry = load_registry()
+    info = registry["models"].get(model_name)
+    if info:
+        return info
+    custom = load_custom_models()
+    return custom.get(model_name)
+def load_custom_models() -> dict:
+    """Load user's custom (directly-pulled) models."""
+    if not CUSTOM_MODELS_PATH.exists():
+        return {}
+    try:
+        with open(CUSTOM_MODELS_PATH) as f:
+            return json.load(f)
+    except (json.JSONDecodeError, OSError):
+        return {}
+def save_custom_model(name: str, info: dict) -> None:
+    """Save a custom model entry to custom_models.json."""
+    models = load_custom_models()
+    models[name] = info
+    BITHUB_HOME.mkdir(parents=True, exist_ok=True)
+    with open(CUSTOM_MODELS_PATH, "w") as f:
+        json.dump(models, f, indent=2)
+def list_available_models() -> dict:
+    """Return all models from the registry."""
+    registry = load_registry()
+    return registry["models"]

bithub/repl.py ADDED Viewed

@@ -0,0 +1,203 @@
+"""Interactive chat REPL for bithub."""
+import json
+import sys
+import time
+from typing import List, Optional, Tuple
+import httpx
+from rich.console import Console
+from rich.markdown import Markdown
+console = Console()
+def is_slash_command(text: str) -> bool:
+    """Check if input is a slash command."""
+    return bool(text) and text.startswith("/")
+def parse_slash_command(text: str) -> Tuple[str, str]:
+    """Parse a slash command into (command, argument)."""
+    parts = text[1:].split(None, 1)
+    cmd = parts[0] if parts else ""
+    arg = parts[1] if len(parts) > 1 else ""
+    return cmd, arg
+class ChatSession:
+    """Manages conversation state for the REPL."""
+    def __init__(self, model: str, api_url: str) -> None:
+        self.model = model
+        self.api_url = api_url.rstrip("/")
+        self.messages: List[dict] = []
+        self.system_prompt: Optional[str] = None
+        self.total_tokens = 0
+    def add_message(self, role: str, content: str) -> None:
+        self.messages.append({"role": role, "content": content})
+    def clear(self) -> None:
+        self.messages.clear()
+        self.total_tokens = 0
+    def set_system_prompt(self, prompt: str) -> None:
+        self.system_prompt = prompt
+    def build_payload(self) -> dict:
+        msgs: List[dict] = []
+        if self.system_prompt:
+            msgs.append({"role": "system", "content": self.system_prompt})
+        msgs.extend(self.messages)
+        return {
+            "model": self.model,
+            "messages": msgs,
+            "stream": True,
+        }
+    def export(self) -> str:
+        lines = []
+        for msg in self.messages:
+            lines.append(f"{msg['role']}: {msg['content']}")
+        return "\n\n".join(lines)
+    def send_and_stream(self) -> str:
+        """Send current conversation to API and stream the response.
+        Returns the full assistant response text.
+        """
+        payload = self.build_payload()
+        url = f"{self.api_url}/v1/chat/completions"
+        full_response = ""
+        try:
+            with httpx.stream("POST", url, json=payload, timeout=120.0) as response:
+                if response.status_code != 200:
+                    console.print(f"[red]API error: {response.status_code}[/red]")
+                    return ""
+                for line in response.iter_lines():
+                    if not line or not line.startswith("data: "):
+                        continue
+                    data = line[6:]
+                    if data == "[DONE]":
+                        break
+                    try:
+                        chunk = json.loads(data)
+                        delta = chunk.get("choices", [{}])[0].get("delta", {})
+                        content = delta.get("content", "")
+                        if content:
+                            sys.stdout.write(content)
+                            sys.stdout.flush()
+                            full_response += content
+                    except json.JSONDecodeError:
+                        continue
+        except httpx.ConnectError:
+            console.print("[red]Cannot connect to API server.[/red]")
+            console.print("Is the server running? Start with: bithub serve <model>")
+        except httpx.ReadTimeout:
+            console.print("\n[yellow]Response timed out.[/yellow]")
+        sys.stdout.write("\n")
+        return full_response
+HELP_TEXT = """[bold]Available commands:[/bold]
+  /help              Show this help
+  /clear             Clear conversation history
+  /system <prompt>   Set system prompt
+  /model             Show current model info
+  /export            Save conversation to file
+  /quit              Exit chat
+"""
+def handle_slash_command(cmd: str, arg: str, session: ChatSession) -> Optional[str]:
+    """Handle a slash command. Returns 'quit' to exit, None otherwise."""
+    if cmd == "help":
+        console.print(HELP_TEXT)
+    elif cmd == "clear":
+        session.clear()
+        console.print("[dim]Conversation cleared.[/dim]")
+    elif cmd == "system":
+        if not arg:
+            if session.system_prompt:
+                console.print(f"[dim]Current system prompt: {session.system_prompt}[/dim]")
+            else:
+                console.print("[dim]No system prompt set. Usage: /system <prompt>[/dim]")
+        else:
+            session.set_system_prompt(arg)
+            console.print("[dim]System prompt set.[/dim]")
+    elif cmd == "model":
+        console.print(f"[bold]Model:[/bold] {session.model}")
+        console.print(f"[bold]API:[/bold] {session.api_url}")
+        console.print(f"[bold]Messages:[/bold] {len(session.messages)}")
+        console.print(f"[bold]Tokens:[/bold] ~{session.total_tokens}")
+    elif cmd == "export":
+        filename = arg if arg else f"chat-{int(time.time())}.txt"
+        content = session.export()
+        if not content:
+            console.print("[yellow]Nothing to export.[/yellow]")
+        else:
+            with open(filename, "w") as f:
+                f.write(content)
+            console.print(f"[green]Saved to {filename}[/green]")
+    elif cmd in ("quit", "exit", "q"):
+        return "quit"
+    else:
+        console.print(f"[yellow]Unknown command: /{cmd}[/yellow]")
+        console.print("Type [bold]/help[/bold] for available commands.")
+    return None
+def start_repl(model: str, api_url: str) -> None:
+    """Start the interactive REPL."""
+    try:
+        from prompt_toolkit import PromptSession
+        from prompt_toolkit.history import FileHistory
+        from bithub.config import BITHUB_HOME
+        history_file = BITHUB_HOME / "repl_history"
+        BITHUB_HOME.mkdir(parents=True, exist_ok=True)
+        prompt_session: Optional[PromptSession] = PromptSession(
+            history=FileHistory(str(history_file))
+        )
+    except ImportError:
+        prompt_session = None
+    session = ChatSession(model=model, api_url=api_url)
+    console.print(f"[bold green]Chat with {model}[/bold green]")
+    console.print(f"[dim]API: {api_url} | Type /help for commands | Ctrl+D to exit[/dim]\n")
+    while True:
+        try:
+            if prompt_session:
+                user_input = prompt_session.prompt(f"[{model}] > ")
+            else:
+                user_input = input(f"[{model}] > ")
+        except (EOFError, KeyboardInterrupt):
+            console.print("\n[green]Goodbye![/green]")
+            break
+        user_input = user_input.strip()
+        if not user_input:
+            continue
+        if is_slash_command(user_input):
+            cmd, arg = parse_slash_command(user_input)
+            result = handle_slash_command(cmd, arg, session)
+            if result == "quit":
+                console.print("[green]Goodbye![/green]")
+                break
+            continue
+        # Send message to API
+        session.add_message("user", user_input)
+        console.print()
+        response = session.send_and_stream()
+        if response:
+            session.add_message("assistant", response)
+        console.print()

bithub/server.py ADDED Viewed

@@ -0,0 +1,226 @@
+"""
+Server — start bithub with an OpenAI-compatible API.
+Two modes:
+  1. `serve` — starts a FastAPI server that proxies to the bitnet.cpp
+     backend, providing /v1/chat/completions and /v1/models.
+  2. `run` — interactive terminal chat via llama-cli.
+"""
+import signal
+import subprocess
+import sys
+import threading
+from pathlib import Path
+from typing import List, Optional
+import httpx
+from rich.console import Console
+from bithub.builder import get_server_binary, get_inference_binary, is_bitnet_cpp_built
+from bithub.config import DEFAULT_HOST, DEFAULT_PORT
+from bithub.downloader import get_model_gguf_path, is_model_downloaded
+from bithub.registry import get_model_info
+console = Console()
+def _preflight_check(model_name: str) -> Path:
+    """
+    Run common checks before serving or chatting.
+    Returns the GGUF path on success, exits on failure.
+    """
+    if not is_bitnet_cpp_built():
+        console.print("[red]bitnet.cpp is not built yet.[/red]")
+        console.print("Run [bold]bithub setup[/bold] first to clone and build the engine.")
+        raise SystemExit(1)
+    if not is_model_downloaded(model_name):
+        console.print(f"[red]Model {model_name} is not downloaded.[/red]")
+        console.print(f"Run [bold]bithub pull {model_name}[/bold] first.")
+        raise SystemExit(1)
+    gguf_path = get_model_gguf_path(model_name)
+    if not gguf_path:
+        console.print(f"[red]Could not find GGUF file for {model_name}.[/red]")
+        raise SystemExit(1)
+    return gguf_path
+def start_server(
+    model_names: Optional[List[str]] = None,
+    model_name: Optional[str] = None,  # backwards compat
+    host: str = DEFAULT_HOST,
+    port: int = DEFAULT_PORT,
+    threads: int = 2,
+    context_size: int = 2048,
+    lazy: bool = False,
+) -> None:
+    """
+    Start the bithub API server with one or more models.
+    This provides OpenAI-compatible endpoints:
+        GET  /v1/models
+        POST /v1/chat/completions (streaming + non-streaming)
+        GET  /health
+    Args:
+        model_names: List of short names from registry
+        model_name: Single model name (backwards compat)
+        host: Address to bind to
+        port: Port to listen on
+        threads: Number of CPU threads per model
+        context_size: Context window size in tokens
+        lazy: If True, only load models on first request
+    """
+    # Handle both old and new calling conventions
+    if model_names is None:
+        if model_name:
+            model_names = [model_name]
+        else:
+            console.print("[red]No models specified.[/red]")
+            raise SystemExit(1)
+    from bithub.model_manager import ModelManager
+    from bithub.api import create_app
+    import uvicorn
+    backend_base_port = port + 1
+    manager = ModelManager(base_port=backend_base_port, max_models=len(model_names))
+    for name in model_names:
+        gguf_path = _preflight_check(name)
+        manager.register(name, gguf_path, threads=threads, context_size=context_size)
+    console.print(f"\n[bold green]Starting bithub server[/bold green]")
+    for name in model_names:
+        info = get_model_info(name)
+        display_name = info["name"] if info else name
+        console.print(f"  Model:    {display_name}")
+    console.print(f"  Address:  http://{host}:{port}")
+    console.print(f"  Threads:  {threads} per model")
+    if len(model_names) > 1:
+        console.print(f"  Mode:     {'lazy' if lazy else 'eager'} loading")
+    console.print()
+    console.print("[dim]Press Ctrl+C to stop the server[/dim]\n")
+    app = create_app(
+        model_name=model_names[0],
+        gguf_path=_preflight_check(model_names[0]),
+        manager=manager,
+    )
+    try:
+        uvicorn.run(app, host=host, port=port, log_level="warning")
+    except KeyboardInterrupt:
+        console.print("\n[green]Server stopped.[/green]")
+def start_background_server(
+    model_name: str,
+    host: str = "127.0.0.1",
+    port: int = 8081,
+    threads: int = 2,
+    context_size: int = 2048,
+) -> threading.Thread:
+    """Start the API server in a background thread for REPL use."""
+    gguf_path = _preflight_check(model_name)
+    from bithub.api import create_app
+    import uvicorn
+    backend_port = port + 1
+    app = create_app(
+        model_name=model_name,
+        gguf_path=gguf_path,
+        threads=threads,
+        context_size=context_size,
+        backend_port=backend_port,
+    )
+    server_thread = threading.Thread(
+        target=uvicorn.run,
+        kwargs={"app": app, "host": host, "port": port, "log_level": "error"},
+        daemon=True,
+    )
+    server_thread.start()
+    return server_thread
+def wait_for_server(url: str, timeout: float = 30.0) -> bool:
+    """Wait for the API server to become ready."""
+    import time
+    start = time.time()
+    while time.time() - start < timeout:
+        try:
+            resp = httpx.get(f"{url}/health", timeout=2.0)
+            if resp.status_code == 200:
+                return True
+        except (httpx.ConnectError, httpx.ReadTimeout):
+            pass
+        time.sleep(0.5)
+    return False
+def run_interactive(
+    model_name: str,
+    threads: int = 2,
+    context_size: int = 2048,
+) -> None:
+    """
+    Run interactive chat with a model in the terminal.
+    Uses llama-cli in interactive/conversation mode.
+    Args:
+        model_name: Short name from registry
+        threads: Number of CPU threads
+        context_size: Context window size
+    """
+    gguf_path = _preflight_check(model_name)
+    cli_bin = get_inference_binary()
+    if not cli_bin:
+        console.print("[red]No inference binary found.[/red]")
+        raise SystemExit(1)
+    info = get_model_info(model_name)
+    display_name = info["name"] if info else model_name
+    console.print(f"\n[bold green]Chat with {display_name}[/bold green]")
+    console.print(f"  Using: {gguf_path.name}")
+    console.print(f"  Threads: {threads}")
+    console.print("[dim]Press Ctrl+C to exit[/dim]\n")
+    cmd = [
+        str(cli_bin),
+        "-m", str(gguf_path),
+        "-t", str(threads),
+        "-c", str(context_size),
+        "--interactive",
+        "--color",
+    ]
+    try:
+        process = subprocess.Popen(cmd)
+        process.wait()
+        if process.returncode != 0:
+            console.print(
+                f"\n[red]Process exited with code {process.returncode}.[/red] "
+                f"Run [bold]bithub status[/bold] to check your setup."
+            )
+    except FileNotFoundError:
+        console.print(
+            "[red]Inference binary not found.[/red] "
+            "Run [bold]bithub setup[/bold] to rebuild."
+        )
+        raise SystemExit(1)
+    except KeyboardInterrupt:
+        console.print("\n[green]Chat ended.[/green]")
+        process.send_signal(signal.SIGTERM)
+        try:
+            process.wait(timeout=5)
+        except subprocess.TimeoutExpired:
+            process.kill()

bithub/static/app.js ADDED Viewed

@@ -0,0 +1,200 @@
+/* bithub dashboard — single-page app */
+const API_BASE = '';
+function navigateTo(page) {
+    document.querySelectorAll('.page').forEach(p => p.classList.remove('active'));
+    document.querySelectorAll('.nav-link').forEach(l => l.classList.remove('active'));
+    const pageEl = document.getElementById('page-' + page);
+    const linkEl = document.querySelector('[data-page="' + page + '"]');
+    if (pageEl) pageEl.classList.add('active');
+    if (linkEl) linkEl.classList.add('active');
+    if (page === 'models') loadModels();
+    if (page === 'server') loadStats();
+    if (page === 'settings') loadSettings();
+    if (page === 'chat') loadModelSelect();
+}
+function initRouter() {
+    window.addEventListener('hashchange', () => {
+        const page = location.hash.replace('#/', '') || 'chat';
+        navigateTo(page);
+    });
+    navigateTo(location.hash.replace('#/', '') || 'chat');
+}
+const chatMessages = [];
+let streaming = false;
+function loadModelSelect() {
+    fetch(API_BASE + '/v1/models').then(r => r.json()).then(data => {
+        const select = document.getElementById('model-select');
+        if (!select) return;
+        const current = select.value;
+        select.innerHTML = '';
+        (data.data || []).forEach(m => {
+            const opt = document.createElement('option');
+            opt.value = m.id;
+            opt.textContent = m.id + (m.status === 'loaded' ? ' (loaded)' : '');
+            select.appendChild(opt);
+        });
+        if (current) select.value = current;
+    }).catch(() => {});
+}
+function addChatMessage(role, content) {
+    chatMessages.push({ role, content });
+    renderChat();
+}
+function renderChat() {
+    const container = document.getElementById('chat-messages');
+    if (!container) return;
+    container.innerHTML = '';
+    chatMessages.forEach(msg => {
+        const div = document.createElement('div');
+        div.className = 'message ' + msg.role;
+        div.innerHTML = '<span class="message-role">' + msg.role + '</span>' +
+            '<div class="message-content">' + escapeHtml(msg.content) + '</div>';
+        container.appendChild(div);
+    });
+    container.scrollTop = container.scrollHeight;
+}
+function escapeHtml(str) {
+    const div = document.createElement('div');
+    div.textContent = str;
+    return div.innerHTML;
+}
+async function sendMessage() {
+    const input = document.getElementById('chat-input');
+    const model = document.getElementById('model-select');
+    if (!input || !model || streaming) return;
+    const text = input.value.trim();
+    if (!text) return;
+    input.value = '';
+    addChatMessage('user', text);
+    streaming = true;
+    const messages = chatMessages.map(m => ({ role: m.role, content: m.content }));
+    try {
+        const response = await fetch(API_BASE + '/v1/chat/completions', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ model: model.value, messages, stream: true }),
+        });
+        if (!response.ok) { addChatMessage('assistant', 'Error: ' + response.statusText); streaming = false; return; }
+        const reader = response.body.getReader();
+        const decoder = new TextDecoder();
+        let assistantText = '';
+        chatMessages.push({ role: 'assistant', content: '' });
+        while (true) {
+            const { done, value } = await reader.read();
+            if (done) break;
+            const chunk = decoder.decode(value, { stream: true });
+            for (const line of chunk.split('\n')) {
+                if (!line.startsWith('data: ')) continue;
+                const data = line.slice(6);
+                if (data === '[DONE]') break;
+                try {
+                    const parsed = JSON.parse(data);
+                    const delta = parsed.choices?.[0]?.delta?.content || '';
+                    if (delta) {
+                        assistantText += delta;
+                        chatMessages[chatMessages.length - 1].content = assistantText;
+                        renderChat();
+                    }
+                } catch (e) {}
+            }
+        }
+    } catch (err) { addChatMessage('assistant', 'Error: ' + err.message); }
+    streaming = false;
+}
+function loadModels() {
+    Promise.all([
+        fetch(API_BASE + '/v1/models').then(r => r.json()),
+        fetch(API_BASE + '/api/models/downloaded').then(r => r.json()),
+    ]).then(([modelsResp, downloaded]) => {
+        const container = document.getElementById('models-list');
+        if (!container) return;
+        container.innerHTML = '';
+        const models = modelsResp.data || [];
+        models.forEach(m => {
+            const dl = downloaded.find(d => d.name === m.id);
+            const size = dl ? dl.size_mb + ' MB' : 'N/A';
+            const statusClass = m.status === 'loaded' ? 'status-loaded' : 'status-available';
+            container.innerHTML +=
+                '<div class="model-card"><h3>' + escapeHtml(m.id) + '</h3>' +
+                '<div class="meta">Size: ' + size + '</div>' +
+                '<div class="status"><span class="status-badge ' + statusClass + '">' + m.status + '</span></div>' +
+                (dl ? '<button class="btn btn-danger btn-sm" style="margin-top:12px" onclick="deleteModel(\'' + m.id + '\')">Delete</button>' : '') +
+                '</div>';
+        });
+        if (!models.length) container.innerHTML = '<p style="color:var(--text-secondary)">No models found. Pull one with: bithub pull 2B-4T</p>';
+    }).catch(() => {});
+}
+function deleteModel(name) {
+    if (!confirm('Delete model ' + name + '?')) return;
+    fetch(API_BASE + '/api/models/' + name, { method: 'DELETE' }).then(r => { if (r.ok) loadModels(); }).catch(() => {});
+}
+function loadStats() {
+    fetch(API_BASE + '/api/stats').then(r => r.json()).then(data => {
+        const container = document.getElementById('server-stats');
+        if (!container) return;
+        container.innerHTML =
+            statCard(formatUptime(data.uptime_seconds || 0), 'Uptime') +
+            statCard(data.total_requests || 0, 'Requests') +
+            statCard(data.models_loaded || 0, 'Models Loaded') +
+            statCard(data.models_registered || 0, 'Models Registered');
+    }).catch(() => {});
+}
+function statCard(value, label) {
+    return '<div class="stat-card"><div class="stat-value">' + value + '</div><div class="stat-label">' + label + '</div></div>';
+}
+function formatUptime(s) {
+    if (s < 60) return s + 's';
+    if (s < 3600) return Math.floor(s / 60) + 'm';
+    return Math.floor(s / 3600) + 'h ' + Math.floor((s % 3600) / 60) + 'm';
+}
+function loadSettings() {
+    fetch(API_BASE + '/api/config').then(r => r.json()).then(config => {
+        const container = document.getElementById('settings-form');
+        if (!container) return;
+        container.innerHTML =
+            formGroup('Server Port', 'settings-port', config.server?.port || 8080, 'number') +
+            formGroup('Server Host', 'settings-host', config.server?.host || '127.0.0.1', 'text') +
+            formGroup('Threads', 'settings-threads', config.server?.threads || 4, 'number') +
+            formGroup('Min Free GB', 'settings-free-gb', config.download?.min_free_gb || 5, 'number') +
+            '<div class="form-group"><label>Theme</label>' +
+            '<select id="theme-select" onchange="toggleTheme(this.value)">' +
+            '<option value="dark"' + (getTheme() === 'dark' ? ' selected' : '') + '>Dark</option>' +
+            '<option value="light"' + (getTheme() === 'light' ? ' selected' : '') + '>Light</option>' +
+            '</select></div>';
+    }).catch(() => {});
+}
+function formGroup(label, id, value, type) {
+    return '<div class="form-group"><label for="' + id + '">' + label + '</label>' +
+        '<input type="' + type + '" id="' + id + '" value="' + value + '" readonly></div>';
+}
+function getTheme() { return localStorage.getItem('bithub-theme') || 'dark'; }
+function toggleTheme(theme) { document.documentElement.setAttribute('data-theme', theme); localStorage.setItem('bithub-theme', theme); }
+document.addEventListener('DOMContentLoaded', () => {
+    toggleTheme(getTheme());
+    initRouter();
+    document.getElementById('send-btn')?.addEventListener('click', sendMessage);
+    document.getElementById('chat-input')?.addEventListener('keydown', e => {
+        if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); sendMessage(); }
+    });
+    document.getElementById('clear-chat')?.addEventListener('click', () => { chatMessages.length = 0; renderChat(); });
+    setInterval(() => {
+        if (document.getElementById('page-server')?.classList.contains('active')) loadStats();
+    }, 10000);
+});