bithub 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bithub/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """bithub — Ollama for 1-bit LLMs."""
2
+
3
+ __version__ = "0.1.0"
bithub/api.py ADDED
@@ -0,0 +1,286 @@
1
+ """
2
+ OpenAI-compatible API layer for bithub.
3
+
4
+ Wraps the bitnet.cpp inference engine behind a FastAPI server that
5
+ speaks the OpenAI Chat Completions protocol. Any app that works with
6
+ OpenAI (Open WebUI, Cursor, custom scripts) can connect directly.
7
+
8
+ Endpoints:
9
+ GET /v1/models — list available/loaded models
10
+ POST /v1/chat/completions — chat completion (streaming + non-streaming)
11
+ GET /health — server health check
12
+ """
13
+
14
+ import json
15
+ import sys
16
+ from pathlib import Path
17
+ from typing import List, Optional, Union
18
+
19
+ import httpx
20
+ from fastapi import FastAPI, HTTPException
21
+ from fastapi.responses import StreamingResponse, JSONResponse
22
+ from pydantic import BaseModel, Field, validator
23
+ from rich.console import Console
24
+
25
+ from bithub.downloader import get_downloaded_models
26
+ from bithub.model_manager import ModelManager
27
+
28
+ console = Console()
29
+
30
+
31
+ # ──────────────────────────────────────────────────────────────
32
+ # Pydantic models for OpenAI-compatible request/response
33
+ # ──────────────────────────────────────────────────────────────
34
+
35
+
36
+ class ChatMessage(BaseModel):
37
+ role: str
38
+ content: str
39
+
40
+
41
+ class ChatCompletionRequest(BaseModel):
42
+ model: str
43
+ messages: List[ChatMessage]
44
+ temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0)
45
+ top_p: Optional[float] = Field(default=0.9, ge=0.0, le=1.0)
46
+ max_tokens: Optional[int] = Field(default=512, gt=0)
47
+ stream: Optional[bool] = False
48
+ stop: Optional[Union[List[str], str]] = None
49
+
50
+ @validator("messages")
51
+ def messages_must_not_be_empty(cls, v):
52
+ if len(v) == 0:
53
+ raise ValueError("messages must not be empty")
54
+ return v
55
+
56
+
57
+ class ChatCompletionChoice(BaseModel):
58
+ index: int = 0
59
+ message: ChatMessage
60
+ finish_reason: str = "stop"
61
+
62
+
63
+ class UsageInfo(BaseModel):
64
+ prompt_tokens: int = 0
65
+ completion_tokens: int = 0
66
+ total_tokens: int = 0
67
+
68
+
69
+ class ChatCompletionResponse(BaseModel):
70
+ id: str = "chatcmpl-bitnet"
71
+ object: str = "chat.completion"
72
+ created: int = 0
73
+ model: str = ""
74
+ choices: List[ChatCompletionChoice] = []
75
+ usage: UsageInfo = Field(default_factory=UsageInfo)
76
+
77
+
78
+ # ──────────────────────────────────────────────────────────────
79
+ # FastAPI app
80
+ # ──────────────────────────────────────────────────────────────
81
+
82
+
83
+ def create_app(
84
+ model_name: str,
85
+ gguf_path: Path,
86
+ threads: int = 2,
87
+ context_size: int = 2048,
88
+ backend_port: int = 8081,
89
+ manager: Optional[ModelManager] = None,
90
+ ) -> FastAPI:
91
+ """
92
+ Create the FastAPI app with model backend(s).
93
+
94
+ In single-model mode (no manager provided), creates a ModelManager
95
+ with a single registered model for backwards compatibility.
96
+
97
+ In multi-model mode, uses the provided ModelManager which may have
98
+ multiple models registered.
99
+
100
+ Args:
101
+ model_name: Name of the model to serve (single-model mode)
102
+ gguf_path: Path to the GGUF file (single-model mode)
103
+ threads: CPU threads for inference
104
+ context_size: Context window size
105
+ backend_port: Internal port for the llama-server backend
106
+ manager: Optional ModelManager for multi-model mode
107
+
108
+ Returns:
109
+ Configured FastAPI app
110
+ """
111
+ if manager is None:
112
+ manager = ModelManager(base_port=backend_port)
113
+ manager.register(model_name, gguf_path, threads=threads, context_size=context_size)
114
+
115
+ app = FastAPI(
116
+ title="bithub API",
117
+ description="OpenAI-compatible API for BitNet 1-bit LLMs",
118
+ version="0.1.0",
119
+ )
120
+
121
+ from bithub.dashboard_api import init_dashboard
122
+ dashboard_router = init_dashboard(manager)
123
+ app.include_router(dashboard_router)
124
+
125
+ from fastapi.staticfiles import StaticFiles
126
+ from fastapi.responses import FileResponse
127
+ static_dir = Path(__file__).parent / "static"
128
+ if static_dir.exists():
129
+ @app.get("/")
130
+ async def dashboard_root():
131
+ return FileResponse(static_dir / "index.html")
132
+ app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
133
+
134
+ @app.on_event("startup")
135
+ async def startup():
136
+ console.print("\n[bold]Starting model backends...[/bold]")
137
+ manager.start_all()
138
+ loaded = [m["name"] for m in manager.list_models() if m["loaded"]]
139
+ if loaded:
140
+ console.print(f"[green]Ready! Models: {', '.join(loaded)}[/green]\n")
141
+
142
+ @app.on_event("shutdown")
143
+ async def shutdown():
144
+ console.print("\n[yellow]Shutting down backends...[/yellow]")
145
+ manager.stop_all()
146
+
147
+ # ── Health ──────────────────────────────────────────────
148
+
149
+ @app.get("/health")
150
+ async def health():
151
+ loaded = [m for m in manager.list_models() if m["loaded"]]
152
+ return {
153
+ "status": "ok" if loaded else "no_models_loaded",
154
+ "models_loaded": len(loaded),
155
+ }
156
+
157
+ # ── Models ──────────────────────────────────────────────
158
+
159
+ @app.get("/v1/models")
160
+ async def list_models_endpoint():
161
+ """List available models (OpenAI-compatible)."""
162
+ models = []
163
+ for m in manager.list_models():
164
+ models.append({
165
+ "id": m["name"],
166
+ "object": "model",
167
+ "created": 0,
168
+ "owned_by": "bithub",
169
+ "status": "loaded" if m["loaded"] else "available",
170
+ })
171
+ downloaded = get_downloaded_models()
172
+ registered_names = {m["name"] for m in manager.list_models()}
173
+ for d in downloaded:
174
+ if d["name"] not in registered_names:
175
+ models.append({
176
+ "id": d["name"],
177
+ "object": "model",
178
+ "created": 0,
179
+ "owned_by": "bithub",
180
+ "status": "available",
181
+ })
182
+ return {"object": "list", "data": models}
183
+
184
+ # ── Chat Completions ────────────────────────────────────
185
+
186
+ @app.post("/v1/chat/completions")
187
+ async def chat_completions(request: ChatCompletionRequest):
188
+ """OpenAI-compatible chat completion endpoint."""
189
+ model_name = request.model
190
+ if not manager.is_loaded(model_name):
191
+ if model_name in manager.models:
192
+ if not manager.ensure_loaded(model_name):
193
+ raise HTTPException(
194
+ status_code=503,
195
+ detail=f"Failed to start backend for {model_name}",
196
+ )
197
+ else:
198
+ available = [m["name"] for m in manager.list_models()]
199
+ raise HTTPException(
200
+ status_code=404,
201
+ detail=f"Model '{model_name}' not found. Available: {available}",
202
+ )
203
+
204
+ backend_url = manager.get_backend_url(model_name)
205
+ if not backend_url:
206
+ raise HTTPException(status_code=503, detail="Backend not available")
207
+
208
+ url = f"{backend_url}/v1/chat/completions"
209
+ payload = {
210
+ "model": request.model,
211
+ "messages": [{"role": m.role, "content": m.content} for m in request.messages],
212
+ "temperature": request.temperature,
213
+ "top_p": request.top_p,
214
+ "max_tokens": request.max_tokens,
215
+ "stream": request.stream,
216
+ }
217
+
218
+ if request.stop:
219
+ payload["stop"] = request.stop
220
+
221
+ if request.stream:
222
+ return await _stream_response(url, payload, model_name)
223
+ else:
224
+ return await _non_stream_response(url, payload, model_name)
225
+
226
+ async def _non_stream_response(
227
+ backend_url: str, payload: dict, model_name: str
228
+ ) -> JSONResponse:
229
+ """Forward a non-streaming request to the backend."""
230
+ try:
231
+ async with httpx.AsyncClient(timeout=300) as client:
232
+ resp = await client.post(backend_url, json=payload)
233
+
234
+ if resp.status_code != 200:
235
+ raise HTTPException(
236
+ status_code=resp.status_code,
237
+ detail=f"Backend error: {resp.text[:500]}",
238
+ )
239
+
240
+ data = resp.json()
241
+ # Ensure the model name matches what the user expects
242
+ data["model"] = model_name
243
+ return JSONResponse(content=data)
244
+
245
+ except httpx.ConnectError:
246
+ raise HTTPException(status_code=503, detail="Backend not reachable")
247
+ except httpx.ReadTimeout:
248
+ raise HTTPException(status_code=504, detail="Backend timed out")
249
+
250
+ async def _stream_response(
251
+ backend_url: str, payload: dict, model_name: str
252
+ ) -> StreamingResponse:
253
+ """Forward a streaming request to the backend."""
254
+ async def generate():
255
+ try:
256
+ async with httpx.AsyncClient(timeout=300) as client:
257
+ async with client.stream(
258
+ "POST", backend_url, json=payload
259
+ ) as resp:
260
+ async for line in resp.aiter_lines():
261
+ if line.startswith("data: "):
262
+ chunk_str = line[6:]
263
+ if chunk_str.strip() == "[DONE]":
264
+ yield "data: [DONE]\n\n"
265
+ break
266
+ try:
267
+ chunk = json.loads(chunk_str)
268
+ chunk["model"] = model_name
269
+ yield f"data: {json.dumps(chunk)}\n\n"
270
+ except json.JSONDecodeError:
271
+ yield f"data: {chunk_str}\n\n"
272
+ except httpx.ConnectError:
273
+ error = {"error": {"message": "Backend not reachable", "type": "server_error"}}
274
+ yield f"data: {json.dumps(error)}\n\n"
275
+ yield "data: [DONE]\n\n"
276
+
277
+ return StreamingResponse(
278
+ generate(),
279
+ media_type="text/event-stream",
280
+ headers={
281
+ "Cache-Control": "no-cache",
282
+ "Connection": "keep-alive",
283
+ },
284
+ )
285
+
286
+ return app
bithub/builder.py ADDED
@@ -0,0 +1,235 @@
1
+ """
2
+ Model builder — clone and build bitnet.cpp for local inference.
3
+
4
+ bitnet.cpp is Microsoft's inference engine optimized for 1-bit LLMs.
5
+ This module handles cloning the repo and running the build process,
6
+ so the user doesn't have to do it manually.
7
+ """
8
+
9
+ import subprocess
10
+ import sys
11
+ from pathlib import Path
12
+ from typing import List, Optional
13
+
14
+ from rich.console import Console
15
+
16
+ from bithub.config import BITNET_CPP_DIR, PREBUILT_DIR, ensure_dirs
17
+
18
+ console = Console()
19
+
20
+ BITNET_CPP_REPO = "https://github.com/microsoft/BitNet.git"
21
+
22
+
23
+ def is_bitnet_cpp_built() -> bool:
24
+ """Check if bitnet.cpp binaries are available (prebuilt or compiled)."""
25
+ if (PREBUILT_DIR / "llama-server").exists() or (PREBUILT_DIR / "llama-cli").exists():
26
+ return True
27
+ build_dir = BITNET_CPP_DIR / "build"
28
+ if not build_dir.exists():
29
+ return False
30
+ inference_bin = _find_inference_binary()
31
+ return inference_bin is not None
32
+
33
+
34
+ def _find_inference_binary() -> Optional[Path]:
35
+ """Find the bitnet.cpp inference binary."""
36
+ candidates = [
37
+ BITNET_CPP_DIR / "build" / "bin" / "llama-cli",
38
+ BITNET_CPP_DIR / "build" / "bin" / "main",
39
+ BITNET_CPP_DIR / "build" / "bin" / "llama-server",
40
+ ]
41
+ for path in candidates:
42
+ if path.exists():
43
+ return path
44
+ return None
45
+
46
+
47
+ def _find_server_binary() -> Optional[Path]:
48
+ """Find the bitnet.cpp server binary specifically."""
49
+ candidates = [
50
+ BITNET_CPP_DIR / "build" / "bin" / "llama-server",
51
+ BITNET_CPP_DIR / "build" / "bin" / "server",
52
+ ]
53
+ for path in candidates:
54
+ if path.exists():
55
+ return path
56
+ return None
57
+
58
+
59
+ def get_inference_binary() -> Optional[Path]:
60
+ """Get the inference binary, checking prebuilt dir first."""
61
+ prebuilt_cli = PREBUILT_DIR / "llama-cli"
62
+ if prebuilt_cli.exists():
63
+ return prebuilt_cli
64
+ return _find_inference_binary()
65
+
66
+
67
+ def get_server_binary() -> Optional[Path]:
68
+ """Get the server binary, checking prebuilt dir first."""
69
+ prebuilt_server = PREBUILT_DIR / "llama-server"
70
+ if prebuilt_server.exists():
71
+ return prebuilt_server
72
+ return _find_server_binary()
73
+
74
+
75
+ def _run_command(cmd: List[str], cwd: Optional[Path] = None, desc: str = "") -> bool:
76
+ """Run a shell command with live output."""
77
+ if desc:
78
+ console.print(f" [dim]{desc}[/dim]")
79
+
80
+ try:
81
+ result = subprocess.run(
82
+ cmd,
83
+ cwd=cwd,
84
+ capture_output=False,
85
+ text=True,
86
+ check=True,
87
+ )
88
+ return True
89
+ except subprocess.CalledProcessError as e:
90
+ console.print(f"[red]Command failed (exit code {e.returncode}): {' '.join(cmd)}[/red]")
91
+ return False
92
+ except FileNotFoundError:
93
+ console.print(f"[red]Command not found: {cmd[0]}[/red]")
94
+ console.print("Make sure required build tools are installed (cmake, clang, git).")
95
+ return False
96
+
97
+
98
+ def _check_prerequisites() -> List[str]:
99
+ """Check for required build tools and return list of missing ones."""
100
+ missing = []
101
+ for tool in ["git", "cmake", "python3"]:
102
+ try:
103
+ subprocess.run(
104
+ [tool, "--version"],
105
+ capture_output=True,
106
+ check=True,
107
+ )
108
+ except (subprocess.CalledProcessError, FileNotFoundError):
109
+ missing.append(tool)
110
+ return missing
111
+
112
+
113
+ def clone_bitnet_cpp(force: bool = False) -> bool:
114
+ """
115
+ Clone the bitnet.cpp repository.
116
+
117
+ Args:
118
+ force: If True, remove and re-clone.
119
+
120
+ Returns:
121
+ True if successful.
122
+ """
123
+ ensure_dirs()
124
+
125
+ if BITNET_CPP_DIR.exists() and not force:
126
+ console.print("[green]bitnet.cpp already cloned.[/green]")
127
+ return True
128
+
129
+ if BITNET_CPP_DIR.exists() and force:
130
+ import shutil
131
+ console.print("[yellow]Removing existing bitnet.cpp clone...[/yellow]")
132
+ shutil.rmtree(BITNET_CPP_DIR)
133
+
134
+ console.print(f"\n[bold]Cloning bitnet.cpp[/bold]")
135
+ console.print(f" From: {BITNET_CPP_REPO}")
136
+ console.print(f" To: {BITNET_CPP_DIR}\n")
137
+
138
+ return _run_command(
139
+ ["git", "clone", "--recursive", BITNET_CPP_REPO, str(BITNET_CPP_DIR)],
140
+ desc="git clone --recursive ...",
141
+ )
142
+
143
+
144
+ def build_bitnet_cpp() -> bool:
145
+ """
146
+ Build bitnet.cpp using its setup_env.py script.
147
+
148
+ The setup_env.py script in bitnet.cpp handles:
149
+ - Installing Python dependencies
150
+ - Running cmake configure
151
+ - Building the project
152
+
153
+ Returns:
154
+ True if successful.
155
+ """
156
+ if not BITNET_CPP_DIR.exists():
157
+ console.print("[red]bitnet.cpp not cloned. Run clone first.[/red]")
158
+ return False
159
+
160
+ setup_script = BITNET_CPP_DIR / "setup_env.py"
161
+
162
+ if setup_script.exists():
163
+ console.print("\n[bold]Building bitnet.cpp via setup_env.py[/bold]\n")
164
+ return _run_command(
165
+ [sys.executable, str(setup_script), "-md", "dummy"],
166
+ cwd=BITNET_CPP_DIR,
167
+ desc="python setup_env.py ...",
168
+ )
169
+ else:
170
+ # Fallback: manual cmake build
171
+ console.print("\n[bold]Building bitnet.cpp via cmake[/bold]\n")
172
+ build_dir = BITNET_CPP_DIR / "build"
173
+ build_dir.mkdir(exist_ok=True)
174
+
175
+ success = _run_command(
176
+ ["cmake", "..", "-DCMAKE_BUILD_TYPE=Release"],
177
+ cwd=build_dir,
178
+ desc="cmake configure ...",
179
+ )
180
+ if not success:
181
+ return False
182
+
183
+ return _run_command(
184
+ ["cmake", "--build", ".", "--config", "Release", "-j"],
185
+ cwd=build_dir,
186
+ desc="cmake build ...",
187
+ )
188
+
189
+
190
+ def setup_bitnet_cpp(force: bool = False) -> bool:
191
+ """
192
+ Full setup: check prerequisites, clone, and build bitnet.cpp.
193
+
194
+ This is the main entry point — it does everything needed to get
195
+ the inference engine ready.
196
+
197
+ Returns:
198
+ True if the engine is ready to use.
199
+ """
200
+ # Check if already built
201
+ if not force and is_bitnet_cpp_built():
202
+ binary = get_inference_binary()
203
+ console.print(f"[green]bitnet.cpp already built:[/green] {binary}")
204
+ return True
205
+
206
+ # Check prerequisites
207
+ console.print("[bold]Checking build prerequisites...[/bold]")
208
+ missing = _check_prerequisites()
209
+ if missing:
210
+ console.print(f"[red]Missing required tools: {', '.join(missing)}[/red]")
211
+ console.print("\nInstall them first:")
212
+ console.print(" macOS: brew install cmake llvm git")
213
+ console.print(" Ubuntu: sudo apt install cmake clang git")
214
+ return False
215
+ console.print("[green]All prerequisites found.[/green]")
216
+
217
+ # Clone
218
+ if not clone_bitnet_cpp(force=force):
219
+ return False
220
+
221
+ # Build
222
+ if not build_bitnet_cpp():
223
+ return False
224
+
225
+ # Verify
226
+ binary = get_inference_binary()
227
+ if binary:
228
+ console.print(f"\n[green]bitnet.cpp built successfully![/green]")
229
+ console.print(f" Binary: {binary}")
230
+ return True
231
+ else:
232
+ console.print("[red]Build completed but inference binary not found.[/red]")
233
+ console.print("You may need to build manually. See:")
234
+ console.print(f" {BITNET_CPP_DIR}/README.md")
235
+ return False