bithub 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bithub/__init__.py +3 -0
- bithub/api.py +286 -0
- bithub/builder.py +235 -0
- bithub/cli.py +401 -0
- bithub/config.py +102 -0
- bithub/dashboard_api.py +50 -0
- bithub/downloader.py +362 -0
- bithub/logging_setup.py +42 -0
- bithub/model_manager.py +206 -0
- bithub/registry.json +68 -0
- bithub/registry.py +55 -0
- bithub/repl.py +203 -0
- bithub/server.py +226 -0
- bithub/static/app.js +200 -0
- bithub/static/index.html +51 -0
- bithub/static/style.css +72 -0
- bithub-0.1.0.dist-info/METADATA +175 -0
- bithub-0.1.0.dist-info/RECORD +22 -0
- bithub-0.1.0.dist-info/WHEEL +5 -0
- bithub-0.1.0.dist-info/entry_points.txt +2 -0
- bithub-0.1.0.dist-info/licenses/LICENSE +21 -0
- bithub-0.1.0.dist-info/top_level.txt +1 -0
bithub/downloader.py
ADDED
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model downloader — pull BitNet GGUF models from HuggingFace.
|
|
3
|
+
|
|
4
|
+
Downloads into ~/.bithub/models/<model_name>/ with progress bars.
|
|
5
|
+
Uses huggingface_hub for reliable, resumable downloads.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import shutil
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import List, Optional, Tuple
|
|
12
|
+
|
|
13
|
+
from huggingface_hub import hf_hub_download, HfApi
|
|
14
|
+
from huggingface_hub.utils import (
|
|
15
|
+
EntryNotFoundError,
|
|
16
|
+
RepositoryNotFoundError,
|
|
17
|
+
GatedRepoError,
|
|
18
|
+
)
|
|
19
|
+
from rich.console import Console
|
|
20
|
+
from rich.progress import (
|
|
21
|
+
Progress,
|
|
22
|
+
SpinnerColumn,
|
|
23
|
+
TextColumn,
|
|
24
|
+
BarColumn,
|
|
25
|
+
DownloadColumn,
|
|
26
|
+
TransferSpeedColumn,
|
|
27
|
+
TimeRemainingColumn,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
from bithub.config import MODELS_DIR, ensure_dirs
|
|
31
|
+
from bithub.registry import get_model_info
|
|
32
|
+
|
|
33
|
+
console = Console()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_gguf_filename(model_info: dict) -> str:
|
|
37
|
+
"""
|
|
38
|
+
Determine the correct GGUF filename to download.
|
|
39
|
+
|
|
40
|
+
Strategy:
|
|
41
|
+
1. If the HF repo is a -gguf repo, list files and pick the GGUF.
|
|
42
|
+
2. Otherwise, look for common patterns based on quant_type.
|
|
43
|
+
"""
|
|
44
|
+
repo_id = model_info["hf_repo"]
|
|
45
|
+
name = model_info["name"]
|
|
46
|
+
quant = model_info.get("quant_type", "i2_s")
|
|
47
|
+
|
|
48
|
+
# Try common naming patterns
|
|
49
|
+
candidates = [
|
|
50
|
+
f"{name}-{quant}.gguf",
|
|
51
|
+
f"{name}.gguf",
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
# Check what's actually in the repo
|
|
55
|
+
try:
|
|
56
|
+
api = HfApi()
|
|
57
|
+
files = api.list_repo_files(repo_id)
|
|
58
|
+
gguf_files = [f for f in files if f.endswith(".gguf")]
|
|
59
|
+
|
|
60
|
+
if len(gguf_files) == 1:
|
|
61
|
+
return gguf_files[0]
|
|
62
|
+
|
|
63
|
+
# Prefer the file matching our quant_type
|
|
64
|
+
for f in gguf_files:
|
|
65
|
+
if quant in f:
|
|
66
|
+
return f
|
|
67
|
+
|
|
68
|
+
# Fall back to first GGUF file
|
|
69
|
+
if gguf_files:
|
|
70
|
+
return gguf_files[0]
|
|
71
|
+
|
|
72
|
+
except Exception:
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
# Last resort: use first candidate
|
|
76
|
+
return candidates[0]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def is_model_downloaded(model_name: str) -> bool:
|
|
80
|
+
"""Check if a model has already been downloaded."""
|
|
81
|
+
model_dir = MODELS_DIR / model_name
|
|
82
|
+
if not model_dir.exists():
|
|
83
|
+
return False
|
|
84
|
+
gguf_files = list(model_dir.glob("*.gguf"))
|
|
85
|
+
return len(gguf_files) > 0
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def get_downloaded_models() -> List[dict]:
|
|
89
|
+
"""Return info about all downloaded models."""
|
|
90
|
+
ensure_dirs()
|
|
91
|
+
downloaded = []
|
|
92
|
+
if not MODELS_DIR.exists():
|
|
93
|
+
return downloaded
|
|
94
|
+
|
|
95
|
+
for model_dir in sorted(MODELS_DIR.iterdir()):
|
|
96
|
+
if not model_dir.is_dir():
|
|
97
|
+
continue
|
|
98
|
+
gguf_files = list(model_dir.glob("*.gguf"))
|
|
99
|
+
if gguf_files:
|
|
100
|
+
size_bytes = sum(f.stat().st_size for f in gguf_files)
|
|
101
|
+
downloaded.append({
|
|
102
|
+
"name": model_dir.name,
|
|
103
|
+
"path": str(model_dir),
|
|
104
|
+
"gguf_file": str(gguf_files[0]),
|
|
105
|
+
"size_mb": round(size_bytes / (1024 * 1024)),
|
|
106
|
+
})
|
|
107
|
+
return downloaded
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_model_gguf_path(model_name: str) -> Optional[Path]:
|
|
111
|
+
"""Return the path to a downloaded model's GGUF file, or None."""
|
|
112
|
+
model_dir = MODELS_DIR / model_name
|
|
113
|
+
gguf_files = list(model_dir.glob("*.gguf"))
|
|
114
|
+
return gguf_files[0] if gguf_files else None
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _compute_sha256(file_path: Path) -> str:
|
|
118
|
+
"""Compute SHA256 hash of a file."""
|
|
119
|
+
h = hashlib.sha256()
|
|
120
|
+
with open(file_path, "rb") as f:
|
|
121
|
+
while True:
|
|
122
|
+
chunk = f.read(8192)
|
|
123
|
+
if not chunk:
|
|
124
|
+
break
|
|
125
|
+
h.update(chunk)
|
|
126
|
+
return h.hexdigest()
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _write_checksum(gguf_path: Path) -> None:
|
|
130
|
+
"""Write SHA256 checksum file next to the GGUF."""
|
|
131
|
+
sha = _compute_sha256(gguf_path)
|
|
132
|
+
checksum_file = gguf_path.parent / "sha256"
|
|
133
|
+
checksum_file.write_text(sha)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def verify_checksum(model_name: str) -> bool:
|
|
137
|
+
"""Verify a model's GGUF matches its stored SHA256. Returns False if mismatch or missing."""
|
|
138
|
+
gguf_path = get_model_gguf_path(model_name)
|
|
139
|
+
if gguf_path is None:
|
|
140
|
+
return False
|
|
141
|
+
checksum_file = gguf_path.parent / "sha256"
|
|
142
|
+
if not checksum_file.exists():
|
|
143
|
+
return False
|
|
144
|
+
stored = checksum_file.read_text().strip()
|
|
145
|
+
actual = _compute_sha256(gguf_path)
|
|
146
|
+
return stored == actual
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _parse_size_mb(size_mb: int) -> int:
|
|
150
|
+
"""Convert size in MB to bytes."""
|
|
151
|
+
return size_mb * 1024 * 1024
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _check_disk_space(target_dir: Path, size_mb: int) -> None:
|
|
155
|
+
"""Abort if disk space is insufficient for the download."""
|
|
156
|
+
required = _parse_size_mb(size_mb)
|
|
157
|
+
if required == 0:
|
|
158
|
+
return
|
|
159
|
+
# Ensure the directory (or its parent) exists for disk_usage check
|
|
160
|
+
check_path = target_dir if target_dir.exists() else target_dir.parent
|
|
161
|
+
usage = shutil.disk_usage(check_path)
|
|
162
|
+
buffer = 1024**3 # 1GB buffer
|
|
163
|
+
if usage.free < required + buffer:
|
|
164
|
+
free_gb = usage.free / 1024**3
|
|
165
|
+
req_gb = required / 1024**3
|
|
166
|
+
console.print(
|
|
167
|
+
f"[red]Insufficient disk space.[/red] "
|
|
168
|
+
f"Need {req_gb:.1f}GB, only {free_gb:.1f}GB free at {target_dir}"
|
|
169
|
+
)
|
|
170
|
+
raise SystemExit(1)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def is_direct_hf_pull(model_ref: str) -> bool:
|
|
174
|
+
"""Check if a model reference uses the hf: prefix."""
|
|
175
|
+
return model_ref.startswith("hf:") and len(model_ref) > 3 and "/" in model_ref
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def parse_hf_uri(model_ref: str) -> Tuple[str, str]:
|
|
179
|
+
"""Parse hf:org/repo into (repo_id, short_name)."""
|
|
180
|
+
repo_id = model_ref[3:] # strip "hf:"
|
|
181
|
+
short_name = repo_id.split("/")[-1]
|
|
182
|
+
return repo_id, short_name
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def download_model(model_name: str, force: bool = False) -> Path:
|
|
186
|
+
"""
|
|
187
|
+
Download a model from HuggingFace.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
model_name: Short name from registry (e.g. '2B-4T')
|
|
191
|
+
force: If True, re-download even if already present
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Path to the downloaded GGUF file
|
|
195
|
+
|
|
196
|
+
Raises:
|
|
197
|
+
SystemExit on errors (after printing helpful messages)
|
|
198
|
+
"""
|
|
199
|
+
ensure_dirs()
|
|
200
|
+
|
|
201
|
+
# Look up model in registry
|
|
202
|
+
info = get_model_info(model_name)
|
|
203
|
+
if not info:
|
|
204
|
+
console.print(f"[red]Unknown model: {model_name}[/red]")
|
|
205
|
+
console.print("Run [bold]bithub models[/bold] to see available models.")
|
|
206
|
+
raise SystemExit(1)
|
|
207
|
+
|
|
208
|
+
# Check disk space before downloading
|
|
209
|
+
_check_disk_space(MODELS_DIR, info.get("size_mb", 0))
|
|
210
|
+
|
|
211
|
+
model_dir = MODELS_DIR / model_name
|
|
212
|
+
repo_id = info["hf_repo"]
|
|
213
|
+
|
|
214
|
+
# Check if already downloaded
|
|
215
|
+
if not force and is_model_downloaded(model_name):
|
|
216
|
+
existing = get_model_gguf_path(model_name)
|
|
217
|
+
console.print(f"[green]Model {model_name} already downloaded:[/green] {existing}")
|
|
218
|
+
console.print("Use [bold]--force[/bold] to re-download.")
|
|
219
|
+
return existing
|
|
220
|
+
|
|
221
|
+
# Determine which file to download
|
|
222
|
+
console.print(f"\n[bold]Pulling {info['name']}[/bold]")
|
|
223
|
+
console.print(f" Repository: [dim]{repo_id}[/dim]")
|
|
224
|
+
console.print(f" Parameters: {info['parameters']}")
|
|
225
|
+
console.print(f" Estimated size: ~{info['size_mb']}MB\n")
|
|
226
|
+
|
|
227
|
+
with console.status("[bold blue]Finding GGUF file in repository..."):
|
|
228
|
+
try:
|
|
229
|
+
gguf_filename = get_gguf_filename(info)
|
|
230
|
+
except Exception as e:
|
|
231
|
+
console.print(f"[red]Failed to list repository files: {e}[/red]")
|
|
232
|
+
raise SystemExit(1)
|
|
233
|
+
|
|
234
|
+
console.print(f" Downloading: [cyan]{gguf_filename}[/cyan]\n")
|
|
235
|
+
|
|
236
|
+
# Download with huggingface_hub (it handles progress, caching, and resumption)
|
|
237
|
+
try:
|
|
238
|
+
downloaded_path = hf_hub_download(
|
|
239
|
+
repo_id=repo_id,
|
|
240
|
+
filename=gguf_filename,
|
|
241
|
+
local_dir=str(model_dir),
|
|
242
|
+
local_dir_use_symlinks=False,
|
|
243
|
+
)
|
|
244
|
+
downloaded_path = Path(downloaded_path)
|
|
245
|
+
|
|
246
|
+
except RepositoryNotFoundError:
|
|
247
|
+
console.print(f"[red]Repository not found: {repo_id}[/red]")
|
|
248
|
+
console.print("The model may have been removed or the repo ID may be wrong.")
|
|
249
|
+
raise SystemExit(1)
|
|
250
|
+
|
|
251
|
+
except EntryNotFoundError:
|
|
252
|
+
console.print(f"[red]File not found: {gguf_filename}[/red]")
|
|
253
|
+
console.print(f"This file doesn't exist in {repo_id}.")
|
|
254
|
+
console.print("The model registry may need updating.")
|
|
255
|
+
raise SystemExit(1)
|
|
256
|
+
|
|
257
|
+
except GatedRepoError:
|
|
258
|
+
console.print(f"[red]Access denied: {repo_id} is a gated repository.[/red]")
|
|
259
|
+
console.print("You may need to accept the model's license on HuggingFace first:")
|
|
260
|
+
console.print(f" [link]https://huggingface.co/{repo_id}[/link]")
|
|
261
|
+
raise SystemExit(1)
|
|
262
|
+
|
|
263
|
+
except Exception as e:
|
|
264
|
+
console.print(f"[red]Download failed: {e}[/red]")
|
|
265
|
+
raise SystemExit(1)
|
|
266
|
+
|
|
267
|
+
# Verify the file exists and has size
|
|
268
|
+
if not downloaded_path.exists():
|
|
269
|
+
console.print("[red]Download seemed to succeed but file not found.[/red]")
|
|
270
|
+
raise SystemExit(1)
|
|
271
|
+
|
|
272
|
+
size_mb = downloaded_path.stat().st_size / (1024 * 1024)
|
|
273
|
+
console.print(f"\n[green]Downloaded successfully![/green]")
|
|
274
|
+
console.print(f" File: {downloaded_path}")
|
|
275
|
+
console.print(f" Size: {size_mb:.0f} MB")
|
|
276
|
+
|
|
277
|
+
_write_checksum(downloaded_path)
|
|
278
|
+
console.print(f" Checksum: [dim]SHA256 written[/dim]")
|
|
279
|
+
|
|
280
|
+
return downloaded_path
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def download_direct_hf(repo_id: str, name: Optional[str] = None, force: bool = False) -> Path:
|
|
284
|
+
"""Download a GGUF model directly from any HuggingFace repo."""
|
|
285
|
+
ensure_dirs()
|
|
286
|
+
|
|
287
|
+
if not name:
|
|
288
|
+
name = repo_id.split("/")[-1]
|
|
289
|
+
|
|
290
|
+
model_dir = MODELS_DIR / name
|
|
291
|
+
|
|
292
|
+
if not force and is_model_downloaded(name):
|
|
293
|
+
existing = get_model_gguf_path(name)
|
|
294
|
+
console.print(f"[green]Model {name} already downloaded:[/green] {existing}")
|
|
295
|
+
console.print("Use [bold]--force[/bold] to re-download.")
|
|
296
|
+
return existing
|
|
297
|
+
|
|
298
|
+
console.print(f"\n[bold]Pulling from HuggingFace[/bold]")
|
|
299
|
+
console.print(f" Repository: [dim]{repo_id}[/dim]")
|
|
300
|
+
console.print(f" [yellow]Not in curated registry. Compatibility not guaranteed.[/yellow]\n")
|
|
301
|
+
|
|
302
|
+
with console.status("[bold blue]Finding GGUF file in repository..."):
|
|
303
|
+
try:
|
|
304
|
+
api = HfApi()
|
|
305
|
+
files = api.list_repo_files(repo_id)
|
|
306
|
+
gguf_files = [f for f in files if f.endswith(".gguf")]
|
|
307
|
+
except Exception as e:
|
|
308
|
+
console.print(f"[red]Failed to access repository: {e}[/red]")
|
|
309
|
+
raise SystemExit(1)
|
|
310
|
+
|
|
311
|
+
if not gguf_files:
|
|
312
|
+
console.print(f"[red]No GGUF files found in {repo_id}[/red]")
|
|
313
|
+
raise SystemExit(1)
|
|
314
|
+
|
|
315
|
+
gguf_filename = gguf_files[0]
|
|
316
|
+
if len(gguf_files) > 1:
|
|
317
|
+
console.print(f" Found {len(gguf_files)} GGUF files, downloading: [cyan]{gguf_filename}[/cyan]")
|
|
318
|
+
else:
|
|
319
|
+
console.print(f" Downloading: [cyan]{gguf_filename}[/cyan]\n")
|
|
320
|
+
|
|
321
|
+
try:
|
|
322
|
+
downloaded_path = hf_hub_download(
|
|
323
|
+
repo_id=repo_id,
|
|
324
|
+
filename=gguf_filename,
|
|
325
|
+
local_dir=str(model_dir),
|
|
326
|
+
local_dir_use_symlinks=False,
|
|
327
|
+
)
|
|
328
|
+
downloaded_path = Path(downloaded_path)
|
|
329
|
+
except Exception as e:
|
|
330
|
+
console.print(f"[red]Download failed: {e}[/red]")
|
|
331
|
+
raise SystemExit(1)
|
|
332
|
+
|
|
333
|
+
size_mb = downloaded_path.stat().st_size / (1024 * 1024)
|
|
334
|
+
console.print(f"\n[green]Downloaded successfully![/green]")
|
|
335
|
+
console.print(f" File: {downloaded_path}")
|
|
336
|
+
console.print(f" Size: {size_mb:.0f} MB")
|
|
337
|
+
|
|
338
|
+
_write_checksum(downloaded_path)
|
|
339
|
+
console.print(f" Checksum: [dim]SHA256 written[/dim]")
|
|
340
|
+
|
|
341
|
+
from bithub.registry import save_custom_model
|
|
342
|
+
save_custom_model(name, {
|
|
343
|
+
"hf_repo": repo_id,
|
|
344
|
+
"name": name,
|
|
345
|
+
"source": "direct",
|
|
346
|
+
})
|
|
347
|
+
|
|
348
|
+
return downloaded_path
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def remove_model(model_name: str) -> bool:
|
|
352
|
+
"""
|
|
353
|
+
Remove a downloaded model.
|
|
354
|
+
|
|
355
|
+
Returns True if removed, False if not found.
|
|
356
|
+
"""
|
|
357
|
+
model_dir = MODELS_DIR / model_name
|
|
358
|
+
if not model_dir.exists():
|
|
359
|
+
return False
|
|
360
|
+
|
|
361
|
+
shutil.rmtree(model_dir)
|
|
362
|
+
return True
|
bithub/logging_setup.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Structured logging setup for bithub."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from logging.handlers import RotatingFileHandler
|
|
5
|
+
|
|
6
|
+
from bithub.config import LOG_PATH, BITHUB_HOME
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def setup_logging(debug: bool = False, verbose: bool = False) -> None:
|
|
10
|
+
"""Configure logging for bithub.
|
|
11
|
+
|
|
12
|
+
- File handler always writes to ~/.bithub/bithub.log (INFO level).
|
|
13
|
+
- Console handler only active with --debug (DEBUG) or --verbose (INFO).
|
|
14
|
+
- Default: no console output — terminal stays clean.
|
|
15
|
+
"""
|
|
16
|
+
BITHUB_HOME.mkdir(parents=True, exist_ok=True)
|
|
17
|
+
|
|
18
|
+
root = logging.getLogger("bithub")
|
|
19
|
+
root.setLevel(logging.DEBUG)
|
|
20
|
+
root.handlers.clear()
|
|
21
|
+
|
|
22
|
+
# File handler — always on, rotated
|
|
23
|
+
file_handler = RotatingFileHandler(
|
|
24
|
+
LOG_PATH, maxBytes=10 * 1024 * 1024, backupCount=3
|
|
25
|
+
)
|
|
26
|
+
file_handler.setLevel(logging.DEBUG)
|
|
27
|
+
file_fmt = logging.Formatter(
|
|
28
|
+
"%(asctime)s %(levelname)-8s %(name)s: %(message)s",
|
|
29
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
30
|
+
)
|
|
31
|
+
file_handler.setFormatter(file_fmt)
|
|
32
|
+
root.addHandler(file_handler)
|
|
33
|
+
|
|
34
|
+
# Console handler — only with flags
|
|
35
|
+
if debug or verbose:
|
|
36
|
+
try:
|
|
37
|
+
from rich.logging import RichHandler
|
|
38
|
+
console_handler = RichHandler(rich_tracebacks=True, show_path=False)
|
|
39
|
+
except ImportError:
|
|
40
|
+
console_handler = logging.StreamHandler() # type: ignore[assignment]
|
|
41
|
+
console_handler.setLevel(logging.DEBUG if debug else logging.INFO)
|
|
42
|
+
root.addHandler(console_handler)
|
bithub/model_manager.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Model manager — manages multiple llama-server backend processes."""
|
|
2
|
+
|
|
3
|
+
import signal
|
|
4
|
+
import subprocess
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
|
|
13
|
+
from bithub.builder import get_server_binary
|
|
14
|
+
|
|
15
|
+
console = Console()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class BackendProcess:
|
|
20
|
+
"""Manages a single llama-server subprocess."""
|
|
21
|
+
process: Optional[subprocess.Popen] = None
|
|
22
|
+
model_name: str = ""
|
|
23
|
+
backend_port: int = 8081
|
|
24
|
+
ready: bool = False
|
|
25
|
+
|
|
26
|
+
def start(
|
|
27
|
+
self,
|
|
28
|
+
gguf_path: Path,
|
|
29
|
+
threads: int = 2,
|
|
30
|
+
context_size: int = 2048,
|
|
31
|
+
) -> bool:
|
|
32
|
+
"""Start the llama-server backend process."""
|
|
33
|
+
server_bin = get_server_binary()
|
|
34
|
+
if not server_bin:
|
|
35
|
+
console.print("[red]No server binary found. Run bithub setup first.[/red]")
|
|
36
|
+
return False
|
|
37
|
+
|
|
38
|
+
cmd = [
|
|
39
|
+
str(server_bin),
|
|
40
|
+
"-m", str(gguf_path),
|
|
41
|
+
"--host", "127.0.0.1",
|
|
42
|
+
"--port", str(self.backend_port),
|
|
43
|
+
"-t", str(threads),
|
|
44
|
+
"-c", str(context_size),
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
self.process = subprocess.Popen(
|
|
48
|
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
self.ready = self._wait_for_ready()
|
|
52
|
+
return self.ready
|
|
53
|
+
|
|
54
|
+
def _wait_for_ready(self, timeout: int = 60) -> bool:
|
|
55
|
+
"""Poll the backend health endpoint until ready."""
|
|
56
|
+
start = time.time()
|
|
57
|
+
while time.time() - start < timeout:
|
|
58
|
+
if self.process and self.process.poll() is not None:
|
|
59
|
+
stderr = self.process.stderr.read().decode() if self.process.stderr else ""
|
|
60
|
+
console.print(f"[red]Backend for {self.model_name} exited unexpectedly.[/red]")
|
|
61
|
+
if stderr:
|
|
62
|
+
console.print(f"[dim]{stderr[:500]}[/dim]")
|
|
63
|
+
return False
|
|
64
|
+
try:
|
|
65
|
+
resp = httpx.get(
|
|
66
|
+
f"http://127.0.0.1:{self.backend_port}/health", timeout=2,
|
|
67
|
+
)
|
|
68
|
+
if resp.status_code == 200:
|
|
69
|
+
return True
|
|
70
|
+
except (httpx.ConnectError, httpx.ReadTimeout):
|
|
71
|
+
pass
|
|
72
|
+
time.sleep(1)
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
def stop(self) -> None:
|
|
76
|
+
"""Gracefully stop the backend process."""
|
|
77
|
+
if self.process:
|
|
78
|
+
self.process.send_signal(signal.SIGTERM)
|
|
79
|
+
try:
|
|
80
|
+
self.process.wait(timeout=5)
|
|
81
|
+
except subprocess.TimeoutExpired:
|
|
82
|
+
self.process.kill()
|
|
83
|
+
self.process.wait()
|
|
84
|
+
self.process = None
|
|
85
|
+
self.ready = False
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def is_running(self) -> bool:
|
|
89
|
+
return self.process is not None and self.process.poll() is None
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ModelManager:
|
|
93
|
+
"""Manages multiple model backends with port allocation."""
|
|
94
|
+
|
|
95
|
+
def __init__(self, base_port: int = 8081, max_models: int = 3) -> None:
|
|
96
|
+
self.base_port = base_port
|
|
97
|
+
self.max_models = max_models
|
|
98
|
+
self.models: Dict[str, dict] = {}
|
|
99
|
+
self.backends: Dict[str, BackendProcess] = {}
|
|
100
|
+
self._next_port = base_port
|
|
101
|
+
self.stats: Dict[str, int] = {"requests": 0, "tokens_generated": 0}
|
|
102
|
+
self._start_time: Optional[float] = None
|
|
103
|
+
|
|
104
|
+
def register(
|
|
105
|
+
self,
|
|
106
|
+
name: str,
|
|
107
|
+
gguf_path: Path,
|
|
108
|
+
threads: int = 2,
|
|
109
|
+
context_size: int = 2048,
|
|
110
|
+
) -> None:
|
|
111
|
+
"""Register a model for serving (does not start the backend yet)."""
|
|
112
|
+
if name in self.models:
|
|
113
|
+
return
|
|
114
|
+
if len(self.models) >= self.max_models:
|
|
115
|
+
raise ValueError(
|
|
116
|
+
f"Maximum {self.max_models} models allowed. "
|
|
117
|
+
f"Already registered: {list(self.models.keys())}"
|
|
118
|
+
)
|
|
119
|
+
port = self._next_port
|
|
120
|
+
self._next_port += 1
|
|
121
|
+
self.models[name] = {
|
|
122
|
+
"gguf_path": gguf_path,
|
|
123
|
+
"threads": threads,
|
|
124
|
+
"context_size": context_size,
|
|
125
|
+
"backend_port": port,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
def start_model(self, name: str) -> bool:
|
|
129
|
+
"""Start the backend for a registered model."""
|
|
130
|
+
if name not in self.models:
|
|
131
|
+
return False
|
|
132
|
+
if name in self.backends and self.backends[name].is_running:
|
|
133
|
+
return True
|
|
134
|
+
|
|
135
|
+
info = self.models[name]
|
|
136
|
+
backend = BackendProcess(
|
|
137
|
+
model_name=name, backend_port=info["backend_port"],
|
|
138
|
+
)
|
|
139
|
+
console.print(f" [bold]Starting backend for {name}...[/bold]")
|
|
140
|
+
success = backend.start(
|
|
141
|
+
gguf_path=info["gguf_path"],
|
|
142
|
+
threads=info["threads"],
|
|
143
|
+
context_size=info["context_size"],
|
|
144
|
+
)
|
|
145
|
+
if success:
|
|
146
|
+
self.backends[name] = backend
|
|
147
|
+
console.print(f" [green]{name} ready on port {info['backend_port']}[/green]")
|
|
148
|
+
return success
|
|
149
|
+
|
|
150
|
+
def get_stats(self) -> dict:
|
|
151
|
+
uptime = time.time() - self._start_time if self._start_time else 0
|
|
152
|
+
return {
|
|
153
|
+
"uptime_seconds": int(uptime),
|
|
154
|
+
"total_requests": self.stats["requests"],
|
|
155
|
+
"models_loaded": sum(1 for m in self.list_models() if m["loaded"]),
|
|
156
|
+
"models_registered": len(self.models),
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
def record_request(self) -> None:
|
|
160
|
+
self.stats["requests"] += 1
|
|
161
|
+
|
|
162
|
+
def start_all(self) -> bool:
|
|
163
|
+
"""Start backends for all registered models."""
|
|
164
|
+
self._start_time = time.time()
|
|
165
|
+
all_ok = True
|
|
166
|
+
for name in self.models:
|
|
167
|
+
if not self.start_model(name):
|
|
168
|
+
all_ok = False
|
|
169
|
+
return all_ok
|
|
170
|
+
|
|
171
|
+
def stop_all(self) -> None:
|
|
172
|
+
"""Stop all running backends."""
|
|
173
|
+
for name, backend in self.backends.items():
|
|
174
|
+
console.print(f" [dim]Stopping {name}...[/dim]")
|
|
175
|
+
backend.stop()
|
|
176
|
+
self.backends.clear()
|
|
177
|
+
|
|
178
|
+
def get_backend_url(self, model_name: str) -> Optional[str]:
|
|
179
|
+
"""Get the backend URL for a model."""
|
|
180
|
+
if model_name not in self.models:
|
|
181
|
+
return None
|
|
182
|
+
port = self.models[model_name]["backend_port"]
|
|
183
|
+
return f"http://127.0.0.1:{port}"
|
|
184
|
+
|
|
185
|
+
def is_loaded(self, model_name: str) -> bool:
|
|
186
|
+
"""Check if a model's backend is running."""
|
|
187
|
+
return model_name in self.backends and self.backends[model_name].is_running
|
|
188
|
+
|
|
189
|
+
def ensure_loaded(self, model_name: str) -> bool:
|
|
190
|
+
"""Ensure a model is loaded (lazy loading)."""
|
|
191
|
+
if self.is_loaded(model_name):
|
|
192
|
+
return True
|
|
193
|
+
if model_name in self.models:
|
|
194
|
+
return self.start_model(model_name)
|
|
195
|
+
return False
|
|
196
|
+
|
|
197
|
+
def list_models(self) -> List[dict]:
|
|
198
|
+
"""List all registered models with their status."""
|
|
199
|
+
result = []
|
|
200
|
+
for name, info in self.models.items():
|
|
201
|
+
result.append({
|
|
202
|
+
"name": name,
|
|
203
|
+
"loaded": self.is_loaded(name),
|
|
204
|
+
"port": info["backend_port"],
|
|
205
|
+
})
|
|
206
|
+
return result
|
bithub/registry.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
{
|
|
2
|
+
"models": {
|
|
3
|
+
"2B-4T": {
|
|
4
|
+
"name": "BitNet-b1.58-2B-4T",
|
|
5
|
+
"hf_repo": "microsoft/BitNet-b1.58-2B-4T-gguf",
|
|
6
|
+
"parameters": "2.4B",
|
|
7
|
+
"quant_type": "i2_s",
|
|
8
|
+
"description": "Microsoft's official 2.4B parameter 1-bit model",
|
|
9
|
+
"size_mb": 1800
|
|
10
|
+
},
|
|
11
|
+
"700M": {
|
|
12
|
+
"name": "bitnet_b1_58-large",
|
|
13
|
+
"hf_repo": "1bitLLM/bitnet_b1_58-large",
|
|
14
|
+
"parameters": "0.7B",
|
|
15
|
+
"quant_type": "i2_s",
|
|
16
|
+
"description": "Community 700M parameter 1-bit model",
|
|
17
|
+
"size_mb": 500
|
|
18
|
+
},
|
|
19
|
+
"3B": {
|
|
20
|
+
"name": "bitnet_b1_58-3B",
|
|
21
|
+
"hf_repo": "1bitLLM/bitnet_b1_58-3B",
|
|
22
|
+
"parameters": "3.3B",
|
|
23
|
+
"quant_type": "tl1",
|
|
24
|
+
"description": "Community 3.3B parameter 1-bit model",
|
|
25
|
+
"size_mb": 2500
|
|
26
|
+
},
|
|
27
|
+
"8B": {
|
|
28
|
+
"name": "Llama3-8B-1.58-100B-tokens",
|
|
29
|
+
"hf_repo": "HF1BitLLM/Llama3-8B-1.58-100B-tokens",
|
|
30
|
+
"parameters": "8.0B",
|
|
31
|
+
"quant_type": "i2_s",
|
|
32
|
+
"description": "Llama3 8B trained in 1.58-bit",
|
|
33
|
+
"size_mb": 5000
|
|
34
|
+
},
|
|
35
|
+
"falcon3-1B": {
|
|
36
|
+
"name": "Falcon3-1B-Instruct-1.58bit",
|
|
37
|
+
"hf_repo": "tiiuae/Falcon3-1B-Instruct-1.58bit",
|
|
38
|
+
"parameters": "1B",
|
|
39
|
+
"quant_type": "i2_s",
|
|
40
|
+
"description": "Falcon3 1B instruction-tuned 1-bit model",
|
|
41
|
+
"size_mb": 700
|
|
42
|
+
},
|
|
43
|
+
"falcon3-3B": {
|
|
44
|
+
"name": "Falcon3-3B-Instruct-1.58bit",
|
|
45
|
+
"hf_repo": "tiiuae/Falcon3-3B-Instruct-1.58bit",
|
|
46
|
+
"parameters": "3B",
|
|
47
|
+
"quant_type": "i2_s",
|
|
48
|
+
"description": "Falcon3 3B instruction-tuned 1-bit model",
|
|
49
|
+
"size_mb": 2000
|
|
50
|
+
},
|
|
51
|
+
"falcon3-7B": {
|
|
52
|
+
"name": "Falcon3-7B-Instruct-1.58bit",
|
|
53
|
+
"hf_repo": "tiiuae/Falcon3-7B-Instruct-1.58bit",
|
|
54
|
+
"parameters": "7B",
|
|
55
|
+
"quant_type": "i2_s",
|
|
56
|
+
"description": "Falcon3 7B instruction-tuned 1-bit model",
|
|
57
|
+
"size_mb": 4500
|
|
58
|
+
},
|
|
59
|
+
"falcon3-10B": {
|
|
60
|
+
"name": "Falcon3-10B-Instruct-1.58bit",
|
|
61
|
+
"hf_repo": "tiiuae/Falcon3-10B-Instruct-1.58bit",
|
|
62
|
+
"parameters": "10B",
|
|
63
|
+
"quant_type": "i2_s",
|
|
64
|
+
"description": "Falcon3 10B instruction-tuned 1-bit model",
|
|
65
|
+
"size_mb": 6500
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|