@mariozechner/pi 0.2.4 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/vllm_manager.py DELETED
@@ -1,662 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Simple vLLM Manager - Run multiple models on different ports
4
- """
5
-
6
- import os
7
- import json
8
- import subprocess as sp
9
- import psutil
10
- import socket
11
- import base64
12
- from pathlib import Path
13
- from typing import Dict, Optional
14
- from datetime import datetime
15
-
16
- # Config
17
- CONFIG_FILE = Path.home() / ".vllm_manager.json"
18
- LOGS_DIR = Path.home() / ".vllm_logs"
19
- BASE_PORT = 8001 # Start from 8001, leave 8000 free
20
-
21
- class VLLMManager:
22
- def __init__(self):
23
- self.models = {} # name -> {pid, port, model_id, log_file}
24
- self.load()
25
- LOGS_DIR.mkdir(exist_ok=True)
26
-
27
- def load(self):
28
- if CONFIG_FILE.exists():
29
- with open(CONFIG_FILE) as f:
30
- self.models = json.load(f)
31
-
32
- def save(self):
33
- with open(CONFIG_FILE, "w") as f:
34
- json.dump(self.models, f, indent=2)
35
-
36
- def is_running(self, pid: int) -> bool:
37
- try:
38
- process = psutil.Process(pid)
39
- return process.is_running()
40
- except:
41
- return False
42
-
43
- def find_free_port(self) -> int:
44
- used_ports = {info['port'] for info in self.models.values()}
45
- for port in range(BASE_PORT, BASE_PORT + 10):
46
- if port not in used_ports:
47
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
48
- try:
49
- s.bind(('', port))
50
- return port
51
- except:
52
- continue
53
- raise Exception("No free ports")
54
-
55
- def get_gpu_count(self) -> int:
56
- try:
57
- result = sp.run(['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'],
58
- capture_output=True, text=True)
59
- if result.returncode == 0:
60
- return len(result.stdout.strip().split('\n'))
61
- except:
62
- pass
63
- return 1
64
-
65
- def find_available_gpu(self) -> Optional[int]:
66
- """Find the next available GPU that's not heavily used"""
67
- gpu_count = self.get_gpu_count()
68
- if gpu_count == 1:
69
- return None # Let vLLM use default
70
-
71
- # Get GPUs used by our models
72
- used_gpus = set()
73
- for info in self.models.values():
74
- if 'gpu_id' in info:
75
- used_gpus.add(info['gpu_id'])
76
-
77
- # Find first unused GPU
78
- for gpu_id in range(gpu_count):
79
- if gpu_id not in used_gpus:
80
- return gpu_id
81
-
82
- # If all GPUs have at least one model, find the least loaded
83
- # For now, just cycle through
84
- return len(self.models) % gpu_count
85
-
86
- def list(self):
87
- # Clean up dead processes
88
- to_remove = []
89
- for name, info in self.models.items():
90
- if not self.is_running(info['pid']):
91
- to_remove.append(name)
92
-
93
- for name in to_remove:
94
- del self.models[name]
95
-
96
- if to_remove:
97
- self.save()
98
-
99
- return self.models
100
-
101
- def get_tool_parser_for_model(self, model_id: str) -> tuple[str, Optional[str]]:
102
- """Determine the appropriate tool parser and chat template for a model."""
103
- model_lower = model_id.lower()
104
-
105
- # Qwen models
106
- if 'qwen' in model_lower:
107
- if 'qwen3-coder' in model_lower:
108
- return "qwen3_coder", None # Try qwen3_coder if it exists
109
- elif 'qwen2.5' in model_lower or 'qwq' in model_lower:
110
- return "hermes", None # Qwen2.5 uses hermes
111
- else:
112
- return "hermes", None # Default for other Qwen models
113
-
114
- # Mistral models
115
- elif 'mistral' in model_lower:
116
- return "mistral", "examples/tool_chat_template_mistral_parallel.jinja"
117
-
118
- # Llama models
119
- elif 'llama' in model_lower or 'meta-llama' in model_lower:
120
- if 'llama-4' in model_lower:
121
- return "llama4_pythonic", "examples/tool_chat_template_llama4_pythonic.jinja"
122
- elif 'llama-3.2' in model_lower:
123
- return "llama3_json", "examples/tool_chat_template_llama3.2_json.jinja"
124
- elif 'llama-3.1' in model_lower:
125
- return "llama3_json", "examples/tool_chat_template_llama3.1_json.jinja"
126
- else:
127
- return "llama3_json", None
128
-
129
- # InternLM models
130
- elif 'internlm' in model_lower:
131
- return "internlm", "examples/tool_chat_template_internlm2_tool.jinja"
132
-
133
- # Jamba models
134
- elif 'jamba' in model_lower:
135
- return "jamba", None
136
-
137
- # Granite models
138
- elif 'granite' in model_lower:
139
- if 'granite-20b-functioncalling' in model_lower:
140
- return "granite-20b-fc", "examples/tool_chat_template_granite_20b_fc.jinja"
141
- elif 'granite-3.0' in model_lower:
142
- return "granite", "examples/tool_chat_template_granite.jinja"
143
- else:
144
- return "granite", None
145
-
146
- # DeepSeek models
147
- elif 'deepseek' in model_lower:
148
- if 'deepseek-r1' in model_lower:
149
- return "deepseek_v3", "examples/tool_chat_template_deepseekr1.jinja"
150
- elif 'deepseek-v3' in model_lower:
151
- return "deepseek_v3", "examples/tool_chat_template_deepseekv3.jinja"
152
- else:
153
- return "hermes", None # Fallback for other DeepSeek models
154
-
155
- # xLAM models
156
- elif 'xlam' in model_lower:
157
- if 'llama-xlam' in model_lower:
158
- return "xlam", "examples/tool_chat_template_xlam_llama.jinja"
159
- else:
160
- return "xlam", "examples/tool_chat_template_xlam_qwen.jinja"
161
-
162
- # Phi models (Microsoft)
163
- elif 'phi' in model_lower:
164
- # Phi models don't have tool calling tokens, disable by default
165
- return None, None
166
-
167
- # Default fallback
168
- else:
169
- return "hermes", None
170
-
171
- def start(self, model_id: str, name: Optional[str] = None, max_len: Optional[int] = None, gpu_memory_utilization: float = None, tensor_parallel_size: int = 1, gpu_ids: Optional[str] = None):
172
- # Generate name
173
- if not name:
174
- name = model_id.split('/')[-1].lower().replace('-', '_')
175
-
176
- # Check if already running
177
- if name in self.models and self.is_running(self.models[name]['pid']):
178
- return self.models[name]
179
-
180
- # Find port
181
- port = self.find_free_port()
182
-
183
- # Create log file
184
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
185
- log_file = LOGS_DIR / f"{name}_{timestamp}.log"
186
-
187
- # Set GPU memory utilization if not specified
188
- if gpu_memory_utilization is None:
189
- print("WARNING: No GPU memory utilization specified, defaulting to 90%")
190
- print(" Consider specifying based on model size to run multiple models")
191
- print(" Examples: 0.2 for small models, 0.5 for medium, 0.9 for large")
192
- gpu_memory_utilization = 0.9
193
-
194
- # Get appropriate tool parser for the model
195
- tool_parser, chat_template = self.get_tool_parser_for_model(model_id)
196
-
197
- # Start vLLM (use venv python if available)
198
- python_cmd = str(Path.home() / "vllm_env/bin/python3") if (Path.home() / "vllm_env/bin/python3").exists() else "python3"
199
- cmd = [
200
- python_cmd, "-u", "-m", "vllm.entrypoints.openai.api_server",
201
- "--model", model_id,
202
- "--host", "0.0.0.0",
203
- "--port", str(port),
204
- "--gpu-memory-utilization", str(gpu_memory_utilization)
205
- ]
206
-
207
- # Only add tool calling if a parser is available
208
- if tool_parser:
209
- print(f"Auto-detected tool parser: {tool_parser}" + (f" with chat template: {chat_template}" if chat_template else ""))
210
- cmd.extend([
211
- "--enable-auto-tool-choice",
212
- "--tool-call-parser", tool_parser
213
- ])
214
- # Add chat template if specified
215
- if chat_template:
216
- cmd.extend(["--chat-template", chat_template])
217
- else:
218
- print(f"Tool calling disabled for {model_id} (no compatible parser)")
219
-
220
- # Only add max-model-len if specified
221
- if max_len is not None:
222
- cmd.extend(["--max-model-len", str(max_len)])
223
-
224
- # Add tensor parallel size if > 1
225
- if tensor_parallel_size > 1:
226
- cmd.extend(["--tensor-parallel-size", str(tensor_parallel_size)])
227
-
228
- # Use environment as-is (already configured by .pirc)
229
- env = os.environ.copy()
230
-
231
- # Handle GPU assignment
232
- assigned_gpu = None
233
- if tensor_parallel_size > 1:
234
- # Multi-GPU: use all GPUs
235
- gpu_count = self.get_gpu_count()
236
- if tensor_parallel_size > gpu_count:
237
- print(f"Warning: Requested {tensor_parallel_size} GPUs but only {gpu_count} available")
238
- tensor_parallel_size = gpu_count
239
- else:
240
- # Single GPU: find available GPU
241
- if gpu_ids:
242
- env['CUDA_VISIBLE_DEVICES'] = gpu_ids
243
- assigned_gpu = int(gpu_ids.split(',')[0])
244
- else:
245
- assigned_gpu = self.find_available_gpu()
246
- if assigned_gpu is not None:
247
- env['CUDA_VISIBLE_DEVICES'] = str(assigned_gpu)
248
- print(f"Auto-assigned to GPU {assigned_gpu}")
249
-
250
-
251
- # Open log file and start process
252
- with open(log_file, 'w') as f:
253
- f.write(f"=== Starting {model_id} at {datetime.now()} ===\n")
254
- f.write(f"Command: {' '.join(cmd)}\n")
255
- if tool_parser:
256
- f.write(f"Tool Parser: {tool_parser}\n")
257
- if chat_template:
258
- f.write(f"Chat Template: {chat_template}\n")
259
- else:
260
- f.write(f"Tool Calling: Disabled (no compatible parser)\n")
261
- if gpu_ids:
262
- f.write(f"CUDA_VISIBLE_DEVICES: {gpu_ids}\n")
263
- if tensor_parallel_size > 1:
264
- f.write(f"Tensor Parallel Size: {tensor_parallel_size}\n")
265
- f.write("=" * 60 + "\n\n")
266
- f.flush()
267
-
268
- process = sp.Popen(
269
- cmd,
270
- stdout=f,
271
- stderr=sp.STDOUT, # Merge stderr into stdout
272
- bufsize=1, # Line buffered
273
- universal_newlines=True,
274
- env=env # Pass the modified environment
275
- )
276
-
277
- # Save info
278
- self.models[name] = {
279
- "pid": process.pid,
280
- "port": port,
281
- "model_id": model_id,
282
- "log_file": str(log_file),
283
- "gpu_id": assigned_gpu,
284
- "tensor_parallel_size": tensor_parallel_size if tensor_parallel_size > 1 else 1
285
- }
286
- self.save()
287
-
288
- return {"name": name, "port": port, "pid": process.pid, "log_file": str(log_file)}
289
-
290
- def start_raw(self, model_id: str, name: str, vllm_args: str):
291
- # Check if already running
292
- if name in self.models and self.is_running(self.models[name]['pid']):
293
- return self.models[name]
294
-
295
- # Find port
296
- port = self.find_free_port()
297
-
298
- # Create log file
299
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
300
- log_file = LOGS_DIR / f"{name}_{timestamp}.log"
301
-
302
- # Start vLLM with raw arguments
303
- python_cmd = str(Path.home() / "vllm_env/bin/python3") if (Path.home() / "vllm_env/bin/python3").exists() else "python3"
304
-
305
- # Base command - ensure vllm_args is properly quoted
306
- cmd = f'{python_cmd} -u -m vllm.entrypoints.openai.api_server --model "{model_id}" --host 0.0.0.0 --port {port} {vllm_args}'
307
-
308
- # Use environment as-is (already configured by .pirc)
309
- env = os.environ.copy()
310
-
311
-
312
- # Open log file and start process
313
- with open(log_file, 'w') as f:
314
- f.write(f"=== Starting {model_id} at {datetime.now()} ===")
315
- f.write(f"\nCommand: {cmd}\n")
316
- f.write("=" * 60 + "\n\n")
317
- f.flush()
318
-
319
- # Use shell=True for the command string
320
- process = sp.Popen(
321
- cmd,
322
- shell=True,
323
- stdout=f,
324
- stderr=sp.STDOUT, # Merge stderr into stdout
325
- bufsize=1, # Line buffered
326
- universal_newlines=True,
327
- env=env # Pass the modified environment
328
- )
329
-
330
- # Save info
331
- self.models[name] = {
332
- "pid": process.pid,
333
- "port": port,
334
- "model_id": model_id,
335
- "log_file": str(log_file),
336
- "raw_args": vllm_args
337
- }
338
- self.save()
339
-
340
- return {"name": name, "port": port, "pid": process.pid, "log_file": str(log_file)}
341
-
342
- def stop(self, name: str):
343
- if name not in self.models:
344
- return False
345
-
346
- info = self.models[name]
347
- try:
348
- process = psutil.Process(info['pid'])
349
- process.terminate()
350
- process.wait(timeout=5)
351
- except:
352
- pass
353
-
354
- # Force kill all vLLM-related Python processes to ensure cleanup
355
- max_attempts = 5
356
- for attempt in range(max_attempts):
357
- try:
358
- # Get all python processes containing 'vllm'
359
- ps_result = sp.run(['ps', 'aux'], capture_output=True, text=True)
360
- vllm_pids = []
361
-
362
- for line in ps_result.stdout.split('\n'):
363
- if 'python' in line and 'vllm' in line and 'vllm_manager.py' not in line:
364
- # Extract PID (second column)
365
- parts = line.split()
366
- if len(parts) > 1:
367
- vllm_pids.append(parts[1])
368
-
369
- if not vllm_pids:
370
- break # No vLLM processes found
371
-
372
- # Kill the vLLM processes
373
- for pid in vllm_pids:
374
- try:
375
- sp.run(['kill', '-9', pid], capture_output=True)
376
- except:
377
- pass
378
-
379
- # Small delay between attempts
380
- import time
381
- time.sleep(0.5)
382
- except:
383
- break
384
-
385
- del self.models[name]
386
- self.save()
387
- return True
388
-
389
- def logs(self, name: str, lines: int = 50):
390
- if name not in self.models:
391
- return None
392
-
393
- log_file = self.models[name].get('log_file')
394
- if not log_file or not Path(log_file).exists():
395
- return None
396
-
397
- # Read last N lines
398
- with open(log_file, 'r') as f:
399
- all_lines = f.readlines()
400
- return ''.join(all_lines[-lines:])
401
-
402
- def check_downloads(self):
403
- """Check model download progress in HuggingFace cache"""
404
- import glob
405
- import re
406
-
407
- # Respect HuggingFace environment variables
408
- if os.environ.get('HUGGINGFACE_HUB_CACHE'):
409
- cache_dir = Path(os.environ['HUGGINGFACE_HUB_CACHE'])
410
- elif os.environ.get('HF_HOME'):
411
- cache_dir = Path(os.environ['HF_HOME']) / "hub"
412
- else:
413
- cache_dir = Path.home() / ".cache" / "huggingface" / "hub"
414
-
415
- if not cache_dir.exists():
416
- return {"status": "NO_CACHE", "cache_dir": str(cache_dir)}
417
-
418
- model_dirs = list(cache_dir.glob("models--*"))
419
- if not model_dirs:
420
- return {"status": "NO_MODELS"}
421
-
422
- results = []
423
-
424
- for model_dir in model_dirs:
425
- # Extract model name
426
- model_name = model_dir.name.replace("models--", "").replace("--", "/")
427
-
428
- # Get size (only count actual blob files, not symlinks)
429
- total_size = 0
430
- blobs_dir = model_dir / "blobs"
431
- if blobs_dir.exists():
432
- for f in blobs_dir.iterdir():
433
- if f.is_file() and not f.name.endswith('.incomplete'):
434
- total_size += f.stat().st_size
435
- size_gb = total_size / (1024**3)
436
-
437
- # Count safetensors files in blobs directory (actual files)
438
- safetensors_count = 0
439
- snapshots_dir = model_dir / "snapshots"
440
- if snapshots_dir.exists():
441
- for snapshot in snapshots_dir.iterdir():
442
- if snapshot.is_dir():
443
- safetensors_count = len(list(snapshot.glob("*.safetensors")))
444
- break # Use first snapshot
445
- file_count = safetensors_count
446
-
447
- # Get total expected files from filename pattern
448
- total_files = 0
449
- if snapshots_dir.exists():
450
- for snapshot in snapshots_dir.iterdir():
451
- if snapshot.is_dir():
452
- for f in snapshot.glob("*.safetensors"):
453
- match = re.search(r'model-\d+-of-(\d+)\.safetensors', f.name)
454
- if match:
455
- total_files = max(total_files, int(match.group(1)))
456
- break # Only check first snapshot
457
-
458
- # Check if actively downloading (check if any incomplete files exist in blobs)
459
- incomplete_files = []
460
- if blobs_dir.exists():
461
- incomplete_files = list(blobs_dir.glob("*.incomplete"))
462
- is_active = len(incomplete_files) > 0
463
-
464
- results.append({
465
- "model": model_name,
466
- "size_gb": round(size_gb, 1),
467
- "files": file_count,
468
- "total_files": total_files,
469
- "active": is_active
470
- })
471
-
472
- # Count vLLM processes
473
- vllm_count = 0
474
- for proc in psutil.process_iter(['pid', 'cmdline']):
475
- try:
476
- cmdline = ' '.join(proc.info['cmdline'] or [])
477
- if 'python' in cmdline and 'vllm' in cmdline and 'vllm_manager.py' not in cmdline:
478
- vllm_count += 1
479
- except:
480
- pass
481
-
482
- return {
483
- "status": "OK",
484
- "models": results,
485
- "vllm_processes": vllm_count
486
- }
487
-
488
- def main():
489
- import sys
490
-
491
- manager = VLLMManager()
492
-
493
- if len(sys.argv) < 2:
494
- print("Usage: vllm_manager.py [list|start|stop|logs|downloads] ...")
495
- sys.exit(1)
496
-
497
- cmd = sys.argv[1]
498
-
499
- if cmd == "list":
500
- models = manager.list()
501
- if not models:
502
- print("No models running")
503
- else:
504
- # Get external IP
505
- try:
506
- # Try to get IP from default interface
507
- result = sp.run(['hostname', '-I'], capture_output=True, text=True)
508
- if result.returncode == 0 and result.stdout.strip():
509
- host_ip = result.stdout.strip().split()[0]
510
- else:
511
- host_ip = socket.gethostbyname(socket.gethostname())
512
- except:
513
- host_ip = socket.gethostbyname(socket.gethostname())
514
- print(f"Running models:")
515
- for name, info in models.items():
516
- print(f"\n{name}:")
517
- print(f" Model: {info['model_id']}")
518
- print(f" HF: https://huggingface.co/{info['model_id']}")
519
- print(f" Port: {info['port']}")
520
- if 'tensor_parallel_size' in info and info.get('tensor_parallel_size', 1) > 1:
521
- print(f" GPUs: {info.get('tensor_parallel_size', 1)} (tensor parallel)")
522
- elif 'gpu_id' in info and info['gpu_id'] is not None:
523
- print(f" GPU: {info['gpu_id']}")
524
- print(f" URL: http://{host_ip}:{info['port']}/v1")
525
- print(f"\n Export for OpenAI clients:")
526
- print(f" export OPENAI_BASE_URL='http://{host_ip}:{info['port']}/v1'")
527
- print(f" export OPENAI_API_KEY='dummy'")
528
- print(f" export OPENAI_MODEL='{info['model_id']}'")
529
- if 'log_file' in info:
530
- print(f"\n Logs: {info['log_file']}")
531
-
532
- elif cmd == "start":
533
- if len(sys.argv) < 3:
534
- print("Usage: vllm_manager.py start <model_id> [name] [max_len] [gpu_memory] [tensor_parallel_size]")
535
- sys.exit(1)
536
-
537
- model_id = sys.argv[2]
538
- name = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] not in ['""', ''] else None
539
- max_len = int(sys.argv[4]) if len(sys.argv) > 4 and sys.argv[4] not in ['""', ''] else None
540
- gpu_memory = float(sys.argv[5]) if len(sys.argv) > 5 else None
541
- tensor_parallel = int(sys.argv[6]) if len(sys.argv) > 6 else 1
542
-
543
- model_result = manager.start(model_id, name, max_len, gpu_memory, tensor_parallel)
544
- # Get external IP
545
- try:
546
- # Try to get IP from default interface
547
- ip_result = sp.run(['hostname', '-I'], capture_output=True, text=True)
548
- if ip_result.returncode == 0 and ip_result.stdout.strip():
549
- host_ip = ip_result.stdout.strip().split()[0]
550
- else:
551
- host_ip = socket.gethostbyname(socket.gethostname())
552
- except:
553
- host_ip = socket.gethostbyname(socket.gethostname())
554
-
555
- print(f"Started {model_result['name']}")
556
- print(f"URL: http://{host_ip}:{model_result['port']}/v1")
557
- print(f"\nExport for OpenAI clients:")
558
- print(f"export OPENAI_BASE_URL='http://{host_ip}:{model_result['port']}/v1'")
559
- print(f"export OPENAI_MODEL='{model_id}'")
560
-
561
- elif cmd == "start_raw":
562
- if len(sys.argv) < 5:
563
- print("Usage: vllm_manager.py start_raw <model_id> <name> <vllm_args>")
564
- sys.exit(1)
565
-
566
- model_id = sys.argv[2]
567
- name = sys.argv[3]
568
- vllm_args_base64 = sys.argv[4]
569
-
570
- # Decode base64 arguments
571
- vllm_args = base64.b64decode(vllm_args_base64).decode('utf-8')
572
- print(f"DEBUG: Decoded vllm_args: '{vllm_args}'")
573
-
574
- model_result = manager.start_raw(model_id, name, vllm_args)
575
- # Get external IP
576
- try:
577
- # Try to get IP from default interface
578
- ip_result = sp.run(['hostname', '-I'], capture_output=True, text=True)
579
- if ip_result.returncode == 0 and ip_result.stdout.strip():
580
- host_ip = ip_result.stdout.strip().split()[0]
581
- else:
582
- host_ip = socket.gethostbyname(socket.gethostname())
583
- except:
584
- host_ip = socket.gethostbyname(socket.gethostname())
585
-
586
- print(f"Started {model_result['name']}")
587
- print(f"URL: http://{host_ip}:{model_result['port']}/v1")
588
- print(f"\nExport for OpenAI clients:")
589
- print(f"export OPENAI_BASE_URL='http://{host_ip}:{model_result['port']}/v1'")
590
- print(f"export OPENAI_MODEL='{model_id}'")
591
-
592
- elif cmd == "stop":
593
- if len(sys.argv) < 3:
594
- print("Usage: vllm_manager.py stop <name>")
595
- sys.exit(1)
596
-
597
- name = sys.argv[2]
598
- if manager.stop(name):
599
- print(f"Stopped {name}")
600
- else:
601
- print(f"Model {name} not found")
602
-
603
- elif cmd == "logs":
604
- if len(sys.argv) < 3:
605
- print("Usage: vllm_manager.py logs <name> [lines]")
606
- sys.exit(1)
607
-
608
- name = sys.argv[2]
609
- lines = int(sys.argv[3]) if len(sys.argv) > 3 else 50
610
-
611
- logs = manager.logs(name, lines)
612
- if logs is None:
613
- print(f"No logs found for {name}")
614
- else:
615
- print(logs, end='')
616
-
617
- elif cmd == "downloads":
618
- # Check if --stream flag is provided
619
- stream = len(sys.argv) > 2 and sys.argv[2] == "--stream"
620
-
621
- if stream:
622
- # Streaming mode - continuously output status
623
- import time
624
- import signal
625
-
626
- # Handle SIGTERM/SIGINT for clean shutdown
627
- def signal_handler(sig, frame):
628
- sys.exit(0)
629
-
630
- signal.signal(signal.SIGINT, signal_handler)
631
- signal.signal(signal.SIGTERM, signal_handler)
632
-
633
- while True:
634
- download_info = manager.check_downloads()
635
-
636
- if download_info["status"] == "NO_CACHE":
637
- print(json.dumps({"status": "NO_CACHE", "message": "No HuggingFace cache found"}))
638
- elif download_info["status"] == "NO_MODELS":
639
- print(json.dumps({"status": "NO_MODELS", "message": "No models in cache"}))
640
- else:
641
- print(json.dumps(download_info))
642
-
643
- sys.stdout.flush() # Force flush to ensure output is sent
644
- time.sleep(2) # Update every 2 seconds
645
- else:
646
- # Single check mode
647
- download_info = manager.check_downloads()
648
-
649
- if download_info["status"] == "NO_CACHE":
650
- print("No HuggingFace cache found")
651
- elif download_info["status"] == "NO_MODELS":
652
- print("No models in cache")
653
- else:
654
- # Output as JSON for easy parsing
655
- print(json.dumps(download_info))
656
-
657
- else:
658
- print(f"Unknown command: {cmd}")
659
- sys.exit(1)
660
-
661
- if __name__ == "__main__":
662
- main()