amd-gaia 0.15.1__py3-none-any.whl → 0.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.3.dist-info}/METADATA +2 -2
- {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.3.dist-info}/RECORD +38 -32
- {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.3.dist-info}/WHEEL +1 -1
- gaia/agents/base/agent.py +317 -113
- gaia/agents/base/api_agent.py +0 -1
- gaia/agents/base/console.py +334 -9
- gaia/agents/base/tools.py +7 -2
- gaia/agents/blender/__init__.py +7 -0
- gaia/agents/blender/agent.py +7 -10
- gaia/agents/blender/core/view.py +2 -2
- gaia/agents/chat/agent.py +22 -48
- gaia/agents/chat/app.py +7 -0
- gaia/agents/chat/tools/rag_tools.py +23 -8
- gaia/agents/chat/tools/shell_tools.py +1 -0
- gaia/agents/code/prompts/code_patterns.py +2 -4
- gaia/agents/docker/agent.py +1 -0
- gaia/agents/emr/agent.py +3 -5
- gaia/agents/emr/cli.py +1 -1
- gaia/agents/emr/dashboard/server.py +2 -4
- gaia/agents/tools/__init__.py +11 -0
- gaia/agents/tools/file_tools.py +715 -0
- gaia/apps/llm/app.py +14 -3
- gaia/chat/app.py +2 -4
- gaia/cli.py +751 -333
- gaia/installer/__init__.py +23 -0
- gaia/installer/init_command.py +1605 -0
- gaia/installer/lemonade_installer.py +678 -0
- gaia/llm/__init__.py +2 -1
- gaia/llm/lemonade_client.py +427 -99
- gaia/llm/lemonade_manager.py +55 -11
- gaia/llm/providers/lemonade.py +21 -14
- gaia/rag/sdk.py +1 -1
- gaia/security.py +24 -4
- gaia/talk/app.py +2 -4
- gaia/version.py +2 -2
- {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.3.dist-info}/entry_points.txt +0 -0
- {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.3.dist-info}/licenses/LICENSE.md +0 -0
- {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.3.dist-info}/top_level.txt +0 -0
gaia/llm/lemonade_client.py
CHANGED
|
@@ -89,8 +89,8 @@ DEFAULT_MODEL_NAME = "Qwen2.5-0.5B-Instruct-CPU"
|
|
|
89
89
|
# Increased to accommodate long-running coding and evaluation tasks
|
|
90
90
|
DEFAULT_REQUEST_TIMEOUT = 900
|
|
91
91
|
# Default timeout in seconds for model loading operations
|
|
92
|
-
# Increased for large model downloads and loading
|
|
93
|
-
DEFAULT_MODEL_LOAD_TIMEOUT =
|
|
92
|
+
# Increased for large model downloads and loading (10x increase for streaming stability)
|
|
93
|
+
DEFAULT_MODEL_LOAD_TIMEOUT = 12000
|
|
94
94
|
|
|
95
95
|
|
|
96
96
|
# =========================================================================
|
|
@@ -413,21 +413,24 @@ def _prompt_user_for_repair(model_name: str) -> bool:
|
|
|
413
413
|
table.add_row(
|
|
414
414
|
"Status:", "[yellow]Download incomplete or files corrupted[/yellow]"
|
|
415
415
|
)
|
|
416
|
-
table.add_row(
|
|
416
|
+
table.add_row(
|
|
417
|
+
"Action:",
|
|
418
|
+
"[green]Resume download (Lemonade will continue where it left off)[/green]",
|
|
419
|
+
)
|
|
417
420
|
|
|
418
421
|
console.print(
|
|
419
422
|
Panel(
|
|
420
423
|
table,
|
|
421
|
-
title="[bold yellow]⚠️
|
|
424
|
+
title="[bold yellow]⚠️ Incomplete Model Download Detected[/bold yellow]",
|
|
422
425
|
border_style="yellow",
|
|
423
426
|
)
|
|
424
427
|
)
|
|
425
428
|
console.print()
|
|
426
429
|
|
|
427
430
|
while True:
|
|
428
|
-
response = input("
|
|
431
|
+
response = input("Resume download? [Y/n]: ").strip().lower()
|
|
429
432
|
if response in ("", "y", "yes"):
|
|
430
|
-
console.print("[green]✓[/green]
|
|
433
|
+
console.print("[green]✓[/green] Resuming download...")
|
|
431
434
|
return True
|
|
432
435
|
elif response in ("n", "no"):
|
|
433
436
|
console.print("[dim]Cancelled.[/dim]")
|
|
@@ -438,15 +441,15 @@ def _prompt_user_for_repair(model_name: str) -> bool:
|
|
|
438
441
|
except ImportError:
|
|
439
442
|
# Fall back to plain text formatting
|
|
440
443
|
print("\n" + "=" * 60)
|
|
441
|
-
print(f"{_emoji('⚠️', '[WARNING]')}
|
|
444
|
+
print(f"{_emoji('⚠️', '[WARNING]')} Incomplete Model Download Detected")
|
|
442
445
|
print("=" * 60)
|
|
443
446
|
print(f"Model: {model_name}")
|
|
444
447
|
print("Status: Download incomplete or files corrupted")
|
|
445
|
-
print("Action:
|
|
448
|
+
print("Action: Resume download (Lemonade will continue where it left off)")
|
|
446
449
|
print("=" * 60)
|
|
447
450
|
|
|
448
451
|
while True:
|
|
449
|
-
response = input("
|
|
452
|
+
response = input("Resume download? [Y/n]: ").strip().lower()
|
|
450
453
|
if response in ("", "y", "yes"):
|
|
451
454
|
return True
|
|
452
455
|
elif response in ("n", "no"):
|
|
@@ -455,6 +458,86 @@ def _prompt_user_for_repair(model_name: str) -> bool:
|
|
|
455
458
|
print("Please enter 'y' or 'n'")
|
|
456
459
|
|
|
457
460
|
|
|
461
|
+
def _prompt_user_for_delete(model_name: str) -> bool:
|
|
462
|
+
"""
|
|
463
|
+
Prompt user for confirmation to delete a model and re-download from scratch.
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
model_name: Name of the model to delete
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
True if user confirms, False if user declines
|
|
470
|
+
"""
|
|
471
|
+
# Get model storage paths
|
|
472
|
+
if sys.platform == "win32":
|
|
473
|
+
lemonade_cache = os.path.expandvars("%LOCALAPPDATA%\\lemonade\\")
|
|
474
|
+
hf_cache = os.path.expandvars("%USERPROFILE%\\.cache\\huggingface\\hub\\")
|
|
475
|
+
else:
|
|
476
|
+
lemonade_cache = os.path.expanduser("~/.local/share/lemonade/")
|
|
477
|
+
hf_cache = os.path.expanduser("~/.cache/huggingface/hub/")
|
|
478
|
+
|
|
479
|
+
try:
|
|
480
|
+
from rich.console import Console
|
|
481
|
+
from rich.panel import Panel
|
|
482
|
+
from rich.table import Table
|
|
483
|
+
|
|
484
|
+
console = Console()
|
|
485
|
+
console.print()
|
|
486
|
+
|
|
487
|
+
table = Table(show_header=False, box=None, padding=(0, 1))
|
|
488
|
+
table.add_column(style="dim")
|
|
489
|
+
table.add_column()
|
|
490
|
+
table.add_row("Model:", f"[cyan]{model_name}[/cyan]")
|
|
491
|
+
table.add_row(
|
|
492
|
+
"Status:", "[yellow]Resume failed, files may be corrupted[/yellow]"
|
|
493
|
+
)
|
|
494
|
+
table.add_row("Action:", "[red]Delete model and download fresh[/red]")
|
|
495
|
+
table.add_row("", "")
|
|
496
|
+
table.add_row("Storage:", f"[dim]{lemonade_cache}[/dim]")
|
|
497
|
+
table.add_row("", f"[dim]{hf_cache}[/dim]")
|
|
498
|
+
|
|
499
|
+
console.print(
|
|
500
|
+
Panel(
|
|
501
|
+
table,
|
|
502
|
+
title="[bold yellow]⚠️ Delete and Re-download?[/bold yellow]",
|
|
503
|
+
border_style="yellow",
|
|
504
|
+
)
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
while True:
|
|
508
|
+
response = (
|
|
509
|
+
input("Delete and re-download from scratch? [y/N]: ").strip().lower()
|
|
510
|
+
)
|
|
511
|
+
if response in ("y", "yes"):
|
|
512
|
+
console.print("[green]✓[/green] Deleting and re-downloading...")
|
|
513
|
+
return True
|
|
514
|
+
elif response in ("", "n", "no"):
|
|
515
|
+
console.print("[dim]Cancelled.[/dim]")
|
|
516
|
+
return False
|
|
517
|
+
else:
|
|
518
|
+
console.print("[dim]Please enter 'y' or 'n'[/dim]")
|
|
519
|
+
|
|
520
|
+
except ImportError:
|
|
521
|
+
print("\n" + "=" * 60)
|
|
522
|
+
print(f"{_emoji('⚠️', '[WARNING]')} Resume failed")
|
|
523
|
+
print(f"Model: {model_name}")
|
|
524
|
+
print(f"Storage: {lemonade_cache}")
|
|
525
|
+
print(f" {hf_cache}")
|
|
526
|
+
print("Delete and download fresh?")
|
|
527
|
+
print("=" * 60)
|
|
528
|
+
|
|
529
|
+
while True:
|
|
530
|
+
response = (
|
|
531
|
+
input("Delete and re-download from scratch? [y/N]: ").strip().lower()
|
|
532
|
+
)
|
|
533
|
+
if response in ("y", "yes"):
|
|
534
|
+
return True
|
|
535
|
+
elif response in ("", "n", "no"):
|
|
536
|
+
return False
|
|
537
|
+
else:
|
|
538
|
+
print("Please enter 'y' or 'n'")
|
|
539
|
+
|
|
540
|
+
|
|
458
541
|
def _check_disk_space(size_gb: float, path: Optional[str] = None) -> bool:
|
|
459
542
|
"""
|
|
460
543
|
Check if there's enough disk space for download.
|
|
@@ -1518,6 +1601,139 @@ class LemonadeClient:
|
|
|
1518
1601
|
self.log.error(f"Error generating embeddings: {str(e)}")
|
|
1519
1602
|
raise LemonadeClientError(f"Error generating embeddings: {str(e)}")
|
|
1520
1603
|
|
|
1604
|
+
# =========================================================================
|
|
1605
|
+
# Image Generation (Stable Diffusion)
|
|
1606
|
+
# =========================================================================
|
|
1607
|
+
|
|
1608
|
+
# Supported SD configurations
|
|
1609
|
+
SD_MODELS = ["SD-1.5", "SD-Turbo", "SDXL-Base-1.0", "SDXL-Turbo"]
|
|
1610
|
+
SD_SIZES = ["512x512", "768x768", "1024x1024"]
|
|
1611
|
+
|
|
1612
|
+
# Model-specific defaults
|
|
1613
|
+
SD_MODEL_DEFAULTS = {
|
|
1614
|
+
"SD-1.5": {"steps": 20, "cfg_scale": 7.5, "size": "512x512"},
|
|
1615
|
+
"SD-Turbo": {"steps": 4, "cfg_scale": 1.0, "size": "512x512"},
|
|
1616
|
+
"SDXL-Base-1.0": {"steps": 20, "cfg_scale": 7.5, "size": "1024x1024"},
|
|
1617
|
+
"SDXL-Turbo": {"steps": 4, "cfg_scale": 1.0, "size": "512x512"},
|
|
1618
|
+
}
|
|
1619
|
+
|
|
1620
|
+
def generate_image(
|
|
1621
|
+
self,
|
|
1622
|
+
prompt: str,
|
|
1623
|
+
model: str = "SDXL-Turbo",
|
|
1624
|
+
size: Optional[str] = None,
|
|
1625
|
+
steps: Optional[int] = None,
|
|
1626
|
+
cfg_scale: Optional[float] = None,
|
|
1627
|
+
seed: Optional[int] = None,
|
|
1628
|
+
timeout: int = 300,
|
|
1629
|
+
) -> Dict[str, Any]:
|
|
1630
|
+
"""
|
|
1631
|
+
Generate an image from a text prompt using Stable Diffusion.
|
|
1632
|
+
|
|
1633
|
+
Args:
|
|
1634
|
+
prompt: Text description of the image to generate
|
|
1635
|
+
model: SD model - SD-1.5, SD-Turbo, SDXL-Base-1.0 (photorealistic), SDXL-Turbo
|
|
1636
|
+
size: Image dimensions (auto-selected if None, or 512x512, 768x768, 1024x1024)
|
|
1637
|
+
steps: Inference steps (auto-selected if None: Turbo=4, Base=20)
|
|
1638
|
+
cfg_scale: CFG scale (auto-selected if None: Turbo=1.0, Base=7.5)
|
|
1639
|
+
seed: Random seed for reproducibility (optional)
|
|
1640
|
+
timeout: Request timeout in seconds (default: 300 for slower Base models)
|
|
1641
|
+
|
|
1642
|
+
Returns:
|
|
1643
|
+
Dict with 'data' containing list of generated images in b64_json format
|
|
1644
|
+
|
|
1645
|
+
Raises:
|
|
1646
|
+
LemonadeClientError: If generation fails or invalid parameters
|
|
1647
|
+
|
|
1648
|
+
Example:
|
|
1649
|
+
# Photorealistic with SDXL-Base-1.0 (auto-settings)
|
|
1650
|
+
result = client.generate_image(
|
|
1651
|
+
prompt="a sunset over mountains, golden hour, photorealistic",
|
|
1652
|
+
model="SDXL-Base-1.0"
|
|
1653
|
+
)
|
|
1654
|
+
|
|
1655
|
+
# Fast stylized with SDXL-Turbo
|
|
1656
|
+
result = client.generate_image(
|
|
1657
|
+
prompt="cyberpunk city",
|
|
1658
|
+
model="SDXL-Turbo"
|
|
1659
|
+
)
|
|
1660
|
+
"""
|
|
1661
|
+
# Validate model
|
|
1662
|
+
if model not in self.SD_MODELS:
|
|
1663
|
+
raise LemonadeClientError(
|
|
1664
|
+
f"Invalid model '{model}'. Choose from: {self.SD_MODELS}"
|
|
1665
|
+
)
|
|
1666
|
+
|
|
1667
|
+
# Apply model-specific defaults
|
|
1668
|
+
defaults = self.SD_MODEL_DEFAULTS.get(model, {})
|
|
1669
|
+
size = size or defaults.get("size", "512x512")
|
|
1670
|
+
steps = steps if steps is not None else defaults.get("steps", 20)
|
|
1671
|
+
cfg_scale = (
|
|
1672
|
+
cfg_scale if cfg_scale is not None else defaults.get("cfg_scale", 7.5)
|
|
1673
|
+
)
|
|
1674
|
+
|
|
1675
|
+
# Validate size
|
|
1676
|
+
if size not in self.SD_SIZES:
|
|
1677
|
+
raise LemonadeClientError(
|
|
1678
|
+
f"Invalid size '{size}'. Choose from: {self.SD_SIZES}"
|
|
1679
|
+
)
|
|
1680
|
+
|
|
1681
|
+
try:
|
|
1682
|
+
# Generate random seed if not provided for varied results
|
|
1683
|
+
import random
|
|
1684
|
+
|
|
1685
|
+
if seed is None:
|
|
1686
|
+
seed = random.randint(0, 2**32 - 1)
|
|
1687
|
+
|
|
1688
|
+
payload = {
|
|
1689
|
+
"prompt": prompt,
|
|
1690
|
+
"model": model,
|
|
1691
|
+
"size": size,
|
|
1692
|
+
"n": 1,
|
|
1693
|
+
"response_format": "b64_json",
|
|
1694
|
+
"cfg_scale": cfg_scale,
|
|
1695
|
+
"steps": steps,
|
|
1696
|
+
"seed": seed,
|
|
1697
|
+
}
|
|
1698
|
+
|
|
1699
|
+
self.log.info(
|
|
1700
|
+
f"Generating image: model={model}, size={size}, steps={steps}, cfg={cfg_scale}"
|
|
1701
|
+
)
|
|
1702
|
+
url = f"{self.base_url}/images/generations"
|
|
1703
|
+
response = self._send_request("POST", url, data=payload, timeout=timeout)
|
|
1704
|
+
|
|
1705
|
+
return response
|
|
1706
|
+
|
|
1707
|
+
except LemonadeClientError:
|
|
1708
|
+
raise
|
|
1709
|
+
except Exception as e:
|
|
1710
|
+
self.log.error(f"Error generating image: {str(e)}")
|
|
1711
|
+
raise LemonadeClientError(f"Error generating image: {str(e)}")
|
|
1712
|
+
|
|
1713
|
+
def list_sd_models(self) -> List[Dict[str, Any]]:
|
|
1714
|
+
"""
|
|
1715
|
+
List available Stable Diffusion models from the server.
|
|
1716
|
+
|
|
1717
|
+
Returns:
|
|
1718
|
+
List of SD model info dicts with id, labels, and image_defaults
|
|
1719
|
+
|
|
1720
|
+
Example:
|
|
1721
|
+
sd_models = client.list_sd_models()
|
|
1722
|
+
for m in sd_models:
|
|
1723
|
+
print(f"{m['id']}: {m.get('image_defaults', {})}")
|
|
1724
|
+
"""
|
|
1725
|
+
try:
|
|
1726
|
+
models = self.list_models()
|
|
1727
|
+
sd_models = [
|
|
1728
|
+
m
|
|
1729
|
+
for m in models.get("data", [])
|
|
1730
|
+
if m.get("id") in self.SD_MODELS or "image" in m.get("labels", [])
|
|
1731
|
+
]
|
|
1732
|
+
return sd_models
|
|
1733
|
+
except Exception as e:
|
|
1734
|
+
self.log.error(f"Error listing SD models: {str(e)}")
|
|
1735
|
+
raise LemonadeClientError(f"Error listing SD models: {str(e)}")
|
|
1736
|
+
|
|
1521
1737
|
def list_models(self, show_all: bool = False) -> Dict[str, Any]:
|
|
1522
1738
|
"""
|
|
1523
1739
|
List available models from the server.
|
|
@@ -1640,8 +1856,6 @@ class LemonadeClient:
|
|
|
1640
1856
|
embedding: Optional[bool] = None,
|
|
1641
1857
|
reranking: Optional[bool] = None,
|
|
1642
1858
|
mmproj: Optional[str] = None,
|
|
1643
|
-
timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
|
|
1644
|
-
progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
|
|
1645
1859
|
) -> Generator[Dict[str, Any], None, None]:
|
|
1646
1860
|
"""
|
|
1647
1861
|
Install a model on the server with streaming progress updates.
|
|
@@ -1658,34 +1872,23 @@ class LemonadeClient:
|
|
|
1658
1872
|
embedding: Whether the model is an embedding model (for registering new models)
|
|
1659
1873
|
reranking: Whether the model is a reranking model (for registering new models)
|
|
1660
1874
|
mmproj: Multimodal Projector file for vision models (for registering new models)
|
|
1661
|
-
timeout: Request timeout in seconds (longer for model installation)
|
|
1662
|
-
progress_callback: Optional callback function called with progress dict on each event.
|
|
1663
|
-
Signature: callback(event_type: str, data: dict) -> None
|
|
1664
|
-
event_type is one of: "progress", "complete", "error"
|
|
1665
1875
|
|
|
1666
1876
|
Yields:
|
|
1667
1877
|
Dict containing progress event data with fields:
|
|
1668
|
-
-
|
|
1669
|
-
|
|
1670
|
-
- For "complete"
|
|
1671
|
-
- For "error"
|
|
1878
|
+
- event: "progress", "complete", or "error"
|
|
1879
|
+
- For "progress": file, file_index, total_files, bytes_downloaded, bytes_total, percent
|
|
1880
|
+
- For "complete": file_index, total_files, percent (100)
|
|
1881
|
+
- For "error": error message
|
|
1672
1882
|
|
|
1673
1883
|
Raises:
|
|
1674
1884
|
LemonadeClientError: If the model installation fails
|
|
1675
1885
|
|
|
1676
1886
|
Example:
|
|
1677
|
-
# Using as generator
|
|
1678
1887
|
for event in client.pull_model_stream("Qwen3-0.6B-GGUF"):
|
|
1679
|
-
if event
|
|
1888
|
+
if event["event"] == "progress":
|
|
1680
1889
|
print(f"Downloading: {event['percent']}%")
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
def on_progress(event_type, data):
|
|
1684
|
-
if event_type == "progress":
|
|
1685
|
-
print(f"{data['file']}: {data['percent']}%")
|
|
1686
|
-
|
|
1687
|
-
for _ in client.pull_model_stream("Qwen3-0.6B-GGUF", progress_callback=on_progress):
|
|
1688
|
-
pass
|
|
1890
|
+
elif event["event"] == "complete":
|
|
1891
|
+
print("Done!")
|
|
1689
1892
|
"""
|
|
1690
1893
|
self.log.info(f"Installing {model_name} with streaming progress")
|
|
1691
1894
|
|
|
@@ -1708,12 +1911,21 @@ class LemonadeClient:
|
|
|
1708
1911
|
|
|
1709
1912
|
url = f"{self.base_url}/pull"
|
|
1710
1913
|
|
|
1914
|
+
# Use separate connect and read timeouts to handle SSE streams properly:
|
|
1915
|
+
# - Connect timeout: 30 seconds (fast connection establishment)
|
|
1916
|
+
# - Read timeout: 120 seconds (timeout if no data for 2 minutes)
|
|
1917
|
+
# This detects stuck downloads while still allowing normal long downloads
|
|
1918
|
+
# (as long as bytes keep flowing). The timeout is between receiving chunks,
|
|
1919
|
+
# not total time, so long downloads with steady progress will work fine.
|
|
1920
|
+
connect_timeout = 30
|
|
1921
|
+
read_timeout = 120 # Timeout if no data received for 2 minutes
|
|
1922
|
+
|
|
1711
1923
|
try:
|
|
1712
1924
|
response = requests.post(
|
|
1713
1925
|
url,
|
|
1714
1926
|
json=request_data,
|
|
1715
1927
|
headers={"Content-Type": "application/json"},
|
|
1716
|
-
timeout=
|
|
1928
|
+
timeout=(connect_timeout, read_timeout),
|
|
1717
1929
|
stream=True,
|
|
1718
1930
|
)
|
|
1719
1931
|
|
|
@@ -1725,11 +1937,14 @@ class LemonadeClient:
|
|
|
1725
1937
|
# Parse SSE stream
|
|
1726
1938
|
event_type = None
|
|
1727
1939
|
received_complete = False
|
|
1940
|
+
|
|
1728
1941
|
try:
|
|
1729
|
-
for
|
|
1730
|
-
if not
|
|
1942
|
+
for line_bytes in response.iter_lines():
|
|
1943
|
+
if not line_bytes:
|
|
1731
1944
|
continue
|
|
1732
1945
|
|
|
1946
|
+
line = line_bytes.decode("utf-8", errors="replace")
|
|
1947
|
+
|
|
1733
1948
|
if line.startswith("event:"):
|
|
1734
1949
|
event_type = line[6:].strip()
|
|
1735
1950
|
elif line.startswith("data:"):
|
|
@@ -1738,28 +1953,20 @@ class LemonadeClient:
|
|
|
1738
1953
|
data = json.loads(data_str)
|
|
1739
1954
|
data["event"] = event_type or "progress"
|
|
1740
1955
|
|
|
1741
|
-
#
|
|
1742
|
-
if progress_callback:
|
|
1743
|
-
progress_callback(event_type or "progress", data)
|
|
1744
|
-
|
|
1956
|
+
# Yield all events - let the consumer handle throttling
|
|
1745
1957
|
yield data
|
|
1746
1958
|
|
|
1747
|
-
# Track complete event
|
|
1748
1959
|
if event_type == "complete":
|
|
1749
1960
|
received_complete = True
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
error_msg = data.get(
|
|
1754
|
-
"error", "Unknown error during model pull"
|
|
1961
|
+
elif event_type == "error":
|
|
1962
|
+
raise LemonadeClientError(
|
|
1963
|
+
data.get("error", "Unknown error during model pull")
|
|
1755
1964
|
)
|
|
1756
|
-
raise LemonadeClientError(error_msg)
|
|
1757
1965
|
|
|
1758
1966
|
except json.JSONDecodeError:
|
|
1759
1967
|
self.log.warning(f"Failed to parse SSE data: {data_str}")
|
|
1760
1968
|
continue
|
|
1761
1969
|
except requests.exceptions.ChunkedEncodingError:
|
|
1762
|
-
# Connection closed by server - this is normal after complete event
|
|
1763
1970
|
if not received_complete:
|
|
1764
1971
|
raise
|
|
1765
1972
|
|
|
@@ -2073,9 +2280,32 @@ class LemonadeClient:
|
|
|
2073
2280
|
return
|
|
2074
2281
|
|
|
2075
2282
|
# Model not loaded - load it (will download if needed without prompting)
|
|
2076
|
-
self.log.
|
|
2283
|
+
self.log.debug(f"Model '{model}' not loaded, loading...")
|
|
2284
|
+
|
|
2285
|
+
try:
|
|
2286
|
+
from rich.console import Console
|
|
2287
|
+
|
|
2288
|
+
console = Console()
|
|
2289
|
+
console.print(
|
|
2290
|
+
f"[bold blue]🔄 Loading model:[/bold blue] [cyan]{model}[/cyan]..."
|
|
2291
|
+
)
|
|
2292
|
+
except ImportError:
|
|
2293
|
+
console = None
|
|
2294
|
+
print(f"🔄 Loading model: {model}...")
|
|
2295
|
+
|
|
2077
2296
|
self.load_model(model, auto_download=True, prompt=False)
|
|
2078
2297
|
|
|
2298
|
+
# Print model ready message
|
|
2299
|
+
try:
|
|
2300
|
+
if console:
|
|
2301
|
+
console.print(
|
|
2302
|
+
f"[bold green]✅ Model loaded:[/bold green] [cyan]{model}[/cyan]"
|
|
2303
|
+
)
|
|
2304
|
+
else:
|
|
2305
|
+
print(f"✅ Model loaded: {model}")
|
|
2306
|
+
except Exception:
|
|
2307
|
+
pass # Ignore print errors
|
|
2308
|
+
|
|
2079
2309
|
except Exception as e:
|
|
2080
2310
|
# Log but don't fail - let the actual request fail with proper error
|
|
2081
2311
|
self.log.debug(f"Could not pre-check model status: {e}")
|
|
@@ -2085,8 +2315,10 @@ class LemonadeClient:
|
|
|
2085
2315
|
model_name: str,
|
|
2086
2316
|
timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
|
|
2087
2317
|
auto_download: bool = False,
|
|
2088
|
-
|
|
2318
|
+
_download_timeout: int = 7200, # Reserved for future use
|
|
2089
2319
|
llamacpp_args: Optional[str] = None,
|
|
2320
|
+
ctx_size: Optional[int] = None,
|
|
2321
|
+
save_options: bool = False,
|
|
2090
2322
|
prompt: bool = True,
|
|
2091
2323
|
) -> Dict[str, Any]:
|
|
2092
2324
|
"""
|
|
@@ -2106,6 +2338,10 @@ class LemonadeClient:
|
|
|
2106
2338
|
Large models can be 100GB+ and take hours to download
|
|
2107
2339
|
llamacpp_args: Optional llama.cpp arguments (e.g., "--ubatch-size 2048").
|
|
2108
2340
|
Used to configure model loading parameters like batch sizes.
|
|
2341
|
+
ctx_size: Context size for the model in tokens (e.g., 8192, 32768).
|
|
2342
|
+
Overrides the default value for this model.
|
|
2343
|
+
save_options: If True, persists ctx_size and llamacpp_args to config file.
|
|
2344
|
+
Model will use these settings on future loads.
|
|
2109
2345
|
prompt: If True, prompt user before downloading (default: True).
|
|
2110
2346
|
Set to False to download automatically without user confirmation.
|
|
2111
2347
|
|
|
@@ -2122,6 +2358,10 @@ class LemonadeClient:
|
|
|
2122
2358
|
request_data = {"model_name": model_name}
|
|
2123
2359
|
if llamacpp_args:
|
|
2124
2360
|
request_data["llamacpp_args"] = llamacpp_args
|
|
2361
|
+
if ctx_size is not None:
|
|
2362
|
+
request_data["ctx_size"] = ctx_size
|
|
2363
|
+
if save_options:
|
|
2364
|
+
request_data["save_options"] = save_options
|
|
2125
2365
|
url = f"{self.base_url}/load"
|
|
2126
2366
|
|
|
2127
2367
|
try:
|
|
@@ -2133,39 +2373,97 @@ class LemonadeClient:
|
|
|
2133
2373
|
original_error = str(e)
|
|
2134
2374
|
|
|
2135
2375
|
# Check if this is a corrupt/incomplete download error
|
|
2136
|
-
|
|
2376
|
+
is_corrupt = self._is_corrupt_download_error(e)
|
|
2377
|
+
if is_corrupt:
|
|
2137
2378
|
self.log.warning(
|
|
2138
|
-
f"{_emoji('⚠️', '[
|
|
2379
|
+
f"{_emoji('⚠️', '[INCOMPLETE]')} Model '{model_name}' has incomplete "
|
|
2139
2380
|
f"or corrupted files"
|
|
2140
2381
|
)
|
|
2141
2382
|
|
|
2142
|
-
# Prompt user for confirmation to
|
|
2383
|
+
# Prompt user for confirmation to resume download
|
|
2143
2384
|
if not _prompt_user_for_repair(model_name):
|
|
2144
2385
|
raise ModelDownloadCancelledError(
|
|
2145
|
-
f"User declined to repair
|
|
2386
|
+
f"User declined to repair incomplete model: {model_name}"
|
|
2146
2387
|
)
|
|
2147
2388
|
|
|
2148
|
-
#
|
|
2389
|
+
# Try to resume download first (Lemonade handles partial files)
|
|
2149
2390
|
self.log.info(
|
|
2150
|
-
f"{_emoji('
|
|
2391
|
+
f"{_emoji('📥', '[RESUME]')} Attempting to resume download..."
|
|
2151
2392
|
)
|
|
2393
|
+
|
|
2152
2394
|
try:
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
|
|
2395
|
+
# First attempt: resume download
|
|
2396
|
+
download_complete = False
|
|
2397
|
+
for event in self.pull_model_stream(model_name=model_name):
|
|
2398
|
+
event_type = event.get("event")
|
|
2399
|
+
if event_type == "complete":
|
|
2400
|
+
download_complete = True
|
|
2401
|
+
elif event_type == "error":
|
|
2402
|
+
raise LemonadeClientError(event.get("error", "Unknown"))
|
|
2403
|
+
|
|
2404
|
+
if download_complete:
|
|
2405
|
+
# Retry loading
|
|
2406
|
+
response = self._send_request(
|
|
2407
|
+
"post", url, request_data, timeout=timeout
|
|
2408
|
+
)
|
|
2409
|
+
self.log.info(
|
|
2410
|
+
f"{_emoji('✅', '[OK]')} Loaded {model_name} after resume"
|
|
2411
|
+
)
|
|
2412
|
+
self.model = model_name
|
|
2413
|
+
return response
|
|
2414
|
+
|
|
2415
|
+
except Exception as resume_error:
|
|
2416
|
+
self.log.warning(
|
|
2417
|
+
f"{_emoji('⚠️', '[RETRY]')} Resume failed: {resume_error}"
|
|
2156
2418
|
)
|
|
2157
|
-
except Exception as delete_error:
|
|
2158
|
-
self.log.warning(f"Failed to delete corrupt model: {delete_error}")
|
|
2159
|
-
# Continue anyway - the download may still work
|
|
2160
2419
|
|
|
2161
|
-
|
|
2162
|
-
|
|
2420
|
+
# Prompt user before deleting
|
|
2421
|
+
if not _prompt_user_for_delete(model_name):
|
|
2422
|
+
raise LemonadeClientError(
|
|
2423
|
+
f"Resume download failed for '{model_name}'. "
|
|
2424
|
+
f"You can manually delete the model and try again."
|
|
2425
|
+
)
|
|
2426
|
+
|
|
2427
|
+
# Second attempt: delete and re-download from scratch
|
|
2428
|
+
try:
|
|
2429
|
+
self.log.info(
|
|
2430
|
+
f"{_emoji('🗑️', '[DELETE]')} Deleting corrupt model..."
|
|
2431
|
+
)
|
|
2432
|
+
self.delete_model(model_name)
|
|
2433
|
+
|
|
2434
|
+
self.log.info(
|
|
2435
|
+
f"{_emoji('📥', '[FRESH]')} Starting fresh download..."
|
|
2436
|
+
)
|
|
2437
|
+
download_complete = False
|
|
2438
|
+
for event in self.pull_model_stream(model_name=model_name):
|
|
2439
|
+
event_type = event.get("event")
|
|
2440
|
+
if event_type == "complete":
|
|
2441
|
+
download_complete = True
|
|
2442
|
+
elif event_type == "error":
|
|
2443
|
+
raise LemonadeClientError(event.get("error", "Unknown"))
|
|
2444
|
+
|
|
2445
|
+
if download_complete:
|
|
2446
|
+
# Retry loading
|
|
2447
|
+
response = self._send_request(
|
|
2448
|
+
"post", url, request_data, timeout=timeout
|
|
2449
|
+
)
|
|
2450
|
+
self.log.info(
|
|
2451
|
+
f"{_emoji('✅', '[OK]')} Loaded {model_name} after fresh download"
|
|
2452
|
+
)
|
|
2453
|
+
self.model = model_name
|
|
2454
|
+
return response
|
|
2455
|
+
|
|
2456
|
+
except Exception as fresh_error:
|
|
2457
|
+
self.log.error(
|
|
2458
|
+
f"{_emoji('❌', '[FAIL]')} Fresh download also failed: {fresh_error}"
|
|
2459
|
+
)
|
|
2460
|
+
raise LemonadeClientError(
|
|
2461
|
+
f"Failed to repair model '{model_name}' after both resume and fresh download attempts. "
|
|
2462
|
+
f"Please check your network connection and disk space, then try again."
|
|
2463
|
+
)
|
|
2163
2464
|
|
|
2164
2465
|
# Check if this is a "model not found" error and auto_download is enabled
|
|
2165
|
-
if not (
|
|
2166
|
-
auto_download
|
|
2167
|
-
and (self._is_model_error(e) or self._is_corrupt_download_error(e))
|
|
2168
|
-
):
|
|
2466
|
+
if not (auto_download and self._is_model_error(e)):
|
|
2169
2467
|
# Not a model error or auto_download disabled - re-raise
|
|
2170
2468
|
self.log.error(f"Failed to load {model_name}: {original_error}")
|
|
2171
2469
|
if isinstance(e, LemonadeClientError):
|
|
@@ -2211,24 +2509,45 @@ class LemonadeClient:
|
|
|
2211
2509
|
self.active_downloads[model_name] = download_task
|
|
2212
2510
|
|
|
2213
2511
|
try:
|
|
2214
|
-
#
|
|
2215
|
-
self.pull_model(model_name, timeout=download_timeout)
|
|
2216
|
-
|
|
2217
|
-
# Wait for download to complete (with cancellation support)
|
|
2218
|
-
self.log.info(
|
|
2219
|
-
f" {_emoji('⏳', '[WAIT]')} Waiting for model download to complete..."
|
|
2220
|
-
)
|
|
2512
|
+
# Use streaming download for better performance and no timeouts
|
|
2221
2513
|
self.log.info(
|
|
2222
|
-
f" {_emoji('
|
|
2223
|
-
f"client.cancel_download(model_name)"
|
|
2514
|
+
f" {_emoji('⏳', '[DOWNLOAD]')} Downloading model with streaming..."
|
|
2224
2515
|
)
|
|
2225
2516
|
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2517
|
+
# Stream download with simple progress logging
|
|
2518
|
+
download_complete = False
|
|
2519
|
+
last_logged_percent = -10 # Log at 0%, 10%, 20%, etc.
|
|
2520
|
+
|
|
2521
|
+
for event in self.pull_model_stream(model_name=model_name):
|
|
2522
|
+
# Check for cancellation
|
|
2523
|
+
if download_task and download_task.is_cancelled():
|
|
2524
|
+
raise ModelDownloadCancelledError(
|
|
2525
|
+
f"Download cancelled: {model_name}"
|
|
2526
|
+
)
|
|
2527
|
+
|
|
2528
|
+
event_type = event.get("event")
|
|
2529
|
+
if event_type == "progress":
|
|
2530
|
+
percent = event.get("percent", 0)
|
|
2531
|
+
# Log every 10%
|
|
2532
|
+
if percent >= last_logged_percent + 10:
|
|
2533
|
+
bytes_dl = event.get("bytes_downloaded", 0)
|
|
2534
|
+
bytes_total = event.get("bytes_total", 0)
|
|
2535
|
+
if bytes_total > 0:
|
|
2536
|
+
gb_dl = bytes_dl / (1024**3)
|
|
2537
|
+
gb_total = bytes_total / (1024**3)
|
|
2538
|
+
self.log.info(
|
|
2539
|
+
f" {_emoji('📥', '[PROGRESS]')} "
|
|
2540
|
+
f"{percent}% ({gb_dl:.1f}/{gb_total:.1f} GB)"
|
|
2541
|
+
)
|
|
2542
|
+
last_logged_percent = percent
|
|
2543
|
+
elif event_type == "complete":
|
|
2544
|
+
download_complete = True
|
|
2545
|
+
elif event_type == "error":
|
|
2546
|
+
raise LemonadeClientError(
|
|
2547
|
+
f"Download failed: {event.get('error', 'Unknown error')}"
|
|
2548
|
+
)
|
|
2549
|
+
|
|
2550
|
+
if download_complete:
|
|
2232
2551
|
# Retry loading after successful download
|
|
2233
2552
|
self.log.info(
|
|
2234
2553
|
f"{_emoji('🔄', '[RETRY]')} Retrying model load: {model_name}"
|
|
@@ -2243,7 +2562,7 @@ class LemonadeClient:
|
|
|
2243
2562
|
return response
|
|
2244
2563
|
else:
|
|
2245
2564
|
raise LemonadeClientError(
|
|
2246
|
-
f"Model download
|
|
2565
|
+
f"Model download did not complete for '{model_name}'"
|
|
2247
2566
|
)
|
|
2248
2567
|
|
|
2249
2568
|
except ModelDownloadCancelledError:
|
|
@@ -2421,7 +2740,17 @@ class LemonadeClient:
|
|
|
2421
2740
|
"""
|
|
2422
2741
|
try:
|
|
2423
2742
|
health = self.health_check()
|
|
2424
|
-
|
|
2743
|
+
|
|
2744
|
+
# Lemonade 9.1.4+: context_size moved to all_models_loaded[N].recipe_options.ctx_size
|
|
2745
|
+
all_models = health.get("all_models_loaded", [])
|
|
2746
|
+
if all_models:
|
|
2747
|
+
# Get context size from the first loaded model (typically the LLM)
|
|
2748
|
+
reported_ctx = (
|
|
2749
|
+
all_models[0].get("recipe_options", {}).get("ctx_size", 0)
|
|
2750
|
+
)
|
|
2751
|
+
else:
|
|
2752
|
+
# Fallback for older Lemonade versions
|
|
2753
|
+
reported_ctx = health.get("context_size", 0)
|
|
2425
2754
|
|
|
2426
2755
|
if reported_ctx >= required_tokens:
|
|
2427
2756
|
self.log.debug(
|
|
@@ -2457,7 +2786,16 @@ class LemonadeClient:
|
|
|
2457
2786
|
health = self.health_check()
|
|
2458
2787
|
status.running = True
|
|
2459
2788
|
status.health_data = health
|
|
2460
|
-
|
|
2789
|
+
|
|
2790
|
+
# Lemonade 9.1.4+: context_size moved to all_models_loaded[N].recipe_options.ctx_size
|
|
2791
|
+
all_models = health.get("all_models_loaded", [])
|
|
2792
|
+
if all_models:
|
|
2793
|
+
status.context_size = (
|
|
2794
|
+
all_models[0].get("recipe_options", {}).get("ctx_size", 0)
|
|
2795
|
+
)
|
|
2796
|
+
else:
|
|
2797
|
+
# Fallback for older Lemonade versions
|
|
2798
|
+
status.context_size = health.get("context_size", 0)
|
|
2461
2799
|
|
|
2462
2800
|
# Get loaded models
|
|
2463
2801
|
models_response = self.list_models()
|
|
@@ -2541,8 +2879,6 @@ class LemonadeClient:
|
|
|
2541
2879
|
def download_agent_models(
|
|
2542
2880
|
self,
|
|
2543
2881
|
agent: str = "all",
|
|
2544
|
-
timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
|
|
2545
|
-
progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
|
|
2546
2882
|
) -> Dict[str, Any]:
|
|
2547
2883
|
"""
|
|
2548
2884
|
Download all models required for an agent with streaming progress.
|
|
@@ -2552,9 +2888,6 @@ class LemonadeClient:
|
|
|
2552
2888
|
|
|
2553
2889
|
Args:
|
|
2554
2890
|
agent: Agent name (chat, code, rag, etc.) or "all" for all models
|
|
2555
|
-
timeout: Timeout per model in seconds
|
|
2556
|
-
progress_callback: Optional callback for progress updates.
|
|
2557
|
-
Signature: callback(event_type: str, data: dict) -> None
|
|
2558
2891
|
|
|
2559
2892
|
Returns:
|
|
2560
2893
|
Dict with download results:
|
|
@@ -2563,11 +2896,9 @@ class LemonadeClient:
|
|
|
2563
2896
|
- errors: List[str] - Any error messages
|
|
2564
2897
|
|
|
2565
2898
|
Example:
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
result = client.download_agent_models("chat", progress_callback=on_progress)
|
|
2899
|
+
result = client.download_agent_models("chat")
|
|
2900
|
+
for event in client.pull_model_stream("model-id"):
|
|
2901
|
+
print(f"{event.get('percent', 0)}%")
|
|
2571
2902
|
"""
|
|
2572
2903
|
model_ids = self.get_required_models(agent)
|
|
2573
2904
|
|
|
@@ -2597,15 +2928,12 @@ class LemonadeClient:
|
|
|
2597
2928
|
self.log.info(f"Downloading model: {model_id}")
|
|
2598
2929
|
completed = False
|
|
2599
2930
|
|
|
2600
|
-
for event in self.pull_model_stream(
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
progress_callback=progress_callback,
|
|
2604
|
-
):
|
|
2605
|
-
if event.get("event") == "complete":
|
|
2931
|
+
for event in self.pull_model_stream(model_name=model_id):
|
|
2932
|
+
event_type = event.get("event")
|
|
2933
|
+
if event_type == "complete":
|
|
2606
2934
|
completed = True
|
|
2607
2935
|
model_result["status"] = "completed"
|
|
2608
|
-
elif
|
|
2936
|
+
elif event_type == "error":
|
|
2609
2937
|
model_result["status"] = "error"
|
|
2610
2938
|
model_result["error"] = event.get("error", "Unknown error")
|
|
2611
2939
|
results["errors"].append(f"{model_id}: {model_result['error']}")
|