amd-gaia 0.15.1__py3-none-any.whl → 0.15.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.2.dist-info}/METADATA +1 -2
- {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.2.dist-info}/RECORD +35 -31
- {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.2.dist-info}/WHEEL +1 -1
- gaia/agents/base/agent.py +45 -90
- gaia/agents/base/api_agent.py +0 -1
- gaia/agents/base/console.py +126 -0
- gaia/agents/base/tools.py +7 -2
- gaia/agents/blender/__init__.py +7 -0
- gaia/agents/blender/agent.py +7 -10
- gaia/agents/blender/core/view.py +2 -2
- gaia/agents/chat/agent.py +22 -48
- gaia/agents/chat/app.py +7 -0
- gaia/agents/chat/tools/rag_tools.py +23 -8
- gaia/agents/chat/tools/shell_tools.py +1 -0
- gaia/agents/code/prompts/code_patterns.py +2 -4
- gaia/agents/docker/agent.py +1 -0
- gaia/agents/emr/agent.py +3 -5
- gaia/agents/emr/cli.py +1 -1
- gaia/agents/emr/dashboard/server.py +2 -4
- gaia/apps/llm/app.py +14 -3
- gaia/chat/app.py +2 -4
- gaia/cli.py +511 -333
- gaia/installer/__init__.py +23 -0
- gaia/installer/init_command.py +1275 -0
- gaia/installer/lemonade_installer.py +619 -0
- gaia/llm/__init__.py +2 -1
- gaia/llm/lemonade_client.py +284 -99
- gaia/llm/providers/lemonade.py +12 -14
- gaia/rag/sdk.py +1 -1
- gaia/security.py +24 -4
- gaia/talk/app.py +2 -4
- gaia/version.py +2 -2
- {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.2.dist-info}/entry_points.txt +0 -0
- {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.2.dist-info}/licenses/LICENSE.md +0 -0
- {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.2.dist-info}/top_level.txt +0 -0
gaia/llm/lemonade_client.py
CHANGED
|
@@ -89,8 +89,8 @@ DEFAULT_MODEL_NAME = "Qwen2.5-0.5B-Instruct-CPU"
|
|
|
89
89
|
# Increased to accommodate long-running coding and evaluation tasks
|
|
90
90
|
DEFAULT_REQUEST_TIMEOUT = 900
|
|
91
91
|
# Default timeout in seconds for model loading operations
|
|
92
|
-
# Increased for large model downloads and loading
|
|
93
|
-
DEFAULT_MODEL_LOAD_TIMEOUT =
|
|
92
|
+
# Increased for large model downloads and loading (10x increase for streaming stability)
|
|
93
|
+
DEFAULT_MODEL_LOAD_TIMEOUT = 12000
|
|
94
94
|
|
|
95
95
|
|
|
96
96
|
# =========================================================================
|
|
@@ -413,21 +413,24 @@ def _prompt_user_for_repair(model_name: str) -> bool:
|
|
|
413
413
|
table.add_row(
|
|
414
414
|
"Status:", "[yellow]Download incomplete or files corrupted[/yellow]"
|
|
415
415
|
)
|
|
416
|
-
table.add_row(
|
|
416
|
+
table.add_row(
|
|
417
|
+
"Action:",
|
|
418
|
+
"[green]Resume download (Lemonade will continue where it left off)[/green]",
|
|
419
|
+
)
|
|
417
420
|
|
|
418
421
|
console.print(
|
|
419
422
|
Panel(
|
|
420
423
|
table,
|
|
421
|
-
title="[bold yellow]⚠️
|
|
424
|
+
title="[bold yellow]⚠️ Incomplete Model Download Detected[/bold yellow]",
|
|
422
425
|
border_style="yellow",
|
|
423
426
|
)
|
|
424
427
|
)
|
|
425
428
|
console.print()
|
|
426
429
|
|
|
427
430
|
while True:
|
|
428
|
-
response = input("
|
|
431
|
+
response = input("Resume download? [Y/n]: ").strip().lower()
|
|
429
432
|
if response in ("", "y", "yes"):
|
|
430
|
-
console.print("[green]✓[/green]
|
|
433
|
+
console.print("[green]✓[/green] Resuming download...")
|
|
431
434
|
return True
|
|
432
435
|
elif response in ("n", "no"):
|
|
433
436
|
console.print("[dim]Cancelled.[/dim]")
|
|
@@ -438,15 +441,15 @@ def _prompt_user_for_repair(model_name: str) -> bool:
|
|
|
438
441
|
except ImportError:
|
|
439
442
|
# Fall back to plain text formatting
|
|
440
443
|
print("\n" + "=" * 60)
|
|
441
|
-
print(f"{_emoji('⚠️', '[WARNING]')}
|
|
444
|
+
print(f"{_emoji('⚠️', '[WARNING]')} Incomplete Model Download Detected")
|
|
442
445
|
print("=" * 60)
|
|
443
446
|
print(f"Model: {model_name}")
|
|
444
447
|
print("Status: Download incomplete or files corrupted")
|
|
445
|
-
print("Action:
|
|
448
|
+
print("Action: Resume download (Lemonade will continue where it left off)")
|
|
446
449
|
print("=" * 60)
|
|
447
450
|
|
|
448
451
|
while True:
|
|
449
|
-
response = input("
|
|
452
|
+
response = input("Resume download? [Y/n]: ").strip().lower()
|
|
450
453
|
if response in ("", "y", "yes"):
|
|
451
454
|
return True
|
|
452
455
|
elif response in ("n", "no"):
|
|
@@ -455,6 +458,86 @@ def _prompt_user_for_repair(model_name: str) -> bool:
|
|
|
455
458
|
print("Please enter 'y' or 'n'")
|
|
456
459
|
|
|
457
460
|
|
|
461
|
+
def _prompt_user_for_delete(model_name: str) -> bool:
|
|
462
|
+
"""
|
|
463
|
+
Prompt user for confirmation to delete a model and re-download from scratch.
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
model_name: Name of the model to delete
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
True if user confirms, False if user declines
|
|
470
|
+
"""
|
|
471
|
+
# Get model storage paths
|
|
472
|
+
if sys.platform == "win32":
|
|
473
|
+
lemonade_cache = os.path.expandvars("%LOCALAPPDATA%\\lemonade\\")
|
|
474
|
+
hf_cache = os.path.expandvars("%USERPROFILE%\\.cache\\huggingface\\hub\\")
|
|
475
|
+
else:
|
|
476
|
+
lemonade_cache = os.path.expanduser("~/.local/share/lemonade/")
|
|
477
|
+
hf_cache = os.path.expanduser("~/.cache/huggingface/hub/")
|
|
478
|
+
|
|
479
|
+
try:
|
|
480
|
+
from rich.console import Console
|
|
481
|
+
from rich.panel import Panel
|
|
482
|
+
from rich.table import Table
|
|
483
|
+
|
|
484
|
+
console = Console()
|
|
485
|
+
console.print()
|
|
486
|
+
|
|
487
|
+
table = Table(show_header=False, box=None, padding=(0, 1))
|
|
488
|
+
table.add_column(style="dim")
|
|
489
|
+
table.add_column()
|
|
490
|
+
table.add_row("Model:", f"[cyan]{model_name}[/cyan]")
|
|
491
|
+
table.add_row(
|
|
492
|
+
"Status:", "[yellow]Resume failed, files may be corrupted[/yellow]"
|
|
493
|
+
)
|
|
494
|
+
table.add_row("Action:", "[red]Delete model and download fresh[/red]")
|
|
495
|
+
table.add_row("", "")
|
|
496
|
+
table.add_row("Storage:", f"[dim]{lemonade_cache}[/dim]")
|
|
497
|
+
table.add_row("", f"[dim]{hf_cache}[/dim]")
|
|
498
|
+
|
|
499
|
+
console.print(
|
|
500
|
+
Panel(
|
|
501
|
+
table,
|
|
502
|
+
title="[bold yellow]⚠️ Delete and Re-download?[/bold yellow]",
|
|
503
|
+
border_style="yellow",
|
|
504
|
+
)
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
while True:
|
|
508
|
+
response = (
|
|
509
|
+
input("Delete and re-download from scratch? [y/N]: ").strip().lower()
|
|
510
|
+
)
|
|
511
|
+
if response in ("y", "yes"):
|
|
512
|
+
console.print("[green]✓[/green] Deleting and re-downloading...")
|
|
513
|
+
return True
|
|
514
|
+
elif response in ("", "n", "no"):
|
|
515
|
+
console.print("[dim]Cancelled.[/dim]")
|
|
516
|
+
return False
|
|
517
|
+
else:
|
|
518
|
+
console.print("[dim]Please enter 'y' or 'n'[/dim]")
|
|
519
|
+
|
|
520
|
+
except ImportError:
|
|
521
|
+
print("\n" + "=" * 60)
|
|
522
|
+
print(f"{_emoji('⚠️', '[WARNING]')} Resume failed")
|
|
523
|
+
print(f"Model: {model_name}")
|
|
524
|
+
print(f"Storage: {lemonade_cache}")
|
|
525
|
+
print(f" {hf_cache}")
|
|
526
|
+
print("Delete and download fresh?")
|
|
527
|
+
print("=" * 60)
|
|
528
|
+
|
|
529
|
+
while True:
|
|
530
|
+
response = (
|
|
531
|
+
input("Delete and re-download from scratch? [y/N]: ").strip().lower()
|
|
532
|
+
)
|
|
533
|
+
if response in ("y", "yes"):
|
|
534
|
+
return True
|
|
535
|
+
elif response in ("", "n", "no"):
|
|
536
|
+
return False
|
|
537
|
+
else:
|
|
538
|
+
print("Please enter 'y' or 'n'")
|
|
539
|
+
|
|
540
|
+
|
|
458
541
|
def _check_disk_space(size_gb: float, path: Optional[str] = None) -> bool:
|
|
459
542
|
"""
|
|
460
543
|
Check if there's enough disk space for download.
|
|
@@ -1640,8 +1723,6 @@ class LemonadeClient:
|
|
|
1640
1723
|
embedding: Optional[bool] = None,
|
|
1641
1724
|
reranking: Optional[bool] = None,
|
|
1642
1725
|
mmproj: Optional[str] = None,
|
|
1643
|
-
timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
|
|
1644
|
-
progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
|
|
1645
1726
|
) -> Generator[Dict[str, Any], None, None]:
|
|
1646
1727
|
"""
|
|
1647
1728
|
Install a model on the server with streaming progress updates.
|
|
@@ -1658,34 +1739,23 @@ class LemonadeClient:
|
|
|
1658
1739
|
embedding: Whether the model is an embedding model (for registering new models)
|
|
1659
1740
|
reranking: Whether the model is a reranking model (for registering new models)
|
|
1660
1741
|
mmproj: Multimodal Projector file for vision models (for registering new models)
|
|
1661
|
-
timeout: Request timeout in seconds (longer for model installation)
|
|
1662
|
-
progress_callback: Optional callback function called with progress dict on each event.
|
|
1663
|
-
Signature: callback(event_type: str, data: dict) -> None
|
|
1664
|
-
event_type is one of: "progress", "complete", "error"
|
|
1665
1742
|
|
|
1666
1743
|
Yields:
|
|
1667
1744
|
Dict containing progress event data with fields:
|
|
1668
|
-
-
|
|
1669
|
-
|
|
1670
|
-
- For "complete"
|
|
1671
|
-
- For "error"
|
|
1745
|
+
- event: "progress", "complete", or "error"
|
|
1746
|
+
- For "progress": file, file_index, total_files, bytes_downloaded, bytes_total, percent
|
|
1747
|
+
- For "complete": file_index, total_files, percent (100)
|
|
1748
|
+
- For "error": error message
|
|
1672
1749
|
|
|
1673
1750
|
Raises:
|
|
1674
1751
|
LemonadeClientError: If the model installation fails
|
|
1675
1752
|
|
|
1676
1753
|
Example:
|
|
1677
|
-
# Using as generator
|
|
1678
1754
|
for event in client.pull_model_stream("Qwen3-0.6B-GGUF"):
|
|
1679
|
-
if event
|
|
1755
|
+
if event["event"] == "progress":
|
|
1680
1756
|
print(f"Downloading: {event['percent']}%")
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
def on_progress(event_type, data):
|
|
1684
|
-
if event_type == "progress":
|
|
1685
|
-
print(f"{data['file']}: {data['percent']}%")
|
|
1686
|
-
|
|
1687
|
-
for _ in client.pull_model_stream("Qwen3-0.6B-GGUF", progress_callback=on_progress):
|
|
1688
|
-
pass
|
|
1757
|
+
elif event["event"] == "complete":
|
|
1758
|
+
print("Done!")
|
|
1689
1759
|
"""
|
|
1690
1760
|
self.log.info(f"Installing {model_name} with streaming progress")
|
|
1691
1761
|
|
|
@@ -1708,12 +1778,21 @@ class LemonadeClient:
|
|
|
1708
1778
|
|
|
1709
1779
|
url = f"{self.base_url}/pull"
|
|
1710
1780
|
|
|
1781
|
+
# Use separate connect and read timeouts to handle SSE streams properly:
|
|
1782
|
+
# - Connect timeout: 30 seconds (fast connection establishment)
|
|
1783
|
+
# - Read timeout: 120 seconds (timeout if no data for 2 minutes)
|
|
1784
|
+
# This detects stuck downloads while still allowing normal long downloads
|
|
1785
|
+
# (as long as bytes keep flowing). The timeout is between receiving chunks,
|
|
1786
|
+
# not total time, so long downloads with steady progress will work fine.
|
|
1787
|
+
connect_timeout = 30
|
|
1788
|
+
read_timeout = 120 # Timeout if no data received for 2 minutes
|
|
1789
|
+
|
|
1711
1790
|
try:
|
|
1712
1791
|
response = requests.post(
|
|
1713
1792
|
url,
|
|
1714
1793
|
json=request_data,
|
|
1715
1794
|
headers={"Content-Type": "application/json"},
|
|
1716
|
-
timeout=
|
|
1795
|
+
timeout=(connect_timeout, read_timeout),
|
|
1717
1796
|
stream=True,
|
|
1718
1797
|
)
|
|
1719
1798
|
|
|
@@ -1725,11 +1804,14 @@ class LemonadeClient:
|
|
|
1725
1804
|
# Parse SSE stream
|
|
1726
1805
|
event_type = None
|
|
1727
1806
|
received_complete = False
|
|
1807
|
+
|
|
1728
1808
|
try:
|
|
1729
|
-
for
|
|
1730
|
-
if not
|
|
1809
|
+
for line_bytes in response.iter_lines():
|
|
1810
|
+
if not line_bytes:
|
|
1731
1811
|
continue
|
|
1732
1812
|
|
|
1813
|
+
line = line_bytes.decode("utf-8", errors="replace")
|
|
1814
|
+
|
|
1733
1815
|
if line.startswith("event:"):
|
|
1734
1816
|
event_type = line[6:].strip()
|
|
1735
1817
|
elif line.startswith("data:"):
|
|
@@ -1738,28 +1820,20 @@ class LemonadeClient:
|
|
|
1738
1820
|
data = json.loads(data_str)
|
|
1739
1821
|
data["event"] = event_type or "progress"
|
|
1740
1822
|
|
|
1741
|
-
#
|
|
1742
|
-
if progress_callback:
|
|
1743
|
-
progress_callback(event_type or "progress", data)
|
|
1744
|
-
|
|
1823
|
+
# Yield all events - let the consumer handle throttling
|
|
1745
1824
|
yield data
|
|
1746
1825
|
|
|
1747
|
-
# Track complete event
|
|
1748
1826
|
if event_type == "complete":
|
|
1749
1827
|
received_complete = True
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
error_msg = data.get(
|
|
1754
|
-
"error", "Unknown error during model pull"
|
|
1828
|
+
elif event_type == "error":
|
|
1829
|
+
raise LemonadeClientError(
|
|
1830
|
+
data.get("error", "Unknown error during model pull")
|
|
1755
1831
|
)
|
|
1756
|
-
raise LemonadeClientError(error_msg)
|
|
1757
1832
|
|
|
1758
1833
|
except json.JSONDecodeError:
|
|
1759
1834
|
self.log.warning(f"Failed to parse SSE data: {data_str}")
|
|
1760
1835
|
continue
|
|
1761
1836
|
except requests.exceptions.ChunkedEncodingError:
|
|
1762
|
-
# Connection closed by server - this is normal after complete event
|
|
1763
1837
|
if not received_complete:
|
|
1764
1838
|
raise
|
|
1765
1839
|
|
|
@@ -2073,9 +2147,32 @@ class LemonadeClient:
|
|
|
2073
2147
|
return
|
|
2074
2148
|
|
|
2075
2149
|
# Model not loaded - load it (will download if needed without prompting)
|
|
2076
|
-
self.log.
|
|
2150
|
+
self.log.debug(f"Model '{model}' not loaded, loading...")
|
|
2151
|
+
|
|
2152
|
+
try:
|
|
2153
|
+
from rich.console import Console
|
|
2154
|
+
|
|
2155
|
+
console = Console()
|
|
2156
|
+
console.print(
|
|
2157
|
+
f"[bold blue]🔄 Loading model:[/bold blue] [cyan]{model}[/cyan]..."
|
|
2158
|
+
)
|
|
2159
|
+
except ImportError:
|
|
2160
|
+
console = None
|
|
2161
|
+
print(f"🔄 Loading model: {model}...")
|
|
2162
|
+
|
|
2077
2163
|
self.load_model(model, auto_download=True, prompt=False)
|
|
2078
2164
|
|
|
2165
|
+
# Print model ready message
|
|
2166
|
+
try:
|
|
2167
|
+
if console:
|
|
2168
|
+
console.print(
|
|
2169
|
+
f"[bold green]✅ Model loaded:[/bold green] [cyan]{model}[/cyan]"
|
|
2170
|
+
)
|
|
2171
|
+
else:
|
|
2172
|
+
print(f"✅ Model loaded: {model}")
|
|
2173
|
+
except Exception:
|
|
2174
|
+
pass # Ignore print errors
|
|
2175
|
+
|
|
2079
2176
|
except Exception as e:
|
|
2080
2177
|
# Log but don't fail - let the actual request fail with proper error
|
|
2081
2178
|
self.log.debug(f"Could not pre-check model status: {e}")
|
|
@@ -2085,7 +2182,7 @@ class LemonadeClient:
|
|
|
2085
2182
|
model_name: str,
|
|
2086
2183
|
timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
|
|
2087
2184
|
auto_download: bool = False,
|
|
2088
|
-
|
|
2185
|
+
_download_timeout: int = 7200, # Reserved for future use
|
|
2089
2186
|
llamacpp_args: Optional[str] = None,
|
|
2090
2187
|
prompt: bool = True,
|
|
2091
2188
|
) -> Dict[str, Any]:
|
|
@@ -2133,39 +2230,97 @@ class LemonadeClient:
|
|
|
2133
2230
|
original_error = str(e)
|
|
2134
2231
|
|
|
2135
2232
|
# Check if this is a corrupt/incomplete download error
|
|
2136
|
-
|
|
2233
|
+
is_corrupt = self._is_corrupt_download_error(e)
|
|
2234
|
+
if is_corrupt:
|
|
2137
2235
|
self.log.warning(
|
|
2138
|
-
f"{_emoji('⚠️', '[
|
|
2236
|
+
f"{_emoji('⚠️', '[INCOMPLETE]')} Model '{model_name}' has incomplete "
|
|
2139
2237
|
f"or corrupted files"
|
|
2140
2238
|
)
|
|
2141
2239
|
|
|
2142
|
-
# Prompt user for confirmation to
|
|
2240
|
+
# Prompt user for confirmation to resume download
|
|
2143
2241
|
if not _prompt_user_for_repair(model_name):
|
|
2144
2242
|
raise ModelDownloadCancelledError(
|
|
2145
|
-
f"User declined to repair
|
|
2243
|
+
f"User declined to repair incomplete model: {model_name}"
|
|
2146
2244
|
)
|
|
2147
2245
|
|
|
2148
|
-
#
|
|
2246
|
+
# Try to resume download first (Lemonade handles partial files)
|
|
2149
2247
|
self.log.info(
|
|
2150
|
-
f"{_emoji('
|
|
2248
|
+
f"{_emoji('📥', '[RESUME]')} Attempting to resume download..."
|
|
2151
2249
|
)
|
|
2250
|
+
|
|
2152
2251
|
try:
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
|
|
2252
|
+
# First attempt: resume download
|
|
2253
|
+
download_complete = False
|
|
2254
|
+
for event in self.pull_model_stream(model_name=model_name):
|
|
2255
|
+
event_type = event.get("event")
|
|
2256
|
+
if event_type == "complete":
|
|
2257
|
+
download_complete = True
|
|
2258
|
+
elif event_type == "error":
|
|
2259
|
+
raise LemonadeClientError(event.get("error", "Unknown"))
|
|
2260
|
+
|
|
2261
|
+
if download_complete:
|
|
2262
|
+
# Retry loading
|
|
2263
|
+
response = self._send_request(
|
|
2264
|
+
"post", url, request_data, timeout=timeout
|
|
2265
|
+
)
|
|
2266
|
+
self.log.info(
|
|
2267
|
+
f"{_emoji('✅', '[OK]')} Loaded {model_name} after resume"
|
|
2268
|
+
)
|
|
2269
|
+
self.model = model_name
|
|
2270
|
+
return response
|
|
2271
|
+
|
|
2272
|
+
except Exception as resume_error:
|
|
2273
|
+
self.log.warning(
|
|
2274
|
+
f"{_emoji('⚠️', '[RETRY]')} Resume failed: {resume_error}"
|
|
2156
2275
|
)
|
|
2157
|
-
except Exception as delete_error:
|
|
2158
|
-
self.log.warning(f"Failed to delete corrupt model: {delete_error}")
|
|
2159
|
-
# Continue anyway - the download may still work
|
|
2160
2276
|
|
|
2161
|
-
|
|
2162
|
-
|
|
2277
|
+
# Prompt user before deleting
|
|
2278
|
+
if not _prompt_user_for_delete(model_name):
|
|
2279
|
+
raise LemonadeClientError(
|
|
2280
|
+
f"Resume download failed for '{model_name}'. "
|
|
2281
|
+
f"You can manually delete the model and try again."
|
|
2282
|
+
)
|
|
2283
|
+
|
|
2284
|
+
# Second attempt: delete and re-download from scratch
|
|
2285
|
+
try:
|
|
2286
|
+
self.log.info(
|
|
2287
|
+
f"{_emoji('🗑️', '[DELETE]')} Deleting corrupt model..."
|
|
2288
|
+
)
|
|
2289
|
+
self.delete_model(model_name)
|
|
2290
|
+
|
|
2291
|
+
self.log.info(
|
|
2292
|
+
f"{_emoji('📥', '[FRESH]')} Starting fresh download..."
|
|
2293
|
+
)
|
|
2294
|
+
download_complete = False
|
|
2295
|
+
for event in self.pull_model_stream(model_name=model_name):
|
|
2296
|
+
event_type = event.get("event")
|
|
2297
|
+
if event_type == "complete":
|
|
2298
|
+
download_complete = True
|
|
2299
|
+
elif event_type == "error":
|
|
2300
|
+
raise LemonadeClientError(event.get("error", "Unknown"))
|
|
2301
|
+
|
|
2302
|
+
if download_complete:
|
|
2303
|
+
# Retry loading
|
|
2304
|
+
response = self._send_request(
|
|
2305
|
+
"post", url, request_data, timeout=timeout
|
|
2306
|
+
)
|
|
2307
|
+
self.log.info(
|
|
2308
|
+
f"{_emoji('✅', '[OK]')} Loaded {model_name} after fresh download"
|
|
2309
|
+
)
|
|
2310
|
+
self.model = model_name
|
|
2311
|
+
return response
|
|
2312
|
+
|
|
2313
|
+
except Exception as fresh_error:
|
|
2314
|
+
self.log.error(
|
|
2315
|
+
f"{_emoji('❌', '[FAIL]')} Fresh download also failed: {fresh_error}"
|
|
2316
|
+
)
|
|
2317
|
+
raise LemonadeClientError(
|
|
2318
|
+
f"Failed to repair model '{model_name}' after both resume and fresh download attempts. "
|
|
2319
|
+
f"Please check your network connection and disk space, then try again."
|
|
2320
|
+
)
|
|
2163
2321
|
|
|
2164
2322
|
# Check if this is a "model not found" error and auto_download is enabled
|
|
2165
|
-
if not (
|
|
2166
|
-
auto_download
|
|
2167
|
-
and (self._is_model_error(e) or self._is_corrupt_download_error(e))
|
|
2168
|
-
):
|
|
2323
|
+
if not (auto_download and self._is_model_error(e)):
|
|
2169
2324
|
# Not a model error or auto_download disabled - re-raise
|
|
2170
2325
|
self.log.error(f"Failed to load {model_name}: {original_error}")
|
|
2171
2326
|
if isinstance(e, LemonadeClientError):
|
|
@@ -2211,24 +2366,45 @@ class LemonadeClient:
|
|
|
2211
2366
|
self.active_downloads[model_name] = download_task
|
|
2212
2367
|
|
|
2213
2368
|
try:
|
|
2214
|
-
#
|
|
2215
|
-
self.pull_model(model_name, timeout=download_timeout)
|
|
2216
|
-
|
|
2217
|
-
# Wait for download to complete (with cancellation support)
|
|
2218
|
-
self.log.info(
|
|
2219
|
-
f" {_emoji('⏳', '[WAIT]')} Waiting for model download to complete..."
|
|
2220
|
-
)
|
|
2369
|
+
# Use streaming download for better performance and no timeouts
|
|
2221
2370
|
self.log.info(
|
|
2222
|
-
f" {_emoji('
|
|
2223
|
-
f"client.cancel_download(model_name)"
|
|
2371
|
+
f" {_emoji('⏳', '[DOWNLOAD]')} Downloading model with streaming..."
|
|
2224
2372
|
)
|
|
2225
2373
|
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2374
|
+
# Stream download with simple progress logging
|
|
2375
|
+
download_complete = False
|
|
2376
|
+
last_logged_percent = -10 # Log at 0%, 10%, 20%, etc.
|
|
2377
|
+
|
|
2378
|
+
for event in self.pull_model_stream(model_name=model_name):
|
|
2379
|
+
# Check for cancellation
|
|
2380
|
+
if download_task and download_task.is_cancelled():
|
|
2381
|
+
raise ModelDownloadCancelledError(
|
|
2382
|
+
f"Download cancelled: {model_name}"
|
|
2383
|
+
)
|
|
2384
|
+
|
|
2385
|
+
event_type = event.get("event")
|
|
2386
|
+
if event_type == "progress":
|
|
2387
|
+
percent = event.get("percent", 0)
|
|
2388
|
+
# Log every 10%
|
|
2389
|
+
if percent >= last_logged_percent + 10:
|
|
2390
|
+
bytes_dl = event.get("bytes_downloaded", 0)
|
|
2391
|
+
bytes_total = event.get("bytes_total", 0)
|
|
2392
|
+
if bytes_total > 0:
|
|
2393
|
+
gb_dl = bytes_dl / (1024**3)
|
|
2394
|
+
gb_total = bytes_total / (1024**3)
|
|
2395
|
+
self.log.info(
|
|
2396
|
+
f" {_emoji('📥', '[PROGRESS]')} "
|
|
2397
|
+
f"{percent}% ({gb_dl:.1f}/{gb_total:.1f} GB)"
|
|
2398
|
+
)
|
|
2399
|
+
last_logged_percent = percent
|
|
2400
|
+
elif event_type == "complete":
|
|
2401
|
+
download_complete = True
|
|
2402
|
+
elif event_type == "error":
|
|
2403
|
+
raise LemonadeClientError(
|
|
2404
|
+
f"Download failed: {event.get('error', 'Unknown error')}"
|
|
2405
|
+
)
|
|
2406
|
+
|
|
2407
|
+
if download_complete:
|
|
2232
2408
|
# Retry loading after successful download
|
|
2233
2409
|
self.log.info(
|
|
2234
2410
|
f"{_emoji('🔄', '[RETRY]')} Retrying model load: {model_name}"
|
|
@@ -2243,7 +2419,7 @@ class LemonadeClient:
|
|
|
2243
2419
|
return response
|
|
2244
2420
|
else:
|
|
2245
2421
|
raise LemonadeClientError(
|
|
2246
|
-
f"Model download
|
|
2422
|
+
f"Model download did not complete for '{model_name}'"
|
|
2247
2423
|
)
|
|
2248
2424
|
|
|
2249
2425
|
except ModelDownloadCancelledError:
|
|
@@ -2421,7 +2597,17 @@ class LemonadeClient:
|
|
|
2421
2597
|
"""
|
|
2422
2598
|
try:
|
|
2423
2599
|
health = self.health_check()
|
|
2424
|
-
|
|
2600
|
+
|
|
2601
|
+
# Lemonade 9.1.4+: context_size moved to all_models_loaded[N].recipe_options.ctx_size
|
|
2602
|
+
all_models = health.get("all_models_loaded", [])
|
|
2603
|
+
if all_models:
|
|
2604
|
+
# Get context size from the first loaded model (typically the LLM)
|
|
2605
|
+
reported_ctx = (
|
|
2606
|
+
all_models[0].get("recipe_options", {}).get("ctx_size", 0)
|
|
2607
|
+
)
|
|
2608
|
+
else:
|
|
2609
|
+
# Fallback for older Lemonade versions
|
|
2610
|
+
reported_ctx = health.get("context_size", 0)
|
|
2425
2611
|
|
|
2426
2612
|
if reported_ctx >= required_tokens:
|
|
2427
2613
|
self.log.debug(
|
|
@@ -2457,7 +2643,16 @@ class LemonadeClient:
|
|
|
2457
2643
|
health = self.health_check()
|
|
2458
2644
|
status.running = True
|
|
2459
2645
|
status.health_data = health
|
|
2460
|
-
|
|
2646
|
+
|
|
2647
|
+
# Lemonade 9.1.4+: context_size moved to all_models_loaded[N].recipe_options.ctx_size
|
|
2648
|
+
all_models = health.get("all_models_loaded", [])
|
|
2649
|
+
if all_models:
|
|
2650
|
+
status.context_size = (
|
|
2651
|
+
all_models[0].get("recipe_options", {}).get("ctx_size", 0)
|
|
2652
|
+
)
|
|
2653
|
+
else:
|
|
2654
|
+
# Fallback for older Lemonade versions
|
|
2655
|
+
status.context_size = health.get("context_size", 0)
|
|
2461
2656
|
|
|
2462
2657
|
# Get loaded models
|
|
2463
2658
|
models_response = self.list_models()
|
|
@@ -2541,8 +2736,6 @@ class LemonadeClient:
|
|
|
2541
2736
|
def download_agent_models(
|
|
2542
2737
|
self,
|
|
2543
2738
|
agent: str = "all",
|
|
2544
|
-
timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
|
|
2545
|
-
progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
|
|
2546
2739
|
) -> Dict[str, Any]:
|
|
2547
2740
|
"""
|
|
2548
2741
|
Download all models required for an agent with streaming progress.
|
|
@@ -2552,9 +2745,6 @@ class LemonadeClient:
|
|
|
2552
2745
|
|
|
2553
2746
|
Args:
|
|
2554
2747
|
agent: Agent name (chat, code, rag, etc.) or "all" for all models
|
|
2555
|
-
timeout: Timeout per model in seconds
|
|
2556
|
-
progress_callback: Optional callback for progress updates.
|
|
2557
|
-
Signature: callback(event_type: str, data: dict) -> None
|
|
2558
2748
|
|
|
2559
2749
|
Returns:
|
|
2560
2750
|
Dict with download results:
|
|
@@ -2563,11 +2753,9 @@ class LemonadeClient:
|
|
|
2563
2753
|
- errors: List[str] - Any error messages
|
|
2564
2754
|
|
|
2565
2755
|
Example:
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
result = client.download_agent_models("chat", progress_callback=on_progress)
|
|
2756
|
+
result = client.download_agent_models("chat")
|
|
2757
|
+
for event in client.pull_model_stream("model-id"):
|
|
2758
|
+
print(f"{event.get('percent', 0)}%")
|
|
2571
2759
|
"""
|
|
2572
2760
|
model_ids = self.get_required_models(agent)
|
|
2573
2761
|
|
|
@@ -2597,15 +2785,12 @@ class LemonadeClient:
|
|
|
2597
2785
|
self.log.info(f"Downloading model: {model_id}")
|
|
2598
2786
|
completed = False
|
|
2599
2787
|
|
|
2600
|
-
for event in self.pull_model_stream(
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
progress_callback=progress_callback,
|
|
2604
|
-
):
|
|
2605
|
-
if event.get("event") == "complete":
|
|
2788
|
+
for event in self.pull_model_stream(model_name=model_id):
|
|
2789
|
+
event_type = event.get("event")
|
|
2790
|
+
if event_type == "complete":
|
|
2606
2791
|
completed = True
|
|
2607
2792
|
model_result["status"] = "completed"
|
|
2608
|
-
elif
|
|
2793
|
+
elif event_type == "error":
|
|
2609
2794
|
model_result["status"] = "error"
|
|
2610
2795
|
model_result["error"] = event.get("error", "Unknown error")
|
|
2611
2796
|
results["errors"].append(f"{model_id}: {model_result['error']}")
|
gaia/llm/providers/lemonade.py
CHANGED
|
@@ -47,18 +47,13 @@ class LemonadeProvider(LLMClient):
|
|
|
47
47
|
stream: bool = False,
|
|
48
48
|
**kwargs,
|
|
49
49
|
) -> Union[str, Iterator[str]]:
|
|
50
|
-
# Use
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
response = self._backend.completions(
|
|
57
|
-
model=effective_model, prompt=prompt, stream=stream, **kwargs
|
|
50
|
+
# Use chat endpoint (completions endpoint not available in Lemonade v9.1+)
|
|
51
|
+
return self.chat(
|
|
52
|
+
[{"role": "user", "content": prompt}],
|
|
53
|
+
model=model,
|
|
54
|
+
stream=stream,
|
|
55
|
+
**kwargs,
|
|
58
56
|
)
|
|
59
|
-
if stream:
|
|
60
|
-
return self._handle_stream(response)
|
|
61
|
-
return self._extract_text(response)
|
|
62
57
|
|
|
63
58
|
def chat(
|
|
64
59
|
self,
|
|
@@ -114,7 +109,10 @@ class LemonadeProvider(LLMClient):
|
|
|
114
109
|
for chunk in response:
|
|
115
110
|
if "choices" in chunk and chunk["choices"]:
|
|
116
111
|
delta = chunk["choices"][0].get("delta", {})
|
|
117
|
-
|
|
118
|
-
|
|
112
|
+
content = delta.get("content")
|
|
113
|
+
if content:
|
|
114
|
+
yield content
|
|
119
115
|
elif "text" in chunk["choices"][0]:
|
|
120
|
-
|
|
116
|
+
text = chunk["choices"][0]["text"]
|
|
117
|
+
if text:
|
|
118
|
+
yield text
|
gaia/rag/sdk.py
CHANGED