amd-gaia 0.15.1__py3-none-any.whl → 0.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.3.dist-info}/METADATA +2 -2
  2. {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.3.dist-info}/RECORD +38 -32
  3. {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.3.dist-info}/WHEEL +1 -1
  4. gaia/agents/base/agent.py +317 -113
  5. gaia/agents/base/api_agent.py +0 -1
  6. gaia/agents/base/console.py +334 -9
  7. gaia/agents/base/tools.py +7 -2
  8. gaia/agents/blender/__init__.py +7 -0
  9. gaia/agents/blender/agent.py +7 -10
  10. gaia/agents/blender/core/view.py +2 -2
  11. gaia/agents/chat/agent.py +22 -48
  12. gaia/agents/chat/app.py +7 -0
  13. gaia/agents/chat/tools/rag_tools.py +23 -8
  14. gaia/agents/chat/tools/shell_tools.py +1 -0
  15. gaia/agents/code/prompts/code_patterns.py +2 -4
  16. gaia/agents/docker/agent.py +1 -0
  17. gaia/agents/emr/agent.py +3 -5
  18. gaia/agents/emr/cli.py +1 -1
  19. gaia/agents/emr/dashboard/server.py +2 -4
  20. gaia/agents/tools/__init__.py +11 -0
  21. gaia/agents/tools/file_tools.py +715 -0
  22. gaia/apps/llm/app.py +14 -3
  23. gaia/chat/app.py +2 -4
  24. gaia/cli.py +751 -333
  25. gaia/installer/__init__.py +23 -0
  26. gaia/installer/init_command.py +1605 -0
  27. gaia/installer/lemonade_installer.py +678 -0
  28. gaia/llm/__init__.py +2 -1
  29. gaia/llm/lemonade_client.py +427 -99
  30. gaia/llm/lemonade_manager.py +55 -11
  31. gaia/llm/providers/lemonade.py +21 -14
  32. gaia/rag/sdk.py +1 -1
  33. gaia/security.py +24 -4
  34. gaia/talk/app.py +2 -4
  35. gaia/version.py +2 -2
  36. {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.3.dist-info}/entry_points.txt +0 -0
  37. {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.3.dist-info}/licenses/LICENSE.md +0 -0
  38. {amd_gaia-0.15.1.dist-info → amd_gaia-0.15.3.dist-info}/top_level.txt +0 -0
@@ -89,8 +89,8 @@ DEFAULT_MODEL_NAME = "Qwen2.5-0.5B-Instruct-CPU"
89
89
  # Increased to accommodate long-running coding and evaluation tasks
90
90
  DEFAULT_REQUEST_TIMEOUT = 900
91
91
  # Default timeout in seconds for model loading operations
92
- # Increased for large model downloads and loading
93
- DEFAULT_MODEL_LOAD_TIMEOUT = 1200
92
+ # Increased for large model downloads and loading (10x increase for streaming stability)
93
+ DEFAULT_MODEL_LOAD_TIMEOUT = 12000
94
94
 
95
95
 
96
96
  # =========================================================================
@@ -413,21 +413,24 @@ def _prompt_user_for_repair(model_name: str) -> bool:
413
413
  table.add_row(
414
414
  "Status:", "[yellow]Download incomplete or files corrupted[/yellow]"
415
415
  )
416
- table.add_row("Action:", "Delete and re-download the model")
416
+ table.add_row(
417
+ "Action:",
418
+ "[green]Resume download (Lemonade will continue where it left off)[/green]",
419
+ )
417
420
 
418
421
  console.print(
419
422
  Panel(
420
423
  table,
421
- title="[bold yellow]⚠️ Corrupt Model Download Detected[/bold yellow]",
424
+ title="[bold yellow]⚠️ Incomplete Model Download Detected[/bold yellow]",
422
425
  border_style="yellow",
423
426
  )
424
427
  )
425
428
  console.print()
426
429
 
427
430
  while True:
428
- response = input("Delete and re-download? [Y/n]: ").strip().lower()
431
+ response = input("Resume download? [Y/n]: ").strip().lower()
429
432
  if response in ("", "y", "yes"):
430
- console.print("[green]✓[/green] Proceeding with repair...")
433
+ console.print("[green]✓[/green] Resuming download...")
431
434
  return True
432
435
  elif response in ("n", "no"):
433
436
  console.print("[dim]Cancelled.[/dim]")
@@ -438,15 +441,15 @@ def _prompt_user_for_repair(model_name: str) -> bool:
438
441
  except ImportError:
439
442
  # Fall back to plain text formatting
440
443
  print("\n" + "=" * 60)
441
- print(f"{_emoji('⚠️', '[WARNING]')} Corrupt Model Download Detected")
444
+ print(f"{_emoji('⚠️', '[WARNING]')} Incomplete Model Download Detected")
442
445
  print("=" * 60)
443
446
  print(f"Model: {model_name}")
444
447
  print("Status: Download incomplete or files corrupted")
445
- print("Action: Delete and re-download the model")
448
+ print("Action: Resume download (Lemonade will continue where it left off)")
446
449
  print("=" * 60)
447
450
 
448
451
  while True:
449
- response = input("Delete and re-download? [Y/n]: ").strip().lower()
452
+ response = input("Resume download? [Y/n]: ").strip().lower()
450
453
  if response in ("", "y", "yes"):
451
454
  return True
452
455
  elif response in ("n", "no"):
@@ -455,6 +458,86 @@ def _prompt_user_for_repair(model_name: str) -> bool:
455
458
  print("Please enter 'y' or 'n'")
456
459
 
457
460
 
461
+ def _prompt_user_for_delete(model_name: str) -> bool:
462
+ """
463
+ Prompt user for confirmation to delete a model and re-download from scratch.
464
+
465
+ Args:
466
+ model_name: Name of the model to delete
467
+
468
+ Returns:
469
+ True if user confirms, False if user declines
470
+ """
471
+ # Get model storage paths
472
+ if sys.platform == "win32":
473
+ lemonade_cache = os.path.expandvars("%LOCALAPPDATA%\\lemonade\\")
474
+ hf_cache = os.path.expandvars("%USERPROFILE%\\.cache\\huggingface\\hub\\")
475
+ else:
476
+ lemonade_cache = os.path.expanduser("~/.local/share/lemonade/")
477
+ hf_cache = os.path.expanduser("~/.cache/huggingface/hub/")
478
+
479
+ try:
480
+ from rich.console import Console
481
+ from rich.panel import Panel
482
+ from rich.table import Table
483
+
484
+ console = Console()
485
+ console.print()
486
+
487
+ table = Table(show_header=False, box=None, padding=(0, 1))
488
+ table.add_column(style="dim")
489
+ table.add_column()
490
+ table.add_row("Model:", f"[cyan]{model_name}[/cyan]")
491
+ table.add_row(
492
+ "Status:", "[yellow]Resume failed, files may be corrupted[/yellow]"
493
+ )
494
+ table.add_row("Action:", "[red]Delete model and download fresh[/red]")
495
+ table.add_row("", "")
496
+ table.add_row("Storage:", f"[dim]{lemonade_cache}[/dim]")
497
+ table.add_row("", f"[dim]{hf_cache}[/dim]")
498
+
499
+ console.print(
500
+ Panel(
501
+ table,
502
+ title="[bold yellow]⚠️ Delete and Re-download?[/bold yellow]",
503
+ border_style="yellow",
504
+ )
505
+ )
506
+
507
+ while True:
508
+ response = (
509
+ input("Delete and re-download from scratch? [y/N]: ").strip().lower()
510
+ )
511
+ if response in ("y", "yes"):
512
+ console.print("[green]✓[/green] Deleting and re-downloading...")
513
+ return True
514
+ elif response in ("", "n", "no"):
515
+ console.print("[dim]Cancelled.[/dim]")
516
+ return False
517
+ else:
518
+ console.print("[dim]Please enter 'y' or 'n'[/dim]")
519
+
520
+ except ImportError:
521
+ print("\n" + "=" * 60)
522
+ print(f"{_emoji('⚠️', '[WARNING]')} Resume failed")
523
+ print(f"Model: {model_name}")
524
+ print(f"Storage: {lemonade_cache}")
525
+ print(f" {hf_cache}")
526
+ print("Delete and download fresh?")
527
+ print("=" * 60)
528
+
529
+ while True:
530
+ response = (
531
+ input("Delete and re-download from scratch? [y/N]: ").strip().lower()
532
+ )
533
+ if response in ("y", "yes"):
534
+ return True
535
+ elif response in ("", "n", "no"):
536
+ return False
537
+ else:
538
+ print("Please enter 'y' or 'n'")
539
+
540
+
458
541
  def _check_disk_space(size_gb: float, path: Optional[str] = None) -> bool:
459
542
  """
460
543
  Check if there's enough disk space for download.
@@ -1518,6 +1601,139 @@ class LemonadeClient:
1518
1601
  self.log.error(f"Error generating embeddings: {str(e)}")
1519
1602
  raise LemonadeClientError(f"Error generating embeddings: {str(e)}")
1520
1603
 
1604
+ # =========================================================================
1605
+ # Image Generation (Stable Diffusion)
1606
+ # =========================================================================
1607
+
1608
+ # Supported SD configurations
1609
+ SD_MODELS = ["SD-1.5", "SD-Turbo", "SDXL-Base-1.0", "SDXL-Turbo"]
1610
+ SD_SIZES = ["512x512", "768x768", "1024x1024"]
1611
+
1612
+ # Model-specific defaults
1613
+ SD_MODEL_DEFAULTS = {
1614
+ "SD-1.5": {"steps": 20, "cfg_scale": 7.5, "size": "512x512"},
1615
+ "SD-Turbo": {"steps": 4, "cfg_scale": 1.0, "size": "512x512"},
1616
+ "SDXL-Base-1.0": {"steps": 20, "cfg_scale": 7.5, "size": "1024x1024"},
1617
+ "SDXL-Turbo": {"steps": 4, "cfg_scale": 1.0, "size": "512x512"},
1618
+ }
1619
+
1620
+ def generate_image(
1621
+ self,
1622
+ prompt: str,
1623
+ model: str = "SDXL-Turbo",
1624
+ size: Optional[str] = None,
1625
+ steps: Optional[int] = None,
1626
+ cfg_scale: Optional[float] = None,
1627
+ seed: Optional[int] = None,
1628
+ timeout: int = 300,
1629
+ ) -> Dict[str, Any]:
1630
+ """
1631
+ Generate an image from a text prompt using Stable Diffusion.
1632
+
1633
+ Args:
1634
+ prompt: Text description of the image to generate
1635
+ model: SD model - SD-1.5, SD-Turbo, SDXL-Base-1.0 (photorealistic), SDXL-Turbo
1636
+ size: Image dimensions (auto-selected if None, or 512x512, 768x768, 1024x1024)
1637
+ steps: Inference steps (auto-selected if None: Turbo=4, Base=20)
1638
+ cfg_scale: CFG scale (auto-selected if None: Turbo=1.0, Base=7.5)
1639
+ seed: Random seed for reproducibility (optional)
1640
+ timeout: Request timeout in seconds (default: 300 for slower Base models)
1641
+
1642
+ Returns:
1643
+ Dict with 'data' containing list of generated images in b64_json format
1644
+
1645
+ Raises:
1646
+ LemonadeClientError: If generation fails or invalid parameters
1647
+
1648
+ Example:
1649
+ # Photorealistic with SDXL-Base-1.0 (auto-settings)
1650
+ result = client.generate_image(
1651
+ prompt="a sunset over mountains, golden hour, photorealistic",
1652
+ model="SDXL-Base-1.0"
1653
+ )
1654
+
1655
+ # Fast stylized with SDXL-Turbo
1656
+ result = client.generate_image(
1657
+ prompt="cyberpunk city",
1658
+ model="SDXL-Turbo"
1659
+ )
1660
+ """
1661
+ # Validate model
1662
+ if model not in self.SD_MODELS:
1663
+ raise LemonadeClientError(
1664
+ f"Invalid model '{model}'. Choose from: {self.SD_MODELS}"
1665
+ )
1666
+
1667
+ # Apply model-specific defaults
1668
+ defaults = self.SD_MODEL_DEFAULTS.get(model, {})
1669
+ size = size or defaults.get("size", "512x512")
1670
+ steps = steps if steps is not None else defaults.get("steps", 20)
1671
+ cfg_scale = (
1672
+ cfg_scale if cfg_scale is not None else defaults.get("cfg_scale", 7.5)
1673
+ )
1674
+
1675
+ # Validate size
1676
+ if size not in self.SD_SIZES:
1677
+ raise LemonadeClientError(
1678
+ f"Invalid size '{size}'. Choose from: {self.SD_SIZES}"
1679
+ )
1680
+
1681
+ try:
1682
+ # Generate random seed if not provided for varied results
1683
+ import random
1684
+
1685
+ if seed is None:
1686
+ seed = random.randint(0, 2**32 - 1)
1687
+
1688
+ payload = {
1689
+ "prompt": prompt,
1690
+ "model": model,
1691
+ "size": size,
1692
+ "n": 1,
1693
+ "response_format": "b64_json",
1694
+ "cfg_scale": cfg_scale,
1695
+ "steps": steps,
1696
+ "seed": seed,
1697
+ }
1698
+
1699
+ self.log.info(
1700
+ f"Generating image: model={model}, size={size}, steps={steps}, cfg={cfg_scale}"
1701
+ )
1702
+ url = f"{self.base_url}/images/generations"
1703
+ response = self._send_request("POST", url, data=payload, timeout=timeout)
1704
+
1705
+ return response
1706
+
1707
+ except LemonadeClientError:
1708
+ raise
1709
+ except Exception as e:
1710
+ self.log.error(f"Error generating image: {str(e)}")
1711
+ raise LemonadeClientError(f"Error generating image: {str(e)}")
1712
+
1713
+ def list_sd_models(self) -> List[Dict[str, Any]]:
1714
+ """
1715
+ List available Stable Diffusion models from the server.
1716
+
1717
+ Returns:
1718
+ List of SD model info dicts with id, labels, and image_defaults
1719
+
1720
+ Example:
1721
+ sd_models = client.list_sd_models()
1722
+ for m in sd_models:
1723
+ print(f"{m['id']}: {m.get('image_defaults', {})}")
1724
+ """
1725
+ try:
1726
+ models = self.list_models()
1727
+ sd_models = [
1728
+ m
1729
+ for m in models.get("data", [])
1730
+ if m.get("id") in self.SD_MODELS or "image" in m.get("labels", [])
1731
+ ]
1732
+ return sd_models
1733
+ except Exception as e:
1734
+ self.log.error(f"Error listing SD models: {str(e)}")
1735
+ raise LemonadeClientError(f"Error listing SD models: {str(e)}")
1736
+
1521
1737
  def list_models(self, show_all: bool = False) -> Dict[str, Any]:
1522
1738
  """
1523
1739
  List available models from the server.
@@ -1640,8 +1856,6 @@ class LemonadeClient:
1640
1856
  embedding: Optional[bool] = None,
1641
1857
  reranking: Optional[bool] = None,
1642
1858
  mmproj: Optional[str] = None,
1643
- timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
1644
- progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
1645
1859
  ) -> Generator[Dict[str, Any], None, None]:
1646
1860
  """
1647
1861
  Install a model on the server with streaming progress updates.
@@ -1658,34 +1872,23 @@ class LemonadeClient:
1658
1872
  embedding: Whether the model is an embedding model (for registering new models)
1659
1873
  reranking: Whether the model is a reranking model (for registering new models)
1660
1874
  mmproj: Multimodal Projector file for vision models (for registering new models)
1661
- timeout: Request timeout in seconds (longer for model installation)
1662
- progress_callback: Optional callback function called with progress dict on each event.
1663
- Signature: callback(event_type: str, data: dict) -> None
1664
- event_type is one of: "progress", "complete", "error"
1665
1875
 
1666
1876
  Yields:
1667
1877
  Dict containing progress event data with fields:
1668
- - For "progress" events: file, file_index, total_files, bytes_downloaded,
1669
- bytes_total, percent
1670
- - For "complete" events: file_index, total_files, percent (100)
1671
- - For "error" events: error message
1878
+ - event: "progress", "complete", or "error"
1879
+ - For "progress": file, file_index, total_files, bytes_downloaded, bytes_total, percent
1880
+ - For "complete": file_index, total_files, percent (100)
1881
+ - For "error": error message
1672
1882
 
1673
1883
  Raises:
1674
1884
  LemonadeClientError: If the model installation fails
1675
1885
 
1676
1886
  Example:
1677
- # Using as generator
1678
1887
  for event in client.pull_model_stream("Qwen3-0.6B-GGUF"):
1679
- if event.get("event") == "progress":
1888
+ if event["event"] == "progress":
1680
1889
  print(f"Downloading: {event['percent']}%")
1681
-
1682
- # Using with callback
1683
- def on_progress(event_type, data):
1684
- if event_type == "progress":
1685
- print(f"{data['file']}: {data['percent']}%")
1686
-
1687
- for _ in client.pull_model_stream("Qwen3-0.6B-GGUF", progress_callback=on_progress):
1688
- pass
1890
+ elif event["event"] == "complete":
1891
+ print("Done!")
1689
1892
  """
1690
1893
  self.log.info(f"Installing {model_name} with streaming progress")
1691
1894
 
@@ -1708,12 +1911,21 @@ class LemonadeClient:
1708
1911
 
1709
1912
  url = f"{self.base_url}/pull"
1710
1913
 
1914
+ # Use separate connect and read timeouts to handle SSE streams properly:
1915
+ # - Connect timeout: 30 seconds (fast connection establishment)
1916
+ # - Read timeout: 120 seconds (timeout if no data for 2 minutes)
1917
+ # This detects stuck downloads while still allowing normal long downloads
1918
+ # (as long as bytes keep flowing). The timeout is between receiving chunks,
1919
+ # not total time, so long downloads with steady progress will work fine.
1920
+ connect_timeout = 30
1921
+ read_timeout = 120 # Timeout if no data received for 2 minutes
1922
+
1711
1923
  try:
1712
1924
  response = requests.post(
1713
1925
  url,
1714
1926
  json=request_data,
1715
1927
  headers={"Content-Type": "application/json"},
1716
- timeout=timeout,
1928
+ timeout=(connect_timeout, read_timeout),
1717
1929
  stream=True,
1718
1930
  )
1719
1931
 
@@ -1725,11 +1937,14 @@ class LemonadeClient:
1725
1937
  # Parse SSE stream
1726
1938
  event_type = None
1727
1939
  received_complete = False
1940
+
1728
1941
  try:
1729
- for line in response.iter_lines(decode_unicode=True):
1730
- if not line:
1942
+ for line_bytes in response.iter_lines():
1943
+ if not line_bytes:
1731
1944
  continue
1732
1945
 
1946
+ line = line_bytes.decode("utf-8", errors="replace")
1947
+
1733
1948
  if line.startswith("event:"):
1734
1949
  event_type = line[6:].strip()
1735
1950
  elif line.startswith("data:"):
@@ -1738,28 +1953,20 @@ class LemonadeClient:
1738
1953
  data = json.loads(data_str)
1739
1954
  data["event"] = event_type or "progress"
1740
1955
 
1741
- # Call the progress callback if provided
1742
- if progress_callback:
1743
- progress_callback(event_type or "progress", data)
1744
-
1956
+ # Yield all events - let the consumer handle throttling
1745
1957
  yield data
1746
1958
 
1747
- # Track complete event
1748
1959
  if event_type == "complete":
1749
1960
  received_complete = True
1750
-
1751
- # Check for error event
1752
- if event_type == "error":
1753
- error_msg = data.get(
1754
- "error", "Unknown error during model pull"
1961
+ elif event_type == "error":
1962
+ raise LemonadeClientError(
1963
+ data.get("error", "Unknown error during model pull")
1755
1964
  )
1756
- raise LemonadeClientError(error_msg)
1757
1965
 
1758
1966
  except json.JSONDecodeError:
1759
1967
  self.log.warning(f"Failed to parse SSE data: {data_str}")
1760
1968
  continue
1761
1969
  except requests.exceptions.ChunkedEncodingError:
1762
- # Connection closed by server - this is normal after complete event
1763
1970
  if not received_complete:
1764
1971
  raise
1765
1972
 
@@ -2073,9 +2280,32 @@ class LemonadeClient:
2073
2280
  return
2074
2281
 
2075
2282
  # Model not loaded - load it (will download if needed without prompting)
2076
- self.log.info(f"Model '{model}' not loaded, loading...")
2283
+ self.log.debug(f"Model '{model}' not loaded, loading...")
2284
+
2285
+ try:
2286
+ from rich.console import Console
2287
+
2288
+ console = Console()
2289
+ console.print(
2290
+ f"[bold blue]🔄 Loading model:[/bold blue] [cyan]{model}[/cyan]..."
2291
+ )
2292
+ except ImportError:
2293
+ console = None
2294
+ print(f"🔄 Loading model: {model}...")
2295
+
2077
2296
  self.load_model(model, auto_download=True, prompt=False)
2078
2297
 
2298
+ # Print model ready message
2299
+ try:
2300
+ if console:
2301
+ console.print(
2302
+ f"[bold green]✅ Model loaded:[/bold green] [cyan]{model}[/cyan]"
2303
+ )
2304
+ else:
2305
+ print(f"✅ Model loaded: {model}")
2306
+ except Exception:
2307
+ pass # Ignore print errors
2308
+
2079
2309
  except Exception as e:
2080
2310
  # Log but don't fail - let the actual request fail with proper error
2081
2311
  self.log.debug(f"Could not pre-check model status: {e}")
@@ -2085,8 +2315,10 @@ class LemonadeClient:
2085
2315
  model_name: str,
2086
2316
  timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
2087
2317
  auto_download: bool = False,
2088
- download_timeout: int = 7200,
2318
+ _download_timeout: int = 7200, # Reserved for future use
2089
2319
  llamacpp_args: Optional[str] = None,
2320
+ ctx_size: Optional[int] = None,
2321
+ save_options: bool = False,
2090
2322
  prompt: bool = True,
2091
2323
  ) -> Dict[str, Any]:
2092
2324
  """
@@ -2106,6 +2338,10 @@ class LemonadeClient:
2106
2338
  Large models can be 100GB+ and take hours to download
2107
2339
  llamacpp_args: Optional llama.cpp arguments (e.g., "--ubatch-size 2048").
2108
2340
  Used to configure model loading parameters like batch sizes.
2341
+ ctx_size: Context size for the model in tokens (e.g., 8192, 32768).
2342
+ Overrides the default value for this model.
2343
+ save_options: If True, persists ctx_size and llamacpp_args to config file.
2344
+ Model will use these settings on future loads.
2109
2345
  prompt: If True, prompt user before downloading (default: True).
2110
2346
  Set to False to download automatically without user confirmation.
2111
2347
 
@@ -2122,6 +2358,10 @@ class LemonadeClient:
2122
2358
  request_data = {"model_name": model_name}
2123
2359
  if llamacpp_args:
2124
2360
  request_data["llamacpp_args"] = llamacpp_args
2361
+ if ctx_size is not None:
2362
+ request_data["ctx_size"] = ctx_size
2363
+ if save_options:
2364
+ request_data["save_options"] = save_options
2125
2365
  url = f"{self.base_url}/load"
2126
2366
 
2127
2367
  try:
@@ -2133,39 +2373,97 @@ class LemonadeClient:
2133
2373
  original_error = str(e)
2134
2374
 
2135
2375
  # Check if this is a corrupt/incomplete download error
2136
- if self._is_corrupt_download_error(e):
2376
+ is_corrupt = self._is_corrupt_download_error(e)
2377
+ if is_corrupt:
2137
2378
  self.log.warning(
2138
- f"{_emoji('⚠️', '[CORRUPT]')} Model '{model_name}' has incomplete "
2379
+ f"{_emoji('⚠️', '[INCOMPLETE]')} Model '{model_name}' has incomplete "
2139
2380
  f"or corrupted files"
2140
2381
  )
2141
2382
 
2142
- # Prompt user for confirmation to delete and re-download
2383
+ # Prompt user for confirmation to resume download
2143
2384
  if not _prompt_user_for_repair(model_name):
2144
2385
  raise ModelDownloadCancelledError(
2145
- f"User declined to repair corrupt model: {model_name}"
2386
+ f"User declined to repair incomplete model: {model_name}"
2146
2387
  )
2147
2388
 
2148
- # Delete the corrupt model
2389
+ # Try to resume download first (Lemonade handles partial files)
2149
2390
  self.log.info(
2150
- f"{_emoji('🗑️', '[DELETE]')} Deleting corrupt model: {model_name}"
2391
+ f"{_emoji('📥', '[RESUME]')} Attempting to resume download..."
2151
2392
  )
2393
+
2152
2394
  try:
2153
- self.delete_model(model_name)
2154
- self.log.info(
2155
- f"{_emoji('✅', '[OK]')} Deleted corrupt model: {model_name}"
2395
+ # First attempt: resume download
2396
+ download_complete = False
2397
+ for event in self.pull_model_stream(model_name=model_name):
2398
+ event_type = event.get("event")
2399
+ if event_type == "complete":
2400
+ download_complete = True
2401
+ elif event_type == "error":
2402
+ raise LemonadeClientError(event.get("error", "Unknown"))
2403
+
2404
+ if download_complete:
2405
+ # Retry loading
2406
+ response = self._send_request(
2407
+ "post", url, request_data, timeout=timeout
2408
+ )
2409
+ self.log.info(
2410
+ f"{_emoji('✅', '[OK]')} Loaded {model_name} after resume"
2411
+ )
2412
+ self.model = model_name
2413
+ return response
2414
+
2415
+ except Exception as resume_error:
2416
+ self.log.warning(
2417
+ f"{_emoji('⚠️', '[RETRY]')} Resume failed: {resume_error}"
2156
2418
  )
2157
- except Exception as delete_error:
2158
- self.log.warning(f"Failed to delete corrupt model: {delete_error}")
2159
- # Continue anyway - the download may still work
2160
2419
 
2161
- # Now trigger a fresh download by falling through to auto-download flow
2162
- # (the model is now "not found" so _is_model_error will match)
2420
+ # Prompt user before deleting
2421
+ if not _prompt_user_for_delete(model_name):
2422
+ raise LemonadeClientError(
2423
+ f"Resume download failed for '{model_name}'. "
2424
+ f"You can manually delete the model and try again."
2425
+ )
2426
+
2427
+ # Second attempt: delete and re-download from scratch
2428
+ try:
2429
+ self.log.info(
2430
+ f"{_emoji('🗑️', '[DELETE]')} Deleting corrupt model..."
2431
+ )
2432
+ self.delete_model(model_name)
2433
+
2434
+ self.log.info(
2435
+ f"{_emoji('📥', '[FRESH]')} Starting fresh download..."
2436
+ )
2437
+ download_complete = False
2438
+ for event in self.pull_model_stream(model_name=model_name):
2439
+ event_type = event.get("event")
2440
+ if event_type == "complete":
2441
+ download_complete = True
2442
+ elif event_type == "error":
2443
+ raise LemonadeClientError(event.get("error", "Unknown"))
2444
+
2445
+ if download_complete:
2446
+ # Retry loading
2447
+ response = self._send_request(
2448
+ "post", url, request_data, timeout=timeout
2449
+ )
2450
+ self.log.info(
2451
+ f"{_emoji('✅', '[OK]')} Loaded {model_name} after fresh download"
2452
+ )
2453
+ self.model = model_name
2454
+ return response
2455
+
2456
+ except Exception as fresh_error:
2457
+ self.log.error(
2458
+ f"{_emoji('❌', '[FAIL]')} Fresh download also failed: {fresh_error}"
2459
+ )
2460
+ raise LemonadeClientError(
2461
+ f"Failed to repair model '{model_name}' after both resume and fresh download attempts. "
2462
+ f"Please check your network connection and disk space, then try again."
2463
+ )
2163
2464
 
2164
2465
  # Check if this is a "model not found" error and auto_download is enabled
2165
- if not (
2166
- auto_download
2167
- and (self._is_model_error(e) or self._is_corrupt_download_error(e))
2168
- ):
2466
+ if not (auto_download and self._is_model_error(e)):
2169
2467
  # Not a model error or auto_download disabled - re-raise
2170
2468
  self.log.error(f"Failed to load {model_name}: {original_error}")
2171
2469
  if isinstance(e, LemonadeClientError):
@@ -2211,24 +2509,45 @@ class LemonadeClient:
2211
2509
  self.active_downloads[model_name] = download_task
2212
2510
 
2213
2511
  try:
2214
- # Trigger model download
2215
- self.pull_model(model_name, timeout=download_timeout)
2216
-
2217
- # Wait for download to complete (with cancellation support)
2218
- self.log.info(
2219
- f" {_emoji('⏳', '[WAIT]')} Waiting for model download to complete..."
2220
- )
2512
+ # Use streaming download for better performance and no timeouts
2221
2513
  self.log.info(
2222
- f" {_emoji('💡', '[TIP]')} Tip: You can cancel with "
2223
- f"client.cancel_download(model_name)"
2514
+ f" {_emoji('', '[DOWNLOAD]')} Downloading model with streaming..."
2224
2515
  )
2225
2516
 
2226
- if self._wait_for_model_download(
2227
- model_name,
2228
- timeout=download_timeout,
2229
- show_progress=True,
2230
- download_task=download_task,
2231
- ):
2517
+ # Stream download with simple progress logging
2518
+ download_complete = False
2519
+ last_logged_percent = -10 # Log at 0%, 10%, 20%, etc.
2520
+
2521
+ for event in self.pull_model_stream(model_name=model_name):
2522
+ # Check for cancellation
2523
+ if download_task and download_task.is_cancelled():
2524
+ raise ModelDownloadCancelledError(
2525
+ f"Download cancelled: {model_name}"
2526
+ )
2527
+
2528
+ event_type = event.get("event")
2529
+ if event_type == "progress":
2530
+ percent = event.get("percent", 0)
2531
+ # Log every 10%
2532
+ if percent >= last_logged_percent + 10:
2533
+ bytes_dl = event.get("bytes_downloaded", 0)
2534
+ bytes_total = event.get("bytes_total", 0)
2535
+ if bytes_total > 0:
2536
+ gb_dl = bytes_dl / (1024**3)
2537
+ gb_total = bytes_total / (1024**3)
2538
+ self.log.info(
2539
+ f" {_emoji('📥', '[PROGRESS]')} "
2540
+ f"{percent}% ({gb_dl:.1f}/{gb_total:.1f} GB)"
2541
+ )
2542
+ last_logged_percent = percent
2543
+ elif event_type == "complete":
2544
+ download_complete = True
2545
+ elif event_type == "error":
2546
+ raise LemonadeClientError(
2547
+ f"Download failed: {event.get('error', 'Unknown error')}"
2548
+ )
2549
+
2550
+ if download_complete:
2232
2551
  # Retry loading after successful download
2233
2552
  self.log.info(
2234
2553
  f"{_emoji('🔄', '[RETRY]')} Retrying model load: {model_name}"
@@ -2243,7 +2562,7 @@ class LemonadeClient:
2243
2562
  return response
2244
2563
  else:
2245
2564
  raise LemonadeClientError(
2246
- f"Model download timed out for '{model_name}'"
2565
+ f"Model download did not complete for '{model_name}'"
2247
2566
  )
2248
2567
 
2249
2568
  except ModelDownloadCancelledError:
@@ -2421,7 +2740,17 @@ class LemonadeClient:
2421
2740
  """
2422
2741
  try:
2423
2742
  health = self.health_check()
2424
- reported_ctx = health.get("context_size", 0)
2743
+
2744
+ # Lemonade 9.1.4+: context_size moved to all_models_loaded[N].recipe_options.ctx_size
2745
+ all_models = health.get("all_models_loaded", [])
2746
+ if all_models:
2747
+ # Get context size from the first loaded model (typically the LLM)
2748
+ reported_ctx = (
2749
+ all_models[0].get("recipe_options", {}).get("ctx_size", 0)
2750
+ )
2751
+ else:
2752
+ # Fallback for older Lemonade versions
2753
+ reported_ctx = health.get("context_size", 0)
2425
2754
 
2426
2755
  if reported_ctx >= required_tokens:
2427
2756
  self.log.debug(
@@ -2457,7 +2786,16 @@ class LemonadeClient:
2457
2786
  health = self.health_check()
2458
2787
  status.running = True
2459
2788
  status.health_data = health
2460
- status.context_size = health.get("context_size", 0)
2789
+
2790
+ # Lemonade 9.1.4+: context_size moved to all_models_loaded[N].recipe_options.ctx_size
2791
+ all_models = health.get("all_models_loaded", [])
2792
+ if all_models:
2793
+ status.context_size = (
2794
+ all_models[0].get("recipe_options", {}).get("ctx_size", 0)
2795
+ )
2796
+ else:
2797
+ # Fallback for older Lemonade versions
2798
+ status.context_size = health.get("context_size", 0)
2461
2799
 
2462
2800
  # Get loaded models
2463
2801
  models_response = self.list_models()
@@ -2541,8 +2879,6 @@ class LemonadeClient:
2541
2879
  def download_agent_models(
2542
2880
  self,
2543
2881
  agent: str = "all",
2544
- timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
2545
- progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
2546
2882
  ) -> Dict[str, Any]:
2547
2883
  """
2548
2884
  Download all models required for an agent with streaming progress.
@@ -2552,9 +2888,6 @@ class LemonadeClient:
2552
2888
 
2553
2889
  Args:
2554
2890
  agent: Agent name (chat, code, rag, etc.) or "all" for all models
2555
- timeout: Timeout per model in seconds
2556
- progress_callback: Optional callback for progress updates.
2557
- Signature: callback(event_type: str, data: dict) -> None
2558
2891
 
2559
2892
  Returns:
2560
2893
  Dict with download results:
@@ -2563,11 +2896,9 @@ class LemonadeClient:
2563
2896
  - errors: List[str] - Any error messages
2564
2897
 
2565
2898
  Example:
2566
- def on_progress(event_type, data):
2567
- if event_type == "progress":
2568
- print(f"{data['file']}: {data['percent']}%")
2569
-
2570
- result = client.download_agent_models("chat", progress_callback=on_progress)
2899
+ result = client.download_agent_models("chat")
2900
+ for event in client.pull_model_stream("model-id"):
2901
+ print(f"{event.get('percent', 0)}%")
2571
2902
  """
2572
2903
  model_ids = self.get_required_models(agent)
2573
2904
 
@@ -2597,15 +2928,12 @@ class LemonadeClient:
2597
2928
  self.log.info(f"Downloading model: {model_id}")
2598
2929
  completed = False
2599
2930
 
2600
- for event in self.pull_model_stream(
2601
- model_name=model_id,
2602
- timeout=timeout,
2603
- progress_callback=progress_callback,
2604
- ):
2605
- if event.get("event") == "complete":
2931
+ for event in self.pull_model_stream(model_name=model_id):
2932
+ event_type = event.get("event")
2933
+ if event_type == "complete":
2606
2934
  completed = True
2607
2935
  model_result["status"] = "completed"
2608
- elif event.get("event") == "error":
2936
+ elif event_type == "error":
2609
2937
  model_result["status"] = "error"
2610
2938
  model_result["error"] = event.get("error", "Unknown error")
2611
2939
  results["errors"].append(f"{model_id}: {model_result['error']}")