amd-gaia 0.15.1__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -89,8 +89,8 @@ DEFAULT_MODEL_NAME = "Qwen2.5-0.5B-Instruct-CPU"
89
89
  # Increased to accommodate long-running coding and evaluation tasks
90
90
  DEFAULT_REQUEST_TIMEOUT = 900
91
91
  # Default timeout in seconds for model loading operations
92
- # Increased for large model downloads and loading
93
- DEFAULT_MODEL_LOAD_TIMEOUT = 1200
92
+ # Increased for large model downloads and loading (10x increase for streaming stability)
93
+ DEFAULT_MODEL_LOAD_TIMEOUT = 12000
94
94
 
95
95
 
96
96
  # =========================================================================
@@ -413,21 +413,24 @@ def _prompt_user_for_repair(model_name: str) -> bool:
413
413
  table.add_row(
414
414
  "Status:", "[yellow]Download incomplete or files corrupted[/yellow]"
415
415
  )
416
- table.add_row("Action:", "Delete and re-download the model")
416
+ table.add_row(
417
+ "Action:",
418
+ "[green]Resume download (Lemonade will continue where it left off)[/green]",
419
+ )
417
420
 
418
421
  console.print(
419
422
  Panel(
420
423
  table,
421
- title="[bold yellow]⚠️ Corrupt Model Download Detected[/bold yellow]",
424
+ title="[bold yellow]⚠️ Incomplete Model Download Detected[/bold yellow]",
422
425
  border_style="yellow",
423
426
  )
424
427
  )
425
428
  console.print()
426
429
 
427
430
  while True:
428
- response = input("Delete and re-download? [Y/n]: ").strip().lower()
431
+ response = input("Resume download? [Y/n]: ").strip().lower()
429
432
  if response in ("", "y", "yes"):
430
- console.print("[green]✓[/green] Proceeding with repair...")
433
+ console.print("[green]✓[/green] Resuming download...")
431
434
  return True
432
435
  elif response in ("n", "no"):
433
436
  console.print("[dim]Cancelled.[/dim]")
@@ -438,15 +441,15 @@ def _prompt_user_for_repair(model_name: str) -> bool:
438
441
  except ImportError:
439
442
  # Fall back to plain text formatting
440
443
  print("\n" + "=" * 60)
441
- print(f"{_emoji('⚠️', '[WARNING]')} Corrupt Model Download Detected")
444
+ print(f"{_emoji('⚠️', '[WARNING]')} Incomplete Model Download Detected")
442
445
  print("=" * 60)
443
446
  print(f"Model: {model_name}")
444
447
  print("Status: Download incomplete or files corrupted")
445
- print("Action: Delete and re-download the model")
448
+ print("Action: Resume download (Lemonade will continue where it left off)")
446
449
  print("=" * 60)
447
450
 
448
451
  while True:
449
- response = input("Delete and re-download? [Y/n]: ").strip().lower()
452
+ response = input("Resume download? [Y/n]: ").strip().lower()
450
453
  if response in ("", "y", "yes"):
451
454
  return True
452
455
  elif response in ("n", "no"):
@@ -455,6 +458,86 @@ def _prompt_user_for_repair(model_name: str) -> bool:
455
458
  print("Please enter 'y' or 'n'")
456
459
 
457
460
 
461
+ def _prompt_user_for_delete(model_name: str) -> bool:
462
+ """
463
+ Prompt user for confirmation to delete a model and re-download from scratch.
464
+
465
+ Args:
466
+ model_name: Name of the model to delete
467
+
468
+ Returns:
469
+ True if user confirms, False if user declines
470
+ """
471
+ # Get model storage paths
472
+ if sys.platform == "win32":
473
+ lemonade_cache = os.path.expandvars("%LOCALAPPDATA%\\lemonade\\")
474
+ hf_cache = os.path.expandvars("%USERPROFILE%\\.cache\\huggingface\\hub\\")
475
+ else:
476
+ lemonade_cache = os.path.expanduser("~/.local/share/lemonade/")
477
+ hf_cache = os.path.expanduser("~/.cache/huggingface/hub/")
478
+
479
+ try:
480
+ from rich.console import Console
481
+ from rich.panel import Panel
482
+ from rich.table import Table
483
+
484
+ console = Console()
485
+ console.print()
486
+
487
+ table = Table(show_header=False, box=None, padding=(0, 1))
488
+ table.add_column(style="dim")
489
+ table.add_column()
490
+ table.add_row("Model:", f"[cyan]{model_name}[/cyan]")
491
+ table.add_row(
492
+ "Status:", "[yellow]Resume failed, files may be corrupted[/yellow]"
493
+ )
494
+ table.add_row("Action:", "[red]Delete model and download fresh[/red]")
495
+ table.add_row("", "")
496
+ table.add_row("Storage:", f"[dim]{lemonade_cache}[/dim]")
497
+ table.add_row("", f"[dim]{hf_cache}[/dim]")
498
+
499
+ console.print(
500
+ Panel(
501
+ table,
502
+ title="[bold yellow]⚠️ Delete and Re-download?[/bold yellow]",
503
+ border_style="yellow",
504
+ )
505
+ )
506
+
507
+ while True:
508
+ response = (
509
+ input("Delete and re-download from scratch? [y/N]: ").strip().lower()
510
+ )
511
+ if response in ("y", "yes"):
512
+ console.print("[green]✓[/green] Deleting and re-downloading...")
513
+ return True
514
+ elif response in ("", "n", "no"):
515
+ console.print("[dim]Cancelled.[/dim]")
516
+ return False
517
+ else:
518
+ console.print("[dim]Please enter 'y' or 'n'[/dim]")
519
+
520
+ except ImportError:
521
+ print("\n" + "=" * 60)
522
+ print(f"{_emoji('⚠️', '[WARNING]')} Resume failed")
523
+ print(f"Model: {model_name}")
524
+ print(f"Storage: {lemonade_cache}")
525
+ print(f" {hf_cache}")
526
+ print("Delete and download fresh?")
527
+ print("=" * 60)
528
+
529
+ while True:
530
+ response = (
531
+ input("Delete and re-download from scratch? [y/N]: ").strip().lower()
532
+ )
533
+ if response in ("y", "yes"):
534
+ return True
535
+ elif response in ("", "n", "no"):
536
+ return False
537
+ else:
538
+ print("Please enter 'y' or 'n'")
539
+
540
+
458
541
  def _check_disk_space(size_gb: float, path: Optional[str] = None) -> bool:
459
542
  """
460
543
  Check if there's enough disk space for download.
@@ -1640,8 +1723,6 @@ class LemonadeClient:
1640
1723
  embedding: Optional[bool] = None,
1641
1724
  reranking: Optional[bool] = None,
1642
1725
  mmproj: Optional[str] = None,
1643
- timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
1644
- progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
1645
1726
  ) -> Generator[Dict[str, Any], None, None]:
1646
1727
  """
1647
1728
  Install a model on the server with streaming progress updates.
@@ -1658,34 +1739,23 @@ class LemonadeClient:
1658
1739
  embedding: Whether the model is an embedding model (for registering new models)
1659
1740
  reranking: Whether the model is a reranking model (for registering new models)
1660
1741
  mmproj: Multimodal Projector file for vision models (for registering new models)
1661
- timeout: Request timeout in seconds (longer for model installation)
1662
- progress_callback: Optional callback function called with progress dict on each event.
1663
- Signature: callback(event_type: str, data: dict) -> None
1664
- event_type is one of: "progress", "complete", "error"
1665
1742
 
1666
1743
  Yields:
1667
1744
  Dict containing progress event data with fields:
1668
- - For "progress" events: file, file_index, total_files, bytes_downloaded,
1669
- bytes_total, percent
1670
- - For "complete" events: file_index, total_files, percent (100)
1671
- - For "error" events: error message
1745
+ - event: "progress", "complete", or "error"
1746
+ - For "progress": file, file_index, total_files, bytes_downloaded, bytes_total, percent
1747
+ - For "complete": file_index, total_files, percent (100)
1748
+ - For "error": error message
1672
1749
 
1673
1750
  Raises:
1674
1751
  LemonadeClientError: If the model installation fails
1675
1752
 
1676
1753
  Example:
1677
- # Using as generator
1678
1754
  for event in client.pull_model_stream("Qwen3-0.6B-GGUF"):
1679
- if event.get("event") == "progress":
1755
+ if event["event"] == "progress":
1680
1756
  print(f"Downloading: {event['percent']}%")
1681
-
1682
- # Using with callback
1683
- def on_progress(event_type, data):
1684
- if event_type == "progress":
1685
- print(f"{data['file']}: {data['percent']}%")
1686
-
1687
- for _ in client.pull_model_stream("Qwen3-0.6B-GGUF", progress_callback=on_progress):
1688
- pass
1757
+ elif event["event"] == "complete":
1758
+ print("Done!")
1689
1759
  """
1690
1760
  self.log.info(f"Installing {model_name} with streaming progress")
1691
1761
 
@@ -1708,12 +1778,21 @@ class LemonadeClient:
1708
1778
 
1709
1779
  url = f"{self.base_url}/pull"
1710
1780
 
1781
+ # Use separate connect and read timeouts to handle SSE streams properly:
1782
+ # - Connect timeout: 30 seconds (fast connection establishment)
1783
+ # - Read timeout: 120 seconds (timeout if no data for 2 minutes)
1784
+ # This detects stuck downloads while still allowing normal long downloads
1785
+ # (as long as bytes keep flowing). The timeout is between receiving chunks,
1786
+ # not total time, so long downloads with steady progress will work fine.
1787
+ connect_timeout = 30
1788
+ read_timeout = 120 # Timeout if no data received for 2 minutes
1789
+
1711
1790
  try:
1712
1791
  response = requests.post(
1713
1792
  url,
1714
1793
  json=request_data,
1715
1794
  headers={"Content-Type": "application/json"},
1716
- timeout=timeout,
1795
+ timeout=(connect_timeout, read_timeout),
1717
1796
  stream=True,
1718
1797
  )
1719
1798
 
@@ -1725,11 +1804,14 @@ class LemonadeClient:
1725
1804
  # Parse SSE stream
1726
1805
  event_type = None
1727
1806
  received_complete = False
1807
+
1728
1808
  try:
1729
- for line in response.iter_lines(decode_unicode=True):
1730
- if not line:
1809
+ for line_bytes in response.iter_lines():
1810
+ if not line_bytes:
1731
1811
  continue
1732
1812
 
1813
+ line = line_bytes.decode("utf-8", errors="replace")
1814
+
1733
1815
  if line.startswith("event:"):
1734
1816
  event_type = line[6:].strip()
1735
1817
  elif line.startswith("data:"):
@@ -1738,28 +1820,20 @@ class LemonadeClient:
1738
1820
  data = json.loads(data_str)
1739
1821
  data["event"] = event_type or "progress"
1740
1822
 
1741
- # Call the progress callback if provided
1742
- if progress_callback:
1743
- progress_callback(event_type or "progress", data)
1744
-
1823
+ # Yield all events - let the consumer handle throttling
1745
1824
  yield data
1746
1825
 
1747
- # Track complete event
1748
1826
  if event_type == "complete":
1749
1827
  received_complete = True
1750
-
1751
- # Check for error event
1752
- if event_type == "error":
1753
- error_msg = data.get(
1754
- "error", "Unknown error during model pull"
1828
+ elif event_type == "error":
1829
+ raise LemonadeClientError(
1830
+ data.get("error", "Unknown error during model pull")
1755
1831
  )
1756
- raise LemonadeClientError(error_msg)
1757
1832
 
1758
1833
  except json.JSONDecodeError:
1759
1834
  self.log.warning(f"Failed to parse SSE data: {data_str}")
1760
1835
  continue
1761
1836
  except requests.exceptions.ChunkedEncodingError:
1762
- # Connection closed by server - this is normal after complete event
1763
1837
  if not received_complete:
1764
1838
  raise
1765
1839
 
@@ -2073,9 +2147,32 @@ class LemonadeClient:
2073
2147
  return
2074
2148
 
2075
2149
  # Model not loaded - load it (will download if needed without prompting)
2076
- self.log.info(f"Model '{model}' not loaded, loading...")
2150
+ self.log.debug(f"Model '{model}' not loaded, loading...")
2151
+
2152
+ try:
2153
+ from rich.console import Console
2154
+
2155
+ console = Console()
2156
+ console.print(
2157
+ f"[bold blue]🔄 Loading model:[/bold blue] [cyan]{model}[/cyan]..."
2158
+ )
2159
+ except ImportError:
2160
+ console = None
2161
+ print(f"🔄 Loading model: {model}...")
2162
+
2077
2163
  self.load_model(model, auto_download=True, prompt=False)
2078
2164
 
2165
+ # Print model ready message
2166
+ try:
2167
+ if console:
2168
+ console.print(
2169
+ f"[bold green]✅ Model loaded:[/bold green] [cyan]{model}[/cyan]"
2170
+ )
2171
+ else:
2172
+ print(f"✅ Model loaded: {model}")
2173
+ except Exception:
2174
+ pass # Ignore print errors
2175
+
2079
2176
  except Exception as e:
2080
2177
  # Log but don't fail - let the actual request fail with proper error
2081
2178
  self.log.debug(f"Could not pre-check model status: {e}")
@@ -2085,7 +2182,7 @@ class LemonadeClient:
2085
2182
  model_name: str,
2086
2183
  timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
2087
2184
  auto_download: bool = False,
2088
- download_timeout: int = 7200,
2185
+ _download_timeout: int = 7200, # Reserved for future use
2089
2186
  llamacpp_args: Optional[str] = None,
2090
2187
  prompt: bool = True,
2091
2188
  ) -> Dict[str, Any]:
@@ -2133,39 +2230,97 @@ class LemonadeClient:
2133
2230
  original_error = str(e)
2134
2231
 
2135
2232
  # Check if this is a corrupt/incomplete download error
2136
- if self._is_corrupt_download_error(e):
2233
+ is_corrupt = self._is_corrupt_download_error(e)
2234
+ if is_corrupt:
2137
2235
  self.log.warning(
2138
- f"{_emoji('⚠️', '[CORRUPT]')} Model '{model_name}' has incomplete "
2236
+ f"{_emoji('⚠️', '[INCOMPLETE]')} Model '{model_name}' has incomplete "
2139
2237
  f"or corrupted files"
2140
2238
  )
2141
2239
 
2142
- # Prompt user for confirmation to delete and re-download
2240
+ # Prompt user for confirmation to resume download
2143
2241
  if not _prompt_user_for_repair(model_name):
2144
2242
  raise ModelDownloadCancelledError(
2145
- f"User declined to repair corrupt model: {model_name}"
2243
+ f"User declined to repair incomplete model: {model_name}"
2146
2244
  )
2147
2245
 
2148
- # Delete the corrupt model
2246
+ # Try to resume download first (Lemonade handles partial files)
2149
2247
  self.log.info(
2150
- f"{_emoji('🗑️', '[DELETE]')} Deleting corrupt model: {model_name}"
2248
+ f"{_emoji('📥', '[RESUME]')} Attempting to resume download..."
2151
2249
  )
2250
+
2152
2251
  try:
2153
- self.delete_model(model_name)
2154
- self.log.info(
2155
- f"{_emoji('✅', '[OK]')} Deleted corrupt model: {model_name}"
2252
+ # First attempt: resume download
2253
+ download_complete = False
2254
+ for event in self.pull_model_stream(model_name=model_name):
2255
+ event_type = event.get("event")
2256
+ if event_type == "complete":
2257
+ download_complete = True
2258
+ elif event_type == "error":
2259
+ raise LemonadeClientError(event.get("error", "Unknown"))
2260
+
2261
+ if download_complete:
2262
+ # Retry loading
2263
+ response = self._send_request(
2264
+ "post", url, request_data, timeout=timeout
2265
+ )
2266
+ self.log.info(
2267
+ f"{_emoji('✅', '[OK]')} Loaded {model_name} after resume"
2268
+ )
2269
+ self.model = model_name
2270
+ return response
2271
+
2272
+ except Exception as resume_error:
2273
+ self.log.warning(
2274
+ f"{_emoji('⚠️', '[RETRY]')} Resume failed: {resume_error}"
2156
2275
  )
2157
- except Exception as delete_error:
2158
- self.log.warning(f"Failed to delete corrupt model: {delete_error}")
2159
- # Continue anyway - the download may still work
2160
2276
 
2161
- # Now trigger a fresh download by falling through to auto-download flow
2162
- # (the model is now "not found" so _is_model_error will match)
2277
+ # Prompt user before deleting
2278
+ if not _prompt_user_for_delete(model_name):
2279
+ raise LemonadeClientError(
2280
+ f"Resume download failed for '{model_name}'. "
2281
+ f"You can manually delete the model and try again."
2282
+ )
2283
+
2284
+ # Second attempt: delete and re-download from scratch
2285
+ try:
2286
+ self.log.info(
2287
+ f"{_emoji('🗑️', '[DELETE]')} Deleting corrupt model..."
2288
+ )
2289
+ self.delete_model(model_name)
2290
+
2291
+ self.log.info(
2292
+ f"{_emoji('📥', '[FRESH]')} Starting fresh download..."
2293
+ )
2294
+ download_complete = False
2295
+ for event in self.pull_model_stream(model_name=model_name):
2296
+ event_type = event.get("event")
2297
+ if event_type == "complete":
2298
+ download_complete = True
2299
+ elif event_type == "error":
2300
+ raise LemonadeClientError(event.get("error", "Unknown"))
2301
+
2302
+ if download_complete:
2303
+ # Retry loading
2304
+ response = self._send_request(
2305
+ "post", url, request_data, timeout=timeout
2306
+ )
2307
+ self.log.info(
2308
+ f"{_emoji('✅', '[OK]')} Loaded {model_name} after fresh download"
2309
+ )
2310
+ self.model = model_name
2311
+ return response
2312
+
2313
+ except Exception as fresh_error:
2314
+ self.log.error(
2315
+ f"{_emoji('❌', '[FAIL]')} Fresh download also failed: {fresh_error}"
2316
+ )
2317
+ raise LemonadeClientError(
2318
+ f"Failed to repair model '{model_name}' after both resume and fresh download attempts. "
2319
+ f"Please check your network connection and disk space, then try again."
2320
+ )
2163
2321
 
2164
2322
  # Check if this is a "model not found" error and auto_download is enabled
2165
- if not (
2166
- auto_download
2167
- and (self._is_model_error(e) or self._is_corrupt_download_error(e))
2168
- ):
2323
+ if not (auto_download and self._is_model_error(e)):
2169
2324
  # Not a model error or auto_download disabled - re-raise
2170
2325
  self.log.error(f"Failed to load {model_name}: {original_error}")
2171
2326
  if isinstance(e, LemonadeClientError):
@@ -2211,24 +2366,45 @@ class LemonadeClient:
2211
2366
  self.active_downloads[model_name] = download_task
2212
2367
 
2213
2368
  try:
2214
- # Trigger model download
2215
- self.pull_model(model_name, timeout=download_timeout)
2216
-
2217
- # Wait for download to complete (with cancellation support)
2218
- self.log.info(
2219
- f" {_emoji('⏳', '[WAIT]')} Waiting for model download to complete..."
2220
- )
2369
+ # Use streaming download for better performance and no timeouts
2221
2370
  self.log.info(
2222
- f" {_emoji('💡', '[TIP]')} Tip: You can cancel with "
2223
- f"client.cancel_download(model_name)"
2371
+ f" {_emoji('', '[DOWNLOAD]')} Downloading model with streaming..."
2224
2372
  )
2225
2373
 
2226
- if self._wait_for_model_download(
2227
- model_name,
2228
- timeout=download_timeout,
2229
- show_progress=True,
2230
- download_task=download_task,
2231
- ):
2374
+ # Stream download with simple progress logging
2375
+ download_complete = False
2376
+ last_logged_percent = -10 # Log at 0%, 10%, 20%, etc.
2377
+
2378
+ for event in self.pull_model_stream(model_name=model_name):
2379
+ # Check for cancellation
2380
+ if download_task and download_task.is_cancelled():
2381
+ raise ModelDownloadCancelledError(
2382
+ f"Download cancelled: {model_name}"
2383
+ )
2384
+
2385
+ event_type = event.get("event")
2386
+ if event_type == "progress":
2387
+ percent = event.get("percent", 0)
2388
+ # Log every 10%
2389
+ if percent >= last_logged_percent + 10:
2390
+ bytes_dl = event.get("bytes_downloaded", 0)
2391
+ bytes_total = event.get("bytes_total", 0)
2392
+ if bytes_total > 0:
2393
+ gb_dl = bytes_dl / (1024**3)
2394
+ gb_total = bytes_total / (1024**3)
2395
+ self.log.info(
2396
+ f" {_emoji('📥', '[PROGRESS]')} "
2397
+ f"{percent}% ({gb_dl:.1f}/{gb_total:.1f} GB)"
2398
+ )
2399
+ last_logged_percent = percent
2400
+ elif event_type == "complete":
2401
+ download_complete = True
2402
+ elif event_type == "error":
2403
+ raise LemonadeClientError(
2404
+ f"Download failed: {event.get('error', 'Unknown error')}"
2405
+ )
2406
+
2407
+ if download_complete:
2232
2408
  # Retry loading after successful download
2233
2409
  self.log.info(
2234
2410
  f"{_emoji('🔄', '[RETRY]')} Retrying model load: {model_name}"
@@ -2243,7 +2419,7 @@ class LemonadeClient:
2243
2419
  return response
2244
2420
  else:
2245
2421
  raise LemonadeClientError(
2246
- f"Model download timed out for '{model_name}'"
2422
+ f"Model download did not complete for '{model_name}'"
2247
2423
  )
2248
2424
 
2249
2425
  except ModelDownloadCancelledError:
@@ -2421,7 +2597,17 @@ class LemonadeClient:
2421
2597
  """
2422
2598
  try:
2423
2599
  health = self.health_check()
2424
- reported_ctx = health.get("context_size", 0)
2600
+
2601
+ # Lemonade 9.1.4+: context_size moved to all_models_loaded[N].recipe_options.ctx_size
2602
+ all_models = health.get("all_models_loaded", [])
2603
+ if all_models:
2604
+ # Get context size from the first loaded model (typically the LLM)
2605
+ reported_ctx = (
2606
+ all_models[0].get("recipe_options", {}).get("ctx_size", 0)
2607
+ )
2608
+ else:
2609
+ # Fallback for older Lemonade versions
2610
+ reported_ctx = health.get("context_size", 0)
2425
2611
 
2426
2612
  if reported_ctx >= required_tokens:
2427
2613
  self.log.debug(
@@ -2457,7 +2643,16 @@ class LemonadeClient:
2457
2643
  health = self.health_check()
2458
2644
  status.running = True
2459
2645
  status.health_data = health
2460
- status.context_size = health.get("context_size", 0)
2646
+
2647
+ # Lemonade 9.1.4+: context_size moved to all_models_loaded[N].recipe_options.ctx_size
2648
+ all_models = health.get("all_models_loaded", [])
2649
+ if all_models:
2650
+ status.context_size = (
2651
+ all_models[0].get("recipe_options", {}).get("ctx_size", 0)
2652
+ )
2653
+ else:
2654
+ # Fallback for older Lemonade versions
2655
+ status.context_size = health.get("context_size", 0)
2461
2656
 
2462
2657
  # Get loaded models
2463
2658
  models_response = self.list_models()
@@ -2541,8 +2736,6 @@ class LemonadeClient:
2541
2736
  def download_agent_models(
2542
2737
  self,
2543
2738
  agent: str = "all",
2544
- timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
2545
- progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
2546
2739
  ) -> Dict[str, Any]:
2547
2740
  """
2548
2741
  Download all models required for an agent with streaming progress.
@@ -2552,9 +2745,6 @@ class LemonadeClient:
2552
2745
 
2553
2746
  Args:
2554
2747
  agent: Agent name (chat, code, rag, etc.) or "all" for all models
2555
- timeout: Timeout per model in seconds
2556
- progress_callback: Optional callback for progress updates.
2557
- Signature: callback(event_type: str, data: dict) -> None
2558
2748
 
2559
2749
  Returns:
2560
2750
  Dict with download results:
@@ -2563,11 +2753,9 @@ class LemonadeClient:
2563
2753
  - errors: List[str] - Any error messages
2564
2754
 
2565
2755
  Example:
2566
- def on_progress(event_type, data):
2567
- if event_type == "progress":
2568
- print(f"{data['file']}: {data['percent']}%")
2569
-
2570
- result = client.download_agent_models("chat", progress_callback=on_progress)
2756
+ result = client.download_agent_models("chat")
2757
+ for event in client.pull_model_stream("model-id"):
2758
+ print(f"{event.get('percent', 0)}%")
2571
2759
  """
2572
2760
  model_ids = self.get_required_models(agent)
2573
2761
 
@@ -2597,15 +2785,12 @@ class LemonadeClient:
2597
2785
  self.log.info(f"Downloading model: {model_id}")
2598
2786
  completed = False
2599
2787
 
2600
- for event in self.pull_model_stream(
2601
- model_name=model_id,
2602
- timeout=timeout,
2603
- progress_callback=progress_callback,
2604
- ):
2605
- if event.get("event") == "complete":
2788
+ for event in self.pull_model_stream(model_name=model_id):
2789
+ event_type = event.get("event")
2790
+ if event_type == "complete":
2606
2791
  completed = True
2607
2792
  model_result["status"] = "completed"
2608
- elif event.get("event") == "error":
2793
+ elif event_type == "error":
2609
2794
  model_result["status"] = "error"
2610
2795
  model_result["error"] = event.get("error", "Unknown error")
2611
2796
  results["errors"].append(f"{model_id}: {model_result['error']}")
@@ -47,18 +47,13 @@ class LemonadeProvider(LLMClient):
47
47
  stream: bool = False,
48
48
  **kwargs,
49
49
  ) -> Union[str, Iterator[str]]:
50
- # Use provided model, instance model, or default CPU model
51
- effective_model = model or self._model or DEFAULT_MODEL_NAME
52
-
53
- # Default to low temperature for deterministic responses (matches old LLMClient behavior)
54
- kwargs.setdefault("temperature", 0.1)
55
-
56
- response = self._backend.completions(
57
- model=effective_model, prompt=prompt, stream=stream, **kwargs
50
+ # Use chat endpoint (completions endpoint not available in Lemonade v9.1+)
51
+ return self.chat(
52
+ [{"role": "user", "content": prompt}],
53
+ model=model,
54
+ stream=stream,
55
+ **kwargs,
58
56
  )
59
- if stream:
60
- return self._handle_stream(response)
61
- return self._extract_text(response)
62
57
 
63
58
  def chat(
64
59
  self,
@@ -114,7 +109,10 @@ class LemonadeProvider(LLMClient):
114
109
  for chunk in response:
115
110
  if "choices" in chunk and chunk["choices"]:
116
111
  delta = chunk["choices"][0].get("delta", {})
117
- if "content" in delta:
118
- yield delta["content"]
112
+ content = delta.get("content")
113
+ if content:
114
+ yield content
119
115
  elif "text" in chunk["choices"][0]:
120
- yield chunk["choices"][0]["text"]
116
+ text = chunk["choices"][0]["text"]
117
+ if text:
118
+ yield text
gaia/rag/sdk.py CHANGED
@@ -432,7 +432,7 @@ class RAGSDK:
432
432
  vlm = None
433
433
  vlm_available = False
434
434
  try:
435
- from gaia.llm.vlm_client import VLMClient
435
+ from gaia.llm import VLMClient
436
436
  from gaia.rag.pdf_utils import (
437
437
  count_images_in_page,
438
438
  extract_images_from_page_pymupdf,