lemonade-sdk 8.1.11__py3-none-any.whl → 8.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (38) hide show
  1. lemonade/cache.py +6 -1
  2. lemonade/common/status.py +4 -4
  3. lemonade/common/system_info.py +0 -26
  4. lemonade/tools/accuracy.py +143 -48
  5. lemonade/tools/adapter.py +6 -1
  6. lemonade/tools/bench.py +26 -8
  7. lemonade/tools/flm/utils.py +70 -22
  8. lemonade/tools/huggingface/bench.py +6 -1
  9. lemonade/tools/llamacpp/bench.py +146 -27
  10. lemonade/tools/llamacpp/load.py +30 -2
  11. lemonade/tools/llamacpp/utils.py +317 -21
  12. lemonade/tools/oga/bench.py +5 -26
  13. lemonade/tools/oga/load.py +49 -123
  14. lemonade/tools/oga/migration.py +403 -0
  15. lemonade/tools/report/table.py +76 -8
  16. lemonade/tools/server/flm.py +2 -6
  17. lemonade/tools/server/llamacpp.py +43 -2
  18. lemonade/tools/server/serve.py +354 -18
  19. lemonade/tools/server/static/js/chat.js +15 -77
  20. lemonade/tools/server/static/js/model-settings.js +24 -3
  21. lemonade/tools/server/static/js/models.js +440 -37
  22. lemonade/tools/server/static/js/shared.js +61 -8
  23. lemonade/tools/server/static/logs.html +157 -13
  24. lemonade/tools/server/static/styles.css +204 -0
  25. lemonade/tools/server/static/webapp.html +39 -1
  26. lemonade/version.py +1 -1
  27. lemonade_install/install.py +33 -579
  28. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +6 -4
  29. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/RECORD +38 -37
  30. lemonade_server/cli.py +10 -0
  31. lemonade_server/model_manager.py +172 -11
  32. lemonade_server/pydantic_models.py +3 -0
  33. lemonade_server/server_models.json +102 -66
  34. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
  35. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
  36. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
  37. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
  38. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0
@@ -7,12 +7,12 @@ import logging
7
7
  import platform
8
8
  import tempfile
9
9
  import traceback
10
- from typing import Optional, Union
10
+ from typing import Optional, Union, List
11
11
  import json
12
12
  from pathlib import Path
13
13
  import os
14
-
15
- from fastapi import FastAPI, HTTPException, status, Request, WebSocket
14
+ import shutil
15
+ from fastapi import FastAPI, HTTPException, status, Request, WebSocket, Form, UploadFile
16
16
  from fastapi.responses import StreamingResponse
17
17
  from fastapi.middleware.cors import CORSMiddleware
18
18
  from fastapi.staticfiles import StaticFiles
@@ -83,10 +83,31 @@ if platform.system() in ["Windows", "Darwin"]:
83
83
  from lemonade.tools.server.tray import LemonadeTray, OutputDuplicator
84
84
 
85
85
 
86
- class WebsocketTextFilter(logging.Filter):
86
+ class ServerLogFilter(logging.Filter):
87
+ def __init__(self, server):
88
+ super().__init__()
89
+ self.server = server
90
+ self.noisy_paths = {
91
+ "/api/v1/health",
92
+ "/api/v0/health",
93
+ "/api/v1/models",
94
+ "/api/v0/models",
95
+ }
96
+
87
97
  def filter(self, record: logging.LogRecord) -> bool:
88
- # Only allow logs that don't include "> TEXT"
89
- return "> TEXT" not in record.getMessage()
98
+ msg = record.getMessage()
99
+
100
+ # Filter out websocket logs
101
+ if "> TEXT" in msg:
102
+ return False
103
+
104
+ # Filter out noisy HTTP routes if debug logs are OFF
105
+ if not self.server.debug_logging_enabled:
106
+ if any(path in msg for path in self.noisy_paths):
107
+ return False
108
+
109
+ # Otherwise, allow the log
110
+ return True
90
111
 
91
112
 
92
113
  async def log_streamer(websocket: WebSocket, path: str, interval: float = 1.0):
@@ -94,7 +115,7 @@ async def log_streamer(websocket: WebSocket, path: str, interval: float = 1.0):
94
115
  await websocket.accept()
95
116
  try:
96
117
  with open(path, "r", encoding="utf-8") as f:
97
- f.seek(0, os.SEEK_END) # start at end
118
+ f.seek(0) # start at the beginning of the file
98
119
  while True:
99
120
  # Try reading a line
100
121
  line = f.readline()
@@ -325,6 +346,7 @@ class Server:
325
346
  self.app.post(f"{prefix}/responses")(self.responses)
326
347
  self.app.post(f"{prefix}/log-level")(self.set_log_level)
327
348
  self.app.websocket(f"{prefix}/logs/ws")(self.logs_ws)
349
+ self.app.post(f"{prefix}/add-local-model")(self.add_local_model)
328
350
 
329
351
  # OpenAI-compatible routes
330
352
  self.app.post(f"{prefix}/chat/completions")(self.chat_completions)
@@ -336,6 +358,186 @@ class Server:
336
358
  self.app.post(f"{prefix}/reranking")(self.reranking)
337
359
  self.app.post(f"{prefix}/rerank")(self.reranking)
338
360
 
361
+ # Migration routes
362
+ self.app.get(f"{prefix}/migration/incompatible-models")(
363
+ self.get_incompatible_models
364
+ )
365
+ self.app.post(f"{prefix}/migration/cleanup")(
366
+ self.cleanup_incompatible_models
367
+ )
368
+
369
+ async def add_local_model(
370
+ self,
371
+ model_name: str = Form(...),
372
+ checkpoint: str = Form(""),
373
+ recipe: str = Form(...),
374
+ reasoning: bool = Form(False),
375
+ vision: bool = Form(False),
376
+ mmproj: str = Form(None),
377
+ model_files: List[UploadFile] = None,
378
+ ):
379
+ from huggingface_hub.constants import HF_HUB_CACHE
380
+ from lemonade.tools.llamacpp.utils import parse_checkpoint
381
+
382
+ # Upload and register a local model from files.
383
+ try:
384
+ if not model_files:
385
+ raise HTTPException(
386
+ status_code=status.HTTP_400_BAD_REQUEST,
387
+ detail="No model files provided for upload",
388
+ )
389
+
390
+ if not model_name.startswith("user."):
391
+ raise HTTPException(
392
+ status_code=status.HTTP_400_BAD_REQUEST,
393
+ detail="Model name must start with 'user.'",
394
+ )
395
+
396
+ valid_recipes = ["llamacpp", "oga-npu", "oga-hybrid", "oga-cpu"]
397
+ if recipe not in valid_recipes:
398
+ raise HTTPException(
399
+ status_code=status.HTTP_400_BAD_REQUEST,
400
+ detail=f"Invalid recipe. Must be one of: {', '.join(valid_recipes)}",
401
+ )
402
+
403
+ if recipe == "llamacpp" and not any(
404
+ f.filename.lower().endswith(".gguf") for f in model_files
405
+ ):
406
+ raise HTTPException(
407
+ status_code=status.HTTP_400_BAD_REQUEST,
408
+ detail="At least one .gguf file is required for llamacpp",
409
+ )
410
+
411
+ # Check if model name already exists
412
+ if model_name in ModelManager().supported_models:
413
+ raise HTTPException(
414
+ status_code=status.HTTP_409_CONFLICT,
415
+ detail=(
416
+ f"Model name '{model_name}' already exists. "
417
+ "Please use a different name."
418
+ ),
419
+ )
420
+
421
+ model_name_clean = model_name.replace("user.", "")
422
+
423
+ # Files are saved to models--{model_name_clean}
424
+ # Note: This is based on the user's custom model name, NOT the checkpoint field
425
+ repo_cache_name = model_name_clean.replace("/", "--")
426
+ snapshot_path = os.path.join(HF_HUB_CACHE, f"models--{repo_cache_name}")
427
+ os.makedirs(snapshot_path, exist_ok=True)
428
+
429
+ # Extract variant from checkpoint field if provided
430
+ # checkpoint field format: "folder:variant" or just "folder"
431
+ variant = None
432
+ if checkpoint and ":" in checkpoint:
433
+ _, variant = parse_checkpoint(checkpoint)
434
+ # variant now contains just the variant[can be with or without the
435
+ # .gguf extension] filename (e.g., "LFM2-VL-1.6B-F16 or LFM2-VL-1.6B-F16.gguf")
436
+
437
+ # Save uploaded files, preserving folder structure
438
+ for file in model_files:
439
+ relative_path = file.filename
440
+ path_parts = relative_path.split("/")
441
+
442
+ if len(path_parts) > 1:
443
+ internal_path = "/".join(path_parts[1:])
444
+ file_path = os.path.join(snapshot_path, internal_path)
445
+ else:
446
+ file_path = os.path.join(snapshot_path, path_parts[0])
447
+
448
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
449
+ with open(file_path, "wb") as f:
450
+ content = await file.read()
451
+ f.write(content)
452
+
453
+ # Resolve actual file paths after upload (for faster loading later)
454
+ resolved_checkpoint = None
455
+ resolved_mmproj = None
456
+
457
+ # For OGA models, find genai_config.json
458
+ if recipe.startswith("oga-"):
459
+ for root, _, files in os.walk(snapshot_path):
460
+ if "genai_config.json" in files:
461
+ resolved_checkpoint = root
462
+ break
463
+ if not resolved_checkpoint:
464
+ resolved_checkpoint = snapshot_path
465
+
466
+ # For llamacpp models, find the GGUF file
467
+ elif recipe == "llamacpp":
468
+ gguf_file_found = None
469
+
470
+ # If variant is specified, look for that specific file
471
+ if variant:
472
+ search_term = (
473
+ variant if variant.endswith(".gguf") else f"{variant}.gguf"
474
+ )
475
+ for root, _, files in os.walk(snapshot_path):
476
+ if search_term in files:
477
+ gguf_file_found = os.path.join(root, search_term)
478
+ break
479
+
480
+ # If no variant or variant not found, search for any .gguf file (excluding mmproj)
481
+ if not gguf_file_found:
482
+ for root, _, files in os.walk(snapshot_path):
483
+ gguf_files = [
484
+ f
485
+ for f in files
486
+ if f.endswith(".gguf") and "mmproj" not in f.lower()
487
+ ]
488
+ if gguf_files:
489
+ gguf_file_found = os.path.join(root, gguf_files[0])
490
+ break
491
+
492
+ resolved_checkpoint = (
493
+ gguf_file_found if gguf_file_found else snapshot_path
494
+ )
495
+
496
+ # Search for mmproj file if provided
497
+ if mmproj:
498
+ for root, _, files in os.walk(snapshot_path):
499
+ if mmproj in files:
500
+ resolved_mmproj = os.path.join(root, mmproj)
501
+ break
502
+
503
+ # Build checkpoint for registration
504
+ # For llamacpp with resolved path, store the full path relative to HF_HUB_CACHE
505
+ if resolved_checkpoint:
506
+ # Store as relative path from HF_HUB_CACHE for portability
507
+ checkpoint_to_register = os.path.relpath(
508
+ resolved_checkpoint, HF_HUB_CACHE
509
+ )
510
+ elif variant:
511
+ checkpoint_to_register = f"models--{repo_cache_name}:{variant}"
512
+ else:
513
+ checkpoint_to_register = f"models--{repo_cache_name}"
514
+
515
+ # Register the model
516
+ ModelManager().register_local_model(
517
+ model_name=model_name,
518
+ checkpoint=checkpoint_to_register,
519
+ recipe=recipe,
520
+ reasoning=reasoning,
521
+ vision=vision,
522
+ mmproj=resolved_mmproj if resolved_mmproj else mmproj,
523
+ snapshot_path=snapshot_path,
524
+ )
525
+
526
+ # Refresh local models
527
+ self.local_models = ModelManager().downloaded_models_enabled
528
+
529
+ return {
530
+ "status": "success",
531
+ "message": f"Model {model_name} uploaded and registered successfully",
532
+ }
533
+ except Exception as e:
534
+ if os.path.exists(checkpoint_to_register):
535
+ shutil.rmtree(checkpoint_to_register)
536
+ raise HTTPException(
537
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
538
+ detail=f"Failed to upload model: {str(e)}",
539
+ )
540
+
339
541
  async def set_log_level(self, config: LogLevelConfig):
340
542
  """
341
543
  Set the logging level of the server.
@@ -454,13 +656,13 @@ class Server:
454
656
  )
455
657
  file_handler.setLevel(logging_level)
456
658
  file_handler.setFormatter(uvicorn_formatter)
457
- file_handler.addFilter(WebsocketTextFilter())
659
+ file_handler.addFilter(ServerLogFilter(self))
458
660
 
459
661
  # Set up console handler
460
662
  console_handler = logging.StreamHandler()
461
663
  console_handler.setLevel(logging_level)
462
664
  console_handler.setFormatter(uvicorn_formatter)
463
- console_handler.addFilter(WebsocketTextFilter())
665
+ console_handler.addFilter(ServerLogFilter(self))
464
666
 
465
667
  # Configure root logger with both handlers
466
668
  logging.basicConfig(
@@ -746,6 +948,23 @@ class Server:
746
948
  await self.load_llm(lc)
747
949
 
748
950
  if self.llm_loaded.recipe == "llamacpp" or self.llm_loaded.recipe == "flm":
951
+ if (
952
+ hasattr(chat_completion_request, "enable_thinking")
953
+ and chat_completion_request.enable_thinking is False
954
+ and "qwen3" in self.llm_loaded.model_name.lower()
955
+ ):
956
+
957
+ # Modify the last user message to include /no_think
958
+ if chat_completion_request.messages:
959
+ for i in range(len(chat_completion_request.messages) - 1, -1, -1):
960
+ if chat_completion_request.messages[i].get("role") == "user":
961
+ original_content = chat_completion_request.messages[i][
962
+ "content"
963
+ ]
964
+ chat_completion_request.messages[i][
965
+ "content"
966
+ ] = f"/no_think\n{original_content}"
967
+ break
749
968
  return self.wrapped_server.chat_completion(chat_completion_request)
750
969
 
751
970
  # Convert chat messages to text using the model's chat template
@@ -807,6 +1026,11 @@ class Server:
807
1026
 
808
1027
  # Keep track of the full response for tool call extraction
809
1028
  full_response = ""
1029
+
1030
+ # Track whether we're still in the thinking phase (before </think> tag)
1031
+ in_thinking_phase = self.llm_loaded.reasoning
1032
+ reasoning_buffer = "" # Accumulate reasoning tokens to detect </think>
1033
+
810
1034
  try:
811
1035
  async for token in self._generate_tokens(**generation_args):
812
1036
  # Handle client disconnect: stop generation and exit
@@ -845,7 +1069,53 @@ class Server:
845
1069
  )
846
1070
  )
847
1071
 
848
- # Create a ChatCompletionChunk
1072
+ # Create a ChatCompletionChunk with reasoning_content support
1073
+ # If we're in reasoning mode and haven't seen </think> yet,
1074
+ # send tokens as reasoning_content instead of content
1075
+ delta_content = None
1076
+ delta_reasoning = None
1077
+
1078
+ if reasoning_first_token:
1079
+ # First token - include opening tag in reasoning
1080
+ delta_reasoning = "<think>" + token
1081
+ reasoning_first_token = False
1082
+ reasoning_buffer = token
1083
+ elif in_thinking_phase:
1084
+ # Still in thinking phase - accumulate and check for </think>
1085
+ reasoning_buffer += token
1086
+
1087
+ # Check if we've seen the closing tag
1088
+ if "</think>" in reasoning_buffer:
1089
+ # Split at the closing tag
1090
+ before_close, after_close = reasoning_buffer.split(
1091
+ "</think>", 1
1092
+ )
1093
+
1094
+ # Send everything before + closing tag as reasoning
1095
+ if before_close or not reasoning_buffer.startswith(
1096
+ "</think>"
1097
+ ):
1098
+ delta_reasoning = before_close + "</think>"
1099
+ else:
1100
+ delta_reasoning = "</think>"
1101
+
1102
+ # Everything after goes to content (will be sent in next iteration)
1103
+ # For now, mark that we've exited thinking phase
1104
+ in_thinking_phase = False
1105
+
1106
+ # If there's content after </think>, we need to send it too
1107
+ # But we send it in the current chunk as regular content
1108
+ if after_close:
1109
+ # We have both reasoning and content in this token
1110
+ # Send reasoning first, content will accumulate
1111
+ delta_content = after_close
1112
+ else:
1113
+ # Still accumulating thinking, send as reasoning_content
1114
+ delta_reasoning = token
1115
+ else:
1116
+ # Normal content (after thinking phase ended)
1117
+ delta_content = token
1118
+
849
1119
  chunk = ChatCompletionChunk.model_construct(
850
1120
  id="0",
851
1121
  object="chat.completion.chunk",
@@ -855,11 +1125,8 @@ class Server:
855
1125
  Choice.model_construct(
856
1126
  index=0,
857
1127
  delta=ChoiceDelta(
858
- content=(
859
- "<think>" + token
860
- if reasoning_first_token
861
- else token
862
- ),
1128
+ content=delta_content,
1129
+ reasoning_content=delta_reasoning,
863
1130
  function_call=None,
864
1131
  role="assistant",
865
1132
  tool_calls=openai_tool_calls,
@@ -872,7 +1139,6 @@ class Server:
872
1139
  )
873
1140
 
874
1141
  # Format as SSE
875
- reasoning_first_token = False
876
1142
  yield f"data: {chunk.model_dump_json()}\n\n".encode("utf-8")
877
1143
 
878
1144
  # Send the [DONE] marker only if still connected
@@ -1125,9 +1391,10 @@ class Server:
1125
1391
  "<think>" + token if reasoning_first_token else token
1126
1392
  ),
1127
1393
  item_id="0 ",
1394
+ logprobs=[],
1128
1395
  output_index=0,
1129
- type="response.output_text.delta",
1130
1396
  sequence_number=0,
1397
+ type="response.output_text.delta",
1131
1398
  )
1132
1399
  full_response += token
1133
1400
 
@@ -1570,9 +1837,10 @@ class Server:
1570
1837
  Load a registered LLM into system memory. Install the model first, if needed.
1571
1838
  config: the information required to load the model
1572
1839
  """
1840
+ from huggingface_hub.constants import HF_HUB_CACHE
1841
+
1573
1842
  try:
1574
1843
  await self._load_lock.acquire()
1575
-
1576
1844
  # Acquire all generate locks
1577
1845
  for _ in range(self.max_concurrent_generations):
1578
1846
  await self._generate_semaphore.acquire()
@@ -1597,6 +1865,38 @@ class Server:
1597
1865
  # Get additional properties from the model registry
1598
1866
  config_to_use = LoadConfig(**supported_models[config.model_name])
1599
1867
 
1868
+ # For locally uploaded models, convert the relative checkpoint path to absolute path
1869
+ model_source = supported_models.get(config.model_name, {}).get(
1870
+ "source", None
1871
+ )
1872
+ if (
1873
+ model_source == "local_upload"
1874
+ and config_to_use.checkpoint
1875
+ and not config_to_use.recipe.startswith("hf-")
1876
+ ):
1877
+ # Check if checkpoint is a relative path (stored during upload)
1878
+ if not os.path.isabs(config_to_use.checkpoint):
1879
+ # Convert relative path to absolute by joining with HF_HUB_CACHE
1880
+ absolute_checkpoint = os.path.join(
1881
+ HF_HUB_CACHE, config_to_use.checkpoint
1882
+ )
1883
+ if os.path.exists(absolute_checkpoint):
1884
+ config_to_use.checkpoint = absolute_checkpoint
1885
+ else:
1886
+ logging.warning(
1887
+ f"Checkpoint path does not exist: {absolute_checkpoint}"
1888
+ )
1889
+
1890
+ # Also resolve mmproj path if present
1891
+ if config_to_use.mmproj and not os.path.isabs(config_to_use.mmproj):
1892
+ absolute_mmproj = os.path.join(HF_HUB_CACHE, config_to_use.mmproj)
1893
+ if os.path.exists(absolute_mmproj):
1894
+ config_to_use.mmproj = absolute_mmproj
1895
+ else:
1896
+ logging.warning(
1897
+ f"MMProj path does not exist: {absolute_mmproj}"
1898
+ )
1899
+
1600
1900
  # Caching mechanism: if the checkpoint is already loaded there is nothing else to do
1601
1901
  if (
1602
1902
  self.llm_loaded
@@ -1782,6 +2082,42 @@ class Server:
1782
2082
  return
1783
2083
  await log_streamer(websocket, self.log_file)
1784
2084
 
2085
+ async def get_incompatible_models(self):
2086
+ """
2087
+ Get information about incompatible RyzenAI models in the cache.
2088
+ """
2089
+ try:
2090
+ return ModelManager().get_incompatible_ryzenai_models()
2091
+ except Exception as e:
2092
+ raise HTTPException(
2093
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
2094
+ detail=f"Failed to scan for incompatible models: {str(e)}",
2095
+ )
2096
+
2097
+ async def cleanup_incompatible_models(self, request: Request):
2098
+ """
2099
+ Delete selected incompatible RyzenAI models from the cache.
2100
+ """
2101
+ try:
2102
+ body = await request.json()
2103
+ model_paths = body.get("model_paths", [])
2104
+
2105
+ if not model_paths:
2106
+ raise HTTPException(
2107
+ status_code=status.HTTP_400_BAD_REQUEST,
2108
+ detail="No model_paths provided",
2109
+ )
2110
+
2111
+ result = ModelManager().cleanup_incompatible_models(model_paths)
2112
+ return result
2113
+ except HTTPException:
2114
+ raise
2115
+ except Exception as e:
2116
+ raise HTTPException(
2117
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
2118
+ detail=f"Failed to cleanup models: {str(e)}",
2119
+ )
2120
+
1785
2121
 
1786
2122
  # This file was originally licensed under Apache 2.0. It has been modified.
1787
2123
  # Modifications Copyright (c) 2025 AMD
@@ -4,9 +4,6 @@ let attachedFiles = [];
4
4
  let systemMessageElement = null;
5
5
  let abortController = null;
6
6
 
7
- // Default model configuration
8
- const DEFAULT_MODEL = 'Qwen2.5-0.5B-Instruct-CPU';
9
-
10
7
  const THINKING_ANIM_INTERVAL_MS = 550;
11
8
  // Toggle this to false if you prefer plain dots only.
12
9
  const THINKING_USE_LEMON = true;
@@ -165,17 +162,22 @@ async function handleModelSelectChange() {
165
162
  loadingOption.hidden = true;
166
163
  select.appendChild(loadingOption);
167
164
  }
165
+ // Gray out send button during loading
166
+ updateAttachmentButtonState();
168
167
  },
169
168
  onLoadingEnd: (modelId, success) => {
170
169
  // Reset the default option text
171
170
  const defaultOption = modelSelect.querySelector('option[value=""]');
172
171
  if (defaultOption) defaultOption.textContent = 'Click to select a model ▼';
172
+ // Update button state after loading completes
173
+ updateAttachmentButtonState();
173
174
  },
174
175
  onSuccess: () => {
175
176
  updateAttachmentButtonState();
176
177
  },
177
178
  onError: () => {
178
179
  updateModelSelectValue();
180
+ updateAttachmentButtonState();
179
181
  }
180
182
  });
181
183
  }
@@ -192,7 +194,8 @@ function updateAttachmentButtonState() {
192
194
  toggleBtn.disabled = false;
193
195
  toggleBtn.textContent = 'Stop';
194
196
  } else {
195
- toggleBtn.disabled = loading;
197
+ // Gray out send button if no model is loaded or if loading
198
+ toggleBtn.disabled = loading || !currentLoadedModel;
196
199
  toggleBtn.textContent = 'Send';
197
200
  }
198
201
  }
@@ -225,43 +228,6 @@ window.updateAttachmentButtonState = updateAttachmentButtonState;
225
228
  // Make displaySystemMessage accessible globally
226
229
  window.displaySystemMessage = displaySystemMessage;
227
230
 
228
- // Auto-load default model and send message
229
- async function autoLoadDefaultModelAndSend() {
230
- // Check if default model is available and installed
231
- if (!window.SERVER_MODELS || !window.SERVER_MODELS[DEFAULT_MODEL]) {
232
- showErrorBanner('No models available. Please install a model first.');
233
- return;
234
- }
235
-
236
- if (!window.installedModels || !window.installedModels.has(DEFAULT_MODEL)) {
237
- showErrorBanner('Default model is not installed. Please install it from the Model Management tab.');
238
- return;
239
- }
240
-
241
- // Store the message to send after loading
242
- const messageToSend = chatInput.value.trim();
243
- if (!messageToSend && attachedFiles.length === 0) return;
244
-
245
- // Use the standardized load function
246
- const success = await loadModelStandardized(DEFAULT_MODEL, {
247
- // Custom UI updates for auto-loading
248
- onLoadingStart: () => {
249
- if (toggleBtn) {
250
- toggleBtn.disabled = true;
251
- toggleBtn.textContent = 'Send';
252
- }
253
- },
254
- // Reset send button state
255
- onLoadingEnd: () => { updateAttachmentButtonState(); },
256
- // Send the message after successful load
257
- onSuccess: () => { sendMessage(messageToSend); },
258
- onError: (error) => {
259
- console.error('Error auto-loading default model:', error);
260
- showErrorBanner('Failed to load model: ' + error.message);
261
- }
262
- });
263
- }
264
-
265
231
  // Check if model supports vision and update attachment button
266
232
  function checkCurrentModel() {
267
233
  if (attachedFiles.length > 0 && currentLoadedModel && !isVisionModel(currentLoadedModel)) {
@@ -320,13 +286,11 @@ function handleChatInputKeydown(e) {
320
286
  clearAttachments();
321
287
  } else if (e.key === 'Enter' && !e.shiftKey) {
322
288
  e.preventDefault();
323
- // Check if we have a loaded model
289
+ // Only send if we have a loaded model
324
290
  if (currentLoadedModel && modelSelect.value !== '' && !modelSelect.disabled) {
325
291
  sendMessage();
326
- } else if (!currentLoadedModel) {
327
- // Auto-load default model and send
328
- autoLoadDefaultModelAndSend();
329
292
  }
293
+ // Otherwise do nothing - button is grayed out
330
294
  }
331
295
  }
332
296
 
@@ -860,39 +824,13 @@ async function sendMessage(existingTextIfAny) {
860
824
  systemMessageElement = null;
861
825
  }
862
826
 
863
- // Check if a model is loaded, if not, automatically load the default model
827
+ // Check if a model is loaded
864
828
  if (!currentLoadedModel) {
865
- const allModels = window.SERVER_MODELS || {};
866
-
867
- if (allModels[DEFAULT_MODEL]) {
868
- try {
869
- // Show loading message
870
- const loadingBubble = appendMessage('system', 'Loading default model, please wait...');
871
-
872
- // Load the default model
873
- await httpRequest(getServerBaseUrl() + '/api/v1/load', {
874
- method: 'POST',
875
- headers: { 'Content-Type': 'application/json' },
876
- body: JSON.stringify({ model_name: DEFAULT_MODEL })
877
- });
878
-
879
- // Update model status
880
- await updateModelStatusIndicator();
881
-
882
- // Remove loading message
883
- loadingBubble.parentElement.remove();
884
-
885
- // Show success message briefly
886
- const successBubble = appendMessage('system', `Loaded ${DEFAULT_MODEL} successfully!`);
887
- setTimeout(() => { successBubble.parentElement.remove(); }, 2000);
888
- } catch (error) {
889
- alert('Please load a model first before sending messages.');
890
- return;
891
- }
892
- } else {
893
- alert('Please load a model first before sending messages.');
894
- return;
895
- }
829
+ alert('Please load a model first before sending messages.');
830
+ abortController = null;
831
+ isStreaming = false;
832
+ updateAttachmentButtonState();
833
+ return;
896
834
  }
897
835
 
898
836
  // Check if trying to send images to non-vision model