lemonade-sdk 7.0.2__tar.gz → 7.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (77) hide show
  1. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/PKG-INFO +1 -1
  2. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/setup.py +1 -1
  3. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/huggingface_load.py +6 -0
  4. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/ort_genai/oga.py +6 -4
  5. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/prompt.py +28 -1
  6. lemonade_sdk-7.0.3/src/lemonade/tools/server/instructions.py +37 -0
  7. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/server/llamacpp.py +16 -16
  8. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/server/serve.py +5 -5
  9. lemonade_sdk-7.0.3/src/lemonade/tools/server/static/instructions.html +262 -0
  10. lemonade_sdk-7.0.3/src/lemonade/version.py +1 -0
  11. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade_sdk.egg-info/PKG-INFO +1 -1
  12. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade_sdk.egg-info/SOURCES.txt +2 -1
  13. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade_server/model_manager.py +45 -12
  14. {lemonade_sdk-7.0.2/src/lemonade/tools/server → lemonade_sdk-7.0.3/src/lemonade_server}/pydantic_models.py +2 -0
  15. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade_server/server_models.json +14 -0
  16. lemonade_sdk-7.0.2/src/lemonade/tools/server/instructions.py +0 -294
  17. lemonade_sdk-7.0.2/src/lemonade/version.py +0 -1
  18. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/LICENSE +0 -0
  19. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/NOTICE.md +0 -0
  20. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/README.md +0 -0
  21. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/pyproject.toml +0 -0
  22. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/setup.cfg +0 -0
  23. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/__init__.py +0 -0
  24. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/api.py +0 -0
  25. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/cache.py +0 -0
  26. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/cli.py +0 -0
  27. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/__init__.py +0 -0
  28. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/analyze_model.py +0 -0
  29. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/build.py +0 -0
  30. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/cli_helpers.py +0 -0
  31. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/exceptions.py +0 -0
  32. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/filesystem.py +0 -0
  33. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/labels.py +0 -0
  34. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/onnx_helpers.py +0 -0
  35. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/plugins.py +0 -0
  36. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/printing.py +0 -0
  37. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/status.py +0 -0
  38. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/system_info.py +0 -0
  39. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/tensor_helpers.py +0 -0
  40. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/common/test_helpers.py +0 -0
  41. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/profilers/__init__.py +0 -0
  42. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/profilers/memory_tracker.py +0 -0
  43. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/profilers/profiler.py +0 -0
  44. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/sequence.py +0 -0
  45. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/state.py +0 -0
  46. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/__init__.py +0 -0
  47. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/accuracy.py +0 -0
  48. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/adapter.py +0 -0
  49. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/bench.py +0 -0
  50. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/huggingface_bench.py +0 -0
  51. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/humaneval.py +0 -0
  52. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/llamacpp.py +0 -0
  53. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/llamacpp_bench.py +0 -0
  54. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/management_tools.py +0 -0
  55. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/mmlu.py +0 -0
  56. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/ort_genai/__init__.py +0 -0
  57. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/ort_genai/oga_bench.py +0 -0
  58. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/perplexity.py +0 -0
  59. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/quark/__init__.py +0 -0
  60. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/quark/quark_load.py +0 -0
  61. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/quark/quark_quantize.py +0 -0
  62. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/report/__init__.py +0 -0
  63. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/report/llm_report.py +0 -0
  64. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/report/table.py +0 -0
  65. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/server/__init__.py +0 -0
  66. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/server/port_utils.py +0 -0
  67. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/server/static/styles.css +0 -0
  68. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/server/thread_utils.py +0 -0
  69. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/server/tool_calls.py +0 -0
  70. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade/tools/tool.py +0 -0
  71. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade_install/__init__.py +0 -0
  72. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade_install/install.py +0 -0
  73. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
  74. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade_sdk.egg-info/entry_points.txt +0 -0
  75. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade_sdk.egg-info/requires.txt +0 -0
  76. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
  77. {lemonade_sdk-7.0.2 → lemonade_sdk-7.0.3}/src/lemonade_server/cli.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 7.0.2
3
+ Version: 7.0.3
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.12
@@ -107,7 +107,7 @@ setup(
107
107
  include_package_data=True,
108
108
  package_data={
109
109
  "lemonade_server": ["server_models.json"],
110
- "lemonade": ["tools/server/static/styles.css"],
110
+ "lemonade": ["tools/server/static/*"],
111
111
  },
112
112
  )
113
113
 
@@ -326,6 +326,7 @@ class HuggingfaceAdapter(ModelAdapter):
326
326
  def generate(
327
327
  self,
328
328
  input_ids,
329
+ random_seed=1,
329
330
  **kwargs,
330
331
  ):
331
332
 
@@ -346,6 +347,11 @@ class HuggingfaceAdapter(ModelAdapter):
346
347
  **kwargs,
347
348
  }
348
349
 
350
+ if random_seed is None:
351
+ torch.random.seed()
352
+ else:
353
+ torch.random.manual_seed(random_seed)
354
+
349
355
  with torch.no_grad(), torch.inference_mode():
350
356
  outputs = self.model.generate(input_ids=input_ids, **generation_kwargs)
351
357
 
@@ -139,6 +139,7 @@ class OrtGenaiModel(ModelAdapter):
139
139
  pad_token_id=None,
140
140
  stopping_criteria=None,
141
141
  max_length=None,
142
+ random_seed=1,
142
143
  ):
143
144
  params = og.GeneratorParams(self.model)
144
145
 
@@ -179,6 +180,9 @@ class OrtGenaiModel(ModelAdapter):
179
180
  if use_oga_pre_6_api:
180
181
  params.input_ids = input_ids
181
182
 
183
+ if random_seed is None:
184
+ random_seed = -1 # In og.Generator, -1 = seed with random device
185
+
182
186
  if self.config and "search" in self.config:
183
187
  search_config = self.config["search"]
184
188
  params.set_search_options(
@@ -196,10 +200,7 @@ class OrtGenaiModel(ModelAdapter):
196
200
  past_present_share_buffer=search_config.get(
197
201
  "past_present_share_buffer", True
198
202
  ),
199
- # Make sure that results do not vary across laptops
200
- # by default, random_seed=-1 causes different laptops to give
201
- # different results
202
- random_seed=1,
203
+ random_seed=random_seed,
203
204
  # Not currently supported by OGA
204
205
  # diversity_penalty=search_config.get('diversity_penalty', 0.0),
205
206
  # no_repeat_ngram_size=search_config.get('no_repeat_ngram_size', 0),
@@ -212,6 +213,7 @@ class OrtGenaiModel(ModelAdapter):
212
213
  temperature=temperature,
213
214
  max_length=max_length_to_use,
214
215
  min_length=min_length,
216
+ random_seed=random_seed,
215
217
  )
216
218
  params.try_graph_capture_with_max_batch_size(1)
217
219
 
@@ -15,6 +15,7 @@ DEFAULT_GENERATE_PARAMS = {
15
15
  "temperature": 0.7,
16
16
  }
17
17
 
18
+ DEFAULT_RANDOM_SEED = 1
18
19
  DEFAULT_MAX_NEW_TOKENS = 512
19
20
  DEFAULT_N_TRIALS = 1
20
21
 
@@ -108,6 +109,19 @@ class LLMPrompt(Tool):
108
109
  f"(useful for testing, default is {DEFAULT_N_TRIALS})",
109
110
  )
110
111
 
112
+ parser.add_argument(
113
+ "--random-seed",
114
+ "-r",
115
+ default=str(DEFAULT_RANDOM_SEED),
116
+ help="Positive integer seed for random number generator used in "
117
+ "sampling tokens "
118
+ f"(default is {DEFAULT_RANDOM_SEED}). If the number of trials is "
119
+ "greater than one, then the seed is incremented by one for each "
120
+ "trial. Set to `None` for random, non-repeatable results. This "
121
+ "random seed behavior only applies to models loaded with "
122
+ "`oga-load` or `huggingface-load`.",
123
+ )
124
+
111
125
  return parser
112
126
 
113
127
  def parse(self, state: State, args, known_only=True) -> argparse.Namespace:
@@ -123,6 +137,11 @@ class LLMPrompt(Tool):
123
137
  with open(parsed_args.prompt, "r", encoding="utf-8") as f:
124
138
  parsed_args.prompt = f.read()
125
139
 
140
+ if parsed_args.random_seed == "None":
141
+ parsed_args.random_seed = None
142
+ else:
143
+ parsed_args.random_seed = int(parsed_args.random_seed)
144
+
126
145
  return parsed_args
127
146
 
128
147
  def run(
@@ -132,6 +151,7 @@ class LLMPrompt(Tool):
132
151
  max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
133
152
  n_trials: int = DEFAULT_N_TRIALS,
134
153
  template: bool = False,
154
+ random_seed: int = DEFAULT_RANDOM_SEED,
135
155
  ) -> State:
136
156
 
137
157
  model: ModelAdapter = state.model
@@ -170,9 +190,16 @@ class LLMPrompt(Tool):
170
190
 
171
191
  # Get the response from the LLM, which may include the prompt in it
172
192
  response = model.generate(
173
- input_ids, max_new_tokens=max_new_tokens, **DEFAULT_GENERATE_PARAMS
193
+ input_ids,
194
+ max_new_tokens=max_new_tokens,
195
+ random_seed=random_seed,
196
+ **DEFAULT_GENERATE_PARAMS,
174
197
  )
175
198
 
199
+ # Increment random seed if not none
200
+ if random_seed is not None:
201
+ random_seed += 1
202
+
176
203
  # Flatten the input and response
177
204
  input_ids_array = (
178
205
  input_ids if isinstance(input_ids, (list, str)) else input_ids[0]
@@ -0,0 +1,37 @@
1
+ from pathlib import Path
2
+ import json
3
+ from fastapi.responses import HTMLResponse
4
+ from lemonade_server.model_manager import ModelManager
5
+
6
+
7
+ def get_instructions_html(port=8000):
8
+ """
9
+ Show instructions on how to use the server.
10
+ """
11
+ # Load server models from JSON
12
+ server_models_path = (
13
+ Path(__file__).parent.parent.parent.parent
14
+ / "lemonade_server"
15
+ / "server_models.json"
16
+ )
17
+ with open(server_models_path, "r", encoding="utf-8") as f:
18
+ server_models = json.load(f)
19
+
20
+ # Use shared filter function from model_manager.py
21
+ filtered_models = ModelManager().filter_models_by_backend(server_models)
22
+
23
+ # Pass filtered server_models to JS
24
+ server_models_js = (
25
+ f"<script>window.SERVER_MODELS = {json.dumps(filtered_models)};</script>"
26
+ )
27
+
28
+ # Load HTML template
29
+ template_path = Path(__file__).parent / "static" / "instructions.html"
30
+ with open(template_path, "r", encoding="utf-8") as f:
31
+ html_template = f.read()
32
+
33
+ # Replace template variables
34
+ html_content = html_template.replace("{{SERVER_PORT}}", str(port))
35
+ html_content = html_content.replace("{{SERVER_MODELS_JS}}", server_models_js)
36
+
37
+ return HTMLResponse(content=html_content)
@@ -14,8 +14,8 @@ from fastapi.responses import StreamingResponse
14
14
 
15
15
  from openai import OpenAI
16
16
 
17
+ from lemonade_server.pydantic_models import ChatCompletionRequest
17
18
  from lemonade_server.model_manager import ModelManager
18
- from lemonade.tools.server.pydantic_models import ChatCompletionRequest
19
19
  from lemonade.tools.server.port_utils import find_free_port
20
20
 
21
21
  LLAMA_VERSION = "b5543"
@@ -165,24 +165,25 @@ def _wait_for_load(
165
165
 
166
166
 
167
167
  def _launch_llama_subprocess(
168
- model_path: str, use_gpu: bool, telemetry: LlamaTelemetry
168
+ snapshot_files: dict, use_gpu: bool, telemetry: LlamaTelemetry
169
169
  ) -> subprocess.Popen:
170
170
  """
171
171
  Launch llama server subprocess with GPU or CPU configuration
172
172
  """
173
173
 
174
+ # Build the base command
175
+ base_command = [LLAMA_SERVER_EXE_PATH, "-m", snapshot_files["variant"]]
176
+ if "mmproj" in snapshot_files:
177
+ base_command.extend(["--mmproj", snapshot_files["mmproj"]])
178
+ if not use_gpu:
179
+ base_command.extend(["--no-mmproj-offload"])
180
+
174
181
  # Find a port, and save it in the telemetry object for future reference
175
182
  # by other functions
176
183
  telemetry.choose_port()
177
184
 
178
- base_command = [
179
- LLAMA_SERVER_EXE_PATH,
180
- "-m",
181
- model_path,
182
- "--port",
183
- str(telemetry.port),
184
- "--jinja",
185
- ]
185
+ # Add port and jinja to enable tool use
186
+ base_command.extend(["--port", str(telemetry.port), "--jinja"])
186
187
 
187
188
  # Configure GPU layers: 99 for GPU, 0 for CPU-only
188
189
  ngl_value = "99" if use_gpu else "0"
@@ -204,7 +205,7 @@ def _launch_llama_subprocess(
204
205
  return process
205
206
 
206
207
 
207
- def server_load(checkpoint: str, model_reference: str, telemetry: LlamaTelemetry):
208
+ def server_load(model_config: dict, model_reference: str, telemetry: LlamaTelemetry):
208
209
  # Download llama.cpp server if it isn't already available
209
210
  if not os.path.exists(LLAMA_SERVER_EXE_DIR):
210
211
  # Download llama.cpp server zip
@@ -236,16 +237,15 @@ def server_load(checkpoint: str, model_reference: str, telemetry: LlamaTelemetry
236
237
  logging.info("Cleaned up zip file")
237
238
 
238
239
  # Download the gguf to the hugging face cache
239
- snapshot_path = ModelManager().download_gguf(checkpoint)
240
- model_path = os.path.join(snapshot_path, os.listdir(snapshot_path)[0])
241
- logging.debug(f"GGUF file path: {model_path}")
240
+ snapshot_files = ModelManager().download_gguf(model_config)
241
+ logging.debug(f"GGUF file paths: {snapshot_files}")
242
242
 
243
243
  # Start the llama-serve.exe process
244
244
  logging.debug(f"Using llama_server for GGUF model: {LLAMA_SERVER_EXE_PATH}")
245
245
 
246
246
  # Attempt loading on GPU first
247
247
  llama_server_process = _launch_llama_subprocess(
248
- model_path, use_gpu=True, telemetry=telemetry
248
+ snapshot_files, use_gpu=True, telemetry=telemetry
249
249
  )
250
250
 
251
251
  # Check the /health endpoint until GPU server is ready
@@ -258,7 +258,7 @@ def server_load(checkpoint: str, model_reference: str, telemetry: LlamaTelemetry
258
258
  # If loading on GPU failed, try loading on CPU
259
259
  if llama_server_process.poll():
260
260
  llama_server_process = _launch_llama_subprocess(
261
- model_path, use_gpu=False, telemetry=telemetry
261
+ snapshot_files, use_gpu=False, telemetry=telemetry
262
262
  )
263
263
 
264
264
  # Check the /health endpoint until CPU server is ready
@@ -46,9 +46,7 @@ from openai.types.responses import (
46
46
 
47
47
  import lemonade.api as lemonade_api
48
48
  from lemonade_server.model_manager import ModelManager
49
- from lemonade.tools.management_tools import ManagementTool
50
- import lemonade.tools.server.llamacpp as llamacpp
51
- from lemonade.tools.server.pydantic_models import (
49
+ from lemonade_server.pydantic_models import (
52
50
  DEFAULT_MAX_NEW_TOKENS,
53
51
  LoadConfig,
54
52
  CompletionRequest,
@@ -56,6 +54,8 @@ from lemonade.tools.server.pydantic_models import (
56
54
  ResponsesRequest,
57
55
  PullConfig,
58
56
  )
57
+ from lemonade.tools.management_tools import ManagementTool
58
+ import lemonade.tools.server.llamacpp as llamacpp
59
59
  from lemonade.tools.server.tool_calls import extract_tool_calls, get_tool_call_pattern
60
60
  from lemonade.tools.server.instructions import get_instructions_html
61
61
  from lemonade.tools.server.port_utils import lifespan
@@ -1200,7 +1200,7 @@ class Server(ManagementTool):
1200
1200
  # We will populate a LoadConfig that has all of the required fields
1201
1201
  config_to_use: LoadConfig
1202
1202
 
1203
- # First, validate that the arguments are valid
1203
+ # First, ensure that the arguments are valid
1204
1204
  if config.model_name:
1205
1205
  # Get the dictionary of supported model from disk
1206
1206
  supported_models = ModelManager().supported_models
@@ -1293,7 +1293,7 @@ class Server(ManagementTool):
1293
1293
  try:
1294
1294
  if config_to_use.recipe == "llamacpp":
1295
1295
  self.llama_server_process = llamacpp.server_load(
1296
- checkpoint=config_to_use.checkpoint,
1296
+ model_config=config_to_use,
1297
1297
  model_reference=model_reference,
1298
1298
  telemetry=self.llama_telemetry,
1299
1299
  )
@@ -0,0 +1,262 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Lemonade Server</title>
7
+ <link rel="icon" href="data:,">
8
+ <link rel="stylesheet" href="/static/styles.css">
9
+ <script>
10
+ window.SERVER_PORT = {{SERVER_PORT}};
11
+ </script>
12
+ {{SERVER_MODELS_JS}}
13
+ </head>
14
+ <body>
15
+ <nav class="navbar">
16
+ <a href="https://github.com/lemonade-sdk/lemonade">GitHub</a>
17
+ <a href="https://lemonade-server.ai/docs/">Docs</a>
18
+ <a href="https://lemonade-server.ai/docs/server/server_models/">Models</a>
19
+ <a href="https://lemonade-server.ai/docs/server/apps/">Featured Apps</a>
20
+ </nav>
21
+ <main class="main">
22
+ <div class="title">🍋 Lemonade Server</div>
23
+ <div class="tab-container">
24
+ <div class="tabs">
25
+ <button class="tab active" id="tab-chat" onclick="showTab('chat')">LLM Chat</button>
26
+ <button class="tab" id="tab-models" onclick="showTab('models')">Model Management</button>
27
+ </div>
28
+ <div class="tab-content active" id="content-chat">
29
+ <div class="chat-container">
30
+ <div class="chat-history" id="chat-history"></div>
31
+ <div class="chat-input-row">
32
+ <select id="model-select"></select>
33
+ <input type="text" id="chat-input" placeholder="Type your message..." />
34
+ <button id="send-btn">Send</button>
35
+ </div>
36
+ </div>
37
+ </div>
38
+ <div class="tab-content" id="content-models">
39
+ <div class="model-mgmt-container">
40
+ <div class="model-mgmt-pane">
41
+ <h3>Installed Models</h3>
42
+ <table class="model-table" id="installed-models-table">
43
+ <colgroup><col style="width:100%"></colgroup>
44
+ <tbody id="installed-models-tbody"></tbody>
45
+ </table>
46
+ </div>
47
+ <div class="model-mgmt-pane">
48
+ <h3>Suggested Models</h3>
49
+ <table class="model-table" id="suggested-models-table">
50
+ <tbody id="suggested-models-tbody"></tbody>
51
+ </table>
52
+ </div>
53
+ </div>
54
+ </div>
55
+ </div>
56
+ </main>
57
+ <footer class="site-footer">
58
+ <div class="dad-joke">When life gives you LLMs, make an LLM aide.</div>
59
+ <div class="copyright">Copyright 2025 AMD</div>
60
+ </footer>
61
+ <script src="https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js"></script>
62
+ <script>
63
+ // Tab switching logic
64
+ function showTab(tab) {
65
+ document.getElementById('tab-chat').classList.remove('active');
66
+ document.getElementById('tab-models').classList.remove('active');
67
+ document.getElementById('content-chat').classList.remove('active');
68
+ document.getElementById('content-models').classList.remove('active');
69
+ if (tab === 'chat') {
70
+ document.getElementById('tab-chat').classList.add('active');
71
+ document.getElementById('content-chat').classList.add('active');
72
+ } else {
73
+ document.getElementById('tab-models').classList.add('active');
74
+ document.getElementById('content-models').classList.add('active');
75
+ }
76
+ }
77
+
78
+ // Helper to get server base URL
79
+ function getServerBaseUrl() {
80
+ const port = window.SERVER_PORT || 8000;
81
+ return `http://localhost:${port}`;
82
+ }
83
+
84
+ // Populate model dropdown from /api/v1/models endpoint
85
+ async function loadModels() {
86
+ try {
87
+ const resp = await fetch(getServerBaseUrl() + '/api/v1/models');
88
+ const data = await resp.json();
89
+ const select = document.getElementById('model-select');
90
+ select.innerHTML = '';
91
+ if (!data.data || !Array.isArray(data.data)) {
92
+ select.innerHTML = '<option>No models found (malformed response)</option>';
93
+ return;
94
+ }
95
+ if (data.data.length === 0) {
96
+ select.innerHTML = '<option>No models available</option>';
97
+ return;
98
+ }
99
+ let defaultIndex = 0;
100
+ data.data.forEach(function(model, index) {
101
+ const modelId = model.id || model.name || model;
102
+ const opt = document.createElement('option');
103
+ opt.value = modelId;
104
+ opt.textContent = modelId;
105
+ if (modelId === 'Llama-3.2-1B-Instruct-Hybrid') {
106
+ defaultIndex = index;
107
+ }
108
+ select.appendChild(opt);
109
+ });
110
+ select.selectedIndex = defaultIndex;
111
+ } catch (e) {
112
+ const select = document.getElementById('model-select');
113
+ select.innerHTML = `<option>Error loading models: ${e.message}</option>`;
114
+ console.error('Error loading models:', e);
115
+ }
116
+ }
117
+ loadModels();
118
+
119
+ // Model Management Tab Logic
120
+ async function refreshModelMgmtUI() {
121
+ // Get installed models from /api/v1/models
122
+ let installed = [];
123
+ try {
124
+ const resp = await fetch(getServerBaseUrl() + '/api/v1/models');
125
+ const data = await resp.json();
126
+ if (data.data && Array.isArray(data.data)) {
127
+ installed = data.data.map(m => m.id || m.name || m);
128
+ }
129
+ } catch (e) {}
130
+ // All models from server_models.json (window.SERVER_MODELS)
131
+ const allModels = window.SERVER_MODELS || {};
132
+ // Filter suggested models not installed
133
+ const suggested = Object.keys(allModels).filter(
134
+ k => allModels[k].suggested && !installed.includes(k)
135
+ );
136
+ // Render installed models as a table (two columns, second is invisible)
137
+ const installedTbody = document.getElementById('installed-models-tbody');
138
+ installedTbody.innerHTML = '';
139
+ installed.forEach(function(mid) {
140
+ var tr = document.createElement('tr');
141
+ var tdName = document.createElement('td');
142
+ tdName.textContent = mid;
143
+ var tdEmpty = document.createElement('td');
144
+ tdEmpty.style.width = '0';
145
+ tdEmpty.style.padding = '0';
146
+ tdEmpty.style.border = 'none';
147
+ tr.appendChild(tdName);
148
+ tr.appendChild(tdEmpty);
149
+ installedTbody.appendChild(tr);
150
+ });
151
+ // Render suggested models as a table
152
+ const suggestedTbody = document.getElementById('suggested-models-tbody');
153
+ suggestedTbody.innerHTML = '';
154
+ suggested.forEach(mid => {
155
+ const tr = document.createElement('tr');
156
+ const tdName = document.createElement('td');
157
+ tdName.textContent = mid;
158
+ tdName.style.paddingRight = '1em';
159
+ tdName.style.verticalAlign = 'middle';
160
+ const tdBtn = document.createElement('td');
161
+ tdBtn.style.width = '1%';
162
+ tdBtn.style.verticalAlign = 'middle';
163
+ const btn = document.createElement('button');
164
+ btn.textContent = '+';
165
+ btn.title = 'Install model';
166
+ btn.onclick = async function() {
167
+ btn.disabled = true;
168
+ btn.textContent = 'Installing...';
169
+ btn.classList.add('installing-btn');
170
+ try {
171
+ await fetch(getServerBaseUrl() + '/api/v1/pull', {
172
+ method: 'POST',
173
+ headers: { 'Content-Type': 'application/json' },
174
+ body: JSON.stringify({ model_name: mid })
175
+ });
176
+ await refreshModelMgmtUI();
177
+ await loadModels(); // update chat dropdown too
178
+ } catch (e) {
179
+ btn.textContent = 'Error';
180
+ }
181
+ };
182
+ tdBtn.appendChild(btn);
183
+ tr.appendChild(tdName);
184
+ tr.appendChild(tdBtn);
185
+ suggestedTbody.appendChild(tr);
186
+ });
187
+ }
188
+ // Initial load
189
+ refreshModelMgmtUI();
190
+ // Optionally, refresh when switching to the tab
191
+ document.getElementById('tab-models').addEventListener('click', refreshModelMgmtUI);
192
+
193
+ // Chat logic (streaming with OpenAI JS client placeholder)
194
+ const chatHistory = document.getElementById('chat-history');
195
+ const chatInput = document.getElementById('chat-input');
196
+ const sendBtn = document.getElementById('send-btn');
197
+ const modelSelect = document.getElementById('model-select');
198
+ let messages = [];
199
+
200
+ function appendMessage(role, text) {
201
+ const div = document.createElement('div');
202
+ div.className = 'chat-message ' + role;
203
+ // Add a bubble for iMessage style
204
+ const bubble = document.createElement('div');
205
+ bubble.className = 'chat-bubble ' + role;
206
+ bubble.innerHTML = text;
207
+ div.appendChild(bubble);
208
+ chatHistory.appendChild(div);
209
+ chatHistory.scrollTop = chatHistory.scrollHeight;
210
+ }
211
+
212
+ async function sendMessage() {
213
+ const text = chatInput.value.trim();
214
+ if (!text) return;
215
+ appendMessage('user', text);
216
+ messages.push({ role: 'user', content: text });
217
+ chatInput.value = '';
218
+ sendBtn.disabled = true;
219
+ // Streaming OpenAI completions (placeholder, adapt as needed)
220
+ let llmText = '';
221
+ appendMessage('llm', '...');
222
+ const llmDiv = chatHistory.lastChild.querySelector('.chat-bubble.llm');
223
+ try {
224
+ // Use the correct endpoint for chat completions
225
+ const resp = await fetch(getServerBaseUrl() + '/api/v1/chat/completions', {
226
+ method: 'POST',
227
+ headers: { 'Content-Type': 'application/json' },
228
+ body: JSON.stringify({
229
+ model: modelSelect.value,
230
+ messages: messages,
231
+ stream: true
232
+ })
233
+ });
234
+ if (!resp.body) throw new Error('No stream');
235
+ const reader = resp.body.getReader();
236
+ let decoder = new TextDecoder();
237
+ llmDiv.textContent = '';
238
+ while (true) {
239
+ const { done, value } = await reader.read();
240
+ if (done) break;
241
+ const chunk = decoder.decode(value);
242
+ if (chunk.trim() === 'data: [DONE]' || chunk.trim() === '[DONE]') continue;
243
+ // Try to extract the content from the OpenAI chunk
244
+ const match = chunk.match(/"content"\s*:\s*"([^"]*)"/);
245
+ if (match && match[1]) {
246
+ llmText += match[1];
247
+ llmDiv.textContent = llmText;
248
+ }
249
+ }
250
+ messages.push({ role: 'assistant', content: llmText });
251
+ } catch (e) {
252
+ llmDiv.textContent = '[Error: ' + e.message + ']';
253
+ }
254
+ sendBtn.disabled = false;
255
+ }
256
+ sendBtn.onclick = sendMessage;
257
+ chatInput.addEventListener('keydown', function(e) {
258
+ if (e.key === 'Enter') sendMessage();
259
+ });
260
+ </script>
261
+ </body>
262
+ </html>
@@ -0,0 +1 @@
1
+ __version__ = "7.0.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 7.0.2
3
+ Version: 7.0.3
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.12
@@ -54,10 +54,10 @@ src/lemonade/tools/server/__init__.py
54
54
  src/lemonade/tools/server/instructions.py
55
55
  src/lemonade/tools/server/llamacpp.py
56
56
  src/lemonade/tools/server/port_utils.py
57
- src/lemonade/tools/server/pydantic_models.py
58
57
  src/lemonade/tools/server/serve.py
59
58
  src/lemonade/tools/server/thread_utils.py
60
59
  src/lemonade/tools/server/tool_calls.py
60
+ src/lemonade/tools/server/static/instructions.html
61
61
  src/lemonade/tools/server/static/styles.css
62
62
  src/lemonade_install/__init__.py
63
63
  src/lemonade_install/install.py
@@ -69,4 +69,5 @@ src/lemonade_sdk.egg-info/requires.txt
69
69
  src/lemonade_sdk.egg-info/top_level.txt
70
70
  src/lemonade_server/cli.py
71
71
  src/lemonade_server/model_manager.py
72
+ src/lemonade_server/pydantic_models.py
72
73
  src/lemonade_server/server_models.json
@@ -1,7 +1,8 @@
1
1
  import json
2
2
  import os
3
3
  import huggingface_hub
4
- import pkg_resources
4
+ from importlib.metadata import distributions
5
+ from lemonade_server.pydantic_models import LoadConfig
5
6
 
6
7
 
7
8
  class ModelManager:
@@ -64,16 +65,45 @@ class ModelManager:
64
65
  """
65
66
  return self.filter_models_by_backend(self.downloaded_models)
66
67
 
67
- def download_gguf(self, checkpoint) -> str:
68
- # The colon after the checkpoint name indicates which
69
- # specific GGUF to download
70
- repo_id = checkpoint.split(":")[0]
71
- pattern_to_match = f'*{checkpoint.split(":")[1]}.gguf'
72
- return huggingface_hub.snapshot_download(
73
- repo_id=repo_id,
74
- allow_patterns=[pattern_to_match],
68
+ def download_gguf(self, model_config: LoadConfig) -> dict:
69
+ """
70
+ Downloads the GGUF file for the given model configuration.
71
+ """
72
+
73
+ # The variant parameter can be either:
74
+ # 1. A full GGUF filename (e.g. "model-Q4_0.gguf")
75
+ # 2. A quantization variant (e.g. "Q4_0")
76
+ # This code handles both cases by constructing the appropriate filename
77
+ checkpoint, variant = model_config.checkpoint.split(":")
78
+ hf_base_name = checkpoint.split("/")[-1].replace("-GGUF", "")
79
+ variant_name = (
80
+ variant if variant.endswith(".gguf") else f"{hf_base_name}-{variant}.gguf"
81
+ )
82
+
83
+ # If there is a mmproj file, add it to the patterns
84
+ expected_files = {"variant": variant_name}
85
+ if model_config.mmproj:
86
+ expected_files["mmproj"] = model_config.mmproj
87
+
88
+ # Download the files
89
+ snapshot_folder = huggingface_hub.snapshot_download(
90
+ repo_id=checkpoint,
91
+ allow_patterns=list(expected_files.values()),
75
92
  )
76
93
 
94
+ # Ensure we downloaded all expected files while creating a dict of the downloaded files
95
+ snapshot_files = {}
96
+ for file in expected_files:
97
+ snapshot_files[file] = os.path.join(snapshot_folder, expected_files[file])
98
+ if expected_files[file] not in os.listdir(snapshot_folder):
99
+ raise ValueError(
100
+ f"Hugging Face snapshot download for {model_config.checkpoint} "
101
+ f"expected file {expected_files[file]} not found in {snapshot_folder}"
102
+ )
103
+
104
+ # Return a dict that points to the snapshot path of the downloaded GGUF files
105
+ return snapshot_files
106
+
77
107
  def download_models(self, models: list[str]):
78
108
  """
79
109
  Downloads the specified models from Hugging Face.
@@ -88,7 +118,8 @@ class ModelManager:
88
118
  print(f"Downloading {model} ({checkpoint})")
89
119
 
90
120
  if "gguf" in checkpoint.lower():
91
- self.download_gguf(checkpoint)
121
+ model_config = LoadConfig(**self.supported_models[model])
122
+ self.download_gguf(model_config)
92
123
  else:
93
124
  huggingface_hub.snapshot_download(repo_id=checkpoint)
94
125
 
@@ -97,9 +128,11 @@ class ModelManager:
97
128
  Returns a filtered dict of models that are enabled by the
98
129
  current environment.
99
130
  """
131
+ installed_packages = {dist.metadata["Name"].lower() for dist in distributions()}
132
+
100
133
  hybrid_installed = (
101
- "onnxruntime-vitisai" in pkg_resources.working_set.by_key
102
- and "onnxruntime-genai-directml-ryzenai" in pkg_resources.working_set.by_key
134
+ "onnxruntime-vitisai" in installed_packages
135
+ and "onnxruntime-genai-directml-ryzenai" in installed_packages
103
136
  )
104
137
  filtered = {}
105
138
  for model, value in models.items():
@@ -24,6 +24,8 @@ class LoadConfig(BaseModel):
24
24
  max_prompt_length: Optional[int] = None
25
25
  # Indicates whether the model is a reasoning model, like DeepSeek
26
26
  reasoning: Optional[bool] = False
27
+ # Indicates which Multimodal Projector (mmproj) file to use
28
+ mmproj: Optional[str] = None
27
29
 
28
30
 
29
31
  class CompletionRequest(BaseModel):
@@ -187,5 +187,19 @@
187
187
  "recipe": "llamacpp",
188
188
  "reasoning": true,
189
189
  "suggested": true
190
+ },
191
+ "Gemma-3-4b-it-GGUF": {
192
+ "checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
193
+ "mmproj": "mmproj-model-f16.gguf",
194
+ "recipe": "llamacpp",
195
+ "reasoning": false,
196
+ "suggested": true
197
+ },
198
+ "Qwen2.5-VL-7B-Instruct": {
199
+ "checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
200
+ "mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
201
+ "recipe": "llamacpp",
202
+ "reasoning": false,
203
+ "suggested": true
190
204
  }
191
205
  }
@@ -1,294 +0,0 @@
1
- from pathlib import Path
2
- import json
3
- from fastapi.responses import HTMLResponse
4
- from lemonade_server.model_manager import ModelManager
5
-
6
-
7
- def get_instructions_html(port=8000):
8
- """
9
- Show instructions on how to use the server.
10
- """
11
- # Load server models from JSON
12
- server_models_path = (
13
- Path(__file__).parent.parent.parent.parent
14
- / "lemonade_server"
15
- / "server_models.json"
16
- )
17
- with open(server_models_path, "r", encoding="utf-8") as f:
18
- server_models = json.load(f)
19
-
20
- # Use shared filter function from model_manager.py
21
- filtered_models = ModelManager().filter_models_by_backend(server_models)
22
-
23
- # Pass filtered server_models to JS
24
- server_models_js = (
25
- f"<script>window.SERVER_MODELS = {json.dumps(filtered_models)};</script>"
26
- )
27
-
28
- # New lemon-themed HTML structure
29
- # pylint: disable=W1401
30
- styled_html = f"""
31
- <!DOCTYPE html>
32
- <html lang=\"en\">
33
- <head>
34
- <meta charset=\"UTF-8\">
35
- <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">
36
- <title>Lemonade Server</title>
37
- <link rel="icon" href="data:,">
38
- <link rel=\"stylesheet\" href=\"/static/styles.css\">
39
- <script>
40
- window.SERVER_PORT = {port};
41
- </script>
42
- {server_models_js}
43
- </head>
44
- <body>
45
- <nav class=\"navbar\">
46
- <a href=\"https://github.com/lemonade-sdk/lemonade\">GitHub</a>
47
- <a href=\"https://lemonade-server.ai/docs/\">Docs</a>
48
- <a href=\"https://lemonade-server.ai/docs/server/server_models/\">Models</a>
49
- <a href=\"https://lemonade-server.ai/docs/server/apps/\">Featured Apps</a>
50
- </nav>
51
- <main class=\"main\">
52
- <div class=\"title\">🍋 Lemonade Server</div>
53
- <div class=\"tab-container\">
54
- <div class=\"tabs\">
55
- <button class=\"tab active\" id=\"tab-chat\" onclick=\"showTab('chat')\">LLM Chat</button>
56
- <button class=\"tab\" id=\"tab-models\" onclick=\"showTab('models')\">Model Management</button>
57
- </div>
58
- <div class=\"tab-content active\" id=\"content-chat\">
59
- <div class=\"chat-container\">
60
- <div class=\"chat-history\" id=\"chat-history\"></div>
61
- <div class=\"chat-input-row\">
62
- <select id=\"model-select\"></select>
63
- <input type=\"text\" id=\"chat-input\" placeholder=\"Type your message...\" />
64
- <button id=\"send-btn\">Send</button>
65
- </div>
66
- </div>
67
- </div>
68
- <div class=\"tab-content\" id=\"content-models\">
69
- <div class=\"model-mgmt-container\">
70
- <div class=\"model-mgmt-pane\">
71
- <h3>Installed Models</h3>
72
- <table class=\"model-table\" id=\"installed-models-table\">
73
- <colgroup><col style=\"width:100%\"></colgroup>
74
- <tbody id=\"installed-models-tbody\"></tbody>
75
- </table>
76
- </div>
77
- <div class=\"model-mgmt-pane\">
78
- <h3>Suggested Models</h3>
79
- <table class=\"model-table\" id=\"suggested-models-table\">
80
- <tbody id=\"suggested-models-tbody\"></tbody>
81
- </table>
82
- </div>
83
- </div>
84
- </div>
85
- </div>
86
- </main>
87
- <footer class=\"site-footer\">
88
- <div class=\"dad-joke\">When life gives you LLMs, make an LLM aide.</div>
89
- <div class=\"copyright\">Copyright 2025 AMD</div>
90
- </footer>
91
- <script src=\"https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js\"></script>
92
- <script>
93
- // Tab switching logic
94
- function showTab(tab) {{
95
- document.getElementById('tab-chat').classList.remove('active');
96
- document.getElementById('tab-models').classList.remove('active');
97
- document.getElementById('content-chat').classList.remove('active');
98
- document.getElementById('content-models').classList.remove('active');
99
- if (tab === 'chat') {{
100
- document.getElementById('tab-chat').classList.add('active');
101
- document.getElementById('content-chat').classList.add('active');
102
- }} else {{
103
- document.getElementById('tab-models').classList.add('active');
104
- document.getElementById('content-models').classList.add('active');
105
- }}
106
- }}
107
-
108
- // Helper to get server base URL
109
- function getServerBaseUrl() {{
110
- const port = window.SERVER_PORT || 8000;
111
- return `http://localhost:{port}`;
112
- }}
113
-
114
- // Populate model dropdown from /api/v1/models endpoint
115
- async function loadModels() {{
116
- try {{
117
- const resp = await fetch(getServerBaseUrl() + '/api/v1/models');
118
- const data = await resp.json();
119
- const select = document.getElementById('model-select');
120
- select.innerHTML = '';
121
- if (!data.data || !Array.isArray(data.data)) {{
122
- select.innerHTML = '<option>No models found (malformed response)</option>';
123
- return;
124
- }}
125
- if (data.data.length === 0) {{
126
- select.innerHTML = '<option>No models available</option>';
127
- return;
128
- }}
129
- let defaultIndex = 0;
130
- data.data.forEach(function(model, index) {{
131
- const modelId = model.id || model.name || model;
132
- const opt = document.createElement('option');
133
- opt.value = modelId;
134
- opt.textContent = modelId;
135
- if (modelId === 'Llama-3.2-1B-Instruct-Hybrid') {{
136
- defaultIndex = index;
137
- }}
138
- select.appendChild(opt);
139
- }});
140
- select.selectedIndex = defaultIndex;
141
- }} catch (e) {{
142
- const select = document.getElementById('model-select');
143
- select.innerHTML = `<option>Error loading models: ${{e.message}}</option>`;
144
- console.error('Error loading models:', e);
145
- }}
146
- }}
147
- loadModels();
148
-
149
- // Model Management Tab Logic
150
- async function refreshModelMgmtUI() {{
151
- // Get installed models from /api/v1/models
152
- let installed = [];
153
- try {{
154
- const resp = await fetch(getServerBaseUrl() + '/api/v1/models');
155
- const data = await resp.json();
156
- if (data.data && Array.isArray(data.data)) {{
157
- installed = data.data.map(m => m.id || m.name || m);
158
- }}
159
- }} catch (e) {{}}
160
- // All models from server_models.json (window.SERVER_MODELS)
161
- const allModels = window.SERVER_MODELS || {{}};
162
- // Filter suggested models not installed
163
- const suggested = Object.keys(allModels).filter(
164
- k => allModels[k].suggested && !installed.includes(k)
165
- );
166
- // Render installed models as a table (two columns, second is invisible)
167
- const installedTbody = document.getElementById('installed-models-tbody');
168
- installedTbody.innerHTML = '';
169
- installed.forEach(function(mid) {{
170
- var tr = document.createElement('tr');
171
- var tdName = document.createElement('td');
172
- tdName.textContent = mid;
173
- var tdEmpty = document.createElement('td');
174
- tdEmpty.style.width = '0';
175
- tdEmpty.style.padding = '0';
176
- tdEmpty.style.border = 'none';
177
- tr.appendChild(tdName);
178
- tr.appendChild(tdEmpty);
179
- installedTbody.appendChild(tr);
180
- }});
181
- // Render suggested models as a table
182
- const suggestedTbody = document.getElementById('suggested-models-tbody');
183
- suggestedTbody.innerHTML = '';
184
- suggested.forEach(mid => {{
185
- const tr = document.createElement('tr');
186
- const tdName = document.createElement('td');
187
- tdName.textContent = mid;
188
- tdName.style.paddingRight = '1em';
189
- tdName.style.verticalAlign = 'middle';
190
- const tdBtn = document.createElement('td');
191
- tdBtn.style.width = '1%';
192
- tdBtn.style.verticalAlign = 'middle';
193
- const btn = document.createElement('button');
194
- btn.textContent = '+';
195
- btn.title = 'Install model';
196
- btn.onclick = async function() {{
197
- btn.disabled = true;
198
- btn.textContent = 'Installing...';
199
- btn.classList.add('installing-btn');
200
- try {{
201
- await fetch(getServerBaseUrl() + '/api/v1/pull', {{
202
- method: 'POST',
203
- headers: {{ 'Content-Type': 'application/json' }},
204
- body: JSON.stringify({{ model_name: mid }})
205
- }});
206
- await refreshModelMgmtUI();
207
- await loadModels(); // update chat dropdown too
208
- }} catch (e) {{
209
- btn.textContent = 'Error';
210
- }}
211
- }};
212
- tdBtn.appendChild(btn);
213
- tr.appendChild(tdName);
214
- tr.appendChild(tdBtn);
215
- suggestedTbody.appendChild(tr);
216
- }});
217
- }}
218
- // Initial load
219
- refreshModelMgmtUI();
220
- // Optionally, refresh when switching to the tab
221
- document.getElementById('tab-models').addEventListener('click', refreshModelMgmtUI);
222
-
223
- // Chat logic (streaming with OpenAI JS client placeholder)
224
- const chatHistory = document.getElementById('chat-history');
225
- const chatInput = document.getElementById('chat-input');
226
- const sendBtn = document.getElementById('send-btn');
227
- const modelSelect = document.getElementById('model-select');
228
- let messages = [];
229
-
230
- function appendMessage(role, text) {{
231
- const div = document.createElement('div');
232
- div.className = 'chat-message ' + role;
233
- // Add a bubble for iMessage style
234
- const bubble = document.createElement('div');
235
- bubble.className = 'chat-bubble ' + role;
236
- bubble.innerHTML = text;
237
- div.appendChild(bubble);
238
- chatHistory.appendChild(div);
239
- chatHistory.scrollTop = chatHistory.scrollHeight;
240
- }}
241
-
242
- async function sendMessage() {{
243
- const text = chatInput.value.trim();
244
- if (!text) return;
245
- appendMessage('user', text);
246
- messages.push({{ role: 'user', content: text }});
247
- chatInput.value = '';
248
- sendBtn.disabled = true;
249
- // Streaming OpenAI completions (placeholder, adapt as needed)
250
- let llmText = '';
251
- appendMessage('llm', '...');
252
- const llmDiv = chatHistory.lastChild.querySelector('.chat-bubble.llm');
253
- try {{
254
- // Use the correct endpoint for chat completions
255
- const resp = await fetch(getServerBaseUrl() + '/api/v1/chat/completions', {{
256
- method: 'POST',
257
- headers: {{ 'Content-Type': 'application/json' }},
258
- body: JSON.stringify({{
259
- model: modelSelect.value,
260
- messages: messages,
261
- stream: true
262
- }})
263
- }});
264
- if (!resp.body) throw new Error('No stream');
265
- const reader = resp.body.getReader();
266
- let decoder = new TextDecoder();
267
- llmDiv.textContent = '';
268
- while (true) {{
269
- const {{ done, value }} = await reader.read();
270
- if (done) break;
271
- const chunk = decoder.decode(value);
272
- if (chunk.trim() === 'data: [DONE]' || chunk.trim() === '[DONE]') continue;
273
- // Try to extract the content from the OpenAI chunk
274
- const match = chunk.match(/"content"\s*:\s*"([^"]*)"/);
275
- if (match && match[1]) {{
276
- llmText += match[1];
277
- llmDiv.textContent = llmText;
278
- }}
279
- }}
280
- messages.push({{ role: 'assistant', content: llmText }});
281
- }} catch (e) {{
282
- llmDiv.textContent = '[Error: ' + e.message + ']';
283
- }}
284
- sendBtn.disabled = false;
285
- }}
286
- sendBtn.onclick = sendMessage;
287
- chatInput.addEventListener('keydown', function(e) {{
288
- if (e.key === 'Enter') sendMessage();
289
- }});
290
- </script>
291
- </body>
292
- </html>
293
- """
294
- return HTMLResponse(content=styled_html)
@@ -1 +0,0 @@
1
- __version__ = "7.0.2"
File without changes
File without changes
File without changes
File without changes