lemonade-sdk 8.0.1__py3-none-any.whl → 8.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

lemonade/cli.py CHANGED
@@ -90,9 +90,9 @@ https://github.com/lemonade-sdk/lemonade/blob/main/docs/README.md""",
90
90
  )
91
91
 
92
92
  profiler_instances = [
93
- profiler(global_args[profiler.unique_name])
93
+ profiler(global_args[profiler.unique_name.replace("-", "_")])
94
94
  for profiler in profilers
95
- if global_args.get(profiler.unique_name, None) is not None
95
+ if global_args.get(profiler.unique_name.replace("-", "_"), None) is not None
96
96
  ]
97
97
 
98
98
  if len(evaluation_tools) > 0:
@@ -48,7 +48,10 @@ class Profiler(abc.ABC):
48
48
  This method is called so that the profiler can create its output files.
49
49
  The state is passed so that build info can be gathered and stats can be written.
50
50
  The timestamp can be used for filename in current working directory.
51
- The start times contain a list of tools and start times.
51
+ The start times parameter is a dict with the keys being the tools names and
52
+ the values being the time the tool started. There is an initial "warmup" key
53
+ that has a start time before the first tool and a "cool down" key that contains the
54
+ time when the last tool ended.
52
55
  """
53
56
 
54
57
 
@@ -1,12 +1,6 @@
1
1
  # onnxruntime_genai is not lint-friendly yet and PyLint can't
2
2
  # find any of the class methods
3
3
  # pylint: disable=no-member
4
- #
5
- # Model builder constraints:
6
- # 11/10/24 Need transformers <4.45.0 OR onnxruntime-genai 0.5.0 (which must be built from source)
7
- # (transformers v4.45 changes the format of the tokenizer.json file which will be supported in
8
- # onnxruntime-genai 0.5)
9
- #
10
4
 
11
5
  import argparse
12
6
  import os
@@ -51,8 +45,8 @@ def import_error_heler(e: Exception):
51
45
  """
52
46
  raise ImportError(
53
47
  f"{e}\n Please install lemonade-sdk with "
54
- "one of the llm-oga extras, for example:\n"
55
- "pip install lemonade-sdk[llm-oga-cpu]\n"
48
+ "one of the oga extras, for example:\n"
49
+ "pip install lemonade-sdk[dev,oga-cpu]\n"
56
50
  "See https://lemonade_server.ai/install_options.html for details"
57
51
  )
58
52
 
lemonade/tools/prompt.py CHANGED
@@ -176,12 +176,21 @@ class LLMPrompt(Tool):
176
176
 
177
177
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
178
178
  if isinstance(input_ids, (list, str)):
179
- # OGA models return a list of tokens
179
+ # OGA models return a list of tokens (older versions)
180
180
  # Our llama.cpp adapter returns a string
181
181
  len_tokens_in = len(input_ids)
182
- else:
182
+ elif hasattr(input_ids, "shape"):
183
183
  # HF models return a 2-D tensor
184
- len_tokens_in = input_ids.shape[1]
184
+ # OGA models with newer versions may return numpy arrays
185
+ if len(input_ids.shape) == 1:
186
+ # 1-D array from newer OGA versions
187
+ len_tokens_in = len(input_ids)
188
+ else:
189
+ # 2-D tensor from HF models
190
+ len_tokens_in = input_ids.shape[1]
191
+ else:
192
+ # Fallback: try to get length directly
193
+ len_tokens_in = len(input_ids)
185
194
 
186
195
  len_tokens_out = []
187
196
  response_texts = []
@@ -202,9 +211,15 @@ class LLMPrompt(Tool):
202
211
  random_seed += 1
203
212
 
204
213
  # Flatten the input and response
205
- input_ids_array = (
206
- input_ids if isinstance(input_ids, (list, str)) else input_ids[0]
207
- )
214
+ if isinstance(input_ids, (list, str)):
215
+ input_ids_array = input_ids
216
+ elif hasattr(input_ids, "shape") and len(input_ids.shape) == 1:
217
+ # 1-D array from newer OGA versions - already flat
218
+ input_ids_array = input_ids
219
+ else:
220
+ # 2-D tensor from HF models - take first row
221
+ input_ids_array = input_ids[0]
222
+
208
223
  response_array = response if isinstance(response, str) else response[0]
209
224
 
210
225
  # Separate the prompt from the response
@@ -7,6 +7,7 @@ from tabulate import tabulate
7
7
  import lemonade.common.build as build
8
8
  import lemonade.common.filesystem as fs
9
9
  from lemonade.cache import Keys
10
+ from lemonade.tools.accuracy import LMEvalHarness
10
11
  from lemonade.tools.huggingface.bench import HuggingfaceBench
11
12
  from lemonade.tools.llamacpp.bench import LlamaCppBench
12
13
  from lemonade.tools.mmlu import AccuracyMMLU
@@ -73,6 +74,7 @@ class SimpleStat(TableColumn):
73
74
  align="center",
74
75
  omit_if_lean=False,
75
76
  wrap=None,
77
+ stat_fn=None,
76
78
  ):
77
79
  self.column_header = column_header
78
80
  self.stat = stat
@@ -80,6 +82,7 @@ class SimpleStat(TableColumn):
80
82
  self.align = align
81
83
  self.omit_if_lean = omit_if_lean
82
84
  self.wrap = wrap or self.default_wrap
85
+ self.stat_fn = stat_fn
83
86
 
84
87
  def get_str(self, build_stats, lean=False):
85
88
  if lean and self.omit_if_lean:
@@ -87,6 +90,8 @@ class SimpleStat(TableColumn):
87
90
  data = build_stats.get(self.stat, None)
88
91
  if data is None:
89
92
  return ""
93
+ if self.stat_fn:
94
+ data = self.stat_fn(data)
90
95
  cell_str = "\n".join(
91
96
  [_wrap(f"{x:{self.format_str}}", self.wrap) for x in _to_list(data)]
92
97
  )
@@ -232,6 +237,47 @@ class AdditionalStat(TableColumn):
232
237
  return "\n".join(cell_entry)
233
238
 
234
239
 
240
+ class DictListStat(TableColumn):
241
+ """
242
+ A statistic that is a list of dicts and values from a given list of keys will be
243
+ pulled out of each dict and placed in the cell
244
+ """
245
+
246
+ def __init__(
247
+ self,
248
+ column_header,
249
+ statistic_name,
250
+ key_format_list,
251
+ align="center",
252
+ omit_if_lean=False,
253
+ wrap=None,
254
+ ):
255
+ self.column_header = column_header
256
+ self.statistic_name = statistic_name
257
+ self.key_format_list = key_format_list
258
+ self.align = align
259
+ self.omit_if_lean = omit_if_lean
260
+ self.wrap = wrap or self.default_wrap
261
+
262
+ def get_str(self, build_stats, lean=False):
263
+ if lean and self.omit_if_lean:
264
+ return None
265
+ stat = build_stats.get(self.statistic_name, None)
266
+ if not stat:
267
+ return ""
268
+ cell_entry = []
269
+ for stat_dict in stat:
270
+ line = [
271
+ format_str.format(stat_dict[key])
272
+ for key, format_str in self.key_format_list
273
+ ]
274
+ cell_entry.append(" ".join(line))
275
+ return "\n".join(cell_entry)
276
+
277
+ def get_keys(self):
278
+ return [self.statistic_name]
279
+
280
+
235
281
  ################################################################################
236
282
  # ABSTRACT BASE CLASS FOR DEFINING A TABLE
237
283
  ################################################################################
@@ -349,6 +395,28 @@ class Table(ABC):
349
395
  headers.append(column.column_header)
350
396
  col_align += (column.align,)
351
397
 
398
+ # Stat column headers
399
+ stat_columns = self.table_descriptor.get("stat_columns", [])
400
+ stat_columns_include = []
401
+ for column in stat_columns:
402
+ # Check to see that at least one build has data for the column
403
+ keep_column = False
404
+ if not (self.lean and column.omit_if_lean):
405
+ keys = column.get_keys()
406
+ for build_stats in self.all_stats:
407
+ found = [(key in build_stats) for key in keys]
408
+ if any(found):
409
+ keep_column = True
410
+ headers.append(column.column_header)
411
+ col_align += (column.align,)
412
+ break
413
+ stat_columns_include.append(keep_column)
414
+ stat_columns = [
415
+ column
416
+ for column, include in zip(stat_columns, stat_columns_include)
417
+ if include
418
+ ]
419
+
352
420
  # Final headers
353
421
  last_columns = self.table_descriptor.get("last_columns", [])
354
422
  for column in last_columns:
@@ -385,6 +453,12 @@ class Table(ABC):
385
453
  if entry_str is not None:
386
454
  row.append(entry_str)
387
455
 
456
+ # Per stat columns
457
+ for entry in stat_columns:
458
+ entry_str = entry.get_str(build_stats, self.lean)
459
+ if entry_str is not None:
460
+ row.append(entry_str)
461
+
388
462
  # Final columns
389
463
  for entry in last_columns:
390
464
  entry_str = entry.get_str(build_stats, self.lean)
@@ -513,6 +587,12 @@ class LemonadePerfTable(Table):
513
587
  Keys.STD_DEV_TOKENS_PER_SECOND,
514
588
  ".2f",
515
589
  ),
590
+ SimpleStat(
591
+ _wrap("Total Generated Tokens", 9),
592
+ Keys.RESPONSE_TOKENS,
593
+ "d",
594
+ stat_fn=sum,
595
+ ),
516
596
  SimpleStat(
517
597
  _wrap("Memory Used (GB)", 8), Keys.MAX_MEMORY_USED_GBYTE, ".3f"
518
598
  ),
@@ -527,7 +607,16 @@ class LemonadePerfTable(Table):
527
607
  ".2f",
528
608
  )
529
609
  ],
610
+ LMEvalHarness: [
611
+ AdditionalStat(
612
+ "EleutherAI\nLM Evaluation",
613
+ "^lm_eval_",
614
+ "^lm_eval_",
615
+ ".1f",
616
+ )
617
+ ],
530
618
  },
619
+ "stat_columns": [],
531
620
  "last_columns": [
532
621
  SimpleStat(
533
622
  "System Info",
@@ -210,15 +210,20 @@ def _log_subprocess_output(
210
210
  """
211
211
 
212
212
  if process.stdout:
213
- for line in iter(process.stdout.readline, ""):
214
- if line:
215
- line_stripped = line.strip()
216
- logging.debug("%s: %s", prefix, line_stripped)
213
+ try:
214
+ for line in iter(process.stdout.readline, ""):
215
+ if line:
216
+ line_stripped = line.strip()
217
+ logging.debug("%s: %s", prefix, line_stripped)
217
218
 
218
- telemetry.parse_telemetry_line(line_stripped)
219
+ telemetry.parse_telemetry_line(line_stripped)
219
220
 
220
- if process.poll() is not None:
221
- break
221
+ if process.poll() is not None:
222
+ break
223
+ except UnicodeDecodeError as e:
224
+ logging.debug("Unicode decode error reading subprocess output: %s", str(e))
225
+ except Exception as e: # pylint: disable=broad-exception-caught
226
+ logging.error("Unexpected error reading subprocess output: %s", str(e))
222
227
 
223
228
 
224
229
  def _wait_for_load(llama_server_process: subprocess.Popen, port: int):
@@ -287,6 +292,8 @@ def _launch_llama_subprocess(
287
292
  stdout=subprocess.PIPE,
288
293
  stderr=subprocess.STDOUT,
289
294
  text=True,
295
+ encoding="utf-8",
296
+ errors="replace",
290
297
  bufsize=1,
291
298
  env=env,
292
299
  )
@@ -383,6 +390,10 @@ def server_load(model_config: PullConfig, telemetry: LlamaTelemetry):
383
390
  f"Loading {model_config.model_name} on GPU didn't work, re-attempting on CPU"
384
391
  )
385
392
 
393
+ if os.environ.get("LEMONADE_LLAMACPP_NO_FALLBACK"):
394
+ # Used for testing, when the test should fail if GPU didn't work
395
+ raise Exception("llamacpp GPU loading failed")
396
+
386
397
  llama_server_process = _launch_llama_subprocess(
387
398
  snapshot_files, use_gpu=False, telemetry=telemetry
388
399
  )
@@ -110,20 +110,53 @@
110
110
  </footer>
111
111
  <script src="https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js"></script>
112
112
  <script> // Tab switching logic
113
- function showTab(tab) {
113
+ function showTab(tab, updateHash = true) {
114
114
  document.getElementById('tab-chat').classList.remove('active');
115
115
  document.getElementById('tab-models').classList.remove('active');
116
116
  document.getElementById('content-chat').classList.remove('active');
117
117
  document.getElementById('content-models').classList.remove('active');
118
118
  if (tab === 'chat') {
119
119
  document.getElementById('tab-chat').classList.add('active');
120
- document.getElementById('content-chat').classList.add('active');
120
+ document.getElementById('content-chat').classList.add('active');
121
+ if (updateHash) {
122
+ window.location.hash = 'llm-chat';
123
+ }
121
124
  } else {
122
125
  document.getElementById('tab-models').classList.add('active');
123
- document.getElementById('content-models').classList.add('active');
126
+ document.getElementById('content-models').classList.add('active');
127
+ if (updateHash) {
128
+ window.location.hash = 'model-management';
129
+ }
124
130
  }
125
131
  }
126
132
 
133
+ // Handle hash changes for anchor navigation
134
+ function handleHashChange() {
135
+ const hash = window.location.hash.slice(1); // Remove the # symbol
136
+ if (hash === 'llm-chat') {
137
+ showTab('chat', false);
138
+ } else if (hash === 'model-management') {
139
+ showTab('models', false);
140
+ }
141
+ }
142
+
143
+ // Initialize tab based on URL hash on page load
144
+ function initializeTabFromHash() {
145
+ const hash = window.location.hash.slice(1);
146
+ if (hash === 'llm-chat') {
147
+ showTab('chat', false);
148
+ } else if (hash === 'model-management') {
149
+ showTab('models', false);
150
+ }
151
+ // If no hash or unrecognized hash, keep default (chat tab is already active)
152
+ }
153
+
154
+ // Listen for hash changes
155
+ window.addEventListener('hashchange', handleHashChange);
156
+
157
+ // Initialize on page load
158
+ document.addEventListener('DOMContentLoaded', initializeTabFromHash);
159
+
127
160
  // Toggle Add Model form
128
161
  function toggleAddModelForm() {
129
162
  const form = document.querySelector('.model-mgmt-register-form');
@@ -197,11 +197,17 @@ class LemonadeTray(SystemTray):
197
197
  """
198
198
  webbrowser.open("https://lemonade-server.ai/docs/")
199
199
 
200
+ def open_llm_chat(self, _, __):
201
+ """
202
+ Open the LLM chat in the default web browser.
203
+ """
204
+ webbrowser.open(f"http://localhost:{self.port}/#llm-chat")
205
+
200
206
  def open_model_manager(self, _, __):
201
207
  """
202
208
  Open the model manager in the default web browser.
203
209
  """
204
- webbrowser.open(f"http://localhost:{self.port}/")
210
+ webbrowser.open(f"http://localhost:{self.port}/#model-management")
205
211
 
206
212
  def check_server_state(self):
207
213
  """
@@ -266,7 +272,7 @@ class LemonadeTray(SystemTray):
266
272
  self.logger.error(f"Error changing port: {str(e)}")
267
273
  self.show_balloon_notification("Error", f"Failed to change port: {str(e)}")
268
274
 
269
- def upgrade_to_latest(self, icon, item):
275
+ def upgrade_to_latest(self, _, __):
270
276
  """
271
277
  Download and launch the Lemonade Server installer
272
278
  """
@@ -281,21 +287,34 @@ class LemonadeTray(SystemTray):
281
287
  installer_path = os.path.join(
282
288
  tempfile.gettempdir(), "Lemonade_Server_Installer.exe"
283
289
  )
290
+ if os.path.exists(installer_path):
291
+ os.remove(installer_path)
284
292
 
285
293
  # Download the installer
286
294
  response = requests.get(self.latest_version_url, stream=True)
287
295
  response.raise_for_status()
288
296
 
289
- # Save the installer to disk
297
+ # Save the installer to disk and force write to disk
290
298
  with open(installer_path, "wb") as f:
291
299
  for chunk in response.iter_content(chunk_size=8192):
292
300
  f.write(chunk)
301
+ f.flush()
302
+ os.fsync(f.fileno())
293
303
 
294
- # Launch the installer
295
- subprocess.Popen([installer_path], shell=True)
304
+ # Launch the installer as a completely detached process
305
+ # subprocess.DETACHED_PROCESS - Creates a process that's not attached to the console
306
+ # subprocess.CREATE_NEW_PROCESS_GROUP - Creates a new process group
307
+ # close_fds=True - Closes file descriptors to prevent inheritance
308
+ subprocess.Popen(
309
+ [installer_path],
310
+ creationflags=subprocess.DETACHED_PROCESS
311
+ | subprocess.CREATE_NEW_PROCESS_GROUP,
312
+ close_fds=True,
313
+ shell=True,
314
+ cwd=tempfile.gettempdir(),
315
+ )
296
316
 
297
- # Quit the application
298
- self.exit_app(icon, item)
317
+ # No need to quit the application, the installer will handle it
299
318
 
300
319
  def create_menu(self):
301
320
  """
@@ -326,16 +345,25 @@ class LemonadeTray(SystemTray):
326
345
 
327
346
  # Create menu items for all downloaded models
328
347
  model_menu_items = []
329
- for model_name, _ in self.downloaded_models.items():
330
- # Create a function that returns the lambda to properly capture the variables
331
- def create_handler(mod):
332
- return lambda icon, item: self.load_llm(icon, item, mod)
348
+ if not self.downloaded_models:
349
+ model_menu_items.append(
350
+ MenuItem(
351
+ "No models available: Use the Model Manager to pull models",
352
+ None,
353
+ enabled=False,
354
+ )
355
+ )
356
+ else:
357
+ for model_name, _ in self.downloaded_models.items():
358
+ # Create a function that returns the lambda to properly capture the variables
359
+ def create_handler(mod):
360
+ return lambda icon, item: self.load_llm(icon, item, mod)
333
361
 
334
- model_item = MenuItem(model_name, create_handler(model_name))
362
+ model_item = MenuItem(model_name, create_handler(model_name))
335
363
 
336
- # Set checked property instead of modifying the text
337
- model_item.checked = model_name == self.loaded_llm
338
- model_menu_items.append(model_item)
364
+ # Set checked property instead of modifying the text
365
+ model_item.checked = model_name == self.loaded_llm
366
+ model_menu_items.append(model_item)
339
367
 
340
368
  load_submenu = Menu(*model_menu_items)
341
369
 
@@ -378,6 +406,7 @@ class LemonadeTray(SystemTray):
378
406
  )
379
407
 
380
408
  items.append(MenuItem("Documentation", self.open_documentation))
409
+ items.append(MenuItem("LLM Chat", self.open_llm_chat))
381
410
  items.append(MenuItem("Model Manager", self.open_model_manager))
382
411
  items.append(MenuItem("Show Logs", self.show_logs))
383
412
  items.append(Menu.SEPARATOR)
lemonade/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "8.0.1"
1
+ __version__ = "8.0.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.0.1
3
+ Version: 8.0.3
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.12
@@ -26,45 +26,49 @@ Requires-Dist: openai>=1.81.0
26
26
  Requires-Dist: transformers<=4.51.3
27
27
  Requires-Dist: jinja2
28
28
  Requires-Dist: tabulate
29
- Requires-Dist: huggingface-hub==0.30.2
29
+ Requires-Dist: sentencepiece
30
+ Requires-Dist: huggingface-hub==0.33.0
31
+ Provides-Extra: oga-hybrid
32
+ Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
33
+ Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
34
+ Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
35
+ Provides-Extra: oga-cpu
36
+ Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
37
+ Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
38
+ Provides-Extra: dev
39
+ Requires-Dist: torch>=2.6.0; extra == "dev"
40
+ Requires-Dist: accelerate; extra == "dev"
41
+ Requires-Dist: datasets; extra == "dev"
42
+ Requires-Dist: pandas>=1.5.3; extra == "dev"
43
+ Requires-Dist: matplotlib; extra == "dev"
44
+ Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
45
+ Requires-Dist: lm-eval[api]; extra == "dev"
30
46
  Provides-Extra: oga-hybrid-minimal
31
- Requires-Dist: onnx==1.16.1; extra == "oga-hybrid-minimal"
32
- Requires-Dist: numpy==1.26.4; extra == "oga-hybrid-minimal"
33
- Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid-minimal"
47
+ Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
34
48
  Provides-Extra: oga-cpu-minimal
35
- Requires-Dist: onnxruntime-genai==0.6.0; extra == "oga-cpu-minimal"
36
- Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "oga-cpu-minimal"
49
+ Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
37
50
  Provides-Extra: llm
38
- Requires-Dist: torch>=2.6.0; extra == "llm"
39
- Requires-Dist: accelerate; extra == "llm"
40
- Requires-Dist: sentencepiece; extra == "llm"
41
- Requires-Dist: datasets; extra == "llm"
42
- Requires-Dist: pandas>=1.5.3; extra == "llm"
43
- Requires-Dist: matplotlib; extra == "llm"
44
- Requires-Dist: human-eval-windows==1.0.4; extra == "llm"
45
- Requires-Dist: lm-eval[api]; extra == "llm"
51
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm"
46
52
  Provides-Extra: llm-oga-cpu
47
- Requires-Dist: lemonade-sdk[oga-cpu-minimal]; extra == "llm-oga-cpu"
48
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cpu"
53
+ Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
49
54
  Provides-Extra: llm-oga-igpu
50
55
  Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
51
56
  Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
52
57
  Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
53
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-igpu"
58
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
54
59
  Provides-Extra: llm-oga-cuda
55
- Requires-Dist: onnxruntime-genai-cuda==0.6.0; extra == "llm-oga-cuda"
56
- Requires-Dist: onnxruntime-gpu<1.22.0,>=1.19.1; extra == "llm-oga-cuda"
57
- Requires-Dist: transformers<4.45.0; extra == "llm-oga-cuda"
58
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cuda"
60
+ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
61
+ Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
62
+ Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
63
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
59
64
  Provides-Extra: llm-oga-npu
60
65
  Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
61
66
  Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
62
67
  Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
63
68
  Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
64
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-npu"
69
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
65
70
  Provides-Extra: llm-oga-hybrid
66
- Requires-Dist: lemonade-sdk[oga-hybrid-minimal]; extra == "llm-oga-hybrid"
67
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-hybrid"
71
+ Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
68
72
  Provides-Extra: llm-oga-unified
69
73
  Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
70
74
  Dynamic: author-email
@@ -1,10 +1,10 @@
1
1
  lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
2
2
  lemonade/api.py,sha256=X7DxBgsOl5L_z6uTkwoJWf8x0rjXWS2JoeEqmo9bMfc,3873
3
3
  lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
4
- lemonade/cli.py,sha256=XzptHh6LTl5OdGRnxiLykQ8QBl2rQmhWH5w0KPJVyY4,4359
4
+ lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
5
5
  lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
6
  lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
- lemonade/version.py,sha256=qR-61NMOca8p2Rty8s6xwXQSXLDufw2os6i4zdyqfak,22
7
+ lemonade/version.py,sha256=GImAlzwPDxsACkYFf5rTrX8QMH23tcqdm6vgjfFYD10,22
8
8
  lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
10
10
  lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
@@ -17,7 +17,7 @@ lemonade/common/system_info.py,sha256=qOwteG_mBo-ImilbiK7Gq37sWIE9ugF0dbWcj9zLD4
17
17
  lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
18
18
  lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
19
19
  lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
20
- lemonade/profilers/profiler.py,sha256=y_iMGr1ToQ6rcwcIcXck4ajapisLXCfHggiV-IpPF98,1666
20
+ lemonade/profilers/profiler.py,sha256=Y5FSbc386bMlTVbqCuya9pYrso5aTthxahR1V_ZKQ9E,1902
21
21
  lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
22
22
  lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
23
23
  lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
@@ -26,7 +26,7 @@ lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9
26
26
  lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
27
27
  lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
28
28
  lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
29
- lemonade/tools/prompt.py,sha256=AT3p5rCGHEs9ozeGxwWl07iKF-mgLxFOkYLjU2btFHs,8638
29
+ lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
30
30
  lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
31
31
  lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
32
32
  lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
@@ -35,36 +35,36 @@ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPd
35
35
  lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
36
36
  lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
38
- lemonade/tools/oga/load.py,sha256=7Sdf6PFPrqbadPabyJb_uPRUIP09qj21ZYdXz47MqsE,28570
38
+ lemonade/tools/oga/load.py,sha256=xSP0DWoGd5zBRozSafj1MMyIQyHJuIRj_vNlCTx8mfs,28309
39
39
  lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
40
40
  lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
42
42
  lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
43
43
  lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
45
- lemonade/tools/report/table.py,sha256=di8IZkolt_kaZfWri6GQkhPE1zCELqcrBcG1x1fzWqg,24843
45
+ lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
46
46
  lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
- lemonade/tools/server/llamacpp.py,sha256=aDVjjkU2Z2PN25Uuy-lk6ByKPR8kg5r2X-YsVSs4vi8,15624
47
+ lemonade/tools/server/llamacpp.py,sha256=vjFNelm_VyKBBgWmltsAwLI7ncQ9AwVFQD7krZnF42w,16199
48
48
  lemonade/tools/server/serve.py,sha256=3_jBpi6THnnAmtKOxvPlOkIhSTTmrlZE3fr2Dpto-Q4,52794
49
49
  lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
50
- lemonade/tools/server/tray.py,sha256=SakwhZKPgo7VtWP4q10SaCcZdxKG95dnNsXdTu9Eei0,16030
50
+ lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
51
51
  lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
52
52
  lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
53
53
  lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
54
- lemonade/tools/server/static/webapp.html,sha256=im7YQkwvbuqrbO-sLhStVqtA6B7HKAn2azZka1KoeJQ,21260
54
+ lemonade/tools/server/static/webapp.html,sha256=kPzORaogVRdFQewXyNI_JaH2ZZCTaq5zfMSyzuoFTuA,22414
55
55
  lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
56
56
  lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
57
57
  lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
58
58
  lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
59
59
  lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
60
- lemonade_sdk-8.0.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
61
- lemonade_sdk-8.0.1.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
62
- lemonade_server/cli.py,sha256=fm1eORLKElHfzqO5VVicDmn9EbmqIffi1bynqacJeyw,11744
63
- lemonade_server/model_manager.py,sha256=HqbahDMRv1x8jyQj4pa1rXanlPmcCykt8tlI6WfaxjE,13023
64
- lemonade_server/pydantic_models.py,sha256=2ALw47C1VWGe2nKWjlEAzP1ggKYsky4xlahUFxQJCMs,2298
65
- lemonade_server/server_models.json,sha256=wTK_H9XDHLxqMWQJqbBsJwm50PhOR4gURyVj9Jm35PQ,6992
66
- lemonade_sdk-8.0.1.dist-info/METADATA,sha256=s5q-KKS3Drrxxm1-wGLUP9c0HymN2RgC7PjMqr0biog,8225
67
- lemonade_sdk-8.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
- lemonade_sdk-8.0.1.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
69
- lemonade_sdk-8.0.1.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
70
- lemonade_sdk-8.0.1.dist-info/RECORD,,
60
+ lemonade_sdk-8.0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
61
+ lemonade_sdk-8.0.3.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
62
+ lemonade_server/cli.py,sha256=z6ojwFaOIz0hbUbVtZWMLP4YDpkcVOmqwmdm55dhKA4,11980
63
+ lemonade_server/model_manager.py,sha256=Yvlsl0wipKfryKULH5ASQ9INhLQXPq9dTGQVBXf2_h0,16167
64
+ lemonade_server/pydantic_models.py,sha256=nsbpHqAkd6nkz5QT16u9xMZbCXqccGiy5O0fWecOM88,2338
65
+ lemonade_server/server_models.json,sha256=O5zk94gH_zRq6GSwbqvi2SNwx51eY9uqgAl_kxTi0iM,7271
66
+ lemonade_sdk-8.0.3.dist-info/METADATA,sha256=WesWziLri9jQjZILRENliiJbggTVF8LmXKVIERInVbE,8285
67
+ lemonade_sdk-8.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
+ lemonade_sdk-8.0.3.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
69
+ lemonade_sdk-8.0.3.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
70
+ lemonade_sdk-8.0.3.dist-info/RECORD,,
lemonade_server/cli.py CHANGED
@@ -4,7 +4,6 @@ import os
4
4
  from typing import Tuple, Optional
5
5
  import psutil
6
6
  from typing import List
7
- import subprocess
8
7
 
9
8
 
10
9
  # Error codes for different CLI scenarios
@@ -88,23 +87,26 @@ def stop():
88
87
  # Terminate the main process first
89
88
  process.terminate()
90
89
 
91
- # Then terminate all children
90
+ # Then terminate llama-server child process (known to be stubborn)
91
+ # We avoid killing other child processes, such as the installer
92
92
  for child in children:
93
- try:
94
- child.terminate()
95
- except psutil.NoSuchProcess:
96
- pass # Child already terminated
93
+ if "llama-server" in child.name():
94
+ try:
95
+ child.terminate()
96
+ except psutil.NoSuchProcess:
97
+ pass # Child already terminated
97
98
 
98
99
  # Wait for main process
99
100
  process.wait(timeout=10)
100
101
 
101
- # Kill any children that didn't terminate gracefully
102
+ # Kill llama-server child process if it didn't terminate gracefully
102
103
  for child in children:
103
- try:
104
- if child.is_running():
105
- child.kill()
106
- except psutil.NoSuchProcess:
107
- pass # Child already terminated
104
+ if "llama-server" in child.name():
105
+ try:
106
+ if child.is_running():
107
+ child.kill()
108
+ except psutil.NoSuchProcess:
109
+ pass # Child already terminated
108
110
  except psutil.NoSuchProcess:
109
111
  # Process already terminated
110
112
  pass
@@ -102,57 +102,131 @@ class ModelManager:
102
102
  """
103
103
  return self.filter_models_by_backend(self.downloaded_models)
104
104
 
105
+ def identify_gguf_models(
106
+ self, checkpoint: str, variant: str, mmproj: str
107
+ ) -> tuple[dict, list[str]]:
108
+ """
109
+ Identifies the GGUF model files in the repository that match the variant.
110
+ """
111
+
112
+ hint = """
113
+ The CHECKPOINT:VARIANT scheme is used to specify model files in Hugging Face repositories.
114
+
115
+ The VARIANT format can be one of several types:
116
+ 1. Full filename: exact file to download
117
+ 2. None/empty: gets the first .gguf file in the repository (excludes mmproj files)
118
+ 3. Quantization variant: find a single file ending with the variant name (case insensitive)
119
+ 4. Folder name: downloads all .gguf files in the folder that matches the variant name (case insensitive)
120
+
121
+ Examples:
122
+ - "unsloth/Qwen3-8B-GGUF:qwen3.gguf" -> downloads "qwen3.gguf"
123
+ - "unsloth/Qwen3-30B-A3B-GGUF" -> downloads "Qwen3-30B-A3B-GGUF.gguf"
124
+ - "unsloth/Qwen3-8B-GGUF:Q4_1" -> downloads "Qwen3-8B-GGUF-Q4_1.gguf"
125
+ - "unsloth/Qwen3-30B-A3B-GGUF:Q4_0" -> downloads all files in "Q4_0/" folder
126
+ """
127
+
128
+ repo_files = huggingface_hub.list_repo_files(checkpoint)
129
+ sharded_files = []
130
+
131
+ # (case 1) If variant ends in .gguf, use it directly
132
+ if variant and variant.endswith(".gguf"):
133
+ variant_name = variant
134
+ if variant_name not in repo_files:
135
+ raise ValueError(
136
+ f"File {variant} not found in Hugging Face repository {checkpoint}. {hint}"
137
+ )
138
+ # (case 2) If no variant is provided, get the first .gguf file in the repository
139
+ elif variant is None:
140
+ all_variants = [
141
+ f for f in repo_files if f.endswith(".gguf") and "mmproj" not in f
142
+ ]
143
+ if len(all_variants) == 0:
144
+ raise ValueError(
145
+ f"No .gguf files found in Hugging Face repository {checkpoint}. {hint}"
146
+ )
147
+ variant_name = all_variants[0]
148
+ else:
149
+ # (case 3) Find a single file ending with the variant name (case insensitive)
150
+ end_with_variant = [
151
+ f
152
+ for f in repo_files
153
+ if f.lower().endswith(f"{variant}.gguf".lower())
154
+ and "mmproj" not in f.lower()
155
+ ]
156
+ if len(end_with_variant) == 1:
157
+ variant_name = end_with_variant[0]
158
+ elif len(end_with_variant) > 1:
159
+ raise ValueError(
160
+ f"Multiple .gguf files found for variant {variant}, but only one is allowed. {hint}"
161
+ )
162
+ # (case 4) Check whether the variant corresponds to a folder with sharded files (case insensitive)
163
+ else:
164
+ sharded_files = [
165
+ f
166
+ for f in repo_files
167
+ if f.endswith(".gguf")
168
+ and f.lower().startswith(f"{variant}/".lower())
169
+ ]
170
+
171
+ if not sharded_files:
172
+ raise ValueError(
173
+ f"No .gguf files found for variant {variant}. {hint}"
174
+ )
175
+
176
+ # Sort to ensure consistent ordering
177
+ sharded_files.sort()
178
+
179
+ # Use first file as primary (this is how llamacpp handles it)
180
+ variant_name = sharded_files[0]
181
+
182
+ core_files = {"variant": variant_name}
183
+
184
+ # If there is a mmproj file, add it to the patterns
185
+ if mmproj:
186
+ if mmproj not in repo_files:
187
+ raise ValueError(
188
+ f"The provided mmproj file {mmproj} was not found in {checkpoint}."
189
+ )
190
+ core_files["mmproj"] = mmproj
191
+
192
+ return core_files, sharded_files
193
+
105
194
  def download_gguf(self, model_config: PullConfig) -> dict:
106
195
  """
107
196
  Downloads the GGUF file for the given model configuration.
197
+
198
+ For sharded models, if the variant points to a folder (e.g. Q4_0), all files in that folder
199
+ will be downloaded but only the first file will be returned for loading.
108
200
  """
109
201
 
110
- # The variant parameter can be either:
111
- # 1. A full GGUF filename (e.g. "model-Q4_0.gguf")
112
- # 2. A quantization variant (e.g. "Q4_0")
113
- # This code handles both cases by constructing the appropriate filename
202
+ # This code handles all cases by constructing the appropriate filename or pattern
114
203
  checkpoint, variant = self.parse_checkpoint(model_config.checkpoint)
115
- hf_base_name = checkpoint.split("/")[-1].replace("-GGUF", "")
116
- variant_name = (
117
- variant if variant.endswith(".gguf") else f"{hf_base_name}-{variant}.gguf"
118
- )
119
204
 
120
- # If there is a mmproj file, add it to the patterns
121
- expected_files = {"variant": variant_name}
122
- if model_config.mmproj:
123
- expected_files["mmproj"] = model_config.mmproj
205
+ # Identify the GGUF model files in the repository that match the variant
206
+ core_files, sharded_files = self.identify_gguf_models(
207
+ checkpoint, variant, model_config.mmproj
208
+ )
124
209
 
125
210
  # Download the files
126
211
  snapshot_folder = huggingface_hub.snapshot_download(
127
212
  repo_id=checkpoint,
128
- allow_patterns=list(expected_files.values()),
213
+ allow_patterns=list(core_files.values()) + sharded_files,
129
214
  )
130
215
 
131
- # Make sure we downloaded something
132
- # If we didn't that can indicate that no patterns from allow_patterns match
133
- # any files in the HF repo
134
- if not os.path.exists(snapshot_folder):
135
- raise ValueError(
136
- "No patterns matched the variant parameter (CHECKPOINT:VARIANT). "
137
- "Try again, providing the full filename of your target .gguf file as the variant."
138
- " For example: Qwen/Qwen2.5-Coder-3B-Instruct-GGUF:"
139
- "qwen2.5-coder-3b-instruct-q4_0.gguf"
140
- )
141
-
142
- # Ensure we downloaded all expected files while creating a dict of the downloaded files
143
- snapshot_files = {}
144
- for file in expected_files:
145
- snapshot_files[file] = os.path.join(snapshot_folder, expected_files[file])
146
- if expected_files[file].lower() not in [
147
- name.lower() for name in os.listdir(snapshot_folder)
148
- ]:
216
+ # Ensure we downloaded all expected files
217
+ for file in list(core_files.values()) + sharded_files:
218
+ expected_path = os.path.join(snapshot_folder, file)
219
+ if not os.path.exists(expected_path):
149
220
  raise ValueError(
150
221
  f"Hugging Face snapshot download for {model_config.checkpoint} "
151
- f"expected file {expected_files[file]} not found in {snapshot_folder}"
222
+ f"expected file {file} not found at {expected_path}"
152
223
  )
153
224
 
154
- # Return a dict that points to the snapshot path of the downloaded GGUF files
155
- return snapshot_files
225
+ # Return a dict of the full path of the core GGUF files
226
+ return {
227
+ file_name: os.path.join(snapshot_folder, file_path)
228
+ for file_name, file_path in core_files.items()
229
+ }
156
230
 
157
231
  def download_models(
158
232
  self,
@@ -249,6 +323,9 @@ class ModelManager:
249
323
 
250
324
  user_models[model_name] = new_user_model
251
325
 
326
+ # Ensure the cache directory exists before writing the file
327
+ os.makedirs(os.path.dirname(USER_MODELS_FILE), exist_ok=True)
328
+
252
329
  with open(USER_MODELS_FILE, mode="w", encoding="utf-8") as file:
253
330
  json.dump(user_models, fp=file)
254
331
 
@@ -62,6 +62,7 @@ class ChatCompletionRequest(BaseModel):
62
62
  tools: list[dict] | None = None
63
63
  max_tokens: int | None = None
64
64
  max_completion_tokens: int | None = None
65
+ response_format: dict | None = None
65
66
 
66
67
 
67
68
  class ResponsesRequest(BaseModel):
@@ -203,5 +203,13 @@
203
203
  "reasoning": false,
204
204
  "suggested": true,
205
205
  "labels": ["vision"]
206
+ },
207
+ "Llama-4-Scout-17B-16E-Instruct-GGUF": {
208
+ "checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
209
+ "mmproj": "mmproj-F16.gguf",
210
+ "recipe": "llamacpp",
211
+ "reasoning": false,
212
+ "suggested": true,
213
+ "labels": ["vision"]
206
214
  }
207
215
  }