lemonade-sdk 8.0.2__py3-none-any.whl → 8.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

lemonade/cli.py CHANGED
@@ -90,9 +90,9 @@ https://github.com/lemonade-sdk/lemonade/blob/main/docs/README.md""",
90
90
  )
91
91
 
92
92
  profiler_instances = [
93
- profiler(global_args[profiler.unique_name])
93
+ profiler(global_args[profiler.unique_name.replace("-", "_")])
94
94
  for profiler in profilers
95
- if global_args.get(profiler.unique_name, None) is not None
95
+ if global_args.get(profiler.unique_name.replace("-", "_"), None) is not None
96
96
  ]
97
97
 
98
98
  if len(evaluation_tools) > 0:
@@ -48,7 +48,10 @@ class Profiler(abc.ABC):
48
48
  This method is called so that the profiler can create its output files.
49
49
  The state is passed so that build info can be gathered and stats can be written.
50
50
  The timestamp can be used for filename in current working directory.
51
- The start times contain a list of tools and start times.
51
+ The start times parameter is a dict with the keys being the tools names and
52
+ the values being the time the tool started. There is an initial "warmup" key
53
+ that has a start time before the first tool and a "cool down" key that contains the
54
+ time when the last tool ended.
52
55
  """
53
56
 
54
57
 
@@ -1,12 +1,6 @@
1
1
  # onnxruntime_genai is not lint-friendly yet and PyLint can't
2
2
  # find any of the class methods
3
3
  # pylint: disable=no-member
4
- #
5
- # Model builder constraints:
6
- # 11/10/24 Need transformers <4.45.0 OR onnxruntime-genai 0.5.0 (which must be built from source)
7
- # (transformers v4.45 changes the format of the tokenizer.json file which will be supported in
8
- # onnxruntime-genai 0.5)
9
- #
10
4
 
11
5
  import argparse
12
6
  import os
@@ -51,8 +45,8 @@ def import_error_heler(e: Exception):
51
45
  """
52
46
  raise ImportError(
53
47
  f"{e}\n Please install lemonade-sdk with "
54
- "one of the llm-oga extras, for example:\n"
55
- "pip install lemonade-sdk[llm-oga-cpu]\n"
48
+ "one of the oga extras, for example:\n"
49
+ "pip install lemonade-sdk[dev,oga-cpu]\n"
56
50
  "See https://lemonade_server.ai/install_options.html for details"
57
51
  )
58
52
 
lemonade/tools/prompt.py CHANGED
@@ -176,12 +176,21 @@ class LLMPrompt(Tool):
176
176
 
177
177
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
178
178
  if isinstance(input_ids, (list, str)):
179
- # OGA models return a list of tokens
179
+ # OGA models return a list of tokens (older versions)
180
180
  # Our llama.cpp adapter returns a string
181
181
  len_tokens_in = len(input_ids)
182
- else:
182
+ elif hasattr(input_ids, "shape"):
183
183
  # HF models return a 2-D tensor
184
- len_tokens_in = input_ids.shape[1]
184
+ # OGA models with newer versions may return numpy arrays
185
+ if len(input_ids.shape) == 1:
186
+ # 1-D array from newer OGA versions
187
+ len_tokens_in = len(input_ids)
188
+ else:
189
+ # 2-D tensor from HF models
190
+ len_tokens_in = input_ids.shape[1]
191
+ else:
192
+ # Fallback: try to get length directly
193
+ len_tokens_in = len(input_ids)
185
194
 
186
195
  len_tokens_out = []
187
196
  response_texts = []
@@ -202,9 +211,15 @@ class LLMPrompt(Tool):
202
211
  random_seed += 1
203
212
 
204
213
  # Flatten the input and response
205
- input_ids_array = (
206
- input_ids if isinstance(input_ids, (list, str)) else input_ids[0]
207
- )
214
+ if isinstance(input_ids, (list, str)):
215
+ input_ids_array = input_ids
216
+ elif hasattr(input_ids, "shape") and len(input_ids.shape) == 1:
217
+ # 1-D array from newer OGA versions - already flat
218
+ input_ids_array = input_ids
219
+ else:
220
+ # 2-D tensor from HF models - take first row
221
+ input_ids_array = input_ids[0]
222
+
208
223
  response_array = response if isinstance(response, str) else response[0]
209
224
 
210
225
  # Separate the prompt from the response
@@ -74,6 +74,7 @@ class SimpleStat(TableColumn):
74
74
  align="center",
75
75
  omit_if_lean=False,
76
76
  wrap=None,
77
+ stat_fn=None,
77
78
  ):
78
79
  self.column_header = column_header
79
80
  self.stat = stat
@@ -81,6 +82,7 @@ class SimpleStat(TableColumn):
81
82
  self.align = align
82
83
  self.omit_if_lean = omit_if_lean
83
84
  self.wrap = wrap or self.default_wrap
85
+ self.stat_fn = stat_fn
84
86
 
85
87
  def get_str(self, build_stats, lean=False):
86
88
  if lean and self.omit_if_lean:
@@ -88,6 +90,8 @@ class SimpleStat(TableColumn):
88
90
  data = build_stats.get(self.stat, None)
89
91
  if data is None:
90
92
  return ""
93
+ if self.stat_fn:
94
+ data = self.stat_fn(data)
91
95
  cell_str = "\n".join(
92
96
  [_wrap(f"{x:{self.format_str}}", self.wrap) for x in _to_list(data)]
93
97
  )
@@ -233,6 +237,47 @@ class AdditionalStat(TableColumn):
233
237
  return "\n".join(cell_entry)
234
238
 
235
239
 
240
+ class DictListStat(TableColumn):
241
+ """
242
+ A statistic that is a list of dicts and values from a given list of keys will be
243
+ pulled out of each dict and placed in the cell
244
+ """
245
+
246
+ def __init__(
247
+ self,
248
+ column_header,
249
+ statistic_name,
250
+ key_format_list,
251
+ align="center",
252
+ omit_if_lean=False,
253
+ wrap=None,
254
+ ):
255
+ self.column_header = column_header
256
+ self.statistic_name = statistic_name
257
+ self.key_format_list = key_format_list
258
+ self.align = align
259
+ self.omit_if_lean = omit_if_lean
260
+ self.wrap = wrap or self.default_wrap
261
+
262
+ def get_str(self, build_stats, lean=False):
263
+ if lean and self.omit_if_lean:
264
+ return None
265
+ stat = build_stats.get(self.statistic_name, None)
266
+ if not stat:
267
+ return ""
268
+ cell_entry = []
269
+ for stat_dict in stat:
270
+ line = [
271
+ format_str.format(stat_dict[key])
272
+ for key, format_str in self.key_format_list
273
+ ]
274
+ cell_entry.append(" ".join(line))
275
+ return "\n".join(cell_entry)
276
+
277
+ def get_keys(self):
278
+ return [self.statistic_name]
279
+
280
+
236
281
  ################################################################################
237
282
  # ABSTRACT BASE CLASS FOR DEFINING A TABLE
238
283
  ################################################################################
@@ -350,6 +395,28 @@ class Table(ABC):
350
395
  headers.append(column.column_header)
351
396
  col_align += (column.align,)
352
397
 
398
+ # Stat column headers
399
+ stat_columns = self.table_descriptor.get("stat_columns", [])
400
+ stat_columns_include = []
401
+ for column in stat_columns:
402
+ # Check to see that at least one build has data for the column
403
+ keep_column = False
404
+ if not (self.lean and column.omit_if_lean):
405
+ keys = column.get_keys()
406
+ for build_stats in self.all_stats:
407
+ found = [(key in build_stats) for key in keys]
408
+ if any(found):
409
+ keep_column = True
410
+ headers.append(column.column_header)
411
+ col_align += (column.align,)
412
+ break
413
+ stat_columns_include.append(keep_column)
414
+ stat_columns = [
415
+ column
416
+ for column, include in zip(stat_columns, stat_columns_include)
417
+ if include
418
+ ]
419
+
353
420
  # Final headers
354
421
  last_columns = self.table_descriptor.get("last_columns", [])
355
422
  for column in last_columns:
@@ -386,6 +453,12 @@ class Table(ABC):
386
453
  if entry_str is not None:
387
454
  row.append(entry_str)
388
455
 
456
+ # Per stat columns
457
+ for entry in stat_columns:
458
+ entry_str = entry.get_str(build_stats, self.lean)
459
+ if entry_str is not None:
460
+ row.append(entry_str)
461
+
389
462
  # Final columns
390
463
  for entry in last_columns:
391
464
  entry_str = entry.get_str(build_stats, self.lean)
@@ -514,6 +587,12 @@ class LemonadePerfTable(Table):
514
587
  Keys.STD_DEV_TOKENS_PER_SECOND,
515
588
  ".2f",
516
589
  ),
590
+ SimpleStat(
591
+ _wrap("Total Generated Tokens", 9),
592
+ Keys.RESPONSE_TOKENS,
593
+ "d",
594
+ stat_fn=sum,
595
+ ),
517
596
  SimpleStat(
518
597
  _wrap("Memory Used (GB)", 8), Keys.MAX_MEMORY_USED_GBYTE, ".3f"
519
598
  ),
@@ -537,6 +616,7 @@ class LemonadePerfTable(Table):
537
616
  )
538
617
  ],
539
618
  },
619
+ "stat_columns": [],
540
620
  "last_columns": [
541
621
  SimpleStat(
542
622
  "System Info",
@@ -210,15 +210,20 @@ def _log_subprocess_output(
210
210
  """
211
211
 
212
212
  if process.stdout:
213
- for line in iter(process.stdout.readline, ""):
214
- if line:
215
- line_stripped = line.strip()
216
- logging.debug("%s: %s", prefix, line_stripped)
213
+ try:
214
+ for line in iter(process.stdout.readline, ""):
215
+ if line:
216
+ line_stripped = line.strip()
217
+ logging.debug("%s: %s", prefix, line_stripped)
217
218
 
218
- telemetry.parse_telemetry_line(line_stripped)
219
+ telemetry.parse_telemetry_line(line_stripped)
219
220
 
220
- if process.poll() is not None:
221
- break
221
+ if process.poll() is not None:
222
+ break
223
+ except UnicodeDecodeError as e:
224
+ logging.debug("Unicode decode error reading subprocess output: %s", str(e))
225
+ except Exception as e: # pylint: disable=broad-exception-caught
226
+ logging.error("Unexpected error reading subprocess output: %s", str(e))
222
227
 
223
228
 
224
229
  def _wait_for_load(llama_server_process: subprocess.Popen, port: int):
@@ -287,6 +292,8 @@ def _launch_llama_subprocess(
287
292
  stdout=subprocess.PIPE,
288
293
  stderr=subprocess.STDOUT,
289
294
  text=True,
295
+ encoding="utf-8",
296
+ errors="replace",
290
297
  bufsize=1,
291
298
  env=env,
292
299
  )
@@ -383,6 +390,10 @@ def server_load(model_config: PullConfig, telemetry: LlamaTelemetry):
383
390
  f"Loading {model_config.model_name} on GPU didn't work, re-attempting on CPU"
384
391
  )
385
392
 
393
+ if os.environ.get("LEMONADE_LLAMACPP_NO_FALLBACK"):
394
+ # Used for testing, when the test should fail if GPU didn't work
395
+ raise Exception("llamacpp GPU loading failed")
396
+
386
397
  llama_server_process = _launch_llama_subprocess(
387
398
  snapshot_files, use_gpu=False, telemetry=telemetry
388
399
  )
@@ -110,20 +110,53 @@
110
110
  </footer>
111
111
  <script src="https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js"></script>
112
112
  <script> // Tab switching logic
113
- function showTab(tab) {
113
+ function showTab(tab, updateHash = true) {
114
114
  document.getElementById('tab-chat').classList.remove('active');
115
115
  document.getElementById('tab-models').classList.remove('active');
116
116
  document.getElementById('content-chat').classList.remove('active');
117
117
  document.getElementById('content-models').classList.remove('active');
118
118
  if (tab === 'chat') {
119
119
  document.getElementById('tab-chat').classList.add('active');
120
- document.getElementById('content-chat').classList.add('active');
120
+ document.getElementById('content-chat').classList.add('active');
121
+ if (updateHash) {
122
+ window.location.hash = 'llm-chat';
123
+ }
121
124
  } else {
122
125
  document.getElementById('tab-models').classList.add('active');
123
- document.getElementById('content-models').classList.add('active');
126
+ document.getElementById('content-models').classList.add('active');
127
+ if (updateHash) {
128
+ window.location.hash = 'model-management';
129
+ }
124
130
  }
125
131
  }
126
132
 
133
+ // Handle hash changes for anchor navigation
134
+ function handleHashChange() {
135
+ const hash = window.location.hash.slice(1); // Remove the # symbol
136
+ if (hash === 'llm-chat') {
137
+ showTab('chat', false);
138
+ } else if (hash === 'model-management') {
139
+ showTab('models', false);
140
+ }
141
+ }
142
+
143
+ // Initialize tab based on URL hash on page load
144
+ function initializeTabFromHash() {
145
+ const hash = window.location.hash.slice(1);
146
+ if (hash === 'llm-chat') {
147
+ showTab('chat', false);
148
+ } else if (hash === 'model-management') {
149
+ showTab('models', false);
150
+ }
151
+ // If no hash or unrecognized hash, keep default (chat tab is already active)
152
+ }
153
+
154
+ // Listen for hash changes
155
+ window.addEventListener('hashchange', handleHashChange);
156
+
157
+ // Initialize on page load
158
+ document.addEventListener('DOMContentLoaded', initializeTabFromHash);
159
+
127
160
  // Toggle Add Model form
128
161
  function toggleAddModelForm() {
129
162
  const form = document.querySelector('.model-mgmt-register-form');
@@ -197,11 +197,17 @@ class LemonadeTray(SystemTray):
197
197
  """
198
198
  webbrowser.open("https://lemonade-server.ai/docs/")
199
199
 
200
+ def open_llm_chat(self, _, __):
201
+ """
202
+ Open the LLM chat in the default web browser.
203
+ """
204
+ webbrowser.open(f"http://localhost:{self.port}/#llm-chat")
205
+
200
206
  def open_model_manager(self, _, __):
201
207
  """
202
208
  Open the model manager in the default web browser.
203
209
  """
204
- webbrowser.open(f"http://localhost:{self.port}/")
210
+ webbrowser.open(f"http://localhost:{self.port}/#model-management")
205
211
 
206
212
  def check_server_state(self):
207
213
  """
@@ -339,16 +345,25 @@ class LemonadeTray(SystemTray):
339
345
 
340
346
  # Create menu items for all downloaded models
341
347
  model_menu_items = []
342
- for model_name, _ in self.downloaded_models.items():
343
- # Create a function that returns the lambda to properly capture the variables
344
- def create_handler(mod):
345
- return lambda icon, item: self.load_llm(icon, item, mod)
348
+ if not self.downloaded_models:
349
+ model_menu_items.append(
350
+ MenuItem(
351
+ "No models available: Use the Model Manager to pull models",
352
+ None,
353
+ enabled=False,
354
+ )
355
+ )
356
+ else:
357
+ for model_name, _ in self.downloaded_models.items():
358
+ # Create a function that returns the lambda to properly capture the variables
359
+ def create_handler(mod):
360
+ return lambda icon, item: self.load_llm(icon, item, mod)
346
361
 
347
- model_item = MenuItem(model_name, create_handler(model_name))
362
+ model_item = MenuItem(model_name, create_handler(model_name))
348
363
 
349
- # Set checked property instead of modifying the text
350
- model_item.checked = model_name == self.loaded_llm
351
- model_menu_items.append(model_item)
364
+ # Set checked property instead of modifying the text
365
+ model_item.checked = model_name == self.loaded_llm
366
+ model_menu_items.append(model_item)
352
367
 
353
368
  load_submenu = Menu(*model_menu_items)
354
369
 
@@ -391,6 +406,7 @@ class LemonadeTray(SystemTray):
391
406
  )
392
407
 
393
408
  items.append(MenuItem("Documentation", self.open_documentation))
409
+ items.append(MenuItem("LLM Chat", self.open_llm_chat))
394
410
  items.append(MenuItem("Model Manager", self.open_model_manager))
395
411
  items.append(MenuItem("Show Logs", self.show_logs))
396
412
  items.append(Menu.SEPARATOR)
lemonade/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "8.0.2"
1
+ __version__ = "8.0.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.0.2
3
+ Version: 8.0.3
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.12
@@ -26,45 +26,49 @@ Requires-Dist: openai>=1.81.0
26
26
  Requires-Dist: transformers<=4.51.3
27
27
  Requires-Dist: jinja2
28
28
  Requires-Dist: tabulate
29
- Requires-Dist: huggingface-hub==0.30.2
29
+ Requires-Dist: sentencepiece
30
+ Requires-Dist: huggingface-hub==0.33.0
31
+ Provides-Extra: oga-hybrid
32
+ Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
33
+ Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
34
+ Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
35
+ Provides-Extra: oga-cpu
36
+ Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
37
+ Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
38
+ Provides-Extra: dev
39
+ Requires-Dist: torch>=2.6.0; extra == "dev"
40
+ Requires-Dist: accelerate; extra == "dev"
41
+ Requires-Dist: datasets; extra == "dev"
42
+ Requires-Dist: pandas>=1.5.3; extra == "dev"
43
+ Requires-Dist: matplotlib; extra == "dev"
44
+ Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
45
+ Requires-Dist: lm-eval[api]; extra == "dev"
30
46
  Provides-Extra: oga-hybrid-minimal
31
- Requires-Dist: onnx==1.16.1; extra == "oga-hybrid-minimal"
32
- Requires-Dist: numpy==1.26.4; extra == "oga-hybrid-minimal"
33
- Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid-minimal"
47
+ Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
34
48
  Provides-Extra: oga-cpu-minimal
35
- Requires-Dist: onnxruntime-genai==0.6.0; extra == "oga-cpu-minimal"
36
- Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "oga-cpu-minimal"
49
+ Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
37
50
  Provides-Extra: llm
38
- Requires-Dist: torch>=2.6.0; extra == "llm"
39
- Requires-Dist: accelerate; extra == "llm"
40
- Requires-Dist: sentencepiece; extra == "llm"
41
- Requires-Dist: datasets; extra == "llm"
42
- Requires-Dist: pandas>=1.5.3; extra == "llm"
43
- Requires-Dist: matplotlib; extra == "llm"
44
- Requires-Dist: human-eval-windows==1.0.4; extra == "llm"
45
- Requires-Dist: lm-eval[api]; extra == "llm"
51
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm"
46
52
  Provides-Extra: llm-oga-cpu
47
- Requires-Dist: lemonade-sdk[oga-cpu-minimal]; extra == "llm-oga-cpu"
48
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cpu"
53
+ Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
49
54
  Provides-Extra: llm-oga-igpu
50
55
  Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
51
56
  Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
52
57
  Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
53
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-igpu"
58
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
54
59
  Provides-Extra: llm-oga-cuda
55
- Requires-Dist: onnxruntime-genai-cuda==0.6.0; extra == "llm-oga-cuda"
56
- Requires-Dist: onnxruntime-gpu<1.22.0,>=1.19.1; extra == "llm-oga-cuda"
57
- Requires-Dist: transformers<4.45.0; extra == "llm-oga-cuda"
58
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cuda"
60
+ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
61
+ Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
62
+ Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
63
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
59
64
  Provides-Extra: llm-oga-npu
60
65
  Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
61
66
  Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
62
67
  Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
63
68
  Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
64
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-npu"
69
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
65
70
  Provides-Extra: llm-oga-hybrid
66
- Requires-Dist: lemonade-sdk[oga-hybrid-minimal]; extra == "llm-oga-hybrid"
67
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-hybrid"
71
+ Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
68
72
  Provides-Extra: llm-oga-unified
69
73
  Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
70
74
  Dynamic: author-email
@@ -1,10 +1,10 @@
1
1
  lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
2
2
  lemonade/api.py,sha256=X7DxBgsOl5L_z6uTkwoJWf8x0rjXWS2JoeEqmo9bMfc,3873
3
3
  lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
4
- lemonade/cli.py,sha256=XzptHh6LTl5OdGRnxiLykQ8QBl2rQmhWH5w0KPJVyY4,4359
4
+ lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
5
5
  lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
6
  lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
- lemonade/version.py,sha256=hTIZ_8cc-ggqcFeOYQQKOHudFQCQNQlM4ZltuYIIjD4,22
7
+ lemonade/version.py,sha256=GImAlzwPDxsACkYFf5rTrX8QMH23tcqdm6vgjfFYD10,22
8
8
  lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
10
10
  lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
@@ -17,7 +17,7 @@ lemonade/common/system_info.py,sha256=qOwteG_mBo-ImilbiK7Gq37sWIE9ugF0dbWcj9zLD4
17
17
  lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
18
18
  lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
19
19
  lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
20
- lemonade/profilers/profiler.py,sha256=y_iMGr1ToQ6rcwcIcXck4ajapisLXCfHggiV-IpPF98,1666
20
+ lemonade/profilers/profiler.py,sha256=Y5FSbc386bMlTVbqCuya9pYrso5aTthxahR1V_ZKQ9E,1902
21
21
  lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
22
22
  lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
23
23
  lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
@@ -26,7 +26,7 @@ lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9
26
26
  lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
27
27
  lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
28
28
  lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
29
- lemonade/tools/prompt.py,sha256=AT3p5rCGHEs9ozeGxwWl07iKF-mgLxFOkYLjU2btFHs,8638
29
+ lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
30
30
  lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
31
31
  lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
32
32
  lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
@@ -35,36 +35,36 @@ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPd
35
35
  lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
36
36
  lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
38
- lemonade/tools/oga/load.py,sha256=7Sdf6PFPrqbadPabyJb_uPRUIP09qj21ZYdXz47MqsE,28570
38
+ lemonade/tools/oga/load.py,sha256=xSP0DWoGd5zBRozSafj1MMyIQyHJuIRj_vNlCTx8mfs,28309
39
39
  lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
40
40
  lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
42
42
  lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
43
43
  lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
45
- lemonade/tools/report/table.py,sha256=VkTv5Vd0HOXudEthCBnFMrWK73Dm2AQP2_B83vEKBzI,25129
45
+ lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
46
46
  lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
- lemonade/tools/server/llamacpp.py,sha256=aDVjjkU2Z2PN25Uuy-lk6ByKPR8kg5r2X-YsVSs4vi8,15624
47
+ lemonade/tools/server/llamacpp.py,sha256=vjFNelm_VyKBBgWmltsAwLI7ncQ9AwVFQD7krZnF42w,16199
48
48
  lemonade/tools/server/serve.py,sha256=3_jBpi6THnnAmtKOxvPlOkIhSTTmrlZE3fr2Dpto-Q4,52794
49
49
  lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
50
- lemonade/tools/server/tray.py,sha256=OI2uCncs8UgnYFLCKHHXq06RETO2RFEcn4xLzMq-q_c,16675
50
+ lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
51
51
  lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
52
52
  lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
53
53
  lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
54
- lemonade/tools/server/static/webapp.html,sha256=im7YQkwvbuqrbO-sLhStVqtA6B7HKAn2azZka1KoeJQ,21260
54
+ lemonade/tools/server/static/webapp.html,sha256=kPzORaogVRdFQewXyNI_JaH2ZZCTaq5zfMSyzuoFTuA,22414
55
55
  lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
56
56
  lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
57
57
  lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
58
58
  lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
59
59
  lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
60
- lemonade_sdk-8.0.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
61
- lemonade_sdk-8.0.2.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
60
+ lemonade_sdk-8.0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
61
+ lemonade_sdk-8.0.3.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
62
62
  lemonade_server/cli.py,sha256=z6ojwFaOIz0hbUbVtZWMLP4YDpkcVOmqwmdm55dhKA4,11980
63
- lemonade_server/model_manager.py,sha256=HqbahDMRv1x8jyQj4pa1rXanlPmcCykt8tlI6WfaxjE,13023
63
+ lemonade_server/model_manager.py,sha256=Yvlsl0wipKfryKULH5ASQ9INhLQXPq9dTGQVBXf2_h0,16167
64
64
  lemonade_server/pydantic_models.py,sha256=nsbpHqAkd6nkz5QT16u9xMZbCXqccGiy5O0fWecOM88,2338
65
- lemonade_server/server_models.json,sha256=wTK_H9XDHLxqMWQJqbBsJwm50PhOR4gURyVj9Jm35PQ,6992
66
- lemonade_sdk-8.0.2.dist-info/METADATA,sha256=hS5Xn5Pjq0RbdLlhedz3HQMCvkRrMWFoAI0Mao4cHwg,8225
67
- lemonade_sdk-8.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
- lemonade_sdk-8.0.2.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
69
- lemonade_sdk-8.0.2.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
70
- lemonade_sdk-8.0.2.dist-info/RECORD,,
65
+ lemonade_server/server_models.json,sha256=O5zk94gH_zRq6GSwbqvi2SNwx51eY9uqgAl_kxTi0iM,7271
66
+ lemonade_sdk-8.0.3.dist-info/METADATA,sha256=WesWziLri9jQjZILRENliiJbggTVF8LmXKVIERInVbE,8285
67
+ lemonade_sdk-8.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
+ lemonade_sdk-8.0.3.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
69
+ lemonade_sdk-8.0.3.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
70
+ lemonade_sdk-8.0.3.dist-info/RECORD,,
@@ -102,57 +102,131 @@ class ModelManager:
102
102
  """
103
103
  return self.filter_models_by_backend(self.downloaded_models)
104
104
 
105
+ def identify_gguf_models(
106
+ self, checkpoint: str, variant: str, mmproj: str
107
+ ) -> tuple[dict, list[str]]:
108
+ """
109
+ Identifies the GGUF model files in the repository that match the variant.
110
+ """
111
+
112
+ hint = """
113
+ The CHECKPOINT:VARIANT scheme is used to specify model files in Hugging Face repositories.
114
+
115
+ The VARIANT format can be one of several types:
116
+ 1. Full filename: exact file to download
117
+ 2. None/empty: gets the first .gguf file in the repository (excludes mmproj files)
118
+ 3. Quantization variant: find a single file ending with the variant name (case insensitive)
119
+ 4. Folder name: downloads all .gguf files in the folder that matches the variant name (case insensitive)
120
+
121
+ Examples:
122
+ - "unsloth/Qwen3-8B-GGUF:qwen3.gguf" -> downloads "qwen3.gguf"
123
+ - "unsloth/Qwen3-30B-A3B-GGUF" -> downloads "Qwen3-30B-A3B-GGUF.gguf"
124
+ - "unsloth/Qwen3-8B-GGUF:Q4_1" -> downloads "Qwen3-8B-GGUF-Q4_1.gguf"
125
+ - "unsloth/Qwen3-30B-A3B-GGUF:Q4_0" -> downloads all files in "Q4_0/" folder
126
+ """
127
+
128
+ repo_files = huggingface_hub.list_repo_files(checkpoint)
129
+ sharded_files = []
130
+
131
+ # (case 1) If variant ends in .gguf, use it directly
132
+ if variant and variant.endswith(".gguf"):
133
+ variant_name = variant
134
+ if variant_name not in repo_files:
135
+ raise ValueError(
136
+ f"File {variant} not found in Hugging Face repository {checkpoint}. {hint}"
137
+ )
138
+ # (case 2) If no variant is provided, get the first .gguf file in the repository
139
+ elif variant is None:
140
+ all_variants = [
141
+ f for f in repo_files if f.endswith(".gguf") and "mmproj" not in f
142
+ ]
143
+ if len(all_variants) == 0:
144
+ raise ValueError(
145
+ f"No .gguf files found in Hugging Face repository {checkpoint}. {hint}"
146
+ )
147
+ variant_name = all_variants[0]
148
+ else:
149
+ # (case 3) Find a single file ending with the variant name (case insensitive)
150
+ end_with_variant = [
151
+ f
152
+ for f in repo_files
153
+ if f.lower().endswith(f"{variant}.gguf".lower())
154
+ and "mmproj" not in f.lower()
155
+ ]
156
+ if len(end_with_variant) == 1:
157
+ variant_name = end_with_variant[0]
158
+ elif len(end_with_variant) > 1:
159
+ raise ValueError(
160
+ f"Multiple .gguf files found for variant {variant}, but only one is allowed. {hint}"
161
+ )
162
+ # (case 4) Check whether the variant corresponds to a folder with sharded files (case insensitive)
163
+ else:
164
+ sharded_files = [
165
+ f
166
+ for f in repo_files
167
+ if f.endswith(".gguf")
168
+ and f.lower().startswith(f"{variant}/".lower())
169
+ ]
170
+
171
+ if not sharded_files:
172
+ raise ValueError(
173
+ f"No .gguf files found for variant {variant}. {hint}"
174
+ )
175
+
176
+ # Sort to ensure consistent ordering
177
+ sharded_files.sort()
178
+
179
+ # Use first file as primary (this is how llamacpp handles it)
180
+ variant_name = sharded_files[0]
181
+
182
+ core_files = {"variant": variant_name}
183
+
184
+ # If there is a mmproj file, add it to the patterns
185
+ if mmproj:
186
+ if mmproj not in repo_files:
187
+ raise ValueError(
188
+ f"The provided mmproj file {mmproj} was not found in {checkpoint}."
189
+ )
190
+ core_files["mmproj"] = mmproj
191
+
192
+ return core_files, sharded_files
193
+
105
194
  def download_gguf(self, model_config: PullConfig) -> dict:
106
195
  """
107
196
  Downloads the GGUF file for the given model configuration.
197
+
198
+ For sharded models, if the variant points to a folder (e.g. Q4_0), all files in that folder
199
+ will be downloaded but only the first file will be returned for loading.
108
200
  """
109
201
 
110
- # The variant parameter can be either:
111
- # 1. A full GGUF filename (e.g. "model-Q4_0.gguf")
112
- # 2. A quantization variant (e.g. "Q4_0")
113
- # This code handles both cases by constructing the appropriate filename
202
+ # This code handles all cases by constructing the appropriate filename or pattern
114
203
  checkpoint, variant = self.parse_checkpoint(model_config.checkpoint)
115
- hf_base_name = checkpoint.split("/")[-1].replace("-GGUF", "")
116
- variant_name = (
117
- variant if variant.endswith(".gguf") else f"{hf_base_name}-{variant}.gguf"
118
- )
119
204
 
120
- # If there is a mmproj file, add it to the patterns
121
- expected_files = {"variant": variant_name}
122
- if model_config.mmproj:
123
- expected_files["mmproj"] = model_config.mmproj
205
+ # Identify the GGUF model files in the repository that match the variant
206
+ core_files, sharded_files = self.identify_gguf_models(
207
+ checkpoint, variant, model_config.mmproj
208
+ )
124
209
 
125
210
  # Download the files
126
211
  snapshot_folder = huggingface_hub.snapshot_download(
127
212
  repo_id=checkpoint,
128
- allow_patterns=list(expected_files.values()),
213
+ allow_patterns=list(core_files.values()) + sharded_files,
129
214
  )
130
215
 
131
- # Make sure we downloaded something
132
- # If we didn't that can indicate that no patterns from allow_patterns match
133
- # any files in the HF repo
134
- if not os.path.exists(snapshot_folder):
135
- raise ValueError(
136
- "No patterns matched the variant parameter (CHECKPOINT:VARIANT). "
137
- "Try again, providing the full filename of your target .gguf file as the variant."
138
- " For example: Qwen/Qwen2.5-Coder-3B-Instruct-GGUF:"
139
- "qwen2.5-coder-3b-instruct-q4_0.gguf"
140
- )
141
-
142
- # Ensure we downloaded all expected files while creating a dict of the downloaded files
143
- snapshot_files = {}
144
- for file in expected_files:
145
- snapshot_files[file] = os.path.join(snapshot_folder, expected_files[file])
146
- if expected_files[file].lower() not in [
147
- name.lower() for name in os.listdir(snapshot_folder)
148
- ]:
216
+ # Ensure we downloaded all expected files
217
+ for file in list(core_files.values()) + sharded_files:
218
+ expected_path = os.path.join(snapshot_folder, file)
219
+ if not os.path.exists(expected_path):
149
220
  raise ValueError(
150
221
  f"Hugging Face snapshot download for {model_config.checkpoint} "
151
- f"expected file {expected_files[file]} not found in {snapshot_folder}"
222
+ f"expected file {file} not found at {expected_path}"
152
223
  )
153
224
 
154
- # Return a dict that points to the snapshot path of the downloaded GGUF files
155
- return snapshot_files
225
+ # Return a dict of the full path of the core GGUF files
226
+ return {
227
+ file_name: os.path.join(snapshot_folder, file_path)
228
+ for file_name, file_path in core_files.items()
229
+ }
156
230
 
157
231
  def download_models(
158
232
  self,
@@ -249,6 +323,9 @@ class ModelManager:
249
323
 
250
324
  user_models[model_name] = new_user_model
251
325
 
326
+ # Ensure the cache directory exists before writing the file
327
+ os.makedirs(os.path.dirname(USER_MODELS_FILE), exist_ok=True)
328
+
252
329
  with open(USER_MODELS_FILE, mode="w", encoding="utf-8") as file:
253
330
  json.dump(user_models, fp=file)
254
331
 
@@ -203,5 +203,13 @@
203
203
  "reasoning": false,
204
204
  "suggested": true,
205
205
  "labels": ["vision"]
206
+ },
207
+ "Llama-4-Scout-17B-16E-Instruct-GGUF": {
208
+ "checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
209
+ "mmproj": "mmproj-F16.gguf",
210
+ "recipe": "llamacpp",
211
+ "reasoning": false,
212
+ "suggested": true,
213
+ "labels": ["vision"]
206
214
  }
207
215
  }