lemonade-sdk 8.0.1__py3-none-any.whl → 8.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cli.py +2 -2
- lemonade/profilers/profiler.py +4 -1
- lemonade/tools/oga/load.py +2 -8
- lemonade/tools/prompt.py +21 -6
- lemonade/tools/report/table.py +89 -0
- lemonade/tools/server/llamacpp.py +18 -7
- lemonade/tools/server/static/webapp.html +36 -3
- lemonade/tools/server/tray.py +44 -15
- lemonade/version.py +1 -1
- {lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/METADATA +29 -25
- {lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/RECORD +20 -20
- lemonade_server/cli.py +14 -12
- lemonade_server/model_manager.py +111 -34
- lemonade_server/pydantic_models.py +1 -0
- lemonade_server/server_models.json +8 -0
- {lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.1.dist-info → lemonade_sdk-8.0.3.dist-info}/top_level.txt +0 -0
lemonade/cli.py
CHANGED
|
@@ -90,9 +90,9 @@ https://github.com/lemonade-sdk/lemonade/blob/main/docs/README.md""",
|
|
|
90
90
|
)
|
|
91
91
|
|
|
92
92
|
profiler_instances = [
|
|
93
|
-
profiler(global_args[profiler.unique_name])
|
|
93
|
+
profiler(global_args[profiler.unique_name.replace("-", "_")])
|
|
94
94
|
for profiler in profilers
|
|
95
|
-
if global_args.get(profiler.unique_name, None) is not None
|
|
95
|
+
if global_args.get(profiler.unique_name.replace("-", "_"), None) is not None
|
|
96
96
|
]
|
|
97
97
|
|
|
98
98
|
if len(evaluation_tools) > 0:
|
lemonade/profilers/profiler.py
CHANGED
|
@@ -48,7 +48,10 @@ class Profiler(abc.ABC):
|
|
|
48
48
|
This method is called so that the profiler can create its output files.
|
|
49
49
|
The state is passed so that build info can be gathered and stats can be written.
|
|
50
50
|
The timestamp can be used for filename in current working directory.
|
|
51
|
-
The start times
|
|
51
|
+
The start times parameter is a dict with the keys being the tools names and
|
|
52
|
+
the values being the time the tool started. There is an initial "warmup" key
|
|
53
|
+
that has a start time before the first tool and a "cool down" key that contains the
|
|
54
|
+
time when the last tool ended.
|
|
52
55
|
"""
|
|
53
56
|
|
|
54
57
|
|
lemonade/tools/oga/load.py
CHANGED
|
@@ -1,12 +1,6 @@
|
|
|
1
1
|
# onnxruntime_genai is not lint-friendly yet and PyLint can't
|
|
2
2
|
# find any of the class methods
|
|
3
3
|
# pylint: disable=no-member
|
|
4
|
-
#
|
|
5
|
-
# Model builder constraints:
|
|
6
|
-
# 11/10/24 Need transformers <4.45.0 OR onnxruntime-genai 0.5.0 (which must be built from source)
|
|
7
|
-
# (transformers v4.45 changes the format of the tokenizer.json file which will be supported in
|
|
8
|
-
# onnxruntime-genai 0.5)
|
|
9
|
-
#
|
|
10
4
|
|
|
11
5
|
import argparse
|
|
12
6
|
import os
|
|
@@ -51,8 +45,8 @@ def import_error_heler(e: Exception):
|
|
|
51
45
|
"""
|
|
52
46
|
raise ImportError(
|
|
53
47
|
f"{e}\n Please install lemonade-sdk with "
|
|
54
|
-
"one of the
|
|
55
|
-
"pip install lemonade-sdk[
|
|
48
|
+
"one of the oga extras, for example:\n"
|
|
49
|
+
"pip install lemonade-sdk[dev,oga-cpu]\n"
|
|
56
50
|
"See https://lemonade_server.ai/install_options.html for details"
|
|
57
51
|
)
|
|
58
52
|
|
lemonade/tools/prompt.py
CHANGED
|
@@ -176,12 +176,21 @@ class LLMPrompt(Tool):
|
|
|
176
176
|
|
|
177
177
|
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
|
|
178
178
|
if isinstance(input_ids, (list, str)):
|
|
179
|
-
# OGA models return a list of tokens
|
|
179
|
+
# OGA models return a list of tokens (older versions)
|
|
180
180
|
# Our llama.cpp adapter returns a string
|
|
181
181
|
len_tokens_in = len(input_ids)
|
|
182
|
-
|
|
182
|
+
elif hasattr(input_ids, "shape"):
|
|
183
183
|
# HF models return a 2-D tensor
|
|
184
|
-
|
|
184
|
+
# OGA models with newer versions may return numpy arrays
|
|
185
|
+
if len(input_ids.shape) == 1:
|
|
186
|
+
# 1-D array from newer OGA versions
|
|
187
|
+
len_tokens_in = len(input_ids)
|
|
188
|
+
else:
|
|
189
|
+
# 2-D tensor from HF models
|
|
190
|
+
len_tokens_in = input_ids.shape[1]
|
|
191
|
+
else:
|
|
192
|
+
# Fallback: try to get length directly
|
|
193
|
+
len_tokens_in = len(input_ids)
|
|
185
194
|
|
|
186
195
|
len_tokens_out = []
|
|
187
196
|
response_texts = []
|
|
@@ -202,9 +211,15 @@ class LLMPrompt(Tool):
|
|
|
202
211
|
random_seed += 1
|
|
203
212
|
|
|
204
213
|
# Flatten the input and response
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
)
|
|
214
|
+
if isinstance(input_ids, (list, str)):
|
|
215
|
+
input_ids_array = input_ids
|
|
216
|
+
elif hasattr(input_ids, "shape") and len(input_ids.shape) == 1:
|
|
217
|
+
# 1-D array from newer OGA versions - already flat
|
|
218
|
+
input_ids_array = input_ids
|
|
219
|
+
else:
|
|
220
|
+
# 2-D tensor from HF models - take first row
|
|
221
|
+
input_ids_array = input_ids[0]
|
|
222
|
+
|
|
208
223
|
response_array = response if isinstance(response, str) else response[0]
|
|
209
224
|
|
|
210
225
|
# Separate the prompt from the response
|
lemonade/tools/report/table.py
CHANGED
|
@@ -7,6 +7,7 @@ from tabulate import tabulate
|
|
|
7
7
|
import lemonade.common.build as build
|
|
8
8
|
import lemonade.common.filesystem as fs
|
|
9
9
|
from lemonade.cache import Keys
|
|
10
|
+
from lemonade.tools.accuracy import LMEvalHarness
|
|
10
11
|
from lemonade.tools.huggingface.bench import HuggingfaceBench
|
|
11
12
|
from lemonade.tools.llamacpp.bench import LlamaCppBench
|
|
12
13
|
from lemonade.tools.mmlu import AccuracyMMLU
|
|
@@ -73,6 +74,7 @@ class SimpleStat(TableColumn):
|
|
|
73
74
|
align="center",
|
|
74
75
|
omit_if_lean=False,
|
|
75
76
|
wrap=None,
|
|
77
|
+
stat_fn=None,
|
|
76
78
|
):
|
|
77
79
|
self.column_header = column_header
|
|
78
80
|
self.stat = stat
|
|
@@ -80,6 +82,7 @@ class SimpleStat(TableColumn):
|
|
|
80
82
|
self.align = align
|
|
81
83
|
self.omit_if_lean = omit_if_lean
|
|
82
84
|
self.wrap = wrap or self.default_wrap
|
|
85
|
+
self.stat_fn = stat_fn
|
|
83
86
|
|
|
84
87
|
def get_str(self, build_stats, lean=False):
|
|
85
88
|
if lean and self.omit_if_lean:
|
|
@@ -87,6 +90,8 @@ class SimpleStat(TableColumn):
|
|
|
87
90
|
data = build_stats.get(self.stat, None)
|
|
88
91
|
if data is None:
|
|
89
92
|
return ""
|
|
93
|
+
if self.stat_fn:
|
|
94
|
+
data = self.stat_fn(data)
|
|
90
95
|
cell_str = "\n".join(
|
|
91
96
|
[_wrap(f"{x:{self.format_str}}", self.wrap) for x in _to_list(data)]
|
|
92
97
|
)
|
|
@@ -232,6 +237,47 @@ class AdditionalStat(TableColumn):
|
|
|
232
237
|
return "\n".join(cell_entry)
|
|
233
238
|
|
|
234
239
|
|
|
240
|
+
class DictListStat(TableColumn):
|
|
241
|
+
"""
|
|
242
|
+
A statistic that is a list of dicts and values from a given list of keys will be
|
|
243
|
+
pulled out of each dict and placed in the cell
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
def __init__(
|
|
247
|
+
self,
|
|
248
|
+
column_header,
|
|
249
|
+
statistic_name,
|
|
250
|
+
key_format_list,
|
|
251
|
+
align="center",
|
|
252
|
+
omit_if_lean=False,
|
|
253
|
+
wrap=None,
|
|
254
|
+
):
|
|
255
|
+
self.column_header = column_header
|
|
256
|
+
self.statistic_name = statistic_name
|
|
257
|
+
self.key_format_list = key_format_list
|
|
258
|
+
self.align = align
|
|
259
|
+
self.omit_if_lean = omit_if_lean
|
|
260
|
+
self.wrap = wrap or self.default_wrap
|
|
261
|
+
|
|
262
|
+
def get_str(self, build_stats, lean=False):
|
|
263
|
+
if lean and self.omit_if_lean:
|
|
264
|
+
return None
|
|
265
|
+
stat = build_stats.get(self.statistic_name, None)
|
|
266
|
+
if not stat:
|
|
267
|
+
return ""
|
|
268
|
+
cell_entry = []
|
|
269
|
+
for stat_dict in stat:
|
|
270
|
+
line = [
|
|
271
|
+
format_str.format(stat_dict[key])
|
|
272
|
+
for key, format_str in self.key_format_list
|
|
273
|
+
]
|
|
274
|
+
cell_entry.append(" ".join(line))
|
|
275
|
+
return "\n".join(cell_entry)
|
|
276
|
+
|
|
277
|
+
def get_keys(self):
|
|
278
|
+
return [self.statistic_name]
|
|
279
|
+
|
|
280
|
+
|
|
235
281
|
################################################################################
|
|
236
282
|
# ABSTRACT BASE CLASS FOR DEFINING A TABLE
|
|
237
283
|
################################################################################
|
|
@@ -349,6 +395,28 @@ class Table(ABC):
|
|
|
349
395
|
headers.append(column.column_header)
|
|
350
396
|
col_align += (column.align,)
|
|
351
397
|
|
|
398
|
+
# Stat column headers
|
|
399
|
+
stat_columns = self.table_descriptor.get("stat_columns", [])
|
|
400
|
+
stat_columns_include = []
|
|
401
|
+
for column in stat_columns:
|
|
402
|
+
# Check to see that at least one build has data for the column
|
|
403
|
+
keep_column = False
|
|
404
|
+
if not (self.lean and column.omit_if_lean):
|
|
405
|
+
keys = column.get_keys()
|
|
406
|
+
for build_stats in self.all_stats:
|
|
407
|
+
found = [(key in build_stats) for key in keys]
|
|
408
|
+
if any(found):
|
|
409
|
+
keep_column = True
|
|
410
|
+
headers.append(column.column_header)
|
|
411
|
+
col_align += (column.align,)
|
|
412
|
+
break
|
|
413
|
+
stat_columns_include.append(keep_column)
|
|
414
|
+
stat_columns = [
|
|
415
|
+
column
|
|
416
|
+
for column, include in zip(stat_columns, stat_columns_include)
|
|
417
|
+
if include
|
|
418
|
+
]
|
|
419
|
+
|
|
352
420
|
# Final headers
|
|
353
421
|
last_columns = self.table_descriptor.get("last_columns", [])
|
|
354
422
|
for column in last_columns:
|
|
@@ -385,6 +453,12 @@ class Table(ABC):
|
|
|
385
453
|
if entry_str is not None:
|
|
386
454
|
row.append(entry_str)
|
|
387
455
|
|
|
456
|
+
# Per stat columns
|
|
457
|
+
for entry in stat_columns:
|
|
458
|
+
entry_str = entry.get_str(build_stats, self.lean)
|
|
459
|
+
if entry_str is not None:
|
|
460
|
+
row.append(entry_str)
|
|
461
|
+
|
|
388
462
|
# Final columns
|
|
389
463
|
for entry in last_columns:
|
|
390
464
|
entry_str = entry.get_str(build_stats, self.lean)
|
|
@@ -513,6 +587,12 @@ class LemonadePerfTable(Table):
|
|
|
513
587
|
Keys.STD_DEV_TOKENS_PER_SECOND,
|
|
514
588
|
".2f",
|
|
515
589
|
),
|
|
590
|
+
SimpleStat(
|
|
591
|
+
_wrap("Total Generated Tokens", 9),
|
|
592
|
+
Keys.RESPONSE_TOKENS,
|
|
593
|
+
"d",
|
|
594
|
+
stat_fn=sum,
|
|
595
|
+
),
|
|
516
596
|
SimpleStat(
|
|
517
597
|
_wrap("Memory Used (GB)", 8), Keys.MAX_MEMORY_USED_GBYTE, ".3f"
|
|
518
598
|
),
|
|
@@ -527,7 +607,16 @@ class LemonadePerfTable(Table):
|
|
|
527
607
|
".2f",
|
|
528
608
|
)
|
|
529
609
|
],
|
|
610
|
+
LMEvalHarness: [
|
|
611
|
+
AdditionalStat(
|
|
612
|
+
"EleutherAI\nLM Evaluation",
|
|
613
|
+
"^lm_eval_",
|
|
614
|
+
"^lm_eval_",
|
|
615
|
+
".1f",
|
|
616
|
+
)
|
|
617
|
+
],
|
|
530
618
|
},
|
|
619
|
+
"stat_columns": [],
|
|
531
620
|
"last_columns": [
|
|
532
621
|
SimpleStat(
|
|
533
622
|
"System Info",
|
|
@@ -210,15 +210,20 @@ def _log_subprocess_output(
|
|
|
210
210
|
"""
|
|
211
211
|
|
|
212
212
|
if process.stdout:
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
213
|
+
try:
|
|
214
|
+
for line in iter(process.stdout.readline, ""):
|
|
215
|
+
if line:
|
|
216
|
+
line_stripped = line.strip()
|
|
217
|
+
logging.debug("%s: %s", prefix, line_stripped)
|
|
217
218
|
|
|
218
|
-
|
|
219
|
+
telemetry.parse_telemetry_line(line_stripped)
|
|
219
220
|
|
|
220
|
-
|
|
221
|
-
|
|
221
|
+
if process.poll() is not None:
|
|
222
|
+
break
|
|
223
|
+
except UnicodeDecodeError as e:
|
|
224
|
+
logging.debug("Unicode decode error reading subprocess output: %s", str(e))
|
|
225
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
226
|
+
logging.error("Unexpected error reading subprocess output: %s", str(e))
|
|
222
227
|
|
|
223
228
|
|
|
224
229
|
def _wait_for_load(llama_server_process: subprocess.Popen, port: int):
|
|
@@ -287,6 +292,8 @@ def _launch_llama_subprocess(
|
|
|
287
292
|
stdout=subprocess.PIPE,
|
|
288
293
|
stderr=subprocess.STDOUT,
|
|
289
294
|
text=True,
|
|
295
|
+
encoding="utf-8",
|
|
296
|
+
errors="replace",
|
|
290
297
|
bufsize=1,
|
|
291
298
|
env=env,
|
|
292
299
|
)
|
|
@@ -383,6 +390,10 @@ def server_load(model_config: PullConfig, telemetry: LlamaTelemetry):
|
|
|
383
390
|
f"Loading {model_config.model_name} on GPU didn't work, re-attempting on CPU"
|
|
384
391
|
)
|
|
385
392
|
|
|
393
|
+
if os.environ.get("LEMONADE_LLAMACPP_NO_FALLBACK"):
|
|
394
|
+
# Used for testing, when the test should fail if GPU didn't work
|
|
395
|
+
raise Exception("llamacpp GPU loading failed")
|
|
396
|
+
|
|
386
397
|
llama_server_process = _launch_llama_subprocess(
|
|
387
398
|
snapshot_files, use_gpu=False, telemetry=telemetry
|
|
388
399
|
)
|
|
@@ -110,20 +110,53 @@
|
|
|
110
110
|
</footer>
|
|
111
111
|
<script src="https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js"></script>
|
|
112
112
|
<script> // Tab switching logic
|
|
113
|
-
function showTab(tab) {
|
|
113
|
+
function showTab(tab, updateHash = true) {
|
|
114
114
|
document.getElementById('tab-chat').classList.remove('active');
|
|
115
115
|
document.getElementById('tab-models').classList.remove('active');
|
|
116
116
|
document.getElementById('content-chat').classList.remove('active');
|
|
117
117
|
document.getElementById('content-models').classList.remove('active');
|
|
118
118
|
if (tab === 'chat') {
|
|
119
119
|
document.getElementById('tab-chat').classList.add('active');
|
|
120
|
-
document.getElementById('content-chat').classList.add('active');
|
|
120
|
+
document.getElementById('content-chat').classList.add('active');
|
|
121
|
+
if (updateHash) {
|
|
122
|
+
window.location.hash = 'llm-chat';
|
|
123
|
+
}
|
|
121
124
|
} else {
|
|
122
125
|
document.getElementById('tab-models').classList.add('active');
|
|
123
|
-
document.getElementById('content-models').classList.add('active');
|
|
126
|
+
document.getElementById('content-models').classList.add('active');
|
|
127
|
+
if (updateHash) {
|
|
128
|
+
window.location.hash = 'model-management';
|
|
129
|
+
}
|
|
124
130
|
}
|
|
125
131
|
}
|
|
126
132
|
|
|
133
|
+
// Handle hash changes for anchor navigation
|
|
134
|
+
function handleHashChange() {
|
|
135
|
+
const hash = window.location.hash.slice(1); // Remove the # symbol
|
|
136
|
+
if (hash === 'llm-chat') {
|
|
137
|
+
showTab('chat', false);
|
|
138
|
+
} else if (hash === 'model-management') {
|
|
139
|
+
showTab('models', false);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Initialize tab based on URL hash on page load
|
|
144
|
+
function initializeTabFromHash() {
|
|
145
|
+
const hash = window.location.hash.slice(1);
|
|
146
|
+
if (hash === 'llm-chat') {
|
|
147
|
+
showTab('chat', false);
|
|
148
|
+
} else if (hash === 'model-management') {
|
|
149
|
+
showTab('models', false);
|
|
150
|
+
}
|
|
151
|
+
// If no hash or unrecognized hash, keep default (chat tab is already active)
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Listen for hash changes
|
|
155
|
+
window.addEventListener('hashchange', handleHashChange);
|
|
156
|
+
|
|
157
|
+
// Initialize on page load
|
|
158
|
+
document.addEventListener('DOMContentLoaded', initializeTabFromHash);
|
|
159
|
+
|
|
127
160
|
// Toggle Add Model form
|
|
128
161
|
function toggleAddModelForm() {
|
|
129
162
|
const form = document.querySelector('.model-mgmt-register-form');
|
lemonade/tools/server/tray.py
CHANGED
|
@@ -197,11 +197,17 @@ class LemonadeTray(SystemTray):
|
|
|
197
197
|
"""
|
|
198
198
|
webbrowser.open("https://lemonade-server.ai/docs/")
|
|
199
199
|
|
|
200
|
+
def open_llm_chat(self, _, __):
|
|
201
|
+
"""
|
|
202
|
+
Open the LLM chat in the default web browser.
|
|
203
|
+
"""
|
|
204
|
+
webbrowser.open(f"http://localhost:{self.port}/#llm-chat")
|
|
205
|
+
|
|
200
206
|
def open_model_manager(self, _, __):
|
|
201
207
|
"""
|
|
202
208
|
Open the model manager in the default web browser.
|
|
203
209
|
"""
|
|
204
|
-
webbrowser.open(f"http://localhost:{self.port}
|
|
210
|
+
webbrowser.open(f"http://localhost:{self.port}/#model-management")
|
|
205
211
|
|
|
206
212
|
def check_server_state(self):
|
|
207
213
|
"""
|
|
@@ -266,7 +272,7 @@ class LemonadeTray(SystemTray):
|
|
|
266
272
|
self.logger.error(f"Error changing port: {str(e)}")
|
|
267
273
|
self.show_balloon_notification("Error", f"Failed to change port: {str(e)}")
|
|
268
274
|
|
|
269
|
-
def upgrade_to_latest(self,
|
|
275
|
+
def upgrade_to_latest(self, _, __):
|
|
270
276
|
"""
|
|
271
277
|
Download and launch the Lemonade Server installer
|
|
272
278
|
"""
|
|
@@ -281,21 +287,34 @@ class LemonadeTray(SystemTray):
|
|
|
281
287
|
installer_path = os.path.join(
|
|
282
288
|
tempfile.gettempdir(), "Lemonade_Server_Installer.exe"
|
|
283
289
|
)
|
|
290
|
+
if os.path.exists(installer_path):
|
|
291
|
+
os.remove(installer_path)
|
|
284
292
|
|
|
285
293
|
# Download the installer
|
|
286
294
|
response = requests.get(self.latest_version_url, stream=True)
|
|
287
295
|
response.raise_for_status()
|
|
288
296
|
|
|
289
|
-
# Save the installer to disk
|
|
297
|
+
# Save the installer to disk and force write to disk
|
|
290
298
|
with open(installer_path, "wb") as f:
|
|
291
299
|
for chunk in response.iter_content(chunk_size=8192):
|
|
292
300
|
f.write(chunk)
|
|
301
|
+
f.flush()
|
|
302
|
+
os.fsync(f.fileno())
|
|
293
303
|
|
|
294
|
-
# Launch the installer
|
|
295
|
-
subprocess.
|
|
304
|
+
# Launch the installer as a completely detached process
|
|
305
|
+
# subprocess.DETACHED_PROCESS - Creates a process that's not attached to the console
|
|
306
|
+
# subprocess.CREATE_NEW_PROCESS_GROUP - Creates a new process group
|
|
307
|
+
# close_fds=True - Closes file descriptors to prevent inheritance
|
|
308
|
+
subprocess.Popen(
|
|
309
|
+
[installer_path],
|
|
310
|
+
creationflags=subprocess.DETACHED_PROCESS
|
|
311
|
+
| subprocess.CREATE_NEW_PROCESS_GROUP,
|
|
312
|
+
close_fds=True,
|
|
313
|
+
shell=True,
|
|
314
|
+
cwd=tempfile.gettempdir(),
|
|
315
|
+
)
|
|
296
316
|
|
|
297
|
-
#
|
|
298
|
-
self.exit_app(icon, item)
|
|
317
|
+
# No need to quit the application, the installer will handle it
|
|
299
318
|
|
|
300
319
|
def create_menu(self):
|
|
301
320
|
"""
|
|
@@ -326,16 +345,25 @@ class LemonadeTray(SystemTray):
|
|
|
326
345
|
|
|
327
346
|
# Create menu items for all downloaded models
|
|
328
347
|
model_menu_items = []
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
348
|
+
if not self.downloaded_models:
|
|
349
|
+
model_menu_items.append(
|
|
350
|
+
MenuItem(
|
|
351
|
+
"No models available: Use the Model Manager to pull models",
|
|
352
|
+
None,
|
|
353
|
+
enabled=False,
|
|
354
|
+
)
|
|
355
|
+
)
|
|
356
|
+
else:
|
|
357
|
+
for model_name, _ in self.downloaded_models.items():
|
|
358
|
+
# Create a function that returns the lambda to properly capture the variables
|
|
359
|
+
def create_handler(mod):
|
|
360
|
+
return lambda icon, item: self.load_llm(icon, item, mod)
|
|
333
361
|
|
|
334
|
-
|
|
362
|
+
model_item = MenuItem(model_name, create_handler(model_name))
|
|
335
363
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
364
|
+
# Set checked property instead of modifying the text
|
|
365
|
+
model_item.checked = model_name == self.loaded_llm
|
|
366
|
+
model_menu_items.append(model_item)
|
|
339
367
|
|
|
340
368
|
load_submenu = Menu(*model_menu_items)
|
|
341
369
|
|
|
@@ -378,6 +406,7 @@ class LemonadeTray(SystemTray):
|
|
|
378
406
|
)
|
|
379
407
|
|
|
380
408
|
items.append(MenuItem("Documentation", self.open_documentation))
|
|
409
|
+
items.append(MenuItem("LLM Chat", self.open_llm_chat))
|
|
381
410
|
items.append(MenuItem("Model Manager", self.open_model_manager))
|
|
382
411
|
items.append(MenuItem("Show Logs", self.show_logs))
|
|
383
412
|
items.append(Menu.SEPARATOR)
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.0.
|
|
1
|
+
__version__ = "8.0.3"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.0.
|
|
3
|
+
Version: 8.0.3
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.12
|
|
@@ -26,45 +26,49 @@ Requires-Dist: openai>=1.81.0
|
|
|
26
26
|
Requires-Dist: transformers<=4.51.3
|
|
27
27
|
Requires-Dist: jinja2
|
|
28
28
|
Requires-Dist: tabulate
|
|
29
|
-
Requires-Dist:
|
|
29
|
+
Requires-Dist: sentencepiece
|
|
30
|
+
Requires-Dist: huggingface-hub==0.33.0
|
|
31
|
+
Provides-Extra: oga-hybrid
|
|
32
|
+
Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
|
|
33
|
+
Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
|
|
34
|
+
Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
|
|
35
|
+
Provides-Extra: oga-cpu
|
|
36
|
+
Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
|
|
37
|
+
Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
|
|
38
|
+
Provides-Extra: dev
|
|
39
|
+
Requires-Dist: torch>=2.6.0; extra == "dev"
|
|
40
|
+
Requires-Dist: accelerate; extra == "dev"
|
|
41
|
+
Requires-Dist: datasets; extra == "dev"
|
|
42
|
+
Requires-Dist: pandas>=1.5.3; extra == "dev"
|
|
43
|
+
Requires-Dist: matplotlib; extra == "dev"
|
|
44
|
+
Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
|
|
45
|
+
Requires-Dist: lm-eval[api]; extra == "dev"
|
|
30
46
|
Provides-Extra: oga-hybrid-minimal
|
|
31
|
-
Requires-Dist:
|
|
32
|
-
Requires-Dist: numpy==1.26.4; extra == "oga-hybrid-minimal"
|
|
33
|
-
Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid-minimal"
|
|
47
|
+
Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
|
|
34
48
|
Provides-Extra: oga-cpu-minimal
|
|
35
|
-
Requires-Dist:
|
|
36
|
-
Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "oga-cpu-minimal"
|
|
49
|
+
Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
|
|
37
50
|
Provides-Extra: llm
|
|
38
|
-
Requires-Dist:
|
|
39
|
-
Requires-Dist: accelerate; extra == "llm"
|
|
40
|
-
Requires-Dist: sentencepiece; extra == "llm"
|
|
41
|
-
Requires-Dist: datasets; extra == "llm"
|
|
42
|
-
Requires-Dist: pandas>=1.5.3; extra == "llm"
|
|
43
|
-
Requires-Dist: matplotlib; extra == "llm"
|
|
44
|
-
Requires-Dist: human-eval-windows==1.0.4; extra == "llm"
|
|
45
|
-
Requires-Dist: lm-eval[api]; extra == "llm"
|
|
51
|
+
Requires-Dist: lemonade-sdk[dev]; extra == "llm"
|
|
46
52
|
Provides-Extra: llm-oga-cpu
|
|
47
|
-
Requires-Dist: lemonade-sdk[oga-cpu
|
|
48
|
-
Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cpu"
|
|
53
|
+
Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
|
|
49
54
|
Provides-Extra: llm-oga-igpu
|
|
50
55
|
Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
|
|
51
56
|
Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
|
|
52
57
|
Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
|
|
53
|
-
Requires-Dist: lemonade-sdk[
|
|
58
|
+
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
|
|
54
59
|
Provides-Extra: llm-oga-cuda
|
|
55
|
-
Requires-Dist: onnxruntime-genai-cuda==0.
|
|
56
|
-
Requires-Dist: onnxruntime-gpu
|
|
57
|
-
Requires-Dist: transformers
|
|
58
|
-
Requires-Dist: lemonade-sdk[
|
|
60
|
+
Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
|
|
61
|
+
Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
|
|
62
|
+
Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
|
|
63
|
+
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
|
|
59
64
|
Provides-Extra: llm-oga-npu
|
|
60
65
|
Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
|
|
61
66
|
Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
|
|
62
67
|
Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
|
|
63
68
|
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
|
|
64
|
-
Requires-Dist: lemonade-sdk[
|
|
69
|
+
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
|
|
65
70
|
Provides-Extra: llm-oga-hybrid
|
|
66
|
-
Requires-Dist: lemonade-sdk[oga-hybrid
|
|
67
|
-
Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-hybrid"
|
|
71
|
+
Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
|
|
68
72
|
Provides-Extra: llm-oga-unified
|
|
69
73
|
Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
|
|
70
74
|
Dynamic: author-email
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
|
|
2
2
|
lemonade/api.py,sha256=X7DxBgsOl5L_z6uTkwoJWf8x0rjXWS2JoeEqmo9bMfc,3873
|
|
3
3
|
lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
|
|
4
|
-
lemonade/cli.py,sha256=
|
|
4
|
+
lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
|
|
5
5
|
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
6
|
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
-
lemonade/version.py,sha256=
|
|
7
|
+
lemonade/version.py,sha256=GImAlzwPDxsACkYFf5rTrX8QMH23tcqdm6vgjfFYD10,22
|
|
8
8
|
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
|
|
10
10
|
lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
|
|
@@ -17,7 +17,7 @@ lemonade/common/system_info.py,sha256=qOwteG_mBo-ImilbiK7Gq37sWIE9ugF0dbWcj9zLD4
|
|
|
17
17
|
lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
|
|
18
18
|
lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
|
|
19
19
|
lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
|
|
20
|
-
lemonade/profilers/profiler.py,sha256=
|
|
20
|
+
lemonade/profilers/profiler.py,sha256=Y5FSbc386bMlTVbqCuya9pYrso5aTthxahR1V_ZKQ9E,1902
|
|
21
21
|
lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
|
|
22
22
|
lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
|
|
23
23
|
lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
|
|
@@ -26,7 +26,7 @@ lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9
|
|
|
26
26
|
lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
|
|
27
27
|
lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
|
|
28
28
|
lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
|
|
29
|
-
lemonade/tools/prompt.py,sha256=
|
|
29
|
+
lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
|
|
30
30
|
lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
|
|
31
31
|
lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
|
|
32
32
|
lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
|
|
@@ -35,36 +35,36 @@ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPd
|
|
|
35
35
|
lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
|
|
36
36
|
lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
37
|
lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
|
|
38
|
-
lemonade/tools/oga/load.py,sha256=
|
|
38
|
+
lemonade/tools/oga/load.py,sha256=xSP0DWoGd5zBRozSafj1MMyIQyHJuIRj_vNlCTx8mfs,28309
|
|
39
39
|
lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
|
|
40
40
|
lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
41
|
lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
|
|
42
42
|
lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
|
|
43
43
|
lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
44
|
lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
|
|
45
|
-
lemonade/tools/report/table.py,sha256=
|
|
45
|
+
lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
|
|
46
46
|
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
-
lemonade/tools/server/llamacpp.py,sha256=
|
|
47
|
+
lemonade/tools/server/llamacpp.py,sha256=vjFNelm_VyKBBgWmltsAwLI7ncQ9AwVFQD7krZnF42w,16199
|
|
48
48
|
lemonade/tools/server/serve.py,sha256=3_jBpi6THnnAmtKOxvPlOkIhSTTmrlZE3fr2Dpto-Q4,52794
|
|
49
49
|
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
50
|
-
lemonade/tools/server/tray.py,sha256=
|
|
50
|
+
lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
|
|
51
51
|
lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
|
|
52
52
|
lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
|
|
53
53
|
lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
|
|
54
|
-
lemonade/tools/server/static/webapp.html,sha256=
|
|
54
|
+
lemonade/tools/server/static/webapp.html,sha256=kPzORaogVRdFQewXyNI_JaH2ZZCTaq5zfMSyzuoFTuA,22414
|
|
55
55
|
lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
|
|
56
56
|
lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
|
|
57
57
|
lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
|
|
58
58
|
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
59
59
|
lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
|
|
60
|
-
lemonade_sdk-8.0.
|
|
61
|
-
lemonade_sdk-8.0.
|
|
62
|
-
lemonade_server/cli.py,sha256=
|
|
63
|
-
lemonade_server/model_manager.py,sha256=
|
|
64
|
-
lemonade_server/pydantic_models.py,sha256=
|
|
65
|
-
lemonade_server/server_models.json,sha256=
|
|
66
|
-
lemonade_sdk-8.0.
|
|
67
|
-
lemonade_sdk-8.0.
|
|
68
|
-
lemonade_sdk-8.0.
|
|
69
|
-
lemonade_sdk-8.0.
|
|
70
|
-
lemonade_sdk-8.0.
|
|
60
|
+
lemonade_sdk-8.0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
61
|
+
lemonade_sdk-8.0.3.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
|
|
62
|
+
lemonade_server/cli.py,sha256=z6ojwFaOIz0hbUbVtZWMLP4YDpkcVOmqwmdm55dhKA4,11980
|
|
63
|
+
lemonade_server/model_manager.py,sha256=Yvlsl0wipKfryKULH5ASQ9INhLQXPq9dTGQVBXf2_h0,16167
|
|
64
|
+
lemonade_server/pydantic_models.py,sha256=nsbpHqAkd6nkz5QT16u9xMZbCXqccGiy5O0fWecOM88,2338
|
|
65
|
+
lemonade_server/server_models.json,sha256=O5zk94gH_zRq6GSwbqvi2SNwx51eY9uqgAl_kxTi0iM,7271
|
|
66
|
+
lemonade_sdk-8.0.3.dist-info/METADATA,sha256=WesWziLri9jQjZILRENliiJbggTVF8LmXKVIERInVbE,8285
|
|
67
|
+
lemonade_sdk-8.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
68
|
+
lemonade_sdk-8.0.3.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
|
|
69
|
+
lemonade_sdk-8.0.3.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
70
|
+
lemonade_sdk-8.0.3.dist-info/RECORD,,
|
lemonade_server/cli.py
CHANGED
|
@@ -4,7 +4,6 @@ import os
|
|
|
4
4
|
from typing import Tuple, Optional
|
|
5
5
|
import psutil
|
|
6
6
|
from typing import List
|
|
7
|
-
import subprocess
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
# Error codes for different CLI scenarios
|
|
@@ -88,23 +87,26 @@ def stop():
|
|
|
88
87
|
# Terminate the main process first
|
|
89
88
|
process.terminate()
|
|
90
89
|
|
|
91
|
-
# Then terminate
|
|
90
|
+
# Then terminate llama-server child process (known to be stubborn)
|
|
91
|
+
# We avoid killing other child processes, such as the installer
|
|
92
92
|
for child in children:
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
93
|
+
if "llama-server" in child.name():
|
|
94
|
+
try:
|
|
95
|
+
child.terminate()
|
|
96
|
+
except psutil.NoSuchProcess:
|
|
97
|
+
pass # Child already terminated
|
|
97
98
|
|
|
98
99
|
# Wait for main process
|
|
99
100
|
process.wait(timeout=10)
|
|
100
101
|
|
|
101
|
-
# Kill
|
|
102
|
+
# Kill llama-server child process if it didn't terminate gracefully
|
|
102
103
|
for child in children:
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
child.
|
|
106
|
-
|
|
107
|
-
|
|
104
|
+
if "llama-server" in child.name():
|
|
105
|
+
try:
|
|
106
|
+
if child.is_running():
|
|
107
|
+
child.kill()
|
|
108
|
+
except psutil.NoSuchProcess:
|
|
109
|
+
pass # Child already terminated
|
|
108
110
|
except psutil.NoSuchProcess:
|
|
109
111
|
# Process already terminated
|
|
110
112
|
pass
|
lemonade_server/model_manager.py
CHANGED
|
@@ -102,57 +102,131 @@ class ModelManager:
|
|
|
102
102
|
"""
|
|
103
103
|
return self.filter_models_by_backend(self.downloaded_models)
|
|
104
104
|
|
|
105
|
+
def identify_gguf_models(
|
|
106
|
+
self, checkpoint: str, variant: str, mmproj: str
|
|
107
|
+
) -> tuple[dict, list[str]]:
|
|
108
|
+
"""
|
|
109
|
+
Identifies the GGUF model files in the repository that match the variant.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
hint = """
|
|
113
|
+
The CHECKPOINT:VARIANT scheme is used to specify model files in Hugging Face repositories.
|
|
114
|
+
|
|
115
|
+
The VARIANT format can be one of several types:
|
|
116
|
+
1. Full filename: exact file to download
|
|
117
|
+
2. None/empty: gets the first .gguf file in the repository (excludes mmproj files)
|
|
118
|
+
3. Quantization variant: find a single file ending with the variant name (case insensitive)
|
|
119
|
+
4. Folder name: downloads all .gguf files in the folder that matches the variant name (case insensitive)
|
|
120
|
+
|
|
121
|
+
Examples:
|
|
122
|
+
- "unsloth/Qwen3-8B-GGUF:qwen3.gguf" -> downloads "qwen3.gguf"
|
|
123
|
+
- "unsloth/Qwen3-30B-A3B-GGUF" -> downloads "Qwen3-30B-A3B-GGUF.gguf"
|
|
124
|
+
- "unsloth/Qwen3-8B-GGUF:Q4_1" -> downloads "Qwen3-8B-GGUF-Q4_1.gguf"
|
|
125
|
+
- "unsloth/Qwen3-30B-A3B-GGUF:Q4_0" -> downloads all files in "Q4_0/" folder
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
repo_files = huggingface_hub.list_repo_files(checkpoint)
|
|
129
|
+
sharded_files = []
|
|
130
|
+
|
|
131
|
+
# (case 1) If variant ends in .gguf, use it directly
|
|
132
|
+
if variant and variant.endswith(".gguf"):
|
|
133
|
+
variant_name = variant
|
|
134
|
+
if variant_name not in repo_files:
|
|
135
|
+
raise ValueError(
|
|
136
|
+
f"File {variant} not found in Hugging Face repository {checkpoint}. {hint}"
|
|
137
|
+
)
|
|
138
|
+
# (case 2) If no variant is provided, get the first .gguf file in the repository
|
|
139
|
+
elif variant is None:
|
|
140
|
+
all_variants = [
|
|
141
|
+
f for f in repo_files if f.endswith(".gguf") and "mmproj" not in f
|
|
142
|
+
]
|
|
143
|
+
if len(all_variants) == 0:
|
|
144
|
+
raise ValueError(
|
|
145
|
+
f"No .gguf files found in Hugging Face repository {checkpoint}. {hint}"
|
|
146
|
+
)
|
|
147
|
+
variant_name = all_variants[0]
|
|
148
|
+
else:
|
|
149
|
+
# (case 3) Find a single file ending with the variant name (case insensitive)
|
|
150
|
+
end_with_variant = [
|
|
151
|
+
f
|
|
152
|
+
for f in repo_files
|
|
153
|
+
if f.lower().endswith(f"{variant}.gguf".lower())
|
|
154
|
+
and "mmproj" not in f.lower()
|
|
155
|
+
]
|
|
156
|
+
if len(end_with_variant) == 1:
|
|
157
|
+
variant_name = end_with_variant[0]
|
|
158
|
+
elif len(end_with_variant) > 1:
|
|
159
|
+
raise ValueError(
|
|
160
|
+
f"Multiple .gguf files found for variant {variant}, but only one is allowed. {hint}"
|
|
161
|
+
)
|
|
162
|
+
# (case 4) Check whether the variant corresponds to a folder with sharded files (case insensitive)
|
|
163
|
+
else:
|
|
164
|
+
sharded_files = [
|
|
165
|
+
f
|
|
166
|
+
for f in repo_files
|
|
167
|
+
if f.endswith(".gguf")
|
|
168
|
+
and f.lower().startswith(f"{variant}/".lower())
|
|
169
|
+
]
|
|
170
|
+
|
|
171
|
+
if not sharded_files:
|
|
172
|
+
raise ValueError(
|
|
173
|
+
f"No .gguf files found for variant {variant}. {hint}"
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Sort to ensure consistent ordering
|
|
177
|
+
sharded_files.sort()
|
|
178
|
+
|
|
179
|
+
# Use first file as primary (this is how llamacpp handles it)
|
|
180
|
+
variant_name = sharded_files[0]
|
|
181
|
+
|
|
182
|
+
core_files = {"variant": variant_name}
|
|
183
|
+
|
|
184
|
+
# If there is a mmproj file, add it to the patterns
|
|
185
|
+
if mmproj:
|
|
186
|
+
if mmproj not in repo_files:
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"The provided mmproj file {mmproj} was not found in {checkpoint}."
|
|
189
|
+
)
|
|
190
|
+
core_files["mmproj"] = mmproj
|
|
191
|
+
|
|
192
|
+
return core_files, sharded_files
|
|
193
|
+
|
|
105
194
|
def download_gguf(self, model_config: PullConfig) -> dict:
|
|
106
195
|
"""
|
|
107
196
|
Downloads the GGUF file for the given model configuration.
|
|
197
|
+
|
|
198
|
+
For sharded models, if the variant points to a folder (e.g. Q4_0), all files in that folder
|
|
199
|
+
will be downloaded but only the first file will be returned for loading.
|
|
108
200
|
"""
|
|
109
201
|
|
|
110
|
-
#
|
|
111
|
-
# 1. A full GGUF filename (e.g. "model-Q4_0.gguf")
|
|
112
|
-
# 2. A quantization variant (e.g. "Q4_0")
|
|
113
|
-
# This code handles both cases by constructing the appropriate filename
|
|
202
|
+
# This code handles all cases by constructing the appropriate filename or pattern
|
|
114
203
|
checkpoint, variant = self.parse_checkpoint(model_config.checkpoint)
|
|
115
|
-
hf_base_name = checkpoint.split("/")[-1].replace("-GGUF", "")
|
|
116
|
-
variant_name = (
|
|
117
|
-
variant if variant.endswith(".gguf") else f"{hf_base_name}-{variant}.gguf"
|
|
118
|
-
)
|
|
119
204
|
|
|
120
|
-
#
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
205
|
+
# Identify the GGUF model files in the repository that match the variant
|
|
206
|
+
core_files, sharded_files = self.identify_gguf_models(
|
|
207
|
+
checkpoint, variant, model_config.mmproj
|
|
208
|
+
)
|
|
124
209
|
|
|
125
210
|
# Download the files
|
|
126
211
|
snapshot_folder = huggingface_hub.snapshot_download(
|
|
127
212
|
repo_id=checkpoint,
|
|
128
|
-
allow_patterns=list(
|
|
213
|
+
allow_patterns=list(core_files.values()) + sharded_files,
|
|
129
214
|
)
|
|
130
215
|
|
|
131
|
-
#
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
raise ValueError(
|
|
136
|
-
"No patterns matched the variant parameter (CHECKPOINT:VARIANT). "
|
|
137
|
-
"Try again, providing the full filename of your target .gguf file as the variant."
|
|
138
|
-
" For example: Qwen/Qwen2.5-Coder-3B-Instruct-GGUF:"
|
|
139
|
-
"qwen2.5-coder-3b-instruct-q4_0.gguf"
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
# Ensure we downloaded all expected files while creating a dict of the downloaded files
|
|
143
|
-
snapshot_files = {}
|
|
144
|
-
for file in expected_files:
|
|
145
|
-
snapshot_files[file] = os.path.join(snapshot_folder, expected_files[file])
|
|
146
|
-
if expected_files[file].lower() not in [
|
|
147
|
-
name.lower() for name in os.listdir(snapshot_folder)
|
|
148
|
-
]:
|
|
216
|
+
# Ensure we downloaded all expected files
|
|
217
|
+
for file in list(core_files.values()) + sharded_files:
|
|
218
|
+
expected_path = os.path.join(snapshot_folder, file)
|
|
219
|
+
if not os.path.exists(expected_path):
|
|
149
220
|
raise ValueError(
|
|
150
221
|
f"Hugging Face snapshot download for {model_config.checkpoint} "
|
|
151
|
-
f"expected file {
|
|
222
|
+
f"expected file {file} not found at {expected_path}"
|
|
152
223
|
)
|
|
153
224
|
|
|
154
|
-
# Return a dict
|
|
155
|
-
return
|
|
225
|
+
# Return a dict of the full path of the core GGUF files
|
|
226
|
+
return {
|
|
227
|
+
file_name: os.path.join(snapshot_folder, file_path)
|
|
228
|
+
for file_name, file_path in core_files.items()
|
|
229
|
+
}
|
|
156
230
|
|
|
157
231
|
def download_models(
|
|
158
232
|
self,
|
|
@@ -249,6 +323,9 @@ class ModelManager:
|
|
|
249
323
|
|
|
250
324
|
user_models[model_name] = new_user_model
|
|
251
325
|
|
|
326
|
+
# Ensure the cache directory exists before writing the file
|
|
327
|
+
os.makedirs(os.path.dirname(USER_MODELS_FILE), exist_ok=True)
|
|
328
|
+
|
|
252
329
|
with open(USER_MODELS_FILE, mode="w", encoding="utf-8") as file:
|
|
253
330
|
json.dump(user_models, fp=file)
|
|
254
331
|
|
|
@@ -203,5 +203,13 @@
|
|
|
203
203
|
"reasoning": false,
|
|
204
204
|
"suggested": true,
|
|
205
205
|
"labels": ["vision"]
|
|
206
|
+
},
|
|
207
|
+
"Llama-4-Scout-17B-16E-Instruct-GGUF": {
|
|
208
|
+
"checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
|
|
209
|
+
"mmproj": "mmproj-F16.gguf",
|
|
210
|
+
"recipe": "llamacpp",
|
|
211
|
+
"reasoning": false,
|
|
212
|
+
"suggested": true,
|
|
213
|
+
"labels": ["vision"]
|
|
206
214
|
}
|
|
207
215
|
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|