lemonade-sdk 8.0.5__py3-none-any.whl → 8.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cache.py +3 -1
- lemonade/common/network.py +18 -1
- lemonade/tools/adapter.py +6 -0
- lemonade/tools/huggingface/utils.py +6 -5
- lemonade/tools/llamacpp/bench.py +28 -46
- lemonade/tools/llamacpp/load.py +104 -196
- lemonade/tools/llamacpp/utils.py +612 -0
- lemonade/tools/oga/bench.py +5 -6
- lemonade/tools/oga/load.py +239 -112
- lemonade/tools/oga/utils.py +27 -9
- lemonade/tools/prompt.py +17 -25
- lemonade/tools/report/table.py +12 -9
- lemonade/tools/server/llamacpp.py +80 -92
- lemonade/tools/server/serve.py +22 -28
- lemonade/tools/server/static/styles.css +121 -26
- lemonade/tools/server/static/webapp.html +14 -6
- lemonade/tools/server/tray.py +7 -0
- lemonade/version.py +1 -1
- lemonade_install/install.py +65 -84
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/METADATA +32 -21
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/RECORD +30 -29
- lemonade_server/cli.py +1 -1
- lemonade_server/model_manager.py +8 -151
- lemonade_server/pydantic_models.py +1 -4
- lemonade_server/server_models.json +44 -9
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/top_level.txt +0 -0
lemonade_install/install.py
CHANGED
|
@@ -24,22 +24,6 @@
|
|
|
24
24
|
# In any python environment, only one set of artifacts can be installed at a time.
|
|
25
25
|
# Python environments created by Lemonade v6.1.x or earlier will need to be recreated.
|
|
26
26
|
#
|
|
27
|
-
# The Ryzen AI 1.3.0 artifact files use a different directory hierarchy.
|
|
28
|
-
# The Ryzen AI 1.3.0 hybrid artifacts directory hierarchy is:
|
|
29
|
-
#
|
|
30
|
-
# RYZEN_AI\hybrid\hybrid-llm-artifacts_1.3.0_lounge\hybrid-llm-artifacts\
|
|
31
|
-
# onnxruntime_genai\lib
|
|
32
|
-
# onnxruntime_genai\wheel
|
|
33
|
-
# onnx_utils\bin
|
|
34
|
-
# eula\eula
|
|
35
|
-
#
|
|
36
|
-
# The Ryzen AI 1.3.0 npu artifacts directory hierarchy is:
|
|
37
|
-
#
|
|
38
|
-
# RYZEN_AI\npu\amd_oga\
|
|
39
|
-
# bins\xclbin\stx
|
|
40
|
-
# libs
|
|
41
|
-
# wheels
|
|
42
|
-
#
|
|
43
27
|
|
|
44
28
|
import argparse
|
|
45
29
|
import glob
|
|
@@ -56,6 +40,13 @@ import zipfile
|
|
|
56
40
|
DEFAULT_RYZEN_AI_VERSION = "1.4.0"
|
|
57
41
|
version_info_filename = "version_info.json"
|
|
58
42
|
|
|
43
|
+
# NPU Driver configuration
|
|
44
|
+
NPU_DRIVER_DOWNLOAD_URL = (
|
|
45
|
+
"https://account.amd.com/en/forms/downloads/"
|
|
46
|
+
"ryzenai-eula-public-xef.html?filename=NPU_RAI1.5_280_WHQL.zip"
|
|
47
|
+
)
|
|
48
|
+
REQUIRED_NPU_DRIVER_VERSION = "32.0.203.280"
|
|
49
|
+
|
|
59
50
|
lemonade_install_dir = Path(__file__).parent.parent.parent
|
|
60
51
|
DEFAULT_QUARK_VERSION = "quark-0.6.0"
|
|
61
52
|
DEFAULT_QUARK_DIR = os.path.join(
|
|
@@ -66,14 +57,6 @@ DEFAULT_QUARK_DIR = os.path.join(
|
|
|
66
57
|
SUPPORTED_RYZEN_AI_SERIES = ["300"]
|
|
67
58
|
|
|
68
59
|
npu_install_data = {
|
|
69
|
-
"1.3.0": {
|
|
70
|
-
"artifacts_zipfile": "ryzen_ai_13_ga/npu-llm-artifacts_1.3.0.zip",
|
|
71
|
-
"license_file": (
|
|
72
|
-
"https://account.amd.com/content/dam/account/en/licenses/download/"
|
|
73
|
-
"amd-end-user-license-agreement.pdf"
|
|
74
|
-
),
|
|
75
|
-
"license_tag": "Beta ",
|
|
76
|
-
},
|
|
77
60
|
"1.4.0": {
|
|
78
61
|
"artifacts_zipfile": (
|
|
79
62
|
"https://www.xilinx.com/bin/public/openDownload?"
|
|
@@ -88,17 +71,6 @@ npu_install_data = {
|
|
|
88
71
|
}
|
|
89
72
|
|
|
90
73
|
hybrid_install_data = {
|
|
91
|
-
"1.3.0": {
|
|
92
|
-
"artifacts_zipfile": (
|
|
93
|
-
"https://www.xilinx.com/bin/public/openDownload?"
|
|
94
|
-
"filename=hybrid-llm-artifacts_1.3.0_012725.zip"
|
|
95
|
-
),
|
|
96
|
-
"license_file": (
|
|
97
|
-
"https://www.xilinx.com/bin/public/openDownload?"
|
|
98
|
-
"filename=AMD%20End%20User%20License%20Agreement.pdf"
|
|
99
|
-
),
|
|
100
|
-
"license_tag": "",
|
|
101
|
-
},
|
|
102
74
|
"1.4.0": {
|
|
103
75
|
"artifacts_zipfile": (
|
|
104
76
|
"https://www.xilinx.com/bin/public/openDownload?"
|
|
@@ -154,10 +126,7 @@ def get_oga_npu_dir():
|
|
|
154
126
|
version_info = get_ryzen_ai_version_info()
|
|
155
127
|
version = version_info["version"]
|
|
156
128
|
ryzen_ai_folder = get_ryzen_ai_path()
|
|
157
|
-
|
|
158
|
-
npu_dir = os.path.join(ryzen_ai_folder, "npu", "amd_oga")
|
|
159
|
-
else:
|
|
160
|
-
npu_dir = os.path.join(ryzen_ai_folder, "npu")
|
|
129
|
+
npu_dir = os.path.join(ryzen_ai_folder, "npu")
|
|
161
130
|
if not os.path.isdir(npu_dir):
|
|
162
131
|
raise RuntimeError(
|
|
163
132
|
f"The npu artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
|
|
@@ -171,15 +140,7 @@ def get_oga_hybrid_dir():
|
|
|
171
140
|
version_info = get_ryzen_ai_version_info()
|
|
172
141
|
version = version_info["version"]
|
|
173
142
|
ryzen_ai_folder = get_ryzen_ai_path()
|
|
174
|
-
|
|
175
|
-
hybrid_dir = os.path.join(
|
|
176
|
-
ryzen_ai_folder,
|
|
177
|
-
"hybrid",
|
|
178
|
-
"hybrid-llm-artifacts_1.3.0_lounge",
|
|
179
|
-
"hybrid-llm-artifacts",
|
|
180
|
-
)
|
|
181
|
-
else:
|
|
182
|
-
hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
|
|
143
|
+
hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
|
|
183
144
|
if not os.path.isdir(hybrid_dir):
|
|
184
145
|
raise RuntimeError(
|
|
185
146
|
f"The hybrid artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
|
|
@@ -189,6 +150,37 @@ def get_oga_hybrid_dir():
|
|
|
189
150
|
return hybrid_dir, version
|
|
190
151
|
|
|
191
152
|
|
|
153
|
+
def _get_ryzenai_version_info(device=None):
|
|
154
|
+
"""
|
|
155
|
+
Centralized version detection for RyzenAI installations.
|
|
156
|
+
Uses lazy imports to avoid import errors when OGA is not installed.
|
|
157
|
+
"""
|
|
158
|
+
try:
|
|
159
|
+
# Lazy import to avoid errors when OGA is not installed
|
|
160
|
+
from packaging.version import Version
|
|
161
|
+
import onnxruntime_genai as og
|
|
162
|
+
|
|
163
|
+
if Version(og.__version__) >= Version("0.7.0"):
|
|
164
|
+
oga_path = os.path.dirname(og.__file__)
|
|
165
|
+
if og.__version__ == "0.7.0.2":
|
|
166
|
+
return "1.5.0", oga_path
|
|
167
|
+
else:
|
|
168
|
+
return "1.4.0", oga_path
|
|
169
|
+
else:
|
|
170
|
+
if device == "npu":
|
|
171
|
+
oga_path, version = get_oga_npu_dir()
|
|
172
|
+
else:
|
|
173
|
+
oga_path, version = get_oga_hybrid_dir()
|
|
174
|
+
return version, oga_path
|
|
175
|
+
except ImportError as e:
|
|
176
|
+
raise ImportError(
|
|
177
|
+
f"{e}\n Please install lemonade-sdk with "
|
|
178
|
+
"one of the oga extras, for example:\n"
|
|
179
|
+
"pip install lemonade-sdk[dev,oga-cpu]\n"
|
|
180
|
+
"See https://lemonade_server.ai/install_options.html for details"
|
|
181
|
+
) from e
|
|
182
|
+
|
|
183
|
+
|
|
192
184
|
def download_lfs_file(token, file, output_filename):
|
|
193
185
|
"""Downloads a file from LFS"""
|
|
194
186
|
import requests
|
|
@@ -426,8 +418,6 @@ class Install:
|
|
|
426
418
|
"npu",
|
|
427
419
|
"hybrid",
|
|
428
420
|
"unified",
|
|
429
|
-
"npu-1.3.0",
|
|
430
|
-
"hybrid-1.3.0",
|
|
431
421
|
"npu-1.4.0",
|
|
432
422
|
"hybrid-1.4.0",
|
|
433
423
|
"unified-1.4.0",
|
|
@@ -524,25 +514,14 @@ class Install:
|
|
|
524
514
|
# Install all whl files in the specified wheels folder
|
|
525
515
|
if wheels_full_path is not None:
|
|
526
516
|
print(f"\nInstalling wheels from {wheels_full_path}\n")
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
print(f"\nInstalling {file} with command {install_cmd}\n")
|
|
536
|
-
subprocess.run(install_cmd, check=True, shell=True)
|
|
537
|
-
else:
|
|
538
|
-
# Install all the wheel files together, allowing pip to work out the dependencies
|
|
539
|
-
wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
|
|
540
|
-
install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
|
|
541
|
-
subprocess.run(
|
|
542
|
-
install_cmd,
|
|
543
|
-
check=True,
|
|
544
|
-
shell=True,
|
|
545
|
-
)
|
|
517
|
+
# Install all the wheel files together, allowing pip to work out the dependencies
|
|
518
|
+
wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
|
|
519
|
+
install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
|
|
520
|
+
subprocess.run(
|
|
521
|
+
install_cmd,
|
|
522
|
+
check=True,
|
|
523
|
+
shell=True,
|
|
524
|
+
)
|
|
546
525
|
|
|
547
526
|
# Delete the zip file
|
|
548
527
|
print(f"\nCleaning up, removing {archive_file_path}\n")
|
|
@@ -611,10 +590,7 @@ class Install:
|
|
|
611
590
|
license_file = npu_install_data[version].get("license_file", None)
|
|
612
591
|
license_tag = npu_install_data[version].get("license_tag", None)
|
|
613
592
|
install_dir = os.path.join(ryzen_ai_folder, "npu")
|
|
614
|
-
|
|
615
|
-
wheels_full_path = os.path.join(install_dir, "amd_oga/wheels")
|
|
616
|
-
else:
|
|
617
|
-
wheels_full_path = os.path.join(install_dir, "wheels")
|
|
593
|
+
wheels_full_path = os.path.join(install_dir, "wheels")
|
|
618
594
|
|
|
619
595
|
if license_file:
|
|
620
596
|
Install._get_license_acceptance(version, license_file, license_tag, yes)
|
|
@@ -641,17 +617,7 @@ class Install:
|
|
|
641
617
|
license_file = hybrid_install_data[version].get("license_file", None)
|
|
642
618
|
license_tag = hybrid_install_data[version].get("license_tag", None)
|
|
643
619
|
install_dir = os.path.join(ryzen_ai_folder, "hybrid")
|
|
644
|
-
|
|
645
|
-
wheels_full_path = os.path.join(
|
|
646
|
-
ryzen_ai_folder,
|
|
647
|
-
"hybrid",
|
|
648
|
-
"hybrid-llm-artifacts_1.3.0_lounge",
|
|
649
|
-
"hybrid-llm-artifacts",
|
|
650
|
-
"onnxruntime_genai",
|
|
651
|
-
"wheel",
|
|
652
|
-
)
|
|
653
|
-
else:
|
|
654
|
-
wheels_full_path = os.path.join(install_dir, "wheels")
|
|
620
|
+
wheels_full_path = os.path.join(install_dir, "wheels")
|
|
655
621
|
|
|
656
622
|
if license_file:
|
|
657
623
|
Install._get_license_acceptance(version, license_file, license_tag, yes)
|
|
@@ -669,6 +635,21 @@ class Install:
|
|
|
669
635
|
# Check if the processor is supported before proceeding
|
|
670
636
|
check_ryzen_ai_processor()
|
|
671
637
|
|
|
638
|
+
warning_msg = (
|
|
639
|
+
"\n" + "=" * 80 + "\n"
|
|
640
|
+
"WARNING: IMPORTANT: NEW RYZEN AI 1.5.0 INSTALLATION PROCESS\n"
|
|
641
|
+
+ "=" * 80
|
|
642
|
+
+ "\n"
|
|
643
|
+
"Starting with Ryzen AI 1.5.0, installation is now available through PyPI.\n"
|
|
644
|
+
"For new installations, consider using:\n\n"
|
|
645
|
+
"pip install lemonade-sdk[oga-ryzenai] --extra-index-url https://pypi.amd.com/simple\n\n"
|
|
646
|
+
"This legacy installation method (lemonade-install --ryzenai) is still\n"
|
|
647
|
+
"supported for version 1.4.0, but may be deprecated in future releases.\n"
|
|
648
|
+
+ "=" * 80
|
|
649
|
+
+ "\n"
|
|
650
|
+
)
|
|
651
|
+
print(warning_msg)
|
|
652
|
+
|
|
672
653
|
# Delete any previous Ryzen AI installation in this environment
|
|
673
654
|
ryzen_ai_folder = get_ryzen_ai_path(check_exists=False)
|
|
674
655
|
if os.path.exists(ryzen_ai_folder):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.0
|
|
3
|
+
Version: 8.1.0
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.13
|
|
@@ -22,16 +22,15 @@ Requires-Dist: pytz
|
|
|
22
22
|
Requires-Dist: zstandard
|
|
23
23
|
Requires-Dist: fastapi
|
|
24
24
|
Requires-Dist: uvicorn[standard]
|
|
25
|
-
Requires-Dist: openai
|
|
26
|
-
Requires-Dist: transformers<=4.
|
|
25
|
+
Requires-Dist: openai<1.97.1,>=1.81.0
|
|
26
|
+
Requires-Dist: transformers<=4.53.2
|
|
27
27
|
Requires-Dist: jinja2
|
|
28
28
|
Requires-Dist: tabulate
|
|
29
29
|
Requires-Dist: sentencepiece
|
|
30
30
|
Requires-Dist: huggingface-hub==0.33.0
|
|
31
|
-
Provides-Extra: oga-
|
|
32
|
-
Requires-Dist:
|
|
33
|
-
Requires-Dist:
|
|
34
|
-
Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
|
|
31
|
+
Provides-Extra: oga-ryzenai
|
|
32
|
+
Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
|
|
33
|
+
Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
|
|
35
34
|
Provides-Extra: oga-cpu
|
|
36
35
|
Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
|
|
37
36
|
Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
|
|
@@ -43,14 +42,32 @@ Requires-Dist: pandas>=1.5.3; extra == "dev"
|
|
|
43
42
|
Requires-Dist: matplotlib; extra == "dev"
|
|
44
43
|
Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
|
|
45
44
|
Requires-Dist: lm-eval[api]; extra == "dev"
|
|
45
|
+
Provides-Extra: oga-hybrid
|
|
46
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
|
|
47
|
+
Provides-Extra: oga-unified
|
|
48
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-unified"
|
|
46
49
|
Provides-Extra: oga-hybrid-minimal
|
|
47
|
-
Requires-Dist: lemonade-sdk[oga-
|
|
50
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid-minimal"
|
|
48
51
|
Provides-Extra: oga-cpu-minimal
|
|
49
52
|
Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
|
|
53
|
+
Provides-Extra: oga-npu-minimal
|
|
54
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-npu-minimal"
|
|
50
55
|
Provides-Extra: llm
|
|
51
56
|
Requires-Dist: lemonade-sdk[dev]; extra == "llm"
|
|
52
57
|
Provides-Extra: llm-oga-cpu
|
|
53
58
|
Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
|
|
59
|
+
Provides-Extra: llm-oga-npu
|
|
60
|
+
Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
|
|
61
|
+
Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
|
|
62
|
+
Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
|
|
63
|
+
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
|
|
64
|
+
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
|
|
65
|
+
Provides-Extra: llm-oga-hybrid
|
|
66
|
+
Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
|
|
67
|
+
Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
|
|
68
|
+
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
|
|
69
|
+
Provides-Extra: llm-oga-unified
|
|
70
|
+
Requires-Dist: lemonade-sdk[dev,llm-oga-hybrid]; extra == "llm-oga-unified"
|
|
54
71
|
Provides-Extra: llm-oga-igpu
|
|
55
72
|
Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
|
|
56
73
|
Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
|
|
@@ -61,16 +78,6 @@ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
|
|
|
61
78
|
Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
|
|
62
79
|
Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
|
|
63
80
|
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
|
|
64
|
-
Provides-Extra: llm-oga-npu
|
|
65
|
-
Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
|
|
66
|
-
Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
|
|
67
|
-
Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
|
|
68
|
-
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
|
|
69
|
-
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
|
|
70
|
-
Provides-Extra: llm-oga-hybrid
|
|
71
|
-
Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
|
|
72
|
-
Provides-Extra: llm-oga-unified
|
|
73
|
-
Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
|
|
74
81
|
Dynamic: author-email
|
|
75
82
|
Dynamic: description
|
|
76
83
|
Dynamic: description-content-type
|
|
@@ -174,7 +181,7 @@ lemonade-server list
|
|
|
174
181
|
|
|
175
182
|
## Model Library
|
|
176
183
|
|
|
177
|
-
Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/
|
|
184
|
+
Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/server_models/).
|
|
178
185
|
|
|
179
186
|
You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
|
|
180
187
|
<p align="center">
|
|
@@ -263,7 +270,7 @@ completion = client.chat.completions.create(
|
|
|
263
270
|
print(completion.choices[0].message.content)
|
|
264
271
|
```
|
|
265
272
|
|
|
266
|
-
For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
|
|
273
|
+
For more detailed integration instructions, see the [Integration Guide](./docs/server/server_integration.md).
|
|
267
274
|
|
|
268
275
|
## Beyond an LLM Server
|
|
269
276
|
|
|
@@ -272,6 +279,10 @@ The [Lemonade SDK](./docs/README.md) also include the following components:
|
|
|
272
279
|
- 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
|
|
273
280
|
- 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
|
|
274
281
|
|
|
282
|
+
## FAQ
|
|
283
|
+
|
|
284
|
+
To read our frequently asked questions, see our [FAQ Guide](./docs/faq.md)
|
|
285
|
+
|
|
275
286
|
## Contributing
|
|
276
287
|
|
|
277
288
|
We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
|
|
@@ -284,7 +295,7 @@ New contributors can find beginner-friendly issues tagged with "Good First Issue
|
|
|
284
295
|
|
|
285
296
|
## Maintainers
|
|
286
297
|
|
|
287
|
-
This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues),
|
|
298
|
+
This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), emailing [lemonade@amd.com](mailto:lemonade@amd.com), or joining our [Discord](https://discord.gg/5xXzkMu8Zk).
|
|
288
299
|
|
|
289
300
|
## License
|
|
290
301
|
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
|
|
2
2
|
lemonade/api.py,sha256=kGz8N_9TuN3peFG8fES0odN0bWR9itLNomlR-FC2z8k,5515
|
|
3
|
-
lemonade/cache.py,sha256=
|
|
3
|
+
lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
|
|
4
4
|
lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
|
|
5
5
|
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
6
|
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
-
lemonade/version.py,sha256=
|
|
7
|
+
lemonade/version.py,sha256=c04nFsyfS0zYoDvZjLO-uEi12TFB5EWSD6fiWiI7OLQ,22
|
|
8
8
|
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
|
|
10
10
|
lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
|
|
11
11
|
lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
|
|
12
12
|
lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
|
|
13
13
|
lemonade/common/inference_engines.py,sha256=lcmir_pATr71TfSBJoIZEi3G9xyxNwi2_xpPvPD8_xI,12932
|
|
14
|
-
lemonade/common/network.py,sha256=
|
|
14
|
+
lemonade/common/network.py,sha256=p1lWJkN0H5hCpb4rKi3Zc47W_BRrrm-7ghdTALJLGqU,1944
|
|
15
15
|
lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
|
|
16
16
|
lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
|
|
17
17
|
lemonade/common/system_info.py,sha256=dOtX8WLHCz1xmURZWnqhDbyNZv_AulrpX_bbI58eHFQ,27084
|
|
@@ -21,51 +21,52 @@ lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2E
|
|
|
21
21
|
lemonade/profilers/profiler.py,sha256=Y5FSbc386bMlTVbqCuya9pYrso5aTthxahR1V_ZKQ9E,1902
|
|
22
22
|
lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
|
|
23
23
|
lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
|
|
24
|
-
lemonade/tools/adapter.py,sha256=
|
|
24
|
+
lemonade/tools/adapter.py,sha256=Ex63Y1SPCOSV4M_QtzEn3YVd39d3yew0lpmEFgp8aH4,3169
|
|
25
25
|
lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
|
|
26
26
|
lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
|
|
27
27
|
lemonade/tools/management_tools.py,sha256=U8GaJnjdXyQ9sw8UxBQMc7glpaLciaVphASaQS4kJsA,10202
|
|
28
28
|
lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
|
|
29
29
|
lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
|
|
30
|
-
lemonade/tools/prompt.py,sha256=
|
|
30
|
+
lemonade/tools/prompt.py,sha256=PyLksp1k8jsZsU7XBRK61k1DUHhbdLa20h-AP8Noh3w,9011
|
|
31
31
|
lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
|
|
32
32
|
lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
|
|
33
33
|
lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
|
|
34
|
-
lemonade/tools/huggingface/utils.py,sha256=
|
|
35
|
-
lemonade/tools/llamacpp/bench.py,sha256=
|
|
36
|
-
lemonade/tools/llamacpp/load.py,sha256=
|
|
34
|
+
lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
|
|
35
|
+
lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
|
|
36
|
+
lemonade/tools/llamacpp/load.py,sha256=SKacK2n8LpC4DN4yALyEpV2c8_sgOv2G7t6Nlyu7XXg,6273
|
|
37
|
+
lemonade/tools/llamacpp/utils.py,sha256=vHA5kykkdHSsMGmbEA4RyOHr8wFIh1WenfhCvY8WxZs,22445
|
|
37
38
|
lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
lemonade/tools/oga/bench.py,sha256=
|
|
39
|
-
lemonade/tools/oga/load.py,sha256=
|
|
40
|
-
lemonade/tools/oga/utils.py,sha256=
|
|
39
|
+
lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
|
|
40
|
+
lemonade/tools/oga/load.py,sha256=O82ezF7Jhgz3CJrxDWZYqLHyD_0NS1nsvfMWDaaUI4I,33728
|
|
41
|
+
lemonade/tools/oga/utils.py,sha256=Xd7tmNr69u_bCut0hZqA7saUR3NFZlp4bvWo54mOZb0,16918
|
|
41
42
|
lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
43
|
lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
|
|
43
44
|
lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
|
|
44
45
|
lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
46
|
lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
|
|
46
|
-
lemonade/tools/report/table.py,sha256=
|
|
47
|
+
lemonade/tools/report/table.py,sha256=ssqy1bZqF-wptNzKEOj6_9REtCNZyXO8R5vakAtg3R4,27973
|
|
47
48
|
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
|
-
lemonade/tools/server/llamacpp.py,sha256=
|
|
49
|
-
lemonade/tools/server/serve.py,sha256=
|
|
49
|
+
lemonade/tools/server/llamacpp.py,sha256=OP0j74QcowEu3zFEcrKIsBbGDOFemBXS5F5DC6oQHaI,18853
|
|
50
|
+
lemonade/tools/server/serve.py,sha256=0-NprfsU-YrX8Qsf1atEi6wPJWemrPjHKEBHV69SwCQ,57046
|
|
50
51
|
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
51
|
-
lemonade/tools/server/tray.py,sha256=
|
|
52
|
+
lemonade/tools/server/tray.py,sha256=yoGCM8j_2KzPqo-AlYiauWd8QR56yp6jW6HZ9921Ydg,17525
|
|
52
53
|
lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
|
|
53
54
|
lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
|
|
54
|
-
lemonade/tools/server/static/styles.css,sha256=
|
|
55
|
-
lemonade/tools/server/static/webapp.html,sha256=
|
|
55
|
+
lemonade/tools/server/static/styles.css,sha256=8wQ5Cg4rbEh03kC8t7ALE7dB20GiD0Pfu5BAxh9hECU,26429
|
|
56
|
+
lemonade/tools/server/static/webapp.html,sha256=KZm1ZFIhQzLT2Y2wy3hFsQxcOxFzv-blaeLzc1ODhb8,36396
|
|
56
57
|
lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
|
|
57
58
|
lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
|
|
58
59
|
lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
|
|
59
60
|
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
60
|
-
lemonade_install/install.py,sha256=
|
|
61
|
-
lemonade_sdk-8.0.
|
|
62
|
-
lemonade_sdk-8.0.
|
|
63
|
-
lemonade_server/cli.py,sha256=
|
|
64
|
-
lemonade_server/model_manager.py,sha256=
|
|
65
|
-
lemonade_server/pydantic_models.py,sha256=
|
|
66
|
-
lemonade_server/server_models.json,sha256=
|
|
67
|
-
lemonade_sdk-8.0.
|
|
68
|
-
lemonade_sdk-8.0.
|
|
69
|
-
lemonade_sdk-8.0.
|
|
70
|
-
lemonade_sdk-8.0.
|
|
71
|
-
lemonade_sdk-8.0.
|
|
61
|
+
lemonade_install/install.py,sha256=TBX-VwEHcPo4WX0K_12pKKINnIK3o4SUo3L5XjkqEtw,27669
|
|
62
|
+
lemonade_sdk-8.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
63
|
+
lemonade_sdk-8.1.0.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
|
|
64
|
+
lemonade_server/cli.py,sha256=6QJ5fxNLuVUbuHauA5JHXf0H5dqJ5E4GNTo4YoMOJtg,16049
|
|
65
|
+
lemonade_server/model_manager.py,sha256=O3fIX52AqU0z10WzPmNEA3lQ_KjOqNq_G-SxjwIgEio,10781
|
|
66
|
+
lemonade_server/pydantic_models.py,sha256=qEvF7x7AuHCHMiByVzGGuLdQTNs233Sw9uQq5cpI6is,2721
|
|
67
|
+
lemonade_server/server_models.json,sha256=gitKHj_VHANxjtcXeE5zFpukVO0HyEfKhu3ZaZsj2xo,8867
|
|
68
|
+
lemonade_sdk-8.1.0.dist-info/METADATA,sha256=c3JxCUYw5ujhGSb3FX3mG6UmgG5BLqik8a5j4oe8n7o,15712
|
|
69
|
+
lemonade_sdk-8.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
70
|
+
lemonade_sdk-8.1.0.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
|
|
71
|
+
lemonade_sdk-8.1.0.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
72
|
+
lemonade_sdk-8.1.0.dist-info/RECORD,,
|
lemonade_server/cli.py
CHANGED
|
@@ -57,7 +57,7 @@ def serve(
|
|
|
57
57
|
log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
|
|
58
58
|
|
|
59
59
|
# Hidden environment variable to enable input truncation (experimental feature)
|
|
60
|
-
truncate_inputs = "LEMONADE_TRUNCATE_INPUTS"
|
|
60
|
+
truncate_inputs = os.environ.get("LEMONADE_TRUNCATE_INPUTS", None)
|
|
61
61
|
|
|
62
62
|
# Start the server
|
|
63
63
|
serve_kwargs = {
|
lemonade_server/model_manager.py
CHANGED
|
@@ -6,31 +6,14 @@ import huggingface_hub
|
|
|
6
6
|
from importlib.metadata import distributions
|
|
7
7
|
from lemonade_server.pydantic_models import PullConfig
|
|
8
8
|
from lemonade.cache import DEFAULT_CACHE_DIR
|
|
9
|
+
from lemonade.tools.llamacpp.utils import parse_checkpoint, download_gguf
|
|
10
|
+
from lemonade.common.network import custom_snapshot_download
|
|
9
11
|
|
|
10
12
|
USER_MODELS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
class ModelManager:
|
|
14
16
|
|
|
15
|
-
@staticmethod
|
|
16
|
-
def parse_checkpoint(checkpoint: str) -> tuple[str, str | None]:
|
|
17
|
-
"""
|
|
18
|
-
Parse a checkpoint string that may contain a variant separated by a colon.
|
|
19
|
-
|
|
20
|
-
For GGUF models, the format is "repository:variant" (e.g., "unsloth/Qwen3-0.6B-GGUF:Q4_0").
|
|
21
|
-
For other models, there is no variant.
|
|
22
|
-
|
|
23
|
-
Args:
|
|
24
|
-
checkpoint: The checkpoint string, potentially with variant
|
|
25
|
-
|
|
26
|
-
Returns:
|
|
27
|
-
tuple: (base_checkpoint, variant) where variant is None if no colon is present
|
|
28
|
-
"""
|
|
29
|
-
if ":" in checkpoint:
|
|
30
|
-
base_checkpoint, variant = checkpoint.split(":", 1)
|
|
31
|
-
return base_checkpoint, variant
|
|
32
|
-
return checkpoint, None
|
|
33
|
-
|
|
34
17
|
@property
|
|
35
18
|
def supported_models(self) -> dict:
|
|
36
19
|
"""
|
|
@@ -98,7 +81,7 @@ class ModelManager:
|
|
|
98
81
|
downloaded_models = {}
|
|
99
82
|
downloaded_checkpoints = self.downloaded_hf_checkpoints
|
|
100
83
|
for model in self.supported_models:
|
|
101
|
-
base_checkpoint =
|
|
84
|
+
base_checkpoint = parse_checkpoint(
|
|
102
85
|
self.supported_models[model]["checkpoint"]
|
|
103
86
|
)[0]
|
|
104
87
|
if base_checkpoint in downloaded_checkpoints:
|
|
@@ -113,132 +96,6 @@ class ModelManager:
|
|
|
113
96
|
"""
|
|
114
97
|
return self.filter_models_by_backend(self.downloaded_models)
|
|
115
98
|
|
|
116
|
-
def identify_gguf_models(
|
|
117
|
-
self, checkpoint: str, variant: str, mmproj: str
|
|
118
|
-
) -> tuple[dict, list[str]]:
|
|
119
|
-
"""
|
|
120
|
-
Identifies the GGUF model files in the repository that match the variant.
|
|
121
|
-
"""
|
|
122
|
-
|
|
123
|
-
hint = """
|
|
124
|
-
The CHECKPOINT:VARIANT scheme is used to specify model files in Hugging Face repositories.
|
|
125
|
-
|
|
126
|
-
The VARIANT format can be one of several types:
|
|
127
|
-
1. Full filename: exact file to download
|
|
128
|
-
2. None/empty: gets the first .gguf file in the repository (excludes mmproj files)
|
|
129
|
-
3. Quantization variant: find a single file ending with the variant name (case insensitive)
|
|
130
|
-
4. Folder name: downloads all .gguf files in the folder that matches the variant name (case insensitive)
|
|
131
|
-
|
|
132
|
-
Examples:
|
|
133
|
-
- "unsloth/Qwen3-8B-GGUF:qwen3.gguf" -> downloads "qwen3.gguf"
|
|
134
|
-
- "unsloth/Qwen3-30B-A3B-GGUF" -> downloads "Qwen3-30B-A3B-GGUF.gguf"
|
|
135
|
-
- "unsloth/Qwen3-8B-GGUF:Q4_1" -> downloads "Qwen3-8B-GGUF-Q4_1.gguf"
|
|
136
|
-
- "unsloth/Qwen3-30B-A3B-GGUF:Q4_0" -> downloads all files in "Q4_0/" folder
|
|
137
|
-
"""
|
|
138
|
-
|
|
139
|
-
repo_files = huggingface_hub.list_repo_files(checkpoint)
|
|
140
|
-
sharded_files = []
|
|
141
|
-
|
|
142
|
-
# (case 1) If variant ends in .gguf, use it directly
|
|
143
|
-
if variant and variant.endswith(".gguf"):
|
|
144
|
-
variant_name = variant
|
|
145
|
-
if variant_name not in repo_files:
|
|
146
|
-
raise ValueError(
|
|
147
|
-
f"File {variant} not found in Hugging Face repository {checkpoint}. {hint}"
|
|
148
|
-
)
|
|
149
|
-
# (case 2) If no variant is provided, get the first .gguf file in the repository
|
|
150
|
-
elif variant is None:
|
|
151
|
-
all_variants = [
|
|
152
|
-
f for f in repo_files if f.endswith(".gguf") and "mmproj" not in f
|
|
153
|
-
]
|
|
154
|
-
if len(all_variants) == 0:
|
|
155
|
-
raise ValueError(
|
|
156
|
-
f"No .gguf files found in Hugging Face repository {checkpoint}. {hint}"
|
|
157
|
-
)
|
|
158
|
-
variant_name = all_variants[0]
|
|
159
|
-
else:
|
|
160
|
-
# (case 3) Find a single file ending with the variant name (case insensitive)
|
|
161
|
-
end_with_variant = [
|
|
162
|
-
f
|
|
163
|
-
for f in repo_files
|
|
164
|
-
if f.lower().endswith(f"{variant}.gguf".lower())
|
|
165
|
-
and "mmproj" not in f.lower()
|
|
166
|
-
]
|
|
167
|
-
if len(end_with_variant) == 1:
|
|
168
|
-
variant_name = end_with_variant[0]
|
|
169
|
-
elif len(end_with_variant) > 1:
|
|
170
|
-
raise ValueError(
|
|
171
|
-
f"Multiple .gguf files found for variant {variant}, but only one is allowed. {hint}"
|
|
172
|
-
)
|
|
173
|
-
# (case 4) Check whether the variant corresponds to a folder with sharded files (case insensitive)
|
|
174
|
-
else:
|
|
175
|
-
sharded_files = [
|
|
176
|
-
f
|
|
177
|
-
for f in repo_files
|
|
178
|
-
if f.endswith(".gguf")
|
|
179
|
-
and f.lower().startswith(f"{variant}/".lower())
|
|
180
|
-
]
|
|
181
|
-
|
|
182
|
-
if not sharded_files:
|
|
183
|
-
raise ValueError(
|
|
184
|
-
f"No .gguf files found for variant {variant}. {hint}"
|
|
185
|
-
)
|
|
186
|
-
|
|
187
|
-
# Sort to ensure consistent ordering
|
|
188
|
-
sharded_files.sort()
|
|
189
|
-
|
|
190
|
-
# Use first file as primary (this is how llamacpp handles it)
|
|
191
|
-
variant_name = sharded_files[0]
|
|
192
|
-
|
|
193
|
-
core_files = {"variant": variant_name}
|
|
194
|
-
|
|
195
|
-
# If there is a mmproj file, add it to the patterns
|
|
196
|
-
if mmproj:
|
|
197
|
-
if mmproj not in repo_files:
|
|
198
|
-
raise ValueError(
|
|
199
|
-
f"The provided mmproj file {mmproj} was not found in {checkpoint}."
|
|
200
|
-
)
|
|
201
|
-
core_files["mmproj"] = mmproj
|
|
202
|
-
|
|
203
|
-
return core_files, sharded_files
|
|
204
|
-
|
|
205
|
-
def download_gguf(self, model_config: PullConfig) -> dict:
|
|
206
|
-
"""
|
|
207
|
-
Downloads the GGUF file for the given model configuration.
|
|
208
|
-
|
|
209
|
-
For sharded models, if the variant points to a folder (e.g. Q4_0), all files in that folder
|
|
210
|
-
will be downloaded but only the first file will be returned for loading.
|
|
211
|
-
"""
|
|
212
|
-
|
|
213
|
-
# This code handles all cases by constructing the appropriate filename or pattern
|
|
214
|
-
checkpoint, variant = self.parse_checkpoint(model_config.checkpoint)
|
|
215
|
-
|
|
216
|
-
# Identify the GGUF model files in the repository that match the variant
|
|
217
|
-
core_files, sharded_files = self.identify_gguf_models(
|
|
218
|
-
checkpoint, variant, model_config.mmproj
|
|
219
|
-
)
|
|
220
|
-
|
|
221
|
-
# Download the files
|
|
222
|
-
snapshot_folder = huggingface_hub.snapshot_download(
|
|
223
|
-
repo_id=checkpoint,
|
|
224
|
-
allow_patterns=list(core_files.values()) + sharded_files,
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
# Ensure we downloaded all expected files
|
|
228
|
-
for file in list(core_files.values()) + sharded_files:
|
|
229
|
-
expected_path = os.path.join(snapshot_folder, file)
|
|
230
|
-
if not os.path.exists(expected_path):
|
|
231
|
-
raise ValueError(
|
|
232
|
-
f"Hugging Face snapshot download for {model_config.checkpoint} "
|
|
233
|
-
f"expected file {file} not found at {expected_path}"
|
|
234
|
-
)
|
|
235
|
-
|
|
236
|
-
# Return a dict of the full path of the core GGUF files
|
|
237
|
-
return {
|
|
238
|
-
file_name: os.path.join(snapshot_folder, file_path)
|
|
239
|
-
for file_name, file_path in core_files.items()
|
|
240
|
-
}
|
|
241
|
-
|
|
242
99
|
def download_models(
|
|
243
100
|
self,
|
|
244
101
|
models: list[str],
|
|
@@ -317,9 +174,9 @@ class ModelManager:
|
|
|
317
174
|
print(f"Downloading {model} ({checkpoint_to_download})")
|
|
318
175
|
|
|
319
176
|
if "gguf" in checkpoint_to_download.lower():
|
|
320
|
-
|
|
177
|
+
download_gguf(gguf_model_config.checkpoint, gguf_model_config.mmproj)
|
|
321
178
|
else:
|
|
322
|
-
|
|
179
|
+
custom_snapshot_download(checkpoint_to_download)
|
|
323
180
|
|
|
324
181
|
# Register the model in user_models.json, creating that file if needed
|
|
325
182
|
# We do this registration after the download so that we don't register
|
|
@@ -373,12 +230,12 @@ class ModelManager:
|
|
|
373
230
|
print(f"Deleting {model_name} ({checkpoint})")
|
|
374
231
|
|
|
375
232
|
# Handle GGUF models that have the format "checkpoint:variant"
|
|
376
|
-
base_checkpoint =
|
|
233
|
+
base_checkpoint = parse_checkpoint(checkpoint)[0]
|
|
377
234
|
|
|
378
235
|
try:
|
|
379
236
|
# Get the local path using snapshot_download with local_files_only=True
|
|
380
|
-
snapshot_path =
|
|
381
|
-
|
|
237
|
+
snapshot_path = custom_snapshot_download(
|
|
238
|
+
base_checkpoint, local_files_only=True
|
|
382
239
|
)
|
|
383
240
|
|
|
384
241
|
# Navigate up to the model directory (parent of snapshots directory)
|