lemonade-sdk 8.0.6__py3-none-any.whl → 8.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/common/inference_engines.py +62 -77
- lemonade/common/network.py +18 -1
- lemonade/common/system_info.py +61 -44
- lemonade/tools/llamacpp/bench.py +3 -1
- lemonade/tools/llamacpp/load.py +13 -4
- lemonade/tools/llamacpp/utils.py +229 -61
- lemonade/tools/oga/load.py +239 -112
- lemonade/tools/oga/utils.py +19 -7
- lemonade/tools/server/llamacpp.py +30 -53
- lemonade/tools/server/serve.py +64 -123
- lemonade/tools/server/static/styles.css +208 -6
- lemonade/tools/server/static/webapp.html +510 -71
- lemonade/tools/server/tray.py +4 -2
- lemonade/tools/server/utils/thread.py +2 -4
- lemonade/version.py +1 -1
- lemonade_install/install.py +90 -86
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/METADATA +74 -24
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/RECORD +27 -27
- lemonade_server/cli.py +79 -26
- lemonade_server/model_manager.py +4 -3
- lemonade_server/pydantic_models.py +1 -4
- lemonade_server/server_models.json +60 -11
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/top_level.txt +0 -0
lemonade/tools/server/tray.py
CHANGED
|
@@ -263,8 +263,10 @@ class LemonadeTray(SystemTray):
|
|
|
263
263
|
self.server.uvicorn_server.should_exit = True
|
|
264
264
|
self.server_thread.join(timeout=2)
|
|
265
265
|
|
|
266
|
-
# Update the port
|
|
266
|
+
# Update the port in both the tray and the server instance
|
|
267
267
|
self.port = new_port
|
|
268
|
+
if self.server:
|
|
269
|
+
self.server.port = new_port
|
|
268
270
|
|
|
269
271
|
# Restart the server
|
|
270
272
|
self.server_thread = threading.Thread(target=self.start_server, daemon=True)
|
|
@@ -425,7 +427,7 @@ class LemonadeTray(SystemTray):
|
|
|
425
427
|
Start the uvicorn server.
|
|
426
428
|
"""
|
|
427
429
|
self.server = self.server_factory()
|
|
428
|
-
self.server.uvicorn_server = self.server.run_in_thread(
|
|
430
|
+
self.server.uvicorn_server = self.server.run_in_thread()
|
|
429
431
|
self.server.uvicorn_server.run()
|
|
430
432
|
|
|
431
433
|
def run(self):
|
|
@@ -26,7 +26,7 @@ class ServerRunner(threading.Thread):
|
|
|
26
26
|
def run(self):
|
|
27
27
|
try:
|
|
28
28
|
# Create the server instance
|
|
29
|
-
self.server = Server()
|
|
29
|
+
self.server = Server(port=self.port, log_level="warning")
|
|
30
30
|
|
|
31
31
|
# Configure the server with model/tokenizer
|
|
32
32
|
self.server.model = self.model
|
|
@@ -44,9 +44,7 @@ class ServerRunner(threading.Thread):
|
|
|
44
44
|
)
|
|
45
45
|
|
|
46
46
|
# Set up the server for threaded execution
|
|
47
|
-
self.uvicorn_server = self.server.run_in_thread(
|
|
48
|
-
port=self.port, host=self.host, log_level="warning"
|
|
49
|
-
)
|
|
47
|
+
self.uvicorn_server = self.server.run_in_thread(host=self.host)
|
|
50
48
|
|
|
51
49
|
# Set the ready event
|
|
52
50
|
self.ready_event.set()
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.
|
|
1
|
+
__version__ = "8.1.1"
|
lemonade_install/install.py
CHANGED
|
@@ -24,22 +24,6 @@
|
|
|
24
24
|
# In any python environment, only one set of artifacts can be installed at a time.
|
|
25
25
|
# Python environments created by Lemonade v6.1.x or earlier will need to be recreated.
|
|
26
26
|
#
|
|
27
|
-
# The Ryzen AI 1.3.0 artifact files use a different directory hierarchy.
|
|
28
|
-
# The Ryzen AI 1.3.0 hybrid artifacts directory hierarchy is:
|
|
29
|
-
#
|
|
30
|
-
# RYZEN_AI\hybrid\hybrid-llm-artifacts_1.3.0_lounge\hybrid-llm-artifacts\
|
|
31
|
-
# onnxruntime_genai\lib
|
|
32
|
-
# onnxruntime_genai\wheel
|
|
33
|
-
# onnx_utils\bin
|
|
34
|
-
# eula\eula
|
|
35
|
-
#
|
|
36
|
-
# The Ryzen AI 1.3.0 npu artifacts directory hierarchy is:
|
|
37
|
-
#
|
|
38
|
-
# RYZEN_AI\npu\amd_oga\
|
|
39
|
-
# bins\xclbin\stx
|
|
40
|
-
# libs
|
|
41
|
-
# wheels
|
|
42
|
-
#
|
|
43
27
|
|
|
44
28
|
import argparse
|
|
45
29
|
import glob
|
|
@@ -56,6 +40,13 @@ import zipfile
|
|
|
56
40
|
DEFAULT_RYZEN_AI_VERSION = "1.4.0"
|
|
57
41
|
version_info_filename = "version_info.json"
|
|
58
42
|
|
|
43
|
+
# NPU Driver configuration
|
|
44
|
+
NPU_DRIVER_DOWNLOAD_URL = (
|
|
45
|
+
"https://account.amd.com/en/forms/downloads/"
|
|
46
|
+
"ryzenai-eula-public-xef.html?filename=NPU_RAI1.5_280_WHQL.zip"
|
|
47
|
+
)
|
|
48
|
+
REQUIRED_NPU_DRIVER_VERSION = "32.0.203.280"
|
|
49
|
+
|
|
59
50
|
lemonade_install_dir = Path(__file__).parent.parent.parent
|
|
60
51
|
DEFAULT_QUARK_VERSION = "quark-0.6.0"
|
|
61
52
|
DEFAULT_QUARK_DIR = os.path.join(
|
|
@@ -66,14 +57,6 @@ DEFAULT_QUARK_DIR = os.path.join(
|
|
|
66
57
|
SUPPORTED_RYZEN_AI_SERIES = ["300"]
|
|
67
58
|
|
|
68
59
|
npu_install_data = {
|
|
69
|
-
"1.3.0": {
|
|
70
|
-
"artifacts_zipfile": "ryzen_ai_13_ga/npu-llm-artifacts_1.3.0.zip",
|
|
71
|
-
"license_file": (
|
|
72
|
-
"https://account.amd.com/content/dam/account/en/licenses/download/"
|
|
73
|
-
"amd-end-user-license-agreement.pdf"
|
|
74
|
-
),
|
|
75
|
-
"license_tag": "Beta ",
|
|
76
|
-
},
|
|
77
60
|
"1.4.0": {
|
|
78
61
|
"artifacts_zipfile": (
|
|
79
62
|
"https://www.xilinx.com/bin/public/openDownload?"
|
|
@@ -88,17 +71,6 @@ npu_install_data = {
|
|
|
88
71
|
}
|
|
89
72
|
|
|
90
73
|
hybrid_install_data = {
|
|
91
|
-
"1.3.0": {
|
|
92
|
-
"artifacts_zipfile": (
|
|
93
|
-
"https://www.xilinx.com/bin/public/openDownload?"
|
|
94
|
-
"filename=hybrid-llm-artifacts_1.3.0_012725.zip"
|
|
95
|
-
),
|
|
96
|
-
"license_file": (
|
|
97
|
-
"https://www.xilinx.com/bin/public/openDownload?"
|
|
98
|
-
"filename=AMD%20End%20User%20License%20Agreement.pdf"
|
|
99
|
-
),
|
|
100
|
-
"license_tag": "",
|
|
101
|
-
},
|
|
102
74
|
"1.4.0": {
|
|
103
75
|
"artifacts_zipfile": (
|
|
104
76
|
"https://www.xilinx.com/bin/public/openDownload?"
|
|
@@ -154,10 +126,7 @@ def get_oga_npu_dir():
|
|
|
154
126
|
version_info = get_ryzen_ai_version_info()
|
|
155
127
|
version = version_info["version"]
|
|
156
128
|
ryzen_ai_folder = get_ryzen_ai_path()
|
|
157
|
-
|
|
158
|
-
npu_dir = os.path.join(ryzen_ai_folder, "npu", "amd_oga")
|
|
159
|
-
else:
|
|
160
|
-
npu_dir = os.path.join(ryzen_ai_folder, "npu")
|
|
129
|
+
npu_dir = os.path.join(ryzen_ai_folder, "npu")
|
|
161
130
|
if not os.path.isdir(npu_dir):
|
|
162
131
|
raise RuntimeError(
|
|
163
132
|
f"The npu artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
|
|
@@ -171,15 +140,7 @@ def get_oga_hybrid_dir():
|
|
|
171
140
|
version_info = get_ryzen_ai_version_info()
|
|
172
141
|
version = version_info["version"]
|
|
173
142
|
ryzen_ai_folder = get_ryzen_ai_path()
|
|
174
|
-
|
|
175
|
-
hybrid_dir = os.path.join(
|
|
176
|
-
ryzen_ai_folder,
|
|
177
|
-
"hybrid",
|
|
178
|
-
"hybrid-llm-artifacts_1.3.0_lounge",
|
|
179
|
-
"hybrid-llm-artifacts",
|
|
180
|
-
)
|
|
181
|
-
else:
|
|
182
|
-
hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
|
|
143
|
+
hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
|
|
183
144
|
if not os.path.isdir(hybrid_dir):
|
|
184
145
|
raise RuntimeError(
|
|
185
146
|
f"The hybrid artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
|
|
@@ -189,6 +150,37 @@ def get_oga_hybrid_dir():
|
|
|
189
150
|
return hybrid_dir, version
|
|
190
151
|
|
|
191
152
|
|
|
153
|
+
def _get_ryzenai_version_info(device=None):
|
|
154
|
+
"""
|
|
155
|
+
Centralized version detection for RyzenAI installations.
|
|
156
|
+
Uses lazy imports to avoid import errors when OGA is not installed.
|
|
157
|
+
"""
|
|
158
|
+
try:
|
|
159
|
+
# Lazy import to avoid errors when OGA is not installed
|
|
160
|
+
from packaging.version import Version
|
|
161
|
+
import onnxruntime_genai as og
|
|
162
|
+
|
|
163
|
+
if Version(og.__version__) >= Version("0.7.0"):
|
|
164
|
+
oga_path = os.path.dirname(og.__file__)
|
|
165
|
+
if og.__version__ == "0.7.0.2":
|
|
166
|
+
return "1.5.0", oga_path
|
|
167
|
+
else:
|
|
168
|
+
return "1.4.0", oga_path
|
|
169
|
+
else:
|
|
170
|
+
if device == "npu":
|
|
171
|
+
oga_path, version = get_oga_npu_dir()
|
|
172
|
+
else:
|
|
173
|
+
oga_path, version = get_oga_hybrid_dir()
|
|
174
|
+
return version, oga_path
|
|
175
|
+
except ImportError as e:
|
|
176
|
+
raise ImportError(
|
|
177
|
+
f"{e}\n Please install lemonade-sdk with "
|
|
178
|
+
"one of the oga extras, for example:\n"
|
|
179
|
+
"pip install lemonade-sdk[dev,oga-cpu]\n"
|
|
180
|
+
"See https://lemonade_server.ai/install_options.html for details"
|
|
181
|
+
) from e
|
|
182
|
+
|
|
183
|
+
|
|
192
184
|
def download_lfs_file(token, file, output_filename):
|
|
193
185
|
"""Downloads a file from LFS"""
|
|
194
186
|
import requests
|
|
@@ -426,8 +418,6 @@ class Install:
|
|
|
426
418
|
"npu",
|
|
427
419
|
"hybrid",
|
|
428
420
|
"unified",
|
|
429
|
-
"npu-1.3.0",
|
|
430
|
-
"hybrid-1.3.0",
|
|
431
421
|
"npu-1.4.0",
|
|
432
422
|
"hybrid-1.4.0",
|
|
433
423
|
"unified-1.4.0",
|
|
@@ -461,6 +451,12 @@ class Install:
|
|
|
461
451
|
choices=["0.6.0"],
|
|
462
452
|
)
|
|
463
453
|
|
|
454
|
+
parser.add_argument(
|
|
455
|
+
"--llamacpp",
|
|
456
|
+
help="Install llama.cpp binaries with specified backend",
|
|
457
|
+
choices=["rocm", "vulkan"],
|
|
458
|
+
)
|
|
459
|
+
|
|
464
460
|
return parser
|
|
465
461
|
|
|
466
462
|
@staticmethod
|
|
@@ -524,25 +520,14 @@ class Install:
|
|
|
524
520
|
# Install all whl files in the specified wheels folder
|
|
525
521
|
if wheels_full_path is not None:
|
|
526
522
|
print(f"\nInstalling wheels from {wheels_full_path}\n")
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
print(f"\nInstalling {file} with command {install_cmd}\n")
|
|
536
|
-
subprocess.run(install_cmd, check=True, shell=True)
|
|
537
|
-
else:
|
|
538
|
-
# Install all the wheel files together, allowing pip to work out the dependencies
|
|
539
|
-
wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
|
|
540
|
-
install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
|
|
541
|
-
subprocess.run(
|
|
542
|
-
install_cmd,
|
|
543
|
-
check=True,
|
|
544
|
-
shell=True,
|
|
545
|
-
)
|
|
523
|
+
# Install all the wheel files together, allowing pip to work out the dependencies
|
|
524
|
+
wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
|
|
525
|
+
install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
|
|
526
|
+
subprocess.run(
|
|
527
|
+
install_cmd,
|
|
528
|
+
check=True,
|
|
529
|
+
shell=True,
|
|
530
|
+
)
|
|
546
531
|
|
|
547
532
|
# Delete the zip file
|
|
548
533
|
print(f"\nCleaning up, removing {archive_file_path}\n")
|
|
@@ -611,10 +596,7 @@ class Install:
|
|
|
611
596
|
license_file = npu_install_data[version].get("license_file", None)
|
|
612
597
|
license_tag = npu_install_data[version].get("license_tag", None)
|
|
613
598
|
install_dir = os.path.join(ryzen_ai_folder, "npu")
|
|
614
|
-
|
|
615
|
-
wheels_full_path = os.path.join(install_dir, "amd_oga/wheels")
|
|
616
|
-
else:
|
|
617
|
-
wheels_full_path = os.path.join(install_dir, "wheels")
|
|
599
|
+
wheels_full_path = os.path.join(install_dir, "wheels")
|
|
618
600
|
|
|
619
601
|
if license_file:
|
|
620
602
|
Install._get_license_acceptance(version, license_file, license_tag, yes)
|
|
@@ -641,17 +623,7 @@ class Install:
|
|
|
641
623
|
license_file = hybrid_install_data[version].get("license_file", None)
|
|
642
624
|
license_tag = hybrid_install_data[version].get("license_tag", None)
|
|
643
625
|
install_dir = os.path.join(ryzen_ai_folder, "hybrid")
|
|
644
|
-
|
|
645
|
-
wheels_full_path = os.path.join(
|
|
646
|
-
ryzen_ai_folder,
|
|
647
|
-
"hybrid",
|
|
648
|
-
"hybrid-llm-artifacts_1.3.0_lounge",
|
|
649
|
-
"hybrid-llm-artifacts",
|
|
650
|
-
"onnxruntime_genai",
|
|
651
|
-
"wheel",
|
|
652
|
-
)
|
|
653
|
-
else:
|
|
654
|
-
wheels_full_path = os.path.join(install_dir, "wheels")
|
|
626
|
+
wheels_full_path = os.path.join(install_dir, "wheels")
|
|
655
627
|
|
|
656
628
|
if license_file:
|
|
657
629
|
Install._get_license_acceptance(version, license_file, license_tag, yes)
|
|
@@ -669,6 +641,21 @@ class Install:
|
|
|
669
641
|
# Check if the processor is supported before proceeding
|
|
670
642
|
check_ryzen_ai_processor()
|
|
671
643
|
|
|
644
|
+
warning_msg = (
|
|
645
|
+
"\n" + "=" * 80 + "\n"
|
|
646
|
+
"WARNING: IMPORTANT: NEW RYZEN AI 1.5.0 INSTALLATION PROCESS\n"
|
|
647
|
+
+ "=" * 80
|
|
648
|
+
+ "\n"
|
|
649
|
+
"Starting with Ryzen AI 1.5.0, installation is now available through PyPI.\n"
|
|
650
|
+
"For new installations, consider using:\n\n"
|
|
651
|
+
"pip install lemonade-sdk[oga-ryzenai] --extra-index-url https://pypi.amd.com/simple\n\n"
|
|
652
|
+
"This legacy installation method (lemonade-install --ryzenai) is still\n"
|
|
653
|
+
"supported for version 1.4.0, but may be deprecated in future releases.\n"
|
|
654
|
+
+ "=" * 80
|
|
655
|
+
+ "\n"
|
|
656
|
+
)
|
|
657
|
+
print(warning_msg)
|
|
658
|
+
|
|
672
659
|
# Delete any previous Ryzen AI installation in this environment
|
|
673
660
|
ryzen_ai_folder = get_ryzen_ai_path(check_exists=False)
|
|
674
661
|
if os.path.exists(ryzen_ai_folder):
|
|
@@ -758,18 +745,32 @@ class Install:
|
|
|
758
745
|
|
|
759
746
|
print(f"\nQuark installed successfully at: {quark_path}")
|
|
760
747
|
|
|
748
|
+
@staticmethod
|
|
749
|
+
def _install_llamacpp(backend):
|
|
750
|
+
"""
|
|
751
|
+
Install llama.cpp binaries with the specified backend.
|
|
752
|
+
|
|
753
|
+
Args:
|
|
754
|
+
backend: The backend to use ('rocm' or 'vulkan')
|
|
755
|
+
"""
|
|
756
|
+
|
|
757
|
+
from lemonade.tools.llamacpp.utils import install_llamacpp
|
|
758
|
+
|
|
759
|
+
install_llamacpp(backend)
|
|
760
|
+
|
|
761
761
|
def run(
|
|
762
762
|
self,
|
|
763
763
|
ryzenai: Optional[str] = None,
|
|
764
764
|
build_model: Optional[str] = None,
|
|
765
765
|
quark: Optional[str] = None,
|
|
766
|
+
llamacpp: Optional[str] = None,
|
|
766
767
|
yes: bool = False,
|
|
767
768
|
token: Optional[str] = None,
|
|
768
769
|
):
|
|
769
|
-
if ryzenai is None and quark is None and
|
|
770
|
+
if ryzenai is None and quark is None and llamacpp is None:
|
|
770
771
|
raise ValueError(
|
|
771
772
|
"You must select something to install, "
|
|
772
|
-
"for example `--ryzenai`, `--quark`, or `--
|
|
773
|
+
"for example `--ryzenai`, `--quark`, or `--llamacpp`"
|
|
773
774
|
)
|
|
774
775
|
|
|
775
776
|
if ryzenai is not None:
|
|
@@ -778,6 +779,9 @@ class Install:
|
|
|
778
779
|
if quark is not None:
|
|
779
780
|
self._install_quark(quark)
|
|
780
781
|
|
|
782
|
+
if llamacpp is not None:
|
|
783
|
+
self._install_llamacpp(llamacpp)
|
|
784
|
+
|
|
781
785
|
|
|
782
786
|
def main():
|
|
783
787
|
installer = Install()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.
|
|
3
|
+
Version: 8.1.1
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.13
|
|
@@ -22,16 +22,16 @@ Requires-Dist: pytz
|
|
|
22
22
|
Requires-Dist: zstandard
|
|
23
23
|
Requires-Dist: fastapi
|
|
24
24
|
Requires-Dist: uvicorn[standard]
|
|
25
|
-
Requires-Dist: openai
|
|
25
|
+
Requires-Dist: openai<1.97.1,>=1.81.0
|
|
26
26
|
Requires-Dist: transformers<=4.53.2
|
|
27
27
|
Requires-Dist: jinja2
|
|
28
28
|
Requires-Dist: tabulate
|
|
29
29
|
Requires-Dist: sentencepiece
|
|
30
|
-
Requires-Dist: huggingface-hub==0.33.0
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
Requires-Dist:
|
|
34
|
-
Requires-Dist: protobuf>=6.30.1; extra == "oga-
|
|
30
|
+
Requires-Dist: huggingface-hub[hf_xet]==0.33.0
|
|
31
|
+
Requires-Dist: python-dotenv
|
|
32
|
+
Provides-Extra: oga-ryzenai
|
|
33
|
+
Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
|
|
34
|
+
Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
|
|
35
35
|
Provides-Extra: oga-cpu
|
|
36
36
|
Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
|
|
37
37
|
Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
|
|
@@ -41,16 +41,35 @@ Requires-Dist: accelerate; extra == "dev"
|
|
|
41
41
|
Requires-Dist: datasets; extra == "dev"
|
|
42
42
|
Requires-Dist: pandas>=1.5.3; extra == "dev"
|
|
43
43
|
Requires-Dist: matplotlib; extra == "dev"
|
|
44
|
+
Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "dev"
|
|
44
45
|
Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
|
|
45
46
|
Requires-Dist: lm-eval[api]; extra == "dev"
|
|
47
|
+
Provides-Extra: oga-hybrid
|
|
48
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
|
|
49
|
+
Provides-Extra: oga-unified
|
|
50
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-unified"
|
|
46
51
|
Provides-Extra: oga-hybrid-minimal
|
|
47
|
-
Requires-Dist: lemonade-sdk[oga-
|
|
52
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid-minimal"
|
|
48
53
|
Provides-Extra: oga-cpu-minimal
|
|
49
54
|
Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
|
|
55
|
+
Provides-Extra: oga-npu-minimal
|
|
56
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-npu-minimal"
|
|
50
57
|
Provides-Extra: llm
|
|
51
58
|
Requires-Dist: lemonade-sdk[dev]; extra == "llm"
|
|
52
59
|
Provides-Extra: llm-oga-cpu
|
|
53
60
|
Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
|
|
61
|
+
Provides-Extra: llm-oga-npu
|
|
62
|
+
Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
|
|
63
|
+
Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
|
|
64
|
+
Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
|
|
65
|
+
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
|
|
66
|
+
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
|
|
67
|
+
Provides-Extra: llm-oga-hybrid
|
|
68
|
+
Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
|
|
69
|
+
Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
|
|
70
|
+
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
|
|
71
|
+
Provides-Extra: llm-oga-unified
|
|
72
|
+
Requires-Dist: lemonade-sdk[dev,llm-oga-hybrid]; extra == "llm-oga-unified"
|
|
54
73
|
Provides-Extra: llm-oga-igpu
|
|
55
74
|
Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
|
|
56
75
|
Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
|
|
@@ -61,16 +80,6 @@ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
|
|
|
61
80
|
Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
|
|
62
81
|
Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
|
|
63
82
|
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
|
|
64
|
-
Provides-Extra: llm-oga-npu
|
|
65
|
-
Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
|
|
66
|
-
Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
|
|
67
|
-
Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
|
|
68
|
-
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
|
|
69
|
-
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
|
|
70
|
-
Provides-Extra: llm-oga-hybrid
|
|
71
|
-
Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
|
|
72
|
-
Provides-Extra: llm-oga-unified
|
|
73
|
-
Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
|
|
74
83
|
Dynamic: author-email
|
|
75
84
|
Dynamic: description
|
|
76
85
|
Dynamic: description-content-type
|
|
@@ -129,7 +138,9 @@ Dynamic: summary
|
|
|
129
138
|
<a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
|
|
130
139
|
</h3>
|
|
131
140
|
|
|
132
|
-
Lemonade
|
|
141
|
+
Lemonade helps users run local LLMs with the highest performance by configuring state-of-the-art inference engines for their NPUs and GPUs.
|
|
142
|
+
|
|
143
|
+
Startups such as [Styrk AI](https://styrk.ai/styrk-ai-and-amd-guardrails-for-your-on-device-ai-revolution/), research teams like [Hazy Research at Stanford](https://www.amd.com/en/developer/resources/technical-articles/2025/minions--on-device-and-cloud-language-model-collaboration-on-ryz.html), and large companies like [AMD](https://www.amd.com/en/developer/resources/technical-articles/unlocking-a-wave-of-llm-apps-on-ryzen-ai-through-lemonade-server.html) use Lemonade to run LLMs.
|
|
133
144
|
|
|
134
145
|
## Getting Started
|
|
135
146
|
|
|
@@ -148,7 +159,7 @@ Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus
|
|
|
148
159
|
</p>
|
|
149
160
|
|
|
150
161
|
> [!TIP]
|
|
151
|
-
> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email
|
|
162
|
+
> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or [email](lemonade@amd.com).
|
|
152
163
|
|
|
153
164
|
## Using the CLI
|
|
154
165
|
|
|
@@ -170,11 +181,14 @@ To check all models available, use the `list` command:
|
|
|
170
181
|
lemonade-server list
|
|
171
182
|
```
|
|
172
183
|
|
|
173
|
-
> Note
|
|
184
|
+
> **Note**: If you installed from source, use the `lemonade-server-dev` command instead.
|
|
185
|
+
|
|
186
|
+
> **Tip**: You can use `--llamacpp vulkan/rocm` to select a backend when running GGUF models.
|
|
187
|
+
|
|
174
188
|
|
|
175
189
|
## Model Library
|
|
176
190
|
|
|
177
|
-
Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/
|
|
191
|
+
Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/server_models/).
|
|
178
192
|
|
|
179
193
|
You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
|
|
180
194
|
<p align="center">
|
|
@@ -212,7 +226,7 @@ Lemonade supports the following configurations, while also making it easy to swi
|
|
|
212
226
|
<tr>
|
|
213
227
|
<td><strong>🎮 GPU</strong></td>
|
|
214
228
|
<td align="center">—</td>
|
|
215
|
-
<td align="center">Vulkan: All platforms<br
|
|
229
|
+
<td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
|
|
216
230
|
<td align="center">—</td>
|
|
217
231
|
<td align="center">✅</td>
|
|
218
232
|
<td align="center">✅</td>
|
|
@@ -228,6 +242,38 @@ Lemonade supports the following configurations, while also making it easy to swi
|
|
|
228
242
|
</tbody>
|
|
229
243
|
</table>
|
|
230
244
|
|
|
245
|
+
<details>
|
|
246
|
+
<summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
|
|
247
|
+
|
|
248
|
+
<br>
|
|
249
|
+
|
|
250
|
+
<table>
|
|
251
|
+
<thead>
|
|
252
|
+
<tr>
|
|
253
|
+
<th>Architecture</th>
|
|
254
|
+
<th>Platform Support</th>
|
|
255
|
+
<th>GPU Models</th>
|
|
256
|
+
</tr>
|
|
257
|
+
</thead>
|
|
258
|
+
<tbody>
|
|
259
|
+
<tr>
|
|
260
|
+
<td><b>gfx1151</b> (STX Halo)</td>
|
|
261
|
+
<td>Windows, Ubuntu</td>
|
|
262
|
+
<td>Ryzen AI MAX+ Pro 395</td>
|
|
263
|
+
</tr>
|
|
264
|
+
<tr>
|
|
265
|
+
<td><b>gfx120X</b> (RDNA4)</td>
|
|
266
|
+
<td>Windows only</td>
|
|
267
|
+
<td>Radeon AI PRO R9700, RX 9070 XT/GRE/9070, RX 9060 XT</td>
|
|
268
|
+
</tr>
|
|
269
|
+
<tr>
|
|
270
|
+
<td><b>gfx110X</b> (RDNA3)</td>
|
|
271
|
+
<td>Windows, Ubuntu</td>
|
|
272
|
+
<td>Radeon PRO W7900/W7800/W7700/V710, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT</td>
|
|
273
|
+
</tr>
|
|
274
|
+
</tbody>
|
|
275
|
+
</table>
|
|
276
|
+
</details>
|
|
231
277
|
|
|
232
278
|
## Integrate Lemonade Server with Your Application
|
|
233
279
|
|
|
@@ -263,7 +309,7 @@ completion = client.chat.completions.create(
|
|
|
263
309
|
print(completion.choices[0].message.content)
|
|
264
310
|
```
|
|
265
311
|
|
|
266
|
-
For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
|
|
312
|
+
For more detailed integration instructions, see the [Integration Guide](./docs/server/server_integration.md).
|
|
267
313
|
|
|
268
314
|
## Beyond an LLM Server
|
|
269
315
|
|
|
@@ -272,6 +318,10 @@ The [Lemonade SDK](./docs/README.md) also include the following components:
|
|
|
272
318
|
- 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
|
|
273
319
|
- 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
|
|
274
320
|
|
|
321
|
+
## FAQ
|
|
322
|
+
|
|
323
|
+
To read our frequently asked questions, see our [FAQ Guide](./docs/faq.md)
|
|
324
|
+
|
|
275
325
|
## Contributing
|
|
276
326
|
|
|
277
327
|
We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
|
|
@@ -4,17 +4,17 @@ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
|
|
|
4
4
|
lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
|
|
5
5
|
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
6
|
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
-
lemonade/version.py,sha256=
|
|
7
|
+
lemonade/version.py,sha256=8YlEPKK1Cm5T4dPa2BQPpPwVVTzjPLnmqAeNcTb5nOw,22
|
|
8
8
|
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
|
|
10
10
|
lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
|
|
11
11
|
lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
|
|
12
12
|
lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
|
|
13
|
-
lemonade/common/inference_engines.py,sha256=
|
|
14
|
-
lemonade/common/network.py,sha256=
|
|
13
|
+
lemonade/common/inference_engines.py,sha256=OJQcED9P1ZeQ8d11lDMNeAoaFoUuZlsDcwEZXLbqWRg,12579
|
|
14
|
+
lemonade/common/network.py,sha256=p1lWJkN0H5hCpb4rKi3Zc47W_BRrrm-7ghdTALJLGqU,1944
|
|
15
15
|
lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
|
|
16
16
|
lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
|
|
17
|
-
lemonade/common/system_info.py,sha256=
|
|
17
|
+
lemonade/common/system_info.py,sha256=pn-k3zMQCbt5cu3aHXa4cENgrubOK97gs9PYdGPsFXA,28405
|
|
18
18
|
lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
|
|
19
19
|
lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
|
|
20
20
|
lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
|
|
@@ -32,13 +32,13 @@ lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
|
|
|
32
32
|
lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
|
|
33
33
|
lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
|
|
34
34
|
lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
|
|
35
|
-
lemonade/tools/llamacpp/bench.py,sha256=
|
|
36
|
-
lemonade/tools/llamacpp/load.py,sha256=
|
|
37
|
-
lemonade/tools/llamacpp/utils.py,sha256=
|
|
35
|
+
lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
|
|
36
|
+
lemonade/tools/llamacpp/load.py,sha256=DFCvQN548Ch9H8U_rHOiYviinzw6vixb5-V7xLj7XE4,6499
|
|
37
|
+
lemonade/tools/llamacpp/utils.py,sha256=CTWnzbEYGPSbOizF26yCnyNrHDY19pLusU-YyND992s,29070
|
|
38
38
|
lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
39
|
lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
|
|
40
|
-
lemonade/tools/oga/load.py,sha256=
|
|
41
|
-
lemonade/tools/oga/utils.py,sha256=
|
|
40
|
+
lemonade/tools/oga/load.py,sha256=6Pf_QrHpIXDbfpTwFNRj4RmWTxI-RImhYuqRvmTVgmY,33722
|
|
41
|
+
lemonade/tools/oga/utils.py,sha256=Xd7tmNr69u_bCut0hZqA7saUR3NFZlp4bvWo54mOZb0,16918
|
|
42
42
|
lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
43
|
lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
|
|
44
44
|
lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
|
|
@@ -46,27 +46,27 @@ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
|
46
46
|
lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
|
|
47
47
|
lemonade/tools/report/table.py,sha256=ssqy1bZqF-wptNzKEOj6_9REtCNZyXO8R5vakAtg3R4,27973
|
|
48
48
|
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
-
lemonade/tools/server/llamacpp.py,sha256=
|
|
50
|
-
lemonade/tools/server/serve.py,sha256=
|
|
49
|
+
lemonade/tools/server/llamacpp.py,sha256=KZO4npzefvbaPvlZbpCYsdW0tMSfmmupT8gaK9y65I8,17962
|
|
50
|
+
lemonade/tools/server/serve.py,sha256=PAAGowj2Z5AQIW3G1l52taNyf_0U4kRFR3G735M4DsU,55513
|
|
51
51
|
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
52
|
-
lemonade/tools/server/tray.py,sha256=
|
|
52
|
+
lemonade/tools/server/tray.py,sha256=qlQKBkQwG9W2v9GTyycvFc12_jly6vPU1uEkrIFBGTs,17624
|
|
53
53
|
lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
|
|
54
54
|
lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
|
|
55
|
-
lemonade/tools/server/static/styles.css,sha256=
|
|
56
|
-
lemonade/tools/server/static/webapp.html,sha256=
|
|
55
|
+
lemonade/tools/server/static/styles.css,sha256=M_JrH_vML65MWun-C8XCvLOFw35qZURSa77Fk4fVngQ,30029
|
|
56
|
+
lemonade/tools/server/static/webapp.html,sha256=oU6FZHGQCq-SoT6VkWObQvYzzNS0ser5Fmqx2j_5jCI,54380
|
|
57
57
|
lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
|
|
58
58
|
lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
|
|
59
|
-
lemonade/tools/server/utils/thread.py,sha256=
|
|
59
|
+
lemonade/tools/server/utils/thread.py,sha256=Z-PDzGcpgfN2qxTmtlROWqrUN0B2fXdPrqo_J10fR_w,2772
|
|
60
60
|
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
61
|
-
lemonade_install/install.py,sha256=
|
|
62
|
-
lemonade_sdk-8.
|
|
63
|
-
lemonade_sdk-8.
|
|
64
|
-
lemonade_server/cli.py,sha256=
|
|
65
|
-
lemonade_server/model_manager.py,sha256=
|
|
66
|
-
lemonade_server/pydantic_models.py,sha256=
|
|
67
|
-
lemonade_server/server_models.json,sha256=
|
|
68
|
-
lemonade_sdk-8.
|
|
69
|
-
lemonade_sdk-8.
|
|
70
|
-
lemonade_sdk-8.
|
|
71
|
-
lemonade_sdk-8.
|
|
72
|
-
lemonade_sdk-8.
|
|
61
|
+
lemonade_install/install.py,sha256=Zl_JtEIhbqZZTvxcqtq895IomEN-JNxp9xOZEtahMHQ,28289
|
|
62
|
+
lemonade_sdk-8.1.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
63
|
+
lemonade_sdk-8.1.1.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
|
|
64
|
+
lemonade_server/cli.py,sha256=CFfhrRgZNJCd0rDRBF3TeS3dMJgwlKGtvT5_kbsWaXk,17316
|
|
65
|
+
lemonade_server/model_manager.py,sha256=O3fIX52AqU0z10WzPmNEA3lQ_KjOqNq_G-SxjwIgEio,10781
|
|
66
|
+
lemonade_server/pydantic_models.py,sha256=qEvF7x7AuHCHMiByVzGGuLdQTNs233Sw9uQq5cpI6is,2721
|
|
67
|
+
lemonade_server/server_models.json,sha256=iag_dG9S1tkHZUhkJmGAfiUJkgEazdQSv7stC1fVAsQ,9741
|
|
68
|
+
lemonade_sdk-8.1.1.dist-info/METADATA,sha256=XT9cwNUAkhwQ6kad6l7t2nj7m8S0t-9GvaFLOMxLCyE,17065
|
|
69
|
+
lemonade_sdk-8.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
70
|
+
lemonade_sdk-8.1.1.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
|
|
71
|
+
lemonade_sdk-8.1.1.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
72
|
+
lemonade_sdk-8.1.1.dist-info/RECORD,,
|