lemonade-sdk 8.0.6__py3-none-any.whl → 8.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/common/network.py +18 -1
- lemonade/tools/llamacpp/bench.py +3 -1
- lemonade/tools/llamacpp/utils.py +7 -7
- lemonade/tools/oga/load.py +239 -112
- lemonade/tools/oga/utils.py +19 -7
- lemonade/tools/server/serve.py +19 -28
- lemonade/tools/server/static/styles.css +5 -6
- lemonade/tools/server/static/webapp.html +3 -0
- lemonade/version.py +1 -1
- lemonade_install/install.py +65 -84
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/METADATA +30 -19
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/RECORD +21 -21
- lemonade_server/cli.py +1 -1
- lemonade_server/model_manager.py +4 -3
- lemonade_server/pydantic_models.py +1 -4
- lemonade_server/server_models.json +35 -11
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/top_level.txt +0 -0
lemonade/tools/server/serve.py
CHANGED
|
@@ -284,7 +284,7 @@ class Server(ManagementTool):
|
|
|
284
284
|
def _setup_server_common(
|
|
285
285
|
self,
|
|
286
286
|
port: int,
|
|
287
|
-
truncate_inputs:
|
|
287
|
+
truncate_inputs: Optional[int] = None,
|
|
288
288
|
log_level: str = DEFAULT_LOG_LEVEL,
|
|
289
289
|
tray: bool = False,
|
|
290
290
|
log_file: str = None,
|
|
@@ -295,7 +295,7 @@ class Server(ManagementTool):
|
|
|
295
295
|
|
|
296
296
|
Args:
|
|
297
297
|
port: Port number for the server
|
|
298
|
-
truncate_inputs:
|
|
298
|
+
truncate_inputs: Truncate messages to this length
|
|
299
299
|
log_level: Logging level to configure
|
|
300
300
|
threaded_mode: Whether this is being set up for threaded execution
|
|
301
301
|
"""
|
|
@@ -372,7 +372,7 @@ class Server(ManagementTool):
|
|
|
372
372
|
_=None,
|
|
373
373
|
port: int = DEFAULT_PORT,
|
|
374
374
|
log_level: str = DEFAULT_LOG_LEVEL,
|
|
375
|
-
truncate_inputs:
|
|
375
|
+
truncate_inputs: Optional[int] = None,
|
|
376
376
|
tray: bool = False,
|
|
377
377
|
log_file: str = None,
|
|
378
378
|
):
|
|
@@ -393,7 +393,7 @@ class Server(ManagementTool):
|
|
|
393
393
|
port: int = DEFAULT_PORT,
|
|
394
394
|
host: str = "localhost",
|
|
395
395
|
log_level: str = "warning",
|
|
396
|
-
truncate_inputs:
|
|
396
|
+
truncate_inputs: Optional[int] = None,
|
|
397
397
|
):
|
|
398
398
|
"""
|
|
399
399
|
Set up the server for running in a thread.
|
|
@@ -1099,29 +1099,20 @@ class Server(ManagementTool):
|
|
|
1099
1099
|
)
|
|
1100
1100
|
self.input_tokens = len(input_ids[0])
|
|
1101
1101
|
|
|
1102
|
-
if
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
self.
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
f"Input exceeded {self.llm_loaded.max_prompt_length} tokens. "
|
|
1117
|
-
f"Truncated {truncate_amount} tokens."
|
|
1118
|
-
)
|
|
1119
|
-
logging.warning(truncation_message)
|
|
1120
|
-
else:
|
|
1121
|
-
raise RuntimeError(
|
|
1122
|
-
f"Prompt tokens ({self.input_tokens}) cannot be greater "
|
|
1123
|
-
f"than the model's max prompt length ({self.llm_loaded.max_prompt_length})"
|
|
1124
|
-
)
|
|
1102
|
+
if self.truncate_inputs and self.truncate_inputs > self.input_tokens:
|
|
1103
|
+
# Truncate input ids
|
|
1104
|
+
truncate_amount = self.input_tokens - self.truncate_inputs
|
|
1105
|
+
input_ids = input_ids[: self.truncate_inputs]
|
|
1106
|
+
|
|
1107
|
+
# Update token count
|
|
1108
|
+
self.input_tokens = len(input_ids)
|
|
1109
|
+
|
|
1110
|
+
# Show warning message
|
|
1111
|
+
truncation_message = (
|
|
1112
|
+
f"Input exceeded {self.truncate_inputs} tokens. "
|
|
1113
|
+
f"Truncated {truncate_amount} tokens."
|
|
1114
|
+
)
|
|
1115
|
+
logging.warning(truncation_message)
|
|
1125
1116
|
|
|
1126
1117
|
# Log the input tokens early to avoid this not showing due to potential crashes
|
|
1127
1118
|
logging.debug(f"Input Tokens: {self.input_tokens}")
|
|
@@ -1317,7 +1308,7 @@ class Server(ManagementTool):
|
|
|
1317
1308
|
self.tokenizer = None
|
|
1318
1309
|
self.model = None
|
|
1319
1310
|
|
|
1320
|
-
default_message =
|
|
1311
|
+
default_message = "see stack trace and error message below"
|
|
1321
1312
|
if message:
|
|
1322
1313
|
detail = message
|
|
1323
1314
|
else:
|
|
@@ -27,7 +27,6 @@ body {
|
|
|
27
27
|
min-height: 100vh;
|
|
28
28
|
display: flex;
|
|
29
29
|
flex-direction: column;
|
|
30
|
-
padding-bottom: 5rem;
|
|
31
30
|
}
|
|
32
31
|
|
|
33
32
|
body::before {
|
|
@@ -102,13 +101,9 @@ body::before {
|
|
|
102
101
|
}
|
|
103
102
|
|
|
104
103
|
.site-footer {
|
|
105
|
-
position: fixed;
|
|
106
|
-
left: 0;
|
|
107
|
-
bottom: 0;
|
|
108
|
-
width: 100%;
|
|
109
104
|
background: transparent;
|
|
110
105
|
padding-top: 0.5rem;
|
|
111
|
-
|
|
106
|
+
margin-top: auto;
|
|
112
107
|
}
|
|
113
108
|
|
|
114
109
|
.dad-joke {
|
|
@@ -535,6 +530,10 @@ body::before {
|
|
|
535
530
|
background-color: #ca4747;
|
|
536
531
|
}
|
|
537
532
|
|
|
533
|
+
.model-label.coding {
|
|
534
|
+
background-color: #ff6b35;
|
|
535
|
+
}
|
|
536
|
+
|
|
538
537
|
.model-labels-container {
|
|
539
538
|
display: flex;
|
|
540
539
|
align-items: center;
|
|
@@ -109,6 +109,7 @@
|
|
|
109
109
|
</label>
|
|
110
110
|
<select id="register-recipe" name="recipe" required>
|
|
111
111
|
<option value="llamacpp">llamacpp</option>
|
|
112
|
+
<option value="oga-npu">oga-npu</option>
|
|
112
113
|
<option value="oga-hybrid">oga-hybrid</option>
|
|
113
114
|
<option value="oga-cpu">oga-cpu</option>
|
|
114
115
|
</select>
|
|
@@ -413,6 +414,8 @@
|
|
|
413
414
|
labelClass = 'reasoning';
|
|
414
415
|
} else if (labelLower === 'reranking') {
|
|
415
416
|
labelClass = 'reranking';
|
|
417
|
+
} else if (labelLower === 'coding') {
|
|
418
|
+
labelClass = 'coding';
|
|
416
419
|
}
|
|
417
420
|
labelSpan.className = `model-label ${labelClass}`;
|
|
418
421
|
labelSpan.textContent = label;
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.0
|
|
1
|
+
__version__ = "8.1.0"
|
lemonade_install/install.py
CHANGED
|
@@ -24,22 +24,6 @@
|
|
|
24
24
|
# In any python environment, only one set of artifacts can be installed at a time.
|
|
25
25
|
# Python environments created by Lemonade v6.1.x or earlier will need to be recreated.
|
|
26
26
|
#
|
|
27
|
-
# The Ryzen AI 1.3.0 artifact files use a different directory hierarchy.
|
|
28
|
-
# The Ryzen AI 1.3.0 hybrid artifacts directory hierarchy is:
|
|
29
|
-
#
|
|
30
|
-
# RYZEN_AI\hybrid\hybrid-llm-artifacts_1.3.0_lounge\hybrid-llm-artifacts\
|
|
31
|
-
# onnxruntime_genai\lib
|
|
32
|
-
# onnxruntime_genai\wheel
|
|
33
|
-
# onnx_utils\bin
|
|
34
|
-
# eula\eula
|
|
35
|
-
#
|
|
36
|
-
# The Ryzen AI 1.3.0 npu artifacts directory hierarchy is:
|
|
37
|
-
#
|
|
38
|
-
# RYZEN_AI\npu\amd_oga\
|
|
39
|
-
# bins\xclbin\stx
|
|
40
|
-
# libs
|
|
41
|
-
# wheels
|
|
42
|
-
#
|
|
43
27
|
|
|
44
28
|
import argparse
|
|
45
29
|
import glob
|
|
@@ -56,6 +40,13 @@ import zipfile
|
|
|
56
40
|
DEFAULT_RYZEN_AI_VERSION = "1.4.0"
|
|
57
41
|
version_info_filename = "version_info.json"
|
|
58
42
|
|
|
43
|
+
# NPU Driver configuration
|
|
44
|
+
NPU_DRIVER_DOWNLOAD_URL = (
|
|
45
|
+
"https://account.amd.com/en/forms/downloads/"
|
|
46
|
+
"ryzenai-eula-public-xef.html?filename=NPU_RAI1.5_280_WHQL.zip"
|
|
47
|
+
)
|
|
48
|
+
REQUIRED_NPU_DRIVER_VERSION = "32.0.203.280"
|
|
49
|
+
|
|
59
50
|
lemonade_install_dir = Path(__file__).parent.parent.parent
|
|
60
51
|
DEFAULT_QUARK_VERSION = "quark-0.6.0"
|
|
61
52
|
DEFAULT_QUARK_DIR = os.path.join(
|
|
@@ -66,14 +57,6 @@ DEFAULT_QUARK_DIR = os.path.join(
|
|
|
66
57
|
SUPPORTED_RYZEN_AI_SERIES = ["300"]
|
|
67
58
|
|
|
68
59
|
npu_install_data = {
|
|
69
|
-
"1.3.0": {
|
|
70
|
-
"artifacts_zipfile": "ryzen_ai_13_ga/npu-llm-artifacts_1.3.0.zip",
|
|
71
|
-
"license_file": (
|
|
72
|
-
"https://account.amd.com/content/dam/account/en/licenses/download/"
|
|
73
|
-
"amd-end-user-license-agreement.pdf"
|
|
74
|
-
),
|
|
75
|
-
"license_tag": "Beta ",
|
|
76
|
-
},
|
|
77
60
|
"1.4.0": {
|
|
78
61
|
"artifacts_zipfile": (
|
|
79
62
|
"https://www.xilinx.com/bin/public/openDownload?"
|
|
@@ -88,17 +71,6 @@ npu_install_data = {
|
|
|
88
71
|
}
|
|
89
72
|
|
|
90
73
|
hybrid_install_data = {
|
|
91
|
-
"1.3.0": {
|
|
92
|
-
"artifacts_zipfile": (
|
|
93
|
-
"https://www.xilinx.com/bin/public/openDownload?"
|
|
94
|
-
"filename=hybrid-llm-artifacts_1.3.0_012725.zip"
|
|
95
|
-
),
|
|
96
|
-
"license_file": (
|
|
97
|
-
"https://www.xilinx.com/bin/public/openDownload?"
|
|
98
|
-
"filename=AMD%20End%20User%20License%20Agreement.pdf"
|
|
99
|
-
),
|
|
100
|
-
"license_tag": "",
|
|
101
|
-
},
|
|
102
74
|
"1.4.0": {
|
|
103
75
|
"artifacts_zipfile": (
|
|
104
76
|
"https://www.xilinx.com/bin/public/openDownload?"
|
|
@@ -154,10 +126,7 @@ def get_oga_npu_dir():
|
|
|
154
126
|
version_info = get_ryzen_ai_version_info()
|
|
155
127
|
version = version_info["version"]
|
|
156
128
|
ryzen_ai_folder = get_ryzen_ai_path()
|
|
157
|
-
|
|
158
|
-
npu_dir = os.path.join(ryzen_ai_folder, "npu", "amd_oga")
|
|
159
|
-
else:
|
|
160
|
-
npu_dir = os.path.join(ryzen_ai_folder, "npu")
|
|
129
|
+
npu_dir = os.path.join(ryzen_ai_folder, "npu")
|
|
161
130
|
if not os.path.isdir(npu_dir):
|
|
162
131
|
raise RuntimeError(
|
|
163
132
|
f"The npu artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
|
|
@@ -171,15 +140,7 @@ def get_oga_hybrid_dir():
|
|
|
171
140
|
version_info = get_ryzen_ai_version_info()
|
|
172
141
|
version = version_info["version"]
|
|
173
142
|
ryzen_ai_folder = get_ryzen_ai_path()
|
|
174
|
-
|
|
175
|
-
hybrid_dir = os.path.join(
|
|
176
|
-
ryzen_ai_folder,
|
|
177
|
-
"hybrid",
|
|
178
|
-
"hybrid-llm-artifacts_1.3.0_lounge",
|
|
179
|
-
"hybrid-llm-artifacts",
|
|
180
|
-
)
|
|
181
|
-
else:
|
|
182
|
-
hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
|
|
143
|
+
hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
|
|
183
144
|
if not os.path.isdir(hybrid_dir):
|
|
184
145
|
raise RuntimeError(
|
|
185
146
|
f"The hybrid artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
|
|
@@ -189,6 +150,37 @@ def get_oga_hybrid_dir():
|
|
|
189
150
|
return hybrid_dir, version
|
|
190
151
|
|
|
191
152
|
|
|
153
|
+
def _get_ryzenai_version_info(device=None):
|
|
154
|
+
"""
|
|
155
|
+
Centralized version detection for RyzenAI installations.
|
|
156
|
+
Uses lazy imports to avoid import errors when OGA is not installed.
|
|
157
|
+
"""
|
|
158
|
+
try:
|
|
159
|
+
# Lazy import to avoid errors when OGA is not installed
|
|
160
|
+
from packaging.version import Version
|
|
161
|
+
import onnxruntime_genai as og
|
|
162
|
+
|
|
163
|
+
if Version(og.__version__) >= Version("0.7.0"):
|
|
164
|
+
oga_path = os.path.dirname(og.__file__)
|
|
165
|
+
if og.__version__ == "0.7.0.2":
|
|
166
|
+
return "1.5.0", oga_path
|
|
167
|
+
else:
|
|
168
|
+
return "1.4.0", oga_path
|
|
169
|
+
else:
|
|
170
|
+
if device == "npu":
|
|
171
|
+
oga_path, version = get_oga_npu_dir()
|
|
172
|
+
else:
|
|
173
|
+
oga_path, version = get_oga_hybrid_dir()
|
|
174
|
+
return version, oga_path
|
|
175
|
+
except ImportError as e:
|
|
176
|
+
raise ImportError(
|
|
177
|
+
f"{e}\n Please install lemonade-sdk with "
|
|
178
|
+
"one of the oga extras, for example:\n"
|
|
179
|
+
"pip install lemonade-sdk[dev,oga-cpu]\n"
|
|
180
|
+
"See https://lemonade_server.ai/install_options.html for details"
|
|
181
|
+
) from e
|
|
182
|
+
|
|
183
|
+
|
|
192
184
|
def download_lfs_file(token, file, output_filename):
|
|
193
185
|
"""Downloads a file from LFS"""
|
|
194
186
|
import requests
|
|
@@ -426,8 +418,6 @@ class Install:
|
|
|
426
418
|
"npu",
|
|
427
419
|
"hybrid",
|
|
428
420
|
"unified",
|
|
429
|
-
"npu-1.3.0",
|
|
430
|
-
"hybrid-1.3.0",
|
|
431
421
|
"npu-1.4.0",
|
|
432
422
|
"hybrid-1.4.0",
|
|
433
423
|
"unified-1.4.0",
|
|
@@ -524,25 +514,14 @@ class Install:
|
|
|
524
514
|
# Install all whl files in the specified wheels folder
|
|
525
515
|
if wheels_full_path is not None:
|
|
526
516
|
print(f"\nInstalling wheels from {wheels_full_path}\n")
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
print(f"\nInstalling {file} with command {install_cmd}\n")
|
|
536
|
-
subprocess.run(install_cmd, check=True, shell=True)
|
|
537
|
-
else:
|
|
538
|
-
# Install all the wheel files together, allowing pip to work out the dependencies
|
|
539
|
-
wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
|
|
540
|
-
install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
|
|
541
|
-
subprocess.run(
|
|
542
|
-
install_cmd,
|
|
543
|
-
check=True,
|
|
544
|
-
shell=True,
|
|
545
|
-
)
|
|
517
|
+
# Install all the wheel files together, allowing pip to work out the dependencies
|
|
518
|
+
wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
|
|
519
|
+
install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
|
|
520
|
+
subprocess.run(
|
|
521
|
+
install_cmd,
|
|
522
|
+
check=True,
|
|
523
|
+
shell=True,
|
|
524
|
+
)
|
|
546
525
|
|
|
547
526
|
# Delete the zip file
|
|
548
527
|
print(f"\nCleaning up, removing {archive_file_path}\n")
|
|
@@ -611,10 +590,7 @@ class Install:
|
|
|
611
590
|
license_file = npu_install_data[version].get("license_file", None)
|
|
612
591
|
license_tag = npu_install_data[version].get("license_tag", None)
|
|
613
592
|
install_dir = os.path.join(ryzen_ai_folder, "npu")
|
|
614
|
-
|
|
615
|
-
wheels_full_path = os.path.join(install_dir, "amd_oga/wheels")
|
|
616
|
-
else:
|
|
617
|
-
wheels_full_path = os.path.join(install_dir, "wheels")
|
|
593
|
+
wheels_full_path = os.path.join(install_dir, "wheels")
|
|
618
594
|
|
|
619
595
|
if license_file:
|
|
620
596
|
Install._get_license_acceptance(version, license_file, license_tag, yes)
|
|
@@ -641,17 +617,7 @@ class Install:
|
|
|
641
617
|
license_file = hybrid_install_data[version].get("license_file", None)
|
|
642
618
|
license_tag = hybrid_install_data[version].get("license_tag", None)
|
|
643
619
|
install_dir = os.path.join(ryzen_ai_folder, "hybrid")
|
|
644
|
-
|
|
645
|
-
wheels_full_path = os.path.join(
|
|
646
|
-
ryzen_ai_folder,
|
|
647
|
-
"hybrid",
|
|
648
|
-
"hybrid-llm-artifacts_1.3.0_lounge",
|
|
649
|
-
"hybrid-llm-artifacts",
|
|
650
|
-
"onnxruntime_genai",
|
|
651
|
-
"wheel",
|
|
652
|
-
)
|
|
653
|
-
else:
|
|
654
|
-
wheels_full_path = os.path.join(install_dir, "wheels")
|
|
620
|
+
wheels_full_path = os.path.join(install_dir, "wheels")
|
|
655
621
|
|
|
656
622
|
if license_file:
|
|
657
623
|
Install._get_license_acceptance(version, license_file, license_tag, yes)
|
|
@@ -669,6 +635,21 @@ class Install:
|
|
|
669
635
|
# Check if the processor is supported before proceeding
|
|
670
636
|
check_ryzen_ai_processor()
|
|
671
637
|
|
|
638
|
+
warning_msg = (
|
|
639
|
+
"\n" + "=" * 80 + "\n"
|
|
640
|
+
"WARNING: IMPORTANT: NEW RYZEN AI 1.5.0 INSTALLATION PROCESS\n"
|
|
641
|
+
+ "=" * 80
|
|
642
|
+
+ "\n"
|
|
643
|
+
"Starting with Ryzen AI 1.5.0, installation is now available through PyPI.\n"
|
|
644
|
+
"For new installations, consider using:\n\n"
|
|
645
|
+
"pip install lemonade-sdk[oga-ryzenai] --extra-index-url https://pypi.amd.com/simple\n\n"
|
|
646
|
+
"This legacy installation method (lemonade-install --ryzenai) is still\n"
|
|
647
|
+
"supported for version 1.4.0, but may be deprecated in future releases.\n"
|
|
648
|
+
+ "=" * 80
|
|
649
|
+
+ "\n"
|
|
650
|
+
)
|
|
651
|
+
print(warning_msg)
|
|
652
|
+
|
|
672
653
|
# Delete any previous Ryzen AI installation in this environment
|
|
673
654
|
ryzen_ai_folder = get_ryzen_ai_path(check_exists=False)
|
|
674
655
|
if os.path.exists(ryzen_ai_folder):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.0
|
|
3
|
+
Version: 8.1.0
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.13
|
|
@@ -22,16 +22,15 @@ Requires-Dist: pytz
|
|
|
22
22
|
Requires-Dist: zstandard
|
|
23
23
|
Requires-Dist: fastapi
|
|
24
24
|
Requires-Dist: uvicorn[standard]
|
|
25
|
-
Requires-Dist: openai
|
|
25
|
+
Requires-Dist: openai<1.97.1,>=1.81.0
|
|
26
26
|
Requires-Dist: transformers<=4.53.2
|
|
27
27
|
Requires-Dist: jinja2
|
|
28
28
|
Requires-Dist: tabulate
|
|
29
29
|
Requires-Dist: sentencepiece
|
|
30
30
|
Requires-Dist: huggingface-hub==0.33.0
|
|
31
|
-
Provides-Extra: oga-
|
|
32
|
-
Requires-Dist:
|
|
33
|
-
Requires-Dist:
|
|
34
|
-
Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
|
|
31
|
+
Provides-Extra: oga-ryzenai
|
|
32
|
+
Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
|
|
33
|
+
Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
|
|
35
34
|
Provides-Extra: oga-cpu
|
|
36
35
|
Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
|
|
37
36
|
Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
|
|
@@ -43,14 +42,32 @@ Requires-Dist: pandas>=1.5.3; extra == "dev"
|
|
|
43
42
|
Requires-Dist: matplotlib; extra == "dev"
|
|
44
43
|
Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
|
|
45
44
|
Requires-Dist: lm-eval[api]; extra == "dev"
|
|
45
|
+
Provides-Extra: oga-hybrid
|
|
46
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
|
|
47
|
+
Provides-Extra: oga-unified
|
|
48
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-unified"
|
|
46
49
|
Provides-Extra: oga-hybrid-minimal
|
|
47
|
-
Requires-Dist: lemonade-sdk[oga-
|
|
50
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid-minimal"
|
|
48
51
|
Provides-Extra: oga-cpu-minimal
|
|
49
52
|
Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
|
|
53
|
+
Provides-Extra: oga-npu-minimal
|
|
54
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-npu-minimal"
|
|
50
55
|
Provides-Extra: llm
|
|
51
56
|
Requires-Dist: lemonade-sdk[dev]; extra == "llm"
|
|
52
57
|
Provides-Extra: llm-oga-cpu
|
|
53
58
|
Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
|
|
59
|
+
Provides-Extra: llm-oga-npu
|
|
60
|
+
Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
|
|
61
|
+
Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
|
|
62
|
+
Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
|
|
63
|
+
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
|
|
64
|
+
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
|
|
65
|
+
Provides-Extra: llm-oga-hybrid
|
|
66
|
+
Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
|
|
67
|
+
Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
|
|
68
|
+
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
|
|
69
|
+
Provides-Extra: llm-oga-unified
|
|
70
|
+
Requires-Dist: lemonade-sdk[dev,llm-oga-hybrid]; extra == "llm-oga-unified"
|
|
54
71
|
Provides-Extra: llm-oga-igpu
|
|
55
72
|
Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
|
|
56
73
|
Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
|
|
@@ -61,16 +78,6 @@ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
|
|
|
61
78
|
Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
|
|
62
79
|
Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
|
|
63
80
|
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
|
|
64
|
-
Provides-Extra: llm-oga-npu
|
|
65
|
-
Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
|
|
66
|
-
Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
|
|
67
|
-
Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
|
|
68
|
-
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
|
|
69
|
-
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
|
|
70
|
-
Provides-Extra: llm-oga-hybrid
|
|
71
|
-
Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
|
|
72
|
-
Provides-Extra: llm-oga-unified
|
|
73
|
-
Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
|
|
74
81
|
Dynamic: author-email
|
|
75
82
|
Dynamic: description
|
|
76
83
|
Dynamic: description-content-type
|
|
@@ -174,7 +181,7 @@ lemonade-server list
|
|
|
174
181
|
|
|
175
182
|
## Model Library
|
|
176
183
|
|
|
177
|
-
Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/
|
|
184
|
+
Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/server_models/).
|
|
178
185
|
|
|
179
186
|
You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
|
|
180
187
|
<p align="center">
|
|
@@ -263,7 +270,7 @@ completion = client.chat.completions.create(
|
|
|
263
270
|
print(completion.choices[0].message.content)
|
|
264
271
|
```
|
|
265
272
|
|
|
266
|
-
For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
|
|
273
|
+
For more detailed integration instructions, see the [Integration Guide](./docs/server/server_integration.md).
|
|
267
274
|
|
|
268
275
|
## Beyond an LLM Server
|
|
269
276
|
|
|
@@ -272,6 +279,10 @@ The [Lemonade SDK](./docs/README.md) also include the following components:
|
|
|
272
279
|
- 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
|
|
273
280
|
- 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
|
|
274
281
|
|
|
282
|
+
## FAQ
|
|
283
|
+
|
|
284
|
+
To read our frequently asked questions, see our [FAQ Guide](./docs/faq.md)
|
|
285
|
+
|
|
275
286
|
## Contributing
|
|
276
287
|
|
|
277
288
|
We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
|
|
@@ -4,14 +4,14 @@ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
|
|
|
4
4
|
lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
|
|
5
5
|
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
6
|
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
-
lemonade/version.py,sha256=
|
|
7
|
+
lemonade/version.py,sha256=c04nFsyfS0zYoDvZjLO-uEi12TFB5EWSD6fiWiI7OLQ,22
|
|
8
8
|
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
|
|
10
10
|
lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
|
|
11
11
|
lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
|
|
12
12
|
lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
|
|
13
13
|
lemonade/common/inference_engines.py,sha256=lcmir_pATr71TfSBJoIZEi3G9xyxNwi2_xpPvPD8_xI,12932
|
|
14
|
-
lemonade/common/network.py,sha256=
|
|
14
|
+
lemonade/common/network.py,sha256=p1lWJkN0H5hCpb4rKi3Zc47W_BRrrm-7ghdTALJLGqU,1944
|
|
15
15
|
lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
|
|
16
16
|
lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
|
|
17
17
|
lemonade/common/system_info.py,sha256=dOtX8WLHCz1xmURZWnqhDbyNZv_AulrpX_bbI58eHFQ,27084
|
|
@@ -32,13 +32,13 @@ lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
|
|
|
32
32
|
lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
|
|
33
33
|
lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
|
|
34
34
|
lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
|
|
35
|
-
lemonade/tools/llamacpp/bench.py,sha256=
|
|
35
|
+
lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
|
|
36
36
|
lemonade/tools/llamacpp/load.py,sha256=SKacK2n8LpC4DN4yALyEpV2c8_sgOv2G7t6Nlyu7XXg,6273
|
|
37
|
-
lemonade/tools/llamacpp/utils.py,sha256=
|
|
37
|
+
lemonade/tools/llamacpp/utils.py,sha256=vHA5kykkdHSsMGmbEA4RyOHr8wFIh1WenfhCvY8WxZs,22445
|
|
38
38
|
lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
39
|
lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
|
|
40
|
-
lemonade/tools/oga/load.py,sha256=
|
|
41
|
-
lemonade/tools/oga/utils.py,sha256=
|
|
40
|
+
lemonade/tools/oga/load.py,sha256=O82ezF7Jhgz3CJrxDWZYqLHyD_0NS1nsvfMWDaaUI4I,33728
|
|
41
|
+
lemonade/tools/oga/utils.py,sha256=Xd7tmNr69u_bCut0hZqA7saUR3NFZlp4bvWo54mOZb0,16918
|
|
42
42
|
lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
43
|
lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
|
|
44
44
|
lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
|
|
@@ -47,26 +47,26 @@ lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTgu
|
|
|
47
47
|
lemonade/tools/report/table.py,sha256=ssqy1bZqF-wptNzKEOj6_9REtCNZyXO8R5vakAtg3R4,27973
|
|
48
48
|
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
49
|
lemonade/tools/server/llamacpp.py,sha256=OP0j74QcowEu3zFEcrKIsBbGDOFemBXS5F5DC6oQHaI,18853
|
|
50
|
-
lemonade/tools/server/serve.py,sha256=
|
|
50
|
+
lemonade/tools/server/serve.py,sha256=0-NprfsU-YrX8Qsf1atEi6wPJWemrPjHKEBHV69SwCQ,57046
|
|
51
51
|
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
52
52
|
lemonade/tools/server/tray.py,sha256=yoGCM8j_2KzPqo-AlYiauWd8QR56yp6jW6HZ9921Ydg,17525
|
|
53
53
|
lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
|
|
54
54
|
lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
|
|
55
|
-
lemonade/tools/server/static/styles.css,sha256=
|
|
56
|
-
lemonade/tools/server/static/webapp.html,sha256=
|
|
55
|
+
lemonade/tools/server/static/styles.css,sha256=8wQ5Cg4rbEh03kC8t7ALE7dB20GiD0Pfu5BAxh9hECU,26429
|
|
56
|
+
lemonade/tools/server/static/webapp.html,sha256=KZm1ZFIhQzLT2Y2wy3hFsQxcOxFzv-blaeLzc1ODhb8,36396
|
|
57
57
|
lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
|
|
58
58
|
lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
|
|
59
59
|
lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
|
|
60
60
|
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
61
|
-
lemonade_install/install.py,sha256=
|
|
62
|
-
lemonade_sdk-8.0.
|
|
63
|
-
lemonade_sdk-8.0.
|
|
64
|
-
lemonade_server/cli.py,sha256=
|
|
65
|
-
lemonade_server/model_manager.py,sha256=
|
|
66
|
-
lemonade_server/pydantic_models.py,sha256=
|
|
67
|
-
lemonade_server/server_models.json,sha256=
|
|
68
|
-
lemonade_sdk-8.0.
|
|
69
|
-
lemonade_sdk-8.0.
|
|
70
|
-
lemonade_sdk-8.0.
|
|
71
|
-
lemonade_sdk-8.0.
|
|
72
|
-
lemonade_sdk-8.0.
|
|
61
|
+
lemonade_install/install.py,sha256=TBX-VwEHcPo4WX0K_12pKKINnIK3o4SUo3L5XjkqEtw,27669
|
|
62
|
+
lemonade_sdk-8.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
63
|
+
lemonade_sdk-8.1.0.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
|
|
64
|
+
lemonade_server/cli.py,sha256=6QJ5fxNLuVUbuHauA5JHXf0H5dqJ5E4GNTo4YoMOJtg,16049
|
|
65
|
+
lemonade_server/model_manager.py,sha256=O3fIX52AqU0z10WzPmNEA3lQ_KjOqNq_G-SxjwIgEio,10781
|
|
66
|
+
lemonade_server/pydantic_models.py,sha256=qEvF7x7AuHCHMiByVzGGuLdQTNs233Sw9uQq5cpI6is,2721
|
|
67
|
+
lemonade_server/server_models.json,sha256=gitKHj_VHANxjtcXeE5zFpukVO0HyEfKhu3ZaZsj2xo,8867
|
|
68
|
+
lemonade_sdk-8.1.0.dist-info/METADATA,sha256=c3JxCUYw5ujhGSb3FX3mG6UmgG5BLqik8a5j4oe8n7o,15712
|
|
69
|
+
lemonade_sdk-8.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
70
|
+
lemonade_sdk-8.1.0.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
|
|
71
|
+
lemonade_sdk-8.1.0.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
72
|
+
lemonade_sdk-8.1.0.dist-info/RECORD,,
|
lemonade_server/cli.py
CHANGED
|
@@ -57,7 +57,7 @@ def serve(
|
|
|
57
57
|
log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
|
|
58
58
|
|
|
59
59
|
# Hidden environment variable to enable input truncation (experimental feature)
|
|
60
|
-
truncate_inputs = "LEMONADE_TRUNCATE_INPUTS"
|
|
60
|
+
truncate_inputs = os.environ.get("LEMONADE_TRUNCATE_INPUTS", None)
|
|
61
61
|
|
|
62
62
|
# Start the server
|
|
63
63
|
serve_kwargs = {
|
lemonade_server/model_manager.py
CHANGED
|
@@ -7,6 +7,7 @@ from importlib.metadata import distributions
|
|
|
7
7
|
from lemonade_server.pydantic_models import PullConfig
|
|
8
8
|
from lemonade.cache import DEFAULT_CACHE_DIR
|
|
9
9
|
from lemonade.tools.llamacpp.utils import parse_checkpoint, download_gguf
|
|
10
|
+
from lemonade.common.network import custom_snapshot_download
|
|
10
11
|
|
|
11
12
|
USER_MODELS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
|
|
12
13
|
|
|
@@ -175,7 +176,7 @@ class ModelManager:
|
|
|
175
176
|
if "gguf" in checkpoint_to_download.lower():
|
|
176
177
|
download_gguf(gguf_model_config.checkpoint, gguf_model_config.mmproj)
|
|
177
178
|
else:
|
|
178
|
-
|
|
179
|
+
custom_snapshot_download(checkpoint_to_download)
|
|
179
180
|
|
|
180
181
|
# Register the model in user_models.json, creating that file if needed
|
|
181
182
|
# We do this registration after the download so that we don't register
|
|
@@ -233,8 +234,8 @@ class ModelManager:
|
|
|
233
234
|
|
|
234
235
|
try:
|
|
235
236
|
# Get the local path using snapshot_download with local_files_only=True
|
|
236
|
-
snapshot_path =
|
|
237
|
-
|
|
237
|
+
snapshot_path = custom_snapshot_download(
|
|
238
|
+
base_checkpoint, local_files_only=True
|
|
238
239
|
)
|
|
239
240
|
|
|
240
241
|
# Navigate up to the model directory (parent of snapshots directory)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Optional, Union, List
|
|
1
|
+
from typing import Optional, Union, List
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
@@ -18,9 +18,6 @@ class LoadConfig(BaseModel):
|
|
|
18
18
|
model_name: str
|
|
19
19
|
checkpoint: Optional[str] = None
|
|
20
20
|
recipe: Optional[str] = None
|
|
21
|
-
# Indicates the maximum prompt length allowed for that specific
|
|
22
|
-
# checkpoint + recipe combination
|
|
23
|
-
max_prompt_length: Optional[int] = None
|
|
24
21
|
# Indicates whether the model is a reasoning model, like DeepSeek
|
|
25
22
|
reasoning: Optional[bool] = False
|
|
26
23
|
# Indicates which Multimodal Projector (mmproj) file to use
|