lemonade-sdk 8.0.4__py3-none-any.whl → 8.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/api.py +50 -0
- lemonade/common/inference_engines.py +415 -0
- lemonade/common/system_info.py +493 -47
- lemonade/tools/management_tools.py +53 -7
- lemonade/tools/server/serve.py +29 -0
- lemonade/tools/server/static/styles.css +36 -53
- lemonade/tools/server/static/webapp.html +23 -2
- lemonade/version.py +1 -1
- lemonade_sdk-8.0.5.dist-info/METADATA +295 -0
- {lemonade_sdk-8.0.4.dist-info → lemonade_sdk-8.0.5.dist-info}/RECORD +16 -15
- lemonade_server/cli.py +168 -22
- lemonade_sdk-8.0.4.dist-info/METADATA +0 -176
- {lemonade_sdk-8.0.4.dist-info → lemonade_sdk-8.0.5.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.4.dist-info → lemonade_sdk-8.0.5.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.4.dist-info → lemonade_sdk-8.0.5.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.4.dist-info → lemonade_sdk-8.0.5.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.4.dist-info → lemonade_sdk-8.0.5.dist-info}/top_level.txt +0 -0
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import abc
|
|
3
|
+
import json
|
|
3
4
|
from typing import List
|
|
4
5
|
import lemonade.common.filesystem as fs
|
|
5
6
|
import lemonade.common.exceptions as exp
|
|
6
7
|
import lemonade.common.printing as printing
|
|
7
8
|
from lemonade.tools.tool import ToolParser
|
|
8
9
|
from lemonade.version import __version__ as lemonade_version
|
|
9
|
-
from lemonade.common.system_info import
|
|
10
|
+
from lemonade.common.system_info import (
|
|
11
|
+
get_system_info_dict,
|
|
12
|
+
get_device_info_dict,
|
|
13
|
+
get_system_info,
|
|
14
|
+
)
|
|
10
15
|
from lemonade.common.build import output_dir
|
|
11
16
|
import lemonade.cache as lemonade_cache
|
|
12
17
|
|
|
@@ -245,28 +250,69 @@ class SystemInfo(ManagementTool):
|
|
|
245
250
|
@staticmethod
|
|
246
251
|
def parser(add_help: bool = True) -> argparse.ArgumentParser:
|
|
247
252
|
parser = __class__.helpful_parser(
|
|
248
|
-
short_description="Print system information",
|
|
253
|
+
short_description="Print system and device information",
|
|
249
254
|
add_help=add_help,
|
|
250
255
|
)
|
|
251
256
|
|
|
257
|
+
parser.add_argument(
|
|
258
|
+
"--format", choices=["table", "json"], default="table", help="Output format"
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
parser.add_argument(
|
|
262
|
+
"--verbose",
|
|
263
|
+
action="store_true",
|
|
264
|
+
help="Show detailed system information",
|
|
265
|
+
)
|
|
266
|
+
|
|
252
267
|
return parser
|
|
253
268
|
|
|
254
269
|
@staticmethod
|
|
255
270
|
def pretty_print(my_dict: dict, level=0):
|
|
256
271
|
for k, v in my_dict.items():
|
|
272
|
+
if k == "available" and v is True:
|
|
273
|
+
continue
|
|
274
|
+
|
|
257
275
|
if isinstance(v, dict):
|
|
258
|
-
|
|
259
|
-
|
|
276
|
+
# Special handling for device availability
|
|
277
|
+
if v.get("available") is False:
|
|
278
|
+
error_msg = v.get("error", "Not available")
|
|
279
|
+
print(" " * level + f"{k}: {error_msg}")
|
|
280
|
+
else:
|
|
281
|
+
print(" " * level + f"{k}:")
|
|
282
|
+
SystemInfo.pretty_print(v, level + 1)
|
|
260
283
|
elif isinstance(v, list):
|
|
261
284
|
print(" " * level + f"{k}:")
|
|
262
285
|
for item in v:
|
|
263
|
-
|
|
286
|
+
if isinstance(item, dict):
|
|
287
|
+
SystemInfo.pretty_print(item, level + 1)
|
|
288
|
+
print()
|
|
289
|
+
else:
|
|
290
|
+
print(" " * (level + 1) + f"{item}")
|
|
264
291
|
else:
|
|
265
292
|
print(" " * level + f"{k}: {v}")
|
|
266
293
|
|
|
267
|
-
def run(self, _):
|
|
294
|
+
def run(self, _, format="table", verbose=False):
|
|
295
|
+
# Get basic system info
|
|
268
296
|
system_info_dict = get_system_info_dict()
|
|
269
|
-
|
|
297
|
+
|
|
298
|
+
# Always include devices
|
|
299
|
+
system_info_dict["Devices"] = get_device_info_dict()
|
|
300
|
+
|
|
301
|
+
# Filter out verbose-only information if not in verbose mode
|
|
302
|
+
if not verbose:
|
|
303
|
+
essential_keys = ["OS Version", "Processor", "Physical Memory", "Devices"]
|
|
304
|
+
system_info_dict = {
|
|
305
|
+
k: v for k, v in system_info_dict.items() if k in essential_keys
|
|
306
|
+
}
|
|
307
|
+
else:
|
|
308
|
+
# In verbose mode, add Python packages at the end
|
|
309
|
+
system_info = get_system_info()
|
|
310
|
+
system_info_dict["Python Packages"] = system_info.get_python_packages()
|
|
311
|
+
|
|
312
|
+
if format == "json":
|
|
313
|
+
print(json.dumps(system_info_dict, indent=2))
|
|
314
|
+
else:
|
|
315
|
+
self.pretty_print(system_info_dict)
|
|
270
316
|
|
|
271
317
|
|
|
272
318
|
# This file was originally licensed under Apache 2.0. It has been modified.
|
lemonade/tools/server/serve.py
CHANGED
|
@@ -228,6 +228,7 @@ class Server(ManagementTool):
|
|
|
228
228
|
self.app.get(f"{prefix}/health")(self.health)
|
|
229
229
|
self.app.get(f"{prefix}/halt")(self.halt_generation)
|
|
230
230
|
self.app.get(f"{prefix}/stats")(self.send_stats)
|
|
231
|
+
self.app.get(f"{prefix}/system-info")(self.get_system_info)
|
|
231
232
|
self.app.post(f"{prefix}/completions")(self.completions)
|
|
232
233
|
self.app.post(f"{prefix}/responses")(self.responses)
|
|
233
234
|
|
|
@@ -1276,6 +1277,34 @@ class Server(ManagementTool):
|
|
|
1276
1277
|
),
|
|
1277
1278
|
}
|
|
1278
1279
|
|
|
1280
|
+
async def get_system_info(self, request: Request):
|
|
1281
|
+
"""
|
|
1282
|
+
Return system and device enumeration information.
|
|
1283
|
+
Supports optional 'verbose' query parameter.
|
|
1284
|
+
"""
|
|
1285
|
+
from lemonade.common.system_info import (
|
|
1286
|
+
get_system_info_dict,
|
|
1287
|
+
get_device_info_dict,
|
|
1288
|
+
get_system_info as get_system_info_obj,
|
|
1289
|
+
)
|
|
1290
|
+
|
|
1291
|
+
# Get verbose parameter from query string (default to False)
|
|
1292
|
+
verbose = request.query_params.get("verbose", "false").lower() in ["true", "1"]
|
|
1293
|
+
|
|
1294
|
+
info = get_system_info_dict()
|
|
1295
|
+
info["devices"] = get_device_info_dict()
|
|
1296
|
+
|
|
1297
|
+
# Filter out verbose-only information if not in verbose mode
|
|
1298
|
+
if not verbose:
|
|
1299
|
+
essential_keys = ["OS Version", "Processor", "Physical Memory", "devices"]
|
|
1300
|
+
info = {k: v for k, v in info.items() if k in essential_keys}
|
|
1301
|
+
else:
|
|
1302
|
+
# In verbose mode, add Python packages at the end
|
|
1303
|
+
system_info_obj = get_system_info_obj()
|
|
1304
|
+
info["Python Packages"] = system_info_obj.get_python_packages()
|
|
1305
|
+
|
|
1306
|
+
return info
|
|
1307
|
+
|
|
1279
1308
|
def model_load_failure(self, model_reference: str, message: Optional[str] = None):
|
|
1280
1309
|
"""
|
|
1281
1310
|
Clean up after a model load failure, then log it and raise
|
|
@@ -90,7 +90,10 @@ body {
|
|
|
90
90
|
border-radius: 8px;
|
|
91
91
|
border: 1px solid #e0e0e0;
|
|
92
92
|
max-width: 1000px;
|
|
93
|
-
width:
|
|
93
|
+
min-width: 320px;
|
|
94
|
+
width: calc(100% - 2rem); /* Responsive width with margin */
|
|
95
|
+
margin-left: 1rem;
|
|
96
|
+
margin-right: 1rem;
|
|
94
97
|
}
|
|
95
98
|
|
|
96
99
|
.tabs {
|
|
@@ -130,8 +133,11 @@ body {
|
|
|
130
133
|
.chat-container {
|
|
131
134
|
display: flex;
|
|
132
135
|
flex-direction: column;
|
|
133
|
-
height:
|
|
136
|
+
height: calc(100vh - 650px); /* Subtract space for navbar, title, wall of logos, etc */
|
|
137
|
+
min-height: 300px;
|
|
138
|
+
max-height: 1200px;
|
|
134
139
|
max-width: 800px;
|
|
140
|
+
width: 100%;
|
|
135
141
|
margin: 0 auto;
|
|
136
142
|
border: 1px solid #e0e0e0;
|
|
137
143
|
border-radius: 8px;
|
|
@@ -638,6 +644,7 @@ body {
|
|
|
638
644
|
border-radius: 6px;
|
|
639
645
|
overflow: hidden;
|
|
640
646
|
transition: box-shadow 0.2s ease;
|
|
647
|
+
min-width: 0;
|
|
641
648
|
}
|
|
642
649
|
|
|
643
650
|
.register-model-name-group:focus-within {
|
|
@@ -686,6 +693,8 @@ body {
|
|
|
686
693
|
transition: border-color 0.2s ease, box-shadow 0.2s ease;
|
|
687
694
|
outline: none;
|
|
688
695
|
box-shadow: 0 2px 8px rgba(0,0,0,0.06);
|
|
696
|
+
box-sizing: border-box;
|
|
697
|
+
min-width: 0;
|
|
689
698
|
}
|
|
690
699
|
|
|
691
700
|
#register-model-name:focus {
|
|
@@ -709,6 +718,7 @@ body {
|
|
|
709
718
|
transition: border-color 0.2s ease, box-shadow 0.2s ease;
|
|
710
719
|
outline: none;
|
|
711
720
|
box-shadow: 0 2px 8px rgba(0,0,0,0.06);
|
|
721
|
+
box-sizing: border-box;
|
|
712
722
|
}
|
|
713
723
|
|
|
714
724
|
#register-recipe:focus {
|
|
@@ -748,6 +758,8 @@ body {
|
|
|
748
758
|
transition: border-color 0.2s ease, box-shadow 0.2s ease;
|
|
749
759
|
outline: none;
|
|
750
760
|
box-shadow: 0 2px 8px rgba(0,0,0,0.06);
|
|
761
|
+
box-sizing: border-box;
|
|
762
|
+
min-width: 0;
|
|
751
763
|
}
|
|
752
764
|
|
|
753
765
|
#register-mmproj:focus, #register-checkpoint:focus {
|
|
@@ -930,6 +942,18 @@ body {
|
|
|
930
942
|
transform: translateX(0) translateY(-2px) !important;
|
|
931
943
|
}
|
|
932
944
|
|
|
945
|
+
@media (max-width: 800px) {
|
|
946
|
+
.model-mgmt-container {
|
|
947
|
+
flex-direction: column;
|
|
948
|
+
gap: 1.5em;
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
.model-mgmt-pane {
|
|
952
|
+
flex: none;
|
|
953
|
+
width: 100%;
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
|
|
933
957
|
@media (max-width: 600px) {
|
|
934
958
|
.title {
|
|
935
959
|
font-size: 2rem;
|
|
@@ -942,61 +966,20 @@ body {
|
|
|
942
966
|
margin-top: 1rem;
|
|
943
967
|
}
|
|
944
968
|
.model-mgmt-container {
|
|
945
|
-
flex-direction: column;
|
|
946
969
|
gap: 1em;
|
|
947
970
|
}
|
|
948
|
-
}
|
|
949
|
-
|
|
950
|
-
@media (max-width: 800px) {
|
|
951
|
-
.model-mgmt-register-form {
|
|
952
|
-
padding: 1.2em 1em 1em 1em;
|
|
953
|
-
margin: 0 1em 1.5em 1em;
|
|
954
|
-
}
|
|
955
971
|
|
|
956
|
-
.
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
}
|
|
961
|
-
|
|
962
|
-
.register-label {
|
|
963
|
-
min-width: 0;
|
|
964
|
-
font-size: 0.9em;
|
|
965
|
-
}
|
|
966
|
-
|
|
967
|
-
.register-label.reasoning-inline {
|
|
968
|
-
margin-left: 0;
|
|
969
|
-
margin-top: 0.5em;
|
|
970
|
-
justify-content: flex-start;
|
|
971
|
-
}
|
|
972
|
-
|
|
973
|
-
#register-model-name, #register-mmproj, #register-checkpoint {
|
|
974
|
-
min-width: 0;
|
|
975
|
-
width: 100%;
|
|
976
|
-
}
|
|
977
|
-
|
|
978
|
-
#register-recipe {
|
|
979
|
-
min-width: 0;
|
|
980
|
-
width: 100%;
|
|
972
|
+
.tab-container {
|
|
973
|
+
margin-left: 0.5rem;
|
|
974
|
+
margin-right: 0.5rem;
|
|
975
|
+
width: calc(100% - 1rem);
|
|
981
976
|
}
|
|
982
977
|
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
.register-form-row:last-child {
|
|
990
|
-
flex-direction: row;
|
|
991
|
-
justify-content: space-between;
|
|
992
|
-
align-items: center;
|
|
993
|
-
flex-wrap: wrap;
|
|
994
|
-
gap: 1em;
|
|
995
|
-
}
|
|
996
|
-
|
|
997
|
-
#register-submit {
|
|
998
|
-
padding: 0.8em 1.5em;
|
|
999
|
-
font-size: 0.9em;
|
|
978
|
+
/* Ensure all input fields are properly constrained on very narrow screens */
|
|
979
|
+
#register-model-name, #register-mmproj, #register-checkpoint, #register-recipe {
|
|
980
|
+
max-width: 100%;
|
|
981
|
+
overflow: hidden;
|
|
982
|
+
text-overflow: ellipsis;
|
|
1000
983
|
}
|
|
1001
984
|
}
|
|
1002
985
|
|
|
@@ -1141,7 +1124,7 @@ body {
|
|
|
1141
1124
|
display: none;
|
|
1142
1125
|
}
|
|
1143
1126
|
|
|
1144
|
-
@media (max-width:
|
|
1127
|
+
@media (max-width: 800px) {
|
|
1145
1128
|
.app-logos-grid {
|
|
1146
1129
|
gap: 0.5rem;
|
|
1147
1130
|
padding: 0 0.5rem;
|
|
@@ -327,7 +327,11 @@
|
|
|
327
327
|
let filteredModels = [];
|
|
328
328
|
let defaultIndex = 0;
|
|
329
329
|
|
|
330
|
-
|
|
330
|
+
// Check if model is specified in URL parameters
|
|
331
|
+
const urlModel = new URLSearchParams(window.location.search).get('model');
|
|
332
|
+
let urlModelIndex = -1;
|
|
333
|
+
|
|
334
|
+
data.data.forEach(function(model, index) {
|
|
331
335
|
const modelId = model.id || model.name || model;
|
|
332
336
|
const modelInfo = allModels[modelId] || {};
|
|
333
337
|
const labels = modelInfo.labels || [];
|
|
@@ -341,9 +345,17 @@
|
|
|
341
345
|
const opt = document.createElement('option');
|
|
342
346
|
opt.value = modelId;
|
|
343
347
|
opt.textContent = modelId;
|
|
348
|
+
|
|
349
|
+
// Check if this model matches the URL parameter
|
|
350
|
+
if (urlModel && modelId === urlModel) {
|
|
351
|
+
urlModelIndex = filteredModels.length - 1;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// Default fallback for backwards compatibility
|
|
344
355
|
if (modelId === 'Llama-3.2-1B-Instruct-Hybrid') {
|
|
345
356
|
defaultIndex = filteredModels.length - 1;
|
|
346
357
|
}
|
|
358
|
+
|
|
347
359
|
select.appendChild(opt);
|
|
348
360
|
});
|
|
349
361
|
|
|
@@ -352,7 +364,16 @@
|
|
|
352
364
|
return;
|
|
353
365
|
}
|
|
354
366
|
|
|
355
|
-
|
|
367
|
+
// Select the URL-specified model if found, otherwise use default
|
|
368
|
+
if (urlModelIndex !== -1) {
|
|
369
|
+
select.selectedIndex = urlModelIndex;
|
|
370
|
+
console.log(`Selected model from URL parameter: ${urlModel}`);
|
|
371
|
+
} else {
|
|
372
|
+
select.selectedIndex = defaultIndex;
|
|
373
|
+
if (urlModel) {
|
|
374
|
+
console.warn(`Model '${urlModel}' specified in URL not found in available models`);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
356
377
|
} catch (e) {
|
|
357
378
|
const select = document.getElementById('model-select');
|
|
358
379
|
select.innerHTML = `<option>Error loading models: ${e.message}</option>`;
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.0.
|
|
1
|
+
__version__ = "8.0.5"
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lemonade-sdk
|
|
3
|
+
Version: 8.0.5
|
|
4
|
+
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
|
+
Author-email: lemonade@amd.com
|
|
6
|
+
Requires-Python: >=3.10, <3.13
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
License-File: NOTICE.md
|
|
10
|
+
Requires-Dist: invoke>=2.0.0
|
|
11
|
+
Requires-Dist: onnx<1.18.0,>=1.11.0
|
|
12
|
+
Requires-Dist: pyyaml>=5.4
|
|
13
|
+
Requires-Dist: typeguard>=2.3.13
|
|
14
|
+
Requires-Dist: packaging>=20.9
|
|
15
|
+
Requires-Dist: numpy<2.0.0
|
|
16
|
+
Requires-Dist: fasteners
|
|
17
|
+
Requires-Dist: GitPython>=3.1.40
|
|
18
|
+
Requires-Dist: psutil>=6.1.1
|
|
19
|
+
Requires-Dist: wmi
|
|
20
|
+
Requires-Dist: py-cpuinfo
|
|
21
|
+
Requires-Dist: pytz
|
|
22
|
+
Requires-Dist: zstandard
|
|
23
|
+
Requires-Dist: fastapi
|
|
24
|
+
Requires-Dist: uvicorn[standard]
|
|
25
|
+
Requires-Dist: openai>=1.81.0
|
|
26
|
+
Requires-Dist: transformers<=4.51.3
|
|
27
|
+
Requires-Dist: jinja2
|
|
28
|
+
Requires-Dist: tabulate
|
|
29
|
+
Requires-Dist: sentencepiece
|
|
30
|
+
Requires-Dist: huggingface-hub==0.33.0
|
|
31
|
+
Provides-Extra: oga-hybrid
|
|
32
|
+
Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
|
|
33
|
+
Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
|
|
34
|
+
Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
|
|
35
|
+
Provides-Extra: oga-cpu
|
|
36
|
+
Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
|
|
37
|
+
Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
|
|
38
|
+
Provides-Extra: dev
|
|
39
|
+
Requires-Dist: torch>=2.6.0; extra == "dev"
|
|
40
|
+
Requires-Dist: accelerate; extra == "dev"
|
|
41
|
+
Requires-Dist: datasets; extra == "dev"
|
|
42
|
+
Requires-Dist: pandas>=1.5.3; extra == "dev"
|
|
43
|
+
Requires-Dist: matplotlib; extra == "dev"
|
|
44
|
+
Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
|
|
45
|
+
Requires-Dist: lm-eval[api]; extra == "dev"
|
|
46
|
+
Provides-Extra: oga-hybrid-minimal
|
|
47
|
+
Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
|
|
48
|
+
Provides-Extra: oga-cpu-minimal
|
|
49
|
+
Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
|
|
50
|
+
Provides-Extra: llm
|
|
51
|
+
Requires-Dist: lemonade-sdk[dev]; extra == "llm"
|
|
52
|
+
Provides-Extra: llm-oga-cpu
|
|
53
|
+
Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
|
|
54
|
+
Provides-Extra: llm-oga-igpu
|
|
55
|
+
Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
|
|
56
|
+
Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
|
|
57
|
+
Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
|
|
58
|
+
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
|
|
59
|
+
Provides-Extra: llm-oga-cuda
|
|
60
|
+
Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
|
|
61
|
+
Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
|
|
62
|
+
Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
|
|
63
|
+
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
|
|
64
|
+
Provides-Extra: llm-oga-npu
|
|
65
|
+
Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
|
|
66
|
+
Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
|
|
67
|
+
Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
|
|
68
|
+
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
|
|
69
|
+
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
|
|
70
|
+
Provides-Extra: llm-oga-hybrid
|
|
71
|
+
Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
|
|
72
|
+
Provides-Extra: llm-oga-unified
|
|
73
|
+
Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
|
|
74
|
+
Dynamic: author-email
|
|
75
|
+
Dynamic: description
|
|
76
|
+
Dynamic: description-content-type
|
|
77
|
+
Dynamic: license-file
|
|
78
|
+
Dynamic: provides-extra
|
|
79
|
+
Dynamic: requires-dist
|
|
80
|
+
Dynamic: requires-python
|
|
81
|
+
Dynamic: summary
|
|
82
|
+
|
|
83
|
+
## 🍋 Lemonade: Local LLM Serving with GPU and NPU acceleration
|
|
84
|
+
|
|
85
|
+
<p align="center">
|
|
86
|
+
<a href="https://discord.gg/5xXzkMu8Zk">
|
|
87
|
+
<img src="https://img.shields.io/badge/Discord-7289DA?logo=discord&logoColor=white" alt="Discord" />
|
|
88
|
+
</a>
|
|
89
|
+
<a href="https://github.com/lemonade-sdk/lemonade/tree/main/test" title="Check out our tests">
|
|
90
|
+
<img src="https://github.com/lemonade-sdk/lemonade/actions/workflows/test_lemonade.yml/badge.svg" alt="Lemonade tests" />
|
|
91
|
+
</a>
|
|
92
|
+
<a href="docs/README.md#installation" title="Check out our instructions">
|
|
93
|
+
<img src="https://img.shields.io/badge/Windows-11-0078D6?logo=windows&logoColor=white" alt="Windows 11" />
|
|
94
|
+
</a>
|
|
95
|
+
<a href="https://lemonade-server.ai/#linux" title="Ubuntu 24.04 & 25.04 Supported">
|
|
96
|
+
<img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" />
|
|
97
|
+
</a>
|
|
98
|
+
<a href="docs/README.md#installation" title="Check out our instructions">
|
|
99
|
+
<img src="https://img.shields.io/badge/Python-3.10%20%7C%203.12-blue?logo=python&logoColor=white" alt="Made with Python" />
|
|
100
|
+
</a>
|
|
101
|
+
<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
|
|
102
|
+
<img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg" alt="PRs Welcome" />
|
|
103
|
+
</a>
|
|
104
|
+
<a href="https://github.com/lemonade-sdk/lemonade/releases/latest" title="Download the latest release">
|
|
105
|
+
<img src="https://img.shields.io/github/v/release/lemonade-sdk/lemonade?include_prereleases" alt="Latest Release" />
|
|
106
|
+
</a>
|
|
107
|
+
<a href="https://tooomm.github.io/github-release-stats/?username=lemonade-sdk&repository=lemonade">
|
|
108
|
+
<img src="https://img.shields.io/github/downloads/lemonade-sdk/lemonade/total.svg" alt="GitHub downloads" />
|
|
109
|
+
</a>
|
|
110
|
+
<a href="https://github.com/lemonade-sdk/lemonade/issues">
|
|
111
|
+
<img src="https://img.shields.io/github/issues/lemonade-sdk/lemonade" alt="GitHub issues" />
|
|
112
|
+
</a>
|
|
113
|
+
<a href="https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE">
|
|
114
|
+
<img src="https://img.shields.io/badge/License-Apache-yellow.svg" alt="License: Apache" />
|
|
115
|
+
</a>
|
|
116
|
+
<a href="https://github.com/psf/black">
|
|
117
|
+
<img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Code style: black" />
|
|
118
|
+
</a>
|
|
119
|
+
<a href="https://star-history.com/#lemonade-sdk/lemonade">
|
|
120
|
+
<img src="https://img.shields.io/badge/Star%20History-View-brightgreen" alt="Star History Chart" />
|
|
121
|
+
</a>
|
|
122
|
+
</p>
|
|
123
|
+
<p align="center">
|
|
124
|
+
<img src="https://github.com/lemonade-sdk/assets/blob/main/docs/banner.png?raw=true" alt="Lemonade Banner" />
|
|
125
|
+
</p>
|
|
126
|
+
<h3 align="center">
|
|
127
|
+
<a href="https://lemonade-server.ai">Download</a> |
|
|
128
|
+
<a href="https://lemonade-server.ai/docs/">Documentation</a> |
|
|
129
|
+
<a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
|
|
130
|
+
</h3>
|
|
131
|
+
|
|
132
|
+
Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
|
|
133
|
+
|
|
134
|
+
## Getting Started
|
|
135
|
+
|
|
136
|
+
<div align="center">
|
|
137
|
+
|
|
138
|
+
| Step 1: Download & Install | Step 2: Launch and Pull Models | Step 3: Start chatting! |
|
|
139
|
+
|:---------------------------:|:-------------------------------:|:------------------------:|
|
|
140
|
+
| <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/install.gif?raw=true" alt="Download & Install" width="245" /> | <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/launch_and_pull.gif?raw=true" alt="Launch and Pull Models" width="245" /> | <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/chat.gif?raw=true" alt="Start chatting!" width="245" /> |
|
|
141
|
+
|Install using a [GUI](https://github.com/lemonade-sdk/lemonade/releases/latest/download/Lemonade_Server_Installer.exe) (Windows only), [pip](https://lemonade-server.ai/install_options.html), or [from source](https://lemonade-server.ai/install_options.html). |Use the [Model Manager](#model-library) to install models|A built-in chat interface is available!|
|
|
142
|
+
</div>
|
|
143
|
+
|
|
144
|
+
### Use it with your favorite OpenAI-compatible app!
|
|
145
|
+
|
|
146
|
+
<p align="center">
|
|
147
|
+
<a href="https://lemonade-server.ai/docs/server/apps/open-webui/" title="Open WebUI" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/openwebui.jpg" alt="Open WebUI" width="60" /></a> <a href="https://lemonade-server.ai/docs/server/apps/continue/" title="Continue" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/continue_dev.png" alt="Continue" width="60" /></a> <a href="https://github.com/amd/gaia" title="Gaia" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/gaia.ico" alt="Gaia" width="60" /></a> <a href="https://lemonade-server.ai/docs/server/apps/anythingLLM/" title="AnythingLLM" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/anything_llm.png" alt="AnythingLLM" width="60" /></a> <a href="https://lemonade-server.ai/docs/server/apps/ai-dev-gallery/" title="AI Dev Gallery" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_dev_gallery.webp" alt="AI Dev Gallery" width="60" /></a> <a href="https://lemonade-server.ai/docs/server/apps/lm-eval/" title="LM-Eval" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/lm_eval.png" alt="LM-Eval" width="60" /></a> <a href="https://lemonade-server.ai/docs/server/apps/codeGPT/" title="CodeGPT" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/codegpt.jpg" alt="CodeGPT" width="60" /></a> <a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/apps/ai-toolkit.md" title="AI Toolkit" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_toolkit.png" alt="AI Toolkit" width="60" /></a>
|
|
148
|
+
</p>
|
|
149
|
+
|
|
150
|
+
> [!TIP]
|
|
151
|
+
> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email lemonade@amd.com.
|
|
152
|
+
|
|
153
|
+
## Using the CLI
|
|
154
|
+
|
|
155
|
+
To run and chat with Gemma 3:
|
|
156
|
+
|
|
157
|
+
```
|
|
158
|
+
lemonade-server run Gemma-3-4b-it-GGUF
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
To install models ahead of time, use the `pull` command:
|
|
162
|
+
|
|
163
|
+
```
|
|
164
|
+
lemonade-server pull Gemma-3-4b-it-GGUF
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
To check all models available, use the `list` command:
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
lemonade-server list
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
> Note: If you installed from source, use the `lemonade-server-dev` command instead.
|
|
174
|
+
|
|
175
|
+
## Model Library
|
|
176
|
+
|
|
177
|
+
Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/models/).
|
|
178
|
+
|
|
179
|
+
You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
|
|
180
|
+
<p align="center">
|
|
181
|
+
<img src="https://github.com/lemonade-sdk/assets/blob/main/docs/model_manager.png?raw=true" alt="Model Manager" width="650" />
|
|
182
|
+
</p>
|
|
183
|
+
|
|
184
|
+
## Supported Configurations
|
|
185
|
+
|
|
186
|
+
Lemonade supports the following configurations, while also making it easy to switch between them at runtime. Find more information about it [here](./docs/README.md#software-and-hardware-overview).
|
|
187
|
+
|
|
188
|
+
<table>
|
|
189
|
+
<thead>
|
|
190
|
+
<tr>
|
|
191
|
+
<th rowspan="2">Hardware</th>
|
|
192
|
+
<th colspan="3" align="center">🛠️ Engine Support</th>
|
|
193
|
+
<th colspan="2" align="center">🖥️ OS (x86/x64)</th>
|
|
194
|
+
</tr>
|
|
195
|
+
<tr>
|
|
196
|
+
<th align="center">OGA</th>
|
|
197
|
+
<th align="center">llamacpp</th>
|
|
198
|
+
<th align="center">HF</th>
|
|
199
|
+
<th align="center">Windows</th>
|
|
200
|
+
<th align="center">Linux</th>
|
|
201
|
+
</tr>
|
|
202
|
+
</thead>
|
|
203
|
+
<tbody>
|
|
204
|
+
<tr>
|
|
205
|
+
<td><strong>🧠 CPU</strong></td>
|
|
206
|
+
<td align="center">All platforms</td>
|
|
207
|
+
<td align="center">All platforms</td>
|
|
208
|
+
<td align="center">All platforms</td>
|
|
209
|
+
<td align="center">✅</td>
|
|
210
|
+
<td align="center">✅</td>
|
|
211
|
+
</tr>
|
|
212
|
+
<tr>
|
|
213
|
+
<td><strong>🎮 GPU</strong></td>
|
|
214
|
+
<td align="center">—</td>
|
|
215
|
+
<td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
|
|
216
|
+
<td align="center">—</td>
|
|
217
|
+
<td align="center">✅</td>
|
|
218
|
+
<td align="center">✅</td>
|
|
219
|
+
</tr>
|
|
220
|
+
<tr>
|
|
221
|
+
<td><strong>🤖 NPU</strong></td>
|
|
222
|
+
<td align="center">AMD Ryzen™ AI 300 series</td>
|
|
223
|
+
<td align="center">—</td>
|
|
224
|
+
<td align="center">—</td>
|
|
225
|
+
<td align="center">✅</td>
|
|
226
|
+
<td align="center">—</td>
|
|
227
|
+
</tr>
|
|
228
|
+
</tbody>
|
|
229
|
+
</table>
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
## Integrate Lemonade Server with Your Application
|
|
233
|
+
|
|
234
|
+
You can use any OpenAI-compatible client library by configuring it to use `http://localhost:8000/api/v1` as the base URL. A table containing official and popular OpenAI clients on different languages is shown below.
|
|
235
|
+
|
|
236
|
+
Feel free to pick and choose your preferred language.
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
| Python | C++ | Java | C# | Node.js | Go | Ruby | Rust | PHP |
|
|
240
|
+
|--------|-----|------|----|---------|----|-------|------|-----|
|
|
241
|
+
| [openai-python](https://github.com/openai/openai-python) | [openai-cpp](https://github.com/olrea/openai-cpp) | [openai-java](https://github.com/openai/openai-java) | [openai-dotnet](https://github.com/openai/openai-dotnet) | [openai-node](https://github.com/openai/openai-node) | [go-openai](https://github.com/sashabaranov/go-openai) | [ruby-openai](https://github.com/alexrudall/ruby-openai) | [async-openai](https://github.com/64bit/async-openai) | [openai-php](https://github.com/openai-php/client) |
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
### Python Client Example
|
|
245
|
+
```python
|
|
246
|
+
from openai import OpenAI
|
|
247
|
+
|
|
248
|
+
# Initialize the client to use Lemonade Server
|
|
249
|
+
client = OpenAI(
|
|
250
|
+
base_url="http://localhost:8000/api/v1",
|
|
251
|
+
api_key="lemonade" # required but unused
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# Create a chat completion
|
|
255
|
+
completion = client.chat.completions.create(
|
|
256
|
+
model="Llama-3.2-1B-Instruct-Hybrid", # or any other available model
|
|
257
|
+
messages=[
|
|
258
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
259
|
+
]
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# Print the response
|
|
263
|
+
print(completion.choices[0].message.content)
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
|
|
267
|
+
|
|
268
|
+
## Beyond an LLM Server
|
|
269
|
+
|
|
270
|
+
The [Lemonade SDK](./docs/README.md) also include the following components:
|
|
271
|
+
|
|
272
|
+
- 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
|
|
273
|
+
- 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
|
|
274
|
+
|
|
275
|
+
## Contributing
|
|
276
|
+
|
|
277
|
+
We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
|
|
278
|
+
|
|
279
|
+
New contributors can find beginner-friendly issues tagged with "Good First Issue" to get started.
|
|
280
|
+
|
|
281
|
+
<a href="https://github.com/lemonade-sdk/lemonade/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22">
|
|
282
|
+
<img src="https://img.shields.io/badge/🍋Lemonade-Good%20First%20Issue-yellowgreen?colorA=38b000&colorB=cccccc" alt="Good First Issue" />
|
|
283
|
+
</a>
|
|
284
|
+
|
|
285
|
+
## Maintainers
|
|
286
|
+
|
|
287
|
+
This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), email [lemonade@amd.com](mailto:lemonade@amd.com), or join our [Discord](https://discord.gg/5xXzkMu8Zk).
|
|
288
|
+
|
|
289
|
+
## License
|
|
290
|
+
|
|
291
|
+
This project is licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE). Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
|
|
292
|
+
|
|
293
|
+
<!--This file was originally licensed under Apache 2.0. It has been modified.
|
|
294
|
+
Modifications Copyright (c) 2025 AMD-->
|
|
295
|
+
|