lemonade-sdk 8.0.3__py3-none-any.whl → 8.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/api.py +50 -0
- lemonade/common/inference_engines.py +415 -0
- lemonade/common/system_info.py +493 -47
- lemonade/tools/humaneval.py +1 -1
- lemonade/tools/management_tools.py +53 -7
- lemonade/tools/mmlu.py +1 -1
- lemonade/tools/oga/load.py +1 -1
- lemonade/tools/perplexity.py +2 -2
- lemonade/tools/quark/quark_load.py +1 -1
- lemonade/tools/quark/quark_quantize.py +2 -2
- lemonade/tools/server/llamacpp.py +130 -9
- lemonade/tools/server/serve.py +102 -0
- lemonade/tools/server/static/styles.css +458 -55
- lemonade/tools/server/static/webapp.html +322 -35
- lemonade/version.py +1 -1
- lemonade_sdk-8.0.5.dist-info/METADATA +295 -0
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.5.dist-info}/RECORD +26 -25
- lemonade_server/cli.py +168 -22
- lemonade_server/model_manager.py +12 -2
- lemonade_server/pydantic_models.py +25 -1
- lemonade_server/server_models.json +46 -44
- lemonade_sdk-8.0.3.dist-info/METADATA +0 -183
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.5.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.5.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.5.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.5.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.5.dist-info}/top_level.txt +0 -0
lemonade_server/model_manager.py
CHANGED
|
@@ -54,6 +54,17 @@ class ModelManager:
|
|
|
54
54
|
for model_name, model_info in user_models.items()
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
# Backwards compatibility for user models that were created before version 8.0.4
|
|
58
|
+
# "reasoning" was a boolean, but as of 8.0.4 it became a label
|
|
59
|
+
for _, model_info in user_models.items():
|
|
60
|
+
if "reasoning" in model_info:
|
|
61
|
+
model_info["labels"] = (
|
|
62
|
+
["reasoning"]
|
|
63
|
+
if not model_info["labels"]
|
|
64
|
+
else model_info["labels"] + ["reasoning"]
|
|
65
|
+
)
|
|
66
|
+
del model_info["reasoning"]
|
|
67
|
+
|
|
57
68
|
models.update(user_models)
|
|
58
69
|
|
|
59
70
|
# Add the model name as a key in each entry, to make it easier
|
|
@@ -268,9 +279,8 @@ class ModelManager:
|
|
|
268
279
|
new_user_model = {
|
|
269
280
|
"checkpoint": checkpoint,
|
|
270
281
|
"recipe": recipe,
|
|
271
|
-
"reasoning": reasoning,
|
|
272
282
|
"suggested": True,
|
|
273
|
-
"labels": ["custom"],
|
|
283
|
+
"labels": ["custom"] + (["reasoning"] if reasoning else []),
|
|
274
284
|
}
|
|
275
285
|
|
|
276
286
|
if mmproj:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Optional, Union, List, Any
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
@@ -65,6 +65,30 @@ class ChatCompletionRequest(BaseModel):
|
|
|
65
65
|
response_format: dict | None = None
|
|
66
66
|
|
|
67
67
|
|
|
68
|
+
class EmbeddingsRequest(BaseModel):
|
|
69
|
+
"""
|
|
70
|
+
Request model for embeddings API endpoint.
|
|
71
|
+
|
|
72
|
+
Generates embeddings for the provided input text or tokens.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
input: Union[str, List]
|
|
76
|
+
model: Optional[str] = None
|
|
77
|
+
encoding_format: Optional[str] = "float" # "float" or "base64"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class RerankingRequest(BaseModel):
|
|
81
|
+
"""
|
|
82
|
+
Request model for reranking API endpoint.
|
|
83
|
+
|
|
84
|
+
Reranks a list of documents based on their relevance to a query.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
query: str
|
|
88
|
+
documents: List[str]
|
|
89
|
+
model: str
|
|
90
|
+
|
|
91
|
+
|
|
68
92
|
class ResponsesRequest(BaseModel):
|
|
69
93
|
"""
|
|
70
94
|
Request model for responses API endpoint.
|
|
@@ -2,197 +2,177 @@
|
|
|
2
2
|
"Qwen2.5-0.5B-Instruct-CPU": {
|
|
3
3
|
"checkpoint": "amd/Qwen2.5-0.5B-Instruct-quantized_int4-float16-cpu-onnx",
|
|
4
4
|
"recipe": "oga-cpu",
|
|
5
|
-
"reasoning": false,
|
|
6
5
|
"suggested": true
|
|
7
6
|
},
|
|
8
7
|
"Llama-3.2-1B-Instruct-CPU": {
|
|
9
8
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
|
|
10
9
|
"recipe": "oga-cpu",
|
|
11
|
-
"reasoning": false,
|
|
12
10
|
"suggested": false
|
|
13
11
|
},
|
|
14
12
|
"Llama-3.2-3B-Instruct-CPU": {
|
|
15
13
|
"checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
|
|
16
14
|
"recipe": "oga-cpu",
|
|
17
|
-
"reasoning": false,
|
|
18
15
|
"suggested": false
|
|
19
16
|
},
|
|
20
17
|
"Phi-3-Mini-Instruct-CPU": {
|
|
21
18
|
"checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
|
|
22
19
|
"recipe": "oga-cpu",
|
|
23
|
-
"reasoning": false,
|
|
24
20
|
"suggested": true
|
|
25
21
|
},
|
|
26
22
|
"Qwen-1.5-7B-Chat-CPU": {
|
|
27
23
|
"checkpoint": "amd/Qwen1.5-7B-Chat_uint4_asym_g128_float16_onnx_cpu",
|
|
28
24
|
"recipe": "oga-cpu",
|
|
29
|
-
"reasoning": false,
|
|
30
25
|
"suggested": true
|
|
31
26
|
},
|
|
32
27
|
"DeepSeek-R1-Distill-Llama-8B-CPU": {
|
|
33
28
|
"checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
|
|
34
29
|
"recipe": "oga-cpu",
|
|
35
|
-
"
|
|
36
|
-
"
|
|
30
|
+
"suggested": true,
|
|
31
|
+
"labels": ["reasoning"]
|
|
37
32
|
},
|
|
38
33
|
"DeepSeek-R1-Distill-Qwen-7B-CPU": {
|
|
39
34
|
"checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
|
|
40
35
|
"recipe": "oga-cpu",
|
|
41
|
-
"
|
|
42
|
-
"
|
|
36
|
+
"suggested": true,
|
|
37
|
+
"labels": ["reasoning"]
|
|
43
38
|
},
|
|
44
39
|
"Llama-3.2-1B-Instruct-Hybrid": {
|
|
45
40
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
46
41
|
"recipe": "oga-hybrid",
|
|
47
|
-
"reasoning": false,
|
|
48
42
|
"max_prompt_length": 3000,
|
|
49
43
|
"suggested": true
|
|
50
44
|
},
|
|
51
45
|
"Llama-3.2-3B-Instruct-Hybrid": {
|
|
52
46
|
"checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
53
47
|
"recipe": "oga-hybrid",
|
|
54
|
-
"reasoning": false,
|
|
55
48
|
"max_prompt_length": 2000,
|
|
56
49
|
"suggested": true
|
|
57
50
|
},
|
|
58
51
|
"Phi-3-Mini-Instruct-Hybrid": {
|
|
59
52
|
"checkpoint": "amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
60
53
|
"recipe": "oga-hybrid",
|
|
61
|
-
"reasoning": false,
|
|
62
54
|
"max_prompt_length": 2000,
|
|
63
55
|
"suggested": true
|
|
64
56
|
},
|
|
65
57
|
"Phi-3.5-Mini-Instruct-Hybrid": {
|
|
66
58
|
"checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
67
59
|
"recipe": "oga-hybrid",
|
|
68
|
-
"reasoning": false,
|
|
69
60
|
"suggested": false
|
|
70
61
|
},
|
|
71
62
|
"Qwen-1.5-7B-Chat-Hybrid": {
|
|
72
63
|
"checkpoint": "amd/Qwen1.5-7B-Chat-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
73
64
|
"recipe": "oga-hybrid",
|
|
74
|
-
"reasoning": false,
|
|
75
65
|
"max_prompt_length": 3000,
|
|
76
66
|
"suggested": true
|
|
77
67
|
},
|
|
78
68
|
"DeepSeek-R1-Distill-Llama-8B-Hybrid": {
|
|
79
69
|
"checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
|
|
80
70
|
"recipe": "oga-hybrid",
|
|
81
|
-
"reasoning": true,
|
|
82
71
|
"max_prompt_length": 2000,
|
|
83
|
-
"suggested": true
|
|
72
|
+
"suggested": true,
|
|
73
|
+
"labels": ["reasoning"]
|
|
84
74
|
},
|
|
85
75
|
"DeepSeek-R1-Distill-Qwen-7B-Hybrid": {
|
|
86
76
|
"checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
|
|
87
77
|
"recipe": "oga-hybrid",
|
|
88
|
-
"reasoning": true,
|
|
89
78
|
"max_prompt_length": 2000,
|
|
90
|
-
"suggested": true
|
|
79
|
+
"suggested": true,
|
|
80
|
+
"labels": ["reasoning"]
|
|
91
81
|
},
|
|
92
82
|
"Mistral-7B-v0.3-Instruct-Hybrid": {
|
|
93
83
|
"checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
94
84
|
"recipe": "oga-hybrid",
|
|
95
|
-
"reasoning": false,
|
|
96
85
|
"max_prompt_length": 2000,
|
|
97
86
|
"suggested": true
|
|
98
87
|
},
|
|
99
88
|
"Llama-3.1-8B-Instruct-Hybrid": {
|
|
100
89
|
"checkpoint": "amd/Llama-3.1-8B-Instruct-awq-asym-uint4-g128-lmhead-onnx-hybrid",
|
|
101
90
|
"recipe": "oga-hybrid",
|
|
102
|
-
"reasoning": false,
|
|
103
91
|
"max_prompt_length": 2000,
|
|
104
92
|
"suggested": true
|
|
105
93
|
},
|
|
106
94
|
"Llama-xLAM-2-8b-fc-r-Hybrid": {
|
|
107
95
|
"checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
|
|
108
96
|
"recipe": "oga-hybrid",
|
|
109
|
-
"reasoning": false,
|
|
110
97
|
"max_prompt_length": 2000,
|
|
111
98
|
"suggested": true
|
|
112
99
|
},
|
|
113
100
|
"Llama-3.2-1B-Instruct-DirectML": {
|
|
114
101
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
|
|
115
102
|
"recipe": "oga-igpu",
|
|
116
|
-
"reasoning": false,
|
|
117
103
|
"suggested": false
|
|
118
104
|
},
|
|
119
105
|
"Llama-3.2-3B-Instruct-DirectML": {
|
|
120
106
|
"checkpoint": "amd/Llama-3.2-3B-Instruct-dml-int4-awq-block-128-directml",
|
|
121
107
|
"recipe": "oga-igpu",
|
|
122
|
-
"reasoning": false,
|
|
123
108
|
"suggested": false
|
|
124
109
|
},
|
|
125
110
|
"Phi-3.5-Mini-Instruct-DirectML": {
|
|
126
111
|
"checkpoint": "amd/phi3.5-mini-instruct-int4-awq-block-128-directml",
|
|
127
112
|
"recipe": "oga-igpu",
|
|
128
|
-
"reasoning": false,
|
|
129
113
|
"suggested": false
|
|
130
114
|
},
|
|
131
115
|
"Qwen-1.5-7B-Chat-DirectML": {
|
|
132
116
|
"checkpoint": "amd/Qwen1.5-7B-Chat-dml-int4-awq-block-128-directml",
|
|
133
117
|
"recipe": "oga-igpu",
|
|
134
|
-
"reasoning": false,
|
|
135
118
|
"suggested": false
|
|
136
119
|
},
|
|
137
120
|
"Mistral-7B-v0.1-Instruct-DirectML": {
|
|
138
121
|
"checkpoint": "amd/Mistral-7B-Instruct-v0.1-awq-g128-int4-onnx-directml",
|
|
139
122
|
"recipe": "oga-igpu",
|
|
140
|
-
"reasoning": false,
|
|
141
123
|
"suggested": false
|
|
142
124
|
},
|
|
143
125
|
"Llama-3-8B-Instruct-DirectML": {
|
|
144
126
|
"checkpoint": "amd/llama3-8b-instruct-awq-g128-int4-onnx-directml",
|
|
145
127
|
"recipe": "oga-igpu",
|
|
146
|
-
"reasoning": false,
|
|
147
128
|
"suggested": false
|
|
148
129
|
},
|
|
149
130
|
"Qwen3-0.6B-GGUF": {
|
|
150
131
|
"checkpoint": "unsloth/Qwen3-0.6B-GGUF:Q4_0",
|
|
151
132
|
"recipe": "llamacpp",
|
|
152
|
-
"
|
|
153
|
-
"
|
|
133
|
+
"suggested": true,
|
|
134
|
+
"labels": ["reasoning"]
|
|
154
135
|
},
|
|
155
136
|
"Qwen3-1.7B-GGUF": {
|
|
156
137
|
"checkpoint": "unsloth/Qwen3-1.7B-GGUF:Q4_0",
|
|
157
138
|
"recipe": "llamacpp",
|
|
158
|
-
"
|
|
159
|
-
"
|
|
139
|
+
"suggested": true,
|
|
140
|
+
"labels": ["reasoning"]
|
|
160
141
|
},
|
|
161
142
|
"Qwen3-4B-GGUF": {
|
|
162
143
|
"checkpoint": "unsloth/Qwen3-4B-GGUF:Q4_0",
|
|
163
144
|
"recipe": "llamacpp",
|
|
164
|
-
"
|
|
165
|
-
"
|
|
145
|
+
"suggested": true,
|
|
146
|
+
"labels": ["reasoning"]
|
|
166
147
|
},
|
|
167
148
|
"Qwen3-8B-GGUF": {
|
|
168
149
|
"checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
|
|
169
150
|
"recipe": "llamacpp",
|
|
170
|
-
"
|
|
171
|
-
"
|
|
151
|
+
"suggested": true,
|
|
152
|
+
"labels": ["reasoning"]
|
|
172
153
|
},
|
|
173
154
|
"DeepSeek-Qwen3-8B-GGUF": {
|
|
174
155
|
"checkpoint": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_1",
|
|
175
156
|
"recipe": "llamacpp",
|
|
176
|
-
"
|
|
177
|
-
"
|
|
157
|
+
"suggested": true,
|
|
158
|
+
"labels": ["reasoning"]
|
|
178
159
|
},
|
|
179
160
|
"Qwen3-14B-GGUF": {
|
|
180
161
|
"checkpoint": "unsloth/Qwen3-14B-GGUF:Q4_0",
|
|
181
162
|
"recipe": "llamacpp",
|
|
182
|
-
"
|
|
183
|
-
"
|
|
163
|
+
"suggested": true,
|
|
164
|
+
"labels": ["reasoning"]
|
|
184
165
|
},
|
|
185
166
|
"Qwen3-30B-A3B-GGUF": {
|
|
186
167
|
"checkpoint": "unsloth/Qwen3-30B-A3B-GGUF:Q4_0",
|
|
187
168
|
"recipe": "llamacpp",
|
|
188
|
-
"
|
|
189
|
-
"
|
|
169
|
+
"suggested": true,
|
|
170
|
+
"labels": ["reasoning"]
|
|
190
171
|
},
|
|
191
172
|
"Gemma-3-4b-it-GGUF": {
|
|
192
173
|
"checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
|
193
174
|
"mmproj": "mmproj-model-f16.gguf",
|
|
194
175
|
"recipe": "llamacpp",
|
|
195
|
-
"reasoning": false,
|
|
196
176
|
"suggested": true,
|
|
197
177
|
"labels": ["vision"]
|
|
198
178
|
},
|
|
@@ -200,7 +180,6 @@
|
|
|
200
180
|
"checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
|
|
201
181
|
"mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
|
|
202
182
|
"recipe": "llamacpp",
|
|
203
|
-
"reasoning": false,
|
|
204
183
|
"suggested": true,
|
|
205
184
|
"labels": ["vision"]
|
|
206
185
|
},
|
|
@@ -208,8 +187,31 @@
|
|
|
208
187
|
"checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
|
|
209
188
|
"mmproj": "mmproj-F16.gguf",
|
|
210
189
|
"recipe": "llamacpp",
|
|
211
|
-
"reasoning": false,
|
|
212
190
|
"suggested": true,
|
|
213
191
|
"labels": ["vision"]
|
|
192
|
+
},
|
|
193
|
+
"nomic-embed-text-v1-GGUF": {
|
|
194
|
+
"checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
|
|
195
|
+
"recipe": "llamacpp",
|
|
196
|
+
"suggested": true,
|
|
197
|
+
"labels": ["embeddings"]
|
|
198
|
+
},
|
|
199
|
+
"nomic-embed-text-v2-moe-GGUF": {
|
|
200
|
+
"checkpoint": "nomic-ai/nomic-embed-text-v2-moe-GGUF:Q8_0",
|
|
201
|
+
"recipe": "llamacpp",
|
|
202
|
+
"suggested": true,
|
|
203
|
+
"labels": ["embeddings"]
|
|
204
|
+
},
|
|
205
|
+
"bge-reranker-v2-m3-GGUF": {
|
|
206
|
+
"checkpoint": "pqnet/bge-reranker-v2-m3-Q8_0-GGUF",
|
|
207
|
+
"recipe": "llamacpp",
|
|
208
|
+
"suggested": true,
|
|
209
|
+
"labels": ["reranking"]
|
|
210
|
+
},
|
|
211
|
+
"jina-reranker-v1-tiny-en-GGUF": {
|
|
212
|
+
"checkpoint": "mradermacher/jina-reranker-v1-tiny-en-GGUF:Q8_0",
|
|
213
|
+
"recipe": "llamacpp",
|
|
214
|
+
"suggested": false,
|
|
215
|
+
"labels": ["reranking"]
|
|
214
216
|
}
|
|
215
217
|
}
|
|
@@ -1,183 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: lemonade-sdk
|
|
3
|
-
Version: 8.0.3
|
|
4
|
-
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
|
-
Author-email: lemonade@amd.com
|
|
6
|
-
Requires-Python: >=3.10, <3.12
|
|
7
|
-
Description-Content-Type: text/markdown
|
|
8
|
-
License-File: LICENSE
|
|
9
|
-
License-File: NOTICE.md
|
|
10
|
-
Requires-Dist: invoke>=2.0.0
|
|
11
|
-
Requires-Dist: onnx<1.18.0,>=1.11.0
|
|
12
|
-
Requires-Dist: pyyaml>=5.4
|
|
13
|
-
Requires-Dist: typeguard>=2.3.13
|
|
14
|
-
Requires-Dist: packaging>=20.9
|
|
15
|
-
Requires-Dist: numpy<2.0.0
|
|
16
|
-
Requires-Dist: fasteners
|
|
17
|
-
Requires-Dist: GitPython>=3.1.40
|
|
18
|
-
Requires-Dist: psutil>=6.1.1
|
|
19
|
-
Requires-Dist: wmi
|
|
20
|
-
Requires-Dist: py-cpuinfo
|
|
21
|
-
Requires-Dist: pytz
|
|
22
|
-
Requires-Dist: zstandard
|
|
23
|
-
Requires-Dist: fastapi
|
|
24
|
-
Requires-Dist: uvicorn[standard]
|
|
25
|
-
Requires-Dist: openai>=1.81.0
|
|
26
|
-
Requires-Dist: transformers<=4.51.3
|
|
27
|
-
Requires-Dist: jinja2
|
|
28
|
-
Requires-Dist: tabulate
|
|
29
|
-
Requires-Dist: sentencepiece
|
|
30
|
-
Requires-Dist: huggingface-hub==0.33.0
|
|
31
|
-
Provides-Extra: oga-hybrid
|
|
32
|
-
Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
|
|
33
|
-
Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
|
|
34
|
-
Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
|
|
35
|
-
Provides-Extra: oga-cpu
|
|
36
|
-
Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
|
|
37
|
-
Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
|
|
38
|
-
Provides-Extra: dev
|
|
39
|
-
Requires-Dist: torch>=2.6.0; extra == "dev"
|
|
40
|
-
Requires-Dist: accelerate; extra == "dev"
|
|
41
|
-
Requires-Dist: datasets; extra == "dev"
|
|
42
|
-
Requires-Dist: pandas>=1.5.3; extra == "dev"
|
|
43
|
-
Requires-Dist: matplotlib; extra == "dev"
|
|
44
|
-
Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
|
|
45
|
-
Requires-Dist: lm-eval[api]; extra == "dev"
|
|
46
|
-
Provides-Extra: oga-hybrid-minimal
|
|
47
|
-
Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
|
|
48
|
-
Provides-Extra: oga-cpu-minimal
|
|
49
|
-
Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
|
|
50
|
-
Provides-Extra: llm
|
|
51
|
-
Requires-Dist: lemonade-sdk[dev]; extra == "llm"
|
|
52
|
-
Provides-Extra: llm-oga-cpu
|
|
53
|
-
Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
|
|
54
|
-
Provides-Extra: llm-oga-igpu
|
|
55
|
-
Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
|
|
56
|
-
Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
|
|
57
|
-
Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
|
|
58
|
-
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
|
|
59
|
-
Provides-Extra: llm-oga-cuda
|
|
60
|
-
Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
|
|
61
|
-
Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
|
|
62
|
-
Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
|
|
63
|
-
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
|
|
64
|
-
Provides-Extra: llm-oga-npu
|
|
65
|
-
Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
|
|
66
|
-
Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
|
|
67
|
-
Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
|
|
68
|
-
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
|
|
69
|
-
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
|
|
70
|
-
Provides-Extra: llm-oga-hybrid
|
|
71
|
-
Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
|
|
72
|
-
Provides-Extra: llm-oga-unified
|
|
73
|
-
Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
|
|
74
|
-
Dynamic: author-email
|
|
75
|
-
Dynamic: description
|
|
76
|
-
Dynamic: description-content-type
|
|
77
|
-
Dynamic: license-file
|
|
78
|
-
Dynamic: provides-extra
|
|
79
|
-
Dynamic: requires-dist
|
|
80
|
-
Dynamic: requires-python
|
|
81
|
-
Dynamic: summary
|
|
82
|
-
|
|
83
|
-
[](https://github.com/lemonade-sdk/lemonade/tree/main/test "Check out our tests")
|
|
84
|
-
[](docs/README.md#installation "Check out our instructions")
|
|
85
|
-
[](docs/README.md#installation "Check out our instructions")
|
|
86
|
-
|
|
87
|
-
## 🍋 Lemonade SDK: Quickly serve, benchmark and deploy LLMs
|
|
88
|
-
|
|
89
|
-
The [Lemonade SDK](./docs/README.md) makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
|
|
90
|
-
|
|
91
|
-
<div align="center">
|
|
92
|
-
<img src="https://download.amd.com/images/lemonade_640x480_1.gif" alt="Lemonade Demo" title="Lemonade in Action">
|
|
93
|
-
</div>
|
|
94
|
-
|
|
95
|
-
### Features
|
|
96
|
-
|
|
97
|
-
The [Lemonade SDK](./docs/README.md) is comprised of the following:
|
|
98
|
-
|
|
99
|
-
- 🌐 **[Lemonade Server](https://lemonade-server.ai/docs)**: A local LLM server for running ONNX and GGUF models using the OpenAI API standard. Install and enable your applications with NPU and GPU acceleration in minutes.
|
|
100
|
-
- 🐍 **Lemonade API**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
|
|
101
|
-
- 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
|
|
102
|
-
- Prompting with templates.
|
|
103
|
-
- Measuring accuracy with a variety of tests.
|
|
104
|
-
- Benchmarking to get the time-to-first-token and tokens per second.
|
|
105
|
-
- Profiling the memory utilization.
|
|
106
|
-
|
|
107
|
-
### [Click here to get started with Lemonade.](./docs/README.md)
|
|
108
|
-
|
|
109
|
-
### Supported Configurations
|
|
110
|
-
|
|
111
|
-
Maximum LLM performance requires the right hardware accelerator with the right inference engine for your scenario. Lemonade supports the following configurations, while also making it easy to switch between them at runtime.
|
|
112
|
-
|
|
113
|
-
<table border="1" cellpadding="6" cellspacing="0">
|
|
114
|
-
<thead>
|
|
115
|
-
<tr>
|
|
116
|
-
<th rowspan="2">Hardware</th>
|
|
117
|
-
<th colspan="3" align="center">🛠️ Engine Support</th>
|
|
118
|
-
<th colspan="2" align="center">🖥️ OS (x86/x64)</th>
|
|
119
|
-
</tr>
|
|
120
|
-
<tr>
|
|
121
|
-
<th align="center">OGA</th>
|
|
122
|
-
<th align="center">llamacpp</th>
|
|
123
|
-
<th align="center">HF</th>
|
|
124
|
-
<th align="center">Windows</th>
|
|
125
|
-
<th align="center">Linux</th>
|
|
126
|
-
</tr>
|
|
127
|
-
</thead>
|
|
128
|
-
<tbody>
|
|
129
|
-
<tr>
|
|
130
|
-
<td>🧠 CPU</td>
|
|
131
|
-
<td align="center">All platforms</td>
|
|
132
|
-
<td align="center">All platforms</td>
|
|
133
|
-
<td align="center">All platforms</td>
|
|
134
|
-
<td align="center">✅</td>
|
|
135
|
-
<td align="center">✅</td>
|
|
136
|
-
</tr>
|
|
137
|
-
<tr>
|
|
138
|
-
<td>🎮 GPU</td>
|
|
139
|
-
<td align="center">—</td>
|
|
140
|
-
<td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
|
|
141
|
-
<td align="center">—</td>
|
|
142
|
-
<td align="center">✅</td>
|
|
143
|
-
<td align="center">✅</td>
|
|
144
|
-
</tr>
|
|
145
|
-
<tr>
|
|
146
|
-
<td>🤖 NPU</td>
|
|
147
|
-
<td align="center">AMD Ryzen™ AI 300 series</td>
|
|
148
|
-
<td align="center">—</td>
|
|
149
|
-
<td align="center">—</td>
|
|
150
|
-
<td align="center">✅</td>
|
|
151
|
-
<td align="center">—</td>
|
|
152
|
-
</tr>
|
|
153
|
-
</tbody>
|
|
154
|
-
</table>
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
#### Inference Engines Overview
|
|
159
|
-
| Engine | Description |
|
|
160
|
-
| :--- | :--- |
|
|
161
|
-
| **OnnxRuntime GenAI (OGA)** | Microsoft engine that runs `.onnx` models and enables hardware vendors to provide their own execution providers (EPs) to support specialized hardware, such as neural processing units (NPUs). |
|
|
162
|
-
| **llamacpp** | Community-driven engine with strong GPU acceleration, support for thousands of `.gguf` models, and advanced features such as vision-language models (VLMs) and mixture-of-experts (MoEs). |
|
|
163
|
-
| **Hugging Face (HF)** | Hugging Face's `transformers` library can run the original `.safetensors` trained weights for models on Meta's PyTorch engine, which provides a source of truth for accuracy measurement. |
|
|
164
|
-
|
|
165
|
-
## Integrate Lemonade Server with Your Application
|
|
166
|
-
|
|
167
|
-
Lemonade Server enables languages including Python, C++, Java, C#, Node.js, Go, Ruby, Rust, and PHP. For the full list and integration details, see [docs/server/README.md](./docs/server/README.md).
|
|
168
|
-
|
|
169
|
-
## Contributing
|
|
170
|
-
|
|
171
|
-
We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
|
|
172
|
-
|
|
173
|
-
## Maintainers
|
|
174
|
-
|
|
175
|
-
This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues) or email [lemonade@amd.com](mailto:lemonade@amd.com).
|
|
176
|
-
|
|
177
|
-
## License
|
|
178
|
-
|
|
179
|
-
This project is licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE). Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
|
|
180
|
-
|
|
181
|
-
<!--This file was originally licensed under Apache 2.0. It has been modified.
|
|
182
|
-
Modifications Copyright (c) 2025 AMD-->
|
|
183
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|