lemonade-sdk 8.0.3__py3-none-any.whl → 8.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -54,6 +54,17 @@ class ModelManager:
54
54
  for model_name, model_info in user_models.items()
55
55
  }
56
56
 
57
+ # Backwards compatibility for user models that were created before version 8.0.4
58
+ # "reasoning" was a boolean, but as of 8.0.4 it became a label
59
+ for _, model_info in user_models.items():
60
+ if "reasoning" in model_info:
61
+ model_info["labels"] = (
62
+ ["reasoning"]
63
+ if not model_info["labels"]
64
+ else model_info["labels"] + ["reasoning"]
65
+ )
66
+ del model_info["reasoning"]
67
+
57
68
  models.update(user_models)
58
69
 
59
70
  # Add the model name as a key in each entry, to make it easier
@@ -268,9 +279,8 @@ class ModelManager:
268
279
  new_user_model = {
269
280
  "checkpoint": checkpoint,
270
281
  "recipe": recipe,
271
- "reasoning": reasoning,
272
282
  "suggested": True,
273
- "labels": ["custom"],
283
+ "labels": ["custom"] + (["reasoning"] if reasoning else []),
274
284
  }
275
285
 
276
286
  if mmproj:
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Optional, Union, List, Any
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
@@ -65,6 +65,30 @@ class ChatCompletionRequest(BaseModel):
65
65
  response_format: dict | None = None
66
66
 
67
67
 
68
+ class EmbeddingsRequest(BaseModel):
69
+ """
70
+ Request model for embeddings API endpoint.
71
+
72
+ Generates embeddings for the provided input text or tokens.
73
+ """
74
+
75
+ input: Union[str, List]
76
+ model: Optional[str] = None
77
+ encoding_format: Optional[str] = "float" # "float" or "base64"
78
+
79
+
80
+ class RerankingRequest(BaseModel):
81
+ """
82
+ Request model for reranking API endpoint.
83
+
84
+ Reranks a list of documents based on their relevance to a query.
85
+ """
86
+
87
+ query: str
88
+ documents: List[str]
89
+ model: str
90
+
91
+
68
92
  class ResponsesRequest(BaseModel):
69
93
  """
70
94
  Request model for responses API endpoint.
@@ -2,197 +2,177 @@
2
2
  "Qwen2.5-0.5B-Instruct-CPU": {
3
3
  "checkpoint": "amd/Qwen2.5-0.5B-Instruct-quantized_int4-float16-cpu-onnx",
4
4
  "recipe": "oga-cpu",
5
- "reasoning": false,
6
5
  "suggested": true
7
6
  },
8
7
  "Llama-3.2-1B-Instruct-CPU": {
9
8
  "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
10
9
  "recipe": "oga-cpu",
11
- "reasoning": false,
12
10
  "suggested": false
13
11
  },
14
12
  "Llama-3.2-3B-Instruct-CPU": {
15
13
  "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
16
14
  "recipe": "oga-cpu",
17
- "reasoning": false,
18
15
  "suggested": false
19
16
  },
20
17
  "Phi-3-Mini-Instruct-CPU": {
21
18
  "checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
22
19
  "recipe": "oga-cpu",
23
- "reasoning": false,
24
20
  "suggested": true
25
21
  },
26
22
  "Qwen-1.5-7B-Chat-CPU": {
27
23
  "checkpoint": "amd/Qwen1.5-7B-Chat_uint4_asym_g128_float16_onnx_cpu",
28
24
  "recipe": "oga-cpu",
29
- "reasoning": false,
30
25
  "suggested": true
31
26
  },
32
27
  "DeepSeek-R1-Distill-Llama-8B-CPU": {
33
28
  "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
34
29
  "recipe": "oga-cpu",
35
- "reasoning": true,
36
- "suggested": true
30
+ "suggested": true,
31
+ "labels": ["reasoning"]
37
32
  },
38
33
  "DeepSeek-R1-Distill-Qwen-7B-CPU": {
39
34
  "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
40
35
  "recipe": "oga-cpu",
41
- "reasoning": true,
42
- "suggested": true
36
+ "suggested": true,
37
+ "labels": ["reasoning"]
43
38
  },
44
39
  "Llama-3.2-1B-Instruct-Hybrid": {
45
40
  "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
46
41
  "recipe": "oga-hybrid",
47
- "reasoning": false,
48
42
  "max_prompt_length": 3000,
49
43
  "suggested": true
50
44
  },
51
45
  "Llama-3.2-3B-Instruct-Hybrid": {
52
46
  "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
53
47
  "recipe": "oga-hybrid",
54
- "reasoning": false,
55
48
  "max_prompt_length": 2000,
56
49
  "suggested": true
57
50
  },
58
51
  "Phi-3-Mini-Instruct-Hybrid": {
59
52
  "checkpoint": "amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
60
53
  "recipe": "oga-hybrid",
61
- "reasoning": false,
62
54
  "max_prompt_length": 2000,
63
55
  "suggested": true
64
56
  },
65
57
  "Phi-3.5-Mini-Instruct-Hybrid": {
66
58
  "checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
67
59
  "recipe": "oga-hybrid",
68
- "reasoning": false,
69
60
  "suggested": false
70
61
  },
71
62
  "Qwen-1.5-7B-Chat-Hybrid": {
72
63
  "checkpoint": "amd/Qwen1.5-7B-Chat-awq-g128-int4-asym-fp16-onnx-hybrid",
73
64
  "recipe": "oga-hybrid",
74
- "reasoning": false,
75
65
  "max_prompt_length": 3000,
76
66
  "suggested": true
77
67
  },
78
68
  "DeepSeek-R1-Distill-Llama-8B-Hybrid": {
79
69
  "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
80
70
  "recipe": "oga-hybrid",
81
- "reasoning": true,
82
71
  "max_prompt_length": 2000,
83
- "suggested": true
72
+ "suggested": true,
73
+ "labels": ["reasoning"]
84
74
  },
85
75
  "DeepSeek-R1-Distill-Qwen-7B-Hybrid": {
86
76
  "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
87
77
  "recipe": "oga-hybrid",
88
- "reasoning": true,
89
78
  "max_prompt_length": 2000,
90
- "suggested": true
79
+ "suggested": true,
80
+ "labels": ["reasoning"]
91
81
  },
92
82
  "Mistral-7B-v0.3-Instruct-Hybrid": {
93
83
  "checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-fp16-onnx-hybrid",
94
84
  "recipe": "oga-hybrid",
95
- "reasoning": false,
96
85
  "max_prompt_length": 2000,
97
86
  "suggested": true
98
87
  },
99
88
  "Llama-3.1-8B-Instruct-Hybrid": {
100
89
  "checkpoint": "amd/Llama-3.1-8B-Instruct-awq-asym-uint4-g128-lmhead-onnx-hybrid",
101
90
  "recipe": "oga-hybrid",
102
- "reasoning": false,
103
91
  "max_prompt_length": 2000,
104
92
  "suggested": true
105
93
  },
106
94
  "Llama-xLAM-2-8b-fc-r-Hybrid": {
107
95
  "checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
108
96
  "recipe": "oga-hybrid",
109
- "reasoning": false,
110
97
  "max_prompt_length": 2000,
111
98
  "suggested": true
112
99
  },
113
100
  "Llama-3.2-1B-Instruct-DirectML": {
114
101
  "checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
115
102
  "recipe": "oga-igpu",
116
- "reasoning": false,
117
103
  "suggested": false
118
104
  },
119
105
  "Llama-3.2-3B-Instruct-DirectML": {
120
106
  "checkpoint": "amd/Llama-3.2-3B-Instruct-dml-int4-awq-block-128-directml",
121
107
  "recipe": "oga-igpu",
122
- "reasoning": false,
123
108
  "suggested": false
124
109
  },
125
110
  "Phi-3.5-Mini-Instruct-DirectML": {
126
111
  "checkpoint": "amd/phi3.5-mini-instruct-int4-awq-block-128-directml",
127
112
  "recipe": "oga-igpu",
128
- "reasoning": false,
129
113
  "suggested": false
130
114
  },
131
115
  "Qwen-1.5-7B-Chat-DirectML": {
132
116
  "checkpoint": "amd/Qwen1.5-7B-Chat-dml-int4-awq-block-128-directml",
133
117
  "recipe": "oga-igpu",
134
- "reasoning": false,
135
118
  "suggested": false
136
119
  },
137
120
  "Mistral-7B-v0.1-Instruct-DirectML": {
138
121
  "checkpoint": "amd/Mistral-7B-Instruct-v0.1-awq-g128-int4-onnx-directml",
139
122
  "recipe": "oga-igpu",
140
- "reasoning": false,
141
123
  "suggested": false
142
124
  },
143
125
  "Llama-3-8B-Instruct-DirectML": {
144
126
  "checkpoint": "amd/llama3-8b-instruct-awq-g128-int4-onnx-directml",
145
127
  "recipe": "oga-igpu",
146
- "reasoning": false,
147
128
  "suggested": false
148
129
  },
149
130
  "Qwen3-0.6B-GGUF": {
150
131
  "checkpoint": "unsloth/Qwen3-0.6B-GGUF:Q4_0",
151
132
  "recipe": "llamacpp",
152
- "reasoning": true,
153
- "suggested": true
133
+ "suggested": true,
134
+ "labels": ["reasoning"]
154
135
  },
155
136
  "Qwen3-1.7B-GGUF": {
156
137
  "checkpoint": "unsloth/Qwen3-1.7B-GGUF:Q4_0",
157
138
  "recipe": "llamacpp",
158
- "reasoning": true,
159
- "suggested": true
139
+ "suggested": true,
140
+ "labels": ["reasoning"]
160
141
  },
161
142
  "Qwen3-4B-GGUF": {
162
143
  "checkpoint": "unsloth/Qwen3-4B-GGUF:Q4_0",
163
144
  "recipe": "llamacpp",
164
- "reasoning": true,
165
- "suggested": true
145
+ "suggested": true,
146
+ "labels": ["reasoning"]
166
147
  },
167
148
  "Qwen3-8B-GGUF": {
168
149
  "checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
169
150
  "recipe": "llamacpp",
170
- "reasoning": true,
171
- "suggested": true
151
+ "suggested": true,
152
+ "labels": ["reasoning"]
172
153
  },
173
154
  "DeepSeek-Qwen3-8B-GGUF": {
174
155
  "checkpoint": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_1",
175
156
  "recipe": "llamacpp",
176
- "reasoning": true,
177
- "suggested": true
157
+ "suggested": true,
158
+ "labels": ["reasoning"]
178
159
  },
179
160
  "Qwen3-14B-GGUF": {
180
161
  "checkpoint": "unsloth/Qwen3-14B-GGUF:Q4_0",
181
162
  "recipe": "llamacpp",
182
- "reasoning": true,
183
- "suggested": true
163
+ "suggested": true,
164
+ "labels": ["reasoning"]
184
165
  },
185
166
  "Qwen3-30B-A3B-GGUF": {
186
167
  "checkpoint": "unsloth/Qwen3-30B-A3B-GGUF:Q4_0",
187
168
  "recipe": "llamacpp",
188
- "reasoning": true,
189
- "suggested": true
169
+ "suggested": true,
170
+ "labels": ["reasoning"]
190
171
  },
191
172
  "Gemma-3-4b-it-GGUF": {
192
173
  "checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
193
174
  "mmproj": "mmproj-model-f16.gguf",
194
175
  "recipe": "llamacpp",
195
- "reasoning": false,
196
176
  "suggested": true,
197
177
  "labels": ["vision"]
198
178
  },
@@ -200,7 +180,6 @@
200
180
  "checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
201
181
  "mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
202
182
  "recipe": "llamacpp",
203
- "reasoning": false,
204
183
  "suggested": true,
205
184
  "labels": ["vision"]
206
185
  },
@@ -208,8 +187,31 @@
208
187
  "checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
209
188
  "mmproj": "mmproj-F16.gguf",
210
189
  "recipe": "llamacpp",
211
- "reasoning": false,
212
190
  "suggested": true,
213
191
  "labels": ["vision"]
192
+ },
193
+ "nomic-embed-text-v1-GGUF": {
194
+ "checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
195
+ "recipe": "llamacpp",
196
+ "suggested": true,
197
+ "labels": ["embeddings"]
198
+ },
199
+ "nomic-embed-text-v2-moe-GGUF": {
200
+ "checkpoint": "nomic-ai/nomic-embed-text-v2-moe-GGUF:Q8_0",
201
+ "recipe": "llamacpp",
202
+ "suggested": true,
203
+ "labels": ["embeddings"]
204
+ },
205
+ "bge-reranker-v2-m3-GGUF": {
206
+ "checkpoint": "pqnet/bge-reranker-v2-m3-Q8_0-GGUF",
207
+ "recipe": "llamacpp",
208
+ "suggested": true,
209
+ "labels": ["reranking"]
210
+ },
211
+ "jina-reranker-v1-tiny-en-GGUF": {
212
+ "checkpoint": "mradermacher/jina-reranker-v1-tiny-en-GGUF:Q8_0",
213
+ "recipe": "llamacpp",
214
+ "suggested": false,
215
+ "labels": ["reranking"]
214
216
  }
215
217
  }
@@ -1,183 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: lemonade-sdk
3
- Version: 8.0.3
4
- Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
- Author-email: lemonade@amd.com
6
- Requires-Python: >=3.10, <3.12
7
- Description-Content-Type: text/markdown
8
- License-File: LICENSE
9
- License-File: NOTICE.md
10
- Requires-Dist: invoke>=2.0.0
11
- Requires-Dist: onnx<1.18.0,>=1.11.0
12
- Requires-Dist: pyyaml>=5.4
13
- Requires-Dist: typeguard>=2.3.13
14
- Requires-Dist: packaging>=20.9
15
- Requires-Dist: numpy<2.0.0
16
- Requires-Dist: fasteners
17
- Requires-Dist: GitPython>=3.1.40
18
- Requires-Dist: psutil>=6.1.1
19
- Requires-Dist: wmi
20
- Requires-Dist: py-cpuinfo
21
- Requires-Dist: pytz
22
- Requires-Dist: zstandard
23
- Requires-Dist: fastapi
24
- Requires-Dist: uvicorn[standard]
25
- Requires-Dist: openai>=1.81.0
26
- Requires-Dist: transformers<=4.51.3
27
- Requires-Dist: jinja2
28
- Requires-Dist: tabulate
29
- Requires-Dist: sentencepiece
30
- Requires-Dist: huggingface-hub==0.33.0
31
- Provides-Extra: oga-hybrid
32
- Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
33
- Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
34
- Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
35
- Provides-Extra: oga-cpu
36
- Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
37
- Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
38
- Provides-Extra: dev
39
- Requires-Dist: torch>=2.6.0; extra == "dev"
40
- Requires-Dist: accelerate; extra == "dev"
41
- Requires-Dist: datasets; extra == "dev"
42
- Requires-Dist: pandas>=1.5.3; extra == "dev"
43
- Requires-Dist: matplotlib; extra == "dev"
44
- Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
45
- Requires-Dist: lm-eval[api]; extra == "dev"
46
- Provides-Extra: oga-hybrid-minimal
47
- Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
48
- Provides-Extra: oga-cpu-minimal
49
- Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
50
- Provides-Extra: llm
51
- Requires-Dist: lemonade-sdk[dev]; extra == "llm"
52
- Provides-Extra: llm-oga-cpu
53
- Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
54
- Provides-Extra: llm-oga-igpu
55
- Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
56
- Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
57
- Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
58
- Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
59
- Provides-Extra: llm-oga-cuda
60
- Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
61
- Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
62
- Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
63
- Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
64
- Provides-Extra: llm-oga-npu
65
- Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
66
- Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
67
- Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
68
- Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
69
- Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
70
- Provides-Extra: llm-oga-hybrid
71
- Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
72
- Provides-Extra: llm-oga-unified
73
- Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
74
- Dynamic: author-email
75
- Dynamic: description
76
- Dynamic: description-content-type
77
- Dynamic: license-file
78
- Dynamic: provides-extra
79
- Dynamic: requires-dist
80
- Dynamic: requires-python
81
- Dynamic: summary
82
-
83
- [![Lemonade tests](https://github.com/lemonade-sdk/lemonade/actions/workflows/test_lemonade.yml/badge.svg)](https://github.com/lemonade-sdk/lemonade/tree/main/test "Check out our tests")
84
- [![OS - Windows | Linux](https://img.shields.io/badge/OS-windows%20%7C%20linux-blue)](docs/README.md#installation "Check out our instructions")
85
- [![Made with Python](https://img.shields.io/badge/Python-3.8,3.10-blue?logo=python&logoColor=white)](docs/README.md#installation "Check out our instructions")
86
-
87
- ## 🍋 Lemonade SDK: Quickly serve, benchmark and deploy LLMs
88
-
89
- The [Lemonade SDK](./docs/README.md) makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
90
-
91
- <div align="center">
92
- <img src="https://download.amd.com/images/lemonade_640x480_1.gif" alt="Lemonade Demo" title="Lemonade in Action">
93
- </div>
94
-
95
- ### Features
96
-
97
- The [Lemonade SDK](./docs/README.md) is comprised of the following:
98
-
99
- - 🌐 **[Lemonade Server](https://lemonade-server.ai/docs)**: A local LLM server for running ONNX and GGUF models using the OpenAI API standard. Install and enable your applications with NPU and GPU acceleration in minutes.
100
- - 🐍 **Lemonade API**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
101
- - 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
102
- - Prompting with templates.
103
- - Measuring accuracy with a variety of tests.
104
- - Benchmarking to get the time-to-first-token and tokens per second.
105
- - Profiling the memory utilization.
106
-
107
- ### [Click here to get started with Lemonade.](./docs/README.md)
108
-
109
- ### Supported Configurations
110
-
111
- Maximum LLM performance requires the right hardware accelerator with the right inference engine for your scenario. Lemonade supports the following configurations, while also making it easy to switch between them at runtime.
112
-
113
- <table border="1" cellpadding="6" cellspacing="0">
114
- <thead>
115
- <tr>
116
- <th rowspan="2">Hardware</th>
117
- <th colspan="3" align="center">🛠️ Engine Support</th>
118
- <th colspan="2" align="center">🖥️ OS (x86/x64)</th>
119
- </tr>
120
- <tr>
121
- <th align="center">OGA</th>
122
- <th align="center">llamacpp</th>
123
- <th align="center">HF</th>
124
- <th align="center">Windows</th>
125
- <th align="center">Linux</th>
126
- </tr>
127
- </thead>
128
- <tbody>
129
- <tr>
130
- <td>🧠 CPU</td>
131
- <td align="center">All platforms</td>
132
- <td align="center">All platforms</td>
133
- <td align="center">All platforms</td>
134
- <td align="center">✅</td>
135
- <td align="center">✅</td>
136
- </tr>
137
- <tr>
138
- <td>🎮 GPU</td>
139
- <td align="center">—</td>
140
- <td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
141
- <td align="center">—</td>
142
- <td align="center">✅</td>
143
- <td align="center">✅</td>
144
- </tr>
145
- <tr>
146
- <td>🤖 NPU</td>
147
- <td align="center">AMD Ryzen™ AI 300 series</td>
148
- <td align="center">—</td>
149
- <td align="center">—</td>
150
- <td align="center">✅</td>
151
- <td align="center">—</td>
152
- </tr>
153
- </tbody>
154
- </table>
155
-
156
-
157
-
158
- #### Inference Engines Overview
159
- | Engine | Description |
160
- | :--- | :--- |
161
- | **OnnxRuntime GenAI (OGA)** | Microsoft engine that runs `.onnx` models and enables hardware vendors to provide their own execution providers (EPs) to support specialized hardware, such as neural processing units (NPUs). |
162
- | **llamacpp** | Community-driven engine with strong GPU acceleration, support for thousands of `.gguf` models, and advanced features such as vision-language models (VLMs) and mixture-of-experts (MoEs). |
163
- | **Hugging Face (HF)** | Hugging Face's `transformers` library can run the original `.safetensors` trained weights for models on Meta's PyTorch engine, which provides a source of truth for accuracy measurement. |
164
-
165
- ## Integrate Lemonade Server with Your Application
166
-
167
- Lemonade Server enables languages including Python, C++, Java, C#, Node.js, Go, Ruby, Rust, and PHP. For the full list and integration details, see [docs/server/README.md](./docs/server/README.md).
168
-
169
- ## Contributing
170
-
171
- We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
172
-
173
- ## Maintainers
174
-
175
- This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues) or email [lemonade@amd.com](mailto:lemonade@amd.com).
176
-
177
- ## License
178
-
179
- This project is licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE). Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
180
-
181
- <!--This file was originally licensed under Apache 2.0. It has been modified.
182
- Modifications Copyright (c) 2025 AMD-->
183
-