lemonade-sdk 8.1.0__tar.gz → 8.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- {lemonade_sdk-8.1.0/src/lemonade_sdk.egg-info → lemonade_sdk-8.1.1}/PKG-INFO +45 -6
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/README.md +41 -4
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/setup.py +3 -1
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/inference_engines.py +62 -77
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/system_info.py +61 -44
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/llamacpp/load.py +13 -4
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/llamacpp/utils.py +222 -54
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/oga/load.py +3 -3
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/llamacpp.py +30 -53
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/serve.py +54 -104
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/static/styles.css +203 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/static/webapp.html +507 -71
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/tray.py +4 -2
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/utils/thread.py +2 -4
- lemonade_sdk-8.1.1/src/lemonade/version.py +1 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_install/install.py +25 -2
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1/src/lemonade_sdk.egg-info}/PKG-INFO +45 -6
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/requires.txt +5 -1
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_server/cli.py +79 -26
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_server/server_models.json +26 -1
- lemonade_sdk-8.1.0/src/lemonade/version.py +0 -1
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/LICENSE +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/NOTICE.md +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/setup.cfg +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/__init__.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/api.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/cache.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/cli.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/__init__.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/build.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/cli_helpers.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/exceptions.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/filesystem.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/network.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/printing.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/status.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/test_helpers.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/profilers/__init__.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/profilers/memory_tracker.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/profilers/profiler.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/sequence.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/state.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/__init__.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/accuracy.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/adapter.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/bench.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/huggingface/bench.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/huggingface/load.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/huggingface/utils.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/humaneval.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/llamacpp/bench.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/management_tools.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/mmlu.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/oga/__init__.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/oga/bench.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/oga/utils.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/perplexity.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/prompt.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/quark/__init__.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/quark/quark_load.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/quark/quark_quantize.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/report/__init__.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/report/llm_report.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/report/table.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/__init__.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/static/favicon.ico +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/tool_calls.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/utils/port.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/utils/system_tray.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/webapp.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/tool.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_install/__init__.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/SOURCES.txt +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/entry_points.txt +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_server/model_manager.py +0 -0
- {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_server/pydantic_models.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.1.
|
|
3
|
+
Version: 8.1.1
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.13
|
|
@@ -27,7 +27,8 @@ Requires-Dist: transformers<=4.53.2
|
|
|
27
27
|
Requires-Dist: jinja2
|
|
28
28
|
Requires-Dist: tabulate
|
|
29
29
|
Requires-Dist: sentencepiece
|
|
30
|
-
Requires-Dist: huggingface-hub==0.33.0
|
|
30
|
+
Requires-Dist: huggingface-hub[hf_xet]==0.33.0
|
|
31
|
+
Requires-Dist: python-dotenv
|
|
31
32
|
Provides-Extra: oga-ryzenai
|
|
32
33
|
Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
|
|
33
34
|
Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
|
|
@@ -40,6 +41,7 @@ Requires-Dist: accelerate; extra == "dev"
|
|
|
40
41
|
Requires-Dist: datasets; extra == "dev"
|
|
41
42
|
Requires-Dist: pandas>=1.5.3; extra == "dev"
|
|
42
43
|
Requires-Dist: matplotlib; extra == "dev"
|
|
44
|
+
Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "dev"
|
|
43
45
|
Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
|
|
44
46
|
Requires-Dist: lm-eval[api]; extra == "dev"
|
|
45
47
|
Provides-Extra: oga-hybrid
|
|
@@ -136,7 +138,9 @@ Dynamic: summary
|
|
|
136
138
|
<a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
|
|
137
139
|
</h3>
|
|
138
140
|
|
|
139
|
-
Lemonade
|
|
141
|
+
Lemonade helps users run local LLMs with the highest performance by configuring state-of-the-art inference engines for their NPUs and GPUs.
|
|
142
|
+
|
|
143
|
+
Startups such as [Styrk AI](https://styrk.ai/styrk-ai-and-amd-guardrails-for-your-on-device-ai-revolution/), research teams like [Hazy Research at Stanford](https://www.amd.com/en/developer/resources/technical-articles/2025/minions--on-device-and-cloud-language-model-collaboration-on-ryz.html), and large companies like [AMD](https://www.amd.com/en/developer/resources/technical-articles/unlocking-a-wave-of-llm-apps-on-ryzen-ai-through-lemonade-server.html) use Lemonade to run LLMs.
|
|
140
144
|
|
|
141
145
|
## Getting Started
|
|
142
146
|
|
|
@@ -155,7 +159,7 @@ Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus
|
|
|
155
159
|
</p>
|
|
156
160
|
|
|
157
161
|
> [!TIP]
|
|
158
|
-
> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email
|
|
162
|
+
> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or [email](lemonade@amd.com).
|
|
159
163
|
|
|
160
164
|
## Using the CLI
|
|
161
165
|
|
|
@@ -177,7 +181,10 @@ To check all models available, use the `list` command:
|
|
|
177
181
|
lemonade-server list
|
|
178
182
|
```
|
|
179
183
|
|
|
180
|
-
> Note
|
|
184
|
+
> **Note**: If you installed from source, use the `lemonade-server-dev` command instead.
|
|
185
|
+
|
|
186
|
+
> **Tip**: You can use `--llamacpp vulkan/rocm` to select a backend when running GGUF models.
|
|
187
|
+
|
|
181
188
|
|
|
182
189
|
## Model Library
|
|
183
190
|
|
|
@@ -219,7 +226,7 @@ Lemonade supports the following configurations, while also making it easy to swi
|
|
|
219
226
|
<tr>
|
|
220
227
|
<td><strong>🎮 GPU</strong></td>
|
|
221
228
|
<td align="center">—</td>
|
|
222
|
-
<td align="center">Vulkan: All platforms<br
|
|
229
|
+
<td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
|
|
223
230
|
<td align="center">—</td>
|
|
224
231
|
<td align="center">✅</td>
|
|
225
232
|
<td align="center">✅</td>
|
|
@@ -235,6 +242,38 @@ Lemonade supports the following configurations, while also making it easy to swi
|
|
|
235
242
|
</tbody>
|
|
236
243
|
</table>
|
|
237
244
|
|
|
245
|
+
<details>
|
|
246
|
+
<summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
|
|
247
|
+
|
|
248
|
+
<br>
|
|
249
|
+
|
|
250
|
+
<table>
|
|
251
|
+
<thead>
|
|
252
|
+
<tr>
|
|
253
|
+
<th>Architecture</th>
|
|
254
|
+
<th>Platform Support</th>
|
|
255
|
+
<th>GPU Models</th>
|
|
256
|
+
</tr>
|
|
257
|
+
</thead>
|
|
258
|
+
<tbody>
|
|
259
|
+
<tr>
|
|
260
|
+
<td><b>gfx1151</b> (STX Halo)</td>
|
|
261
|
+
<td>Windows, Ubuntu</td>
|
|
262
|
+
<td>Ryzen AI MAX+ Pro 395</td>
|
|
263
|
+
</tr>
|
|
264
|
+
<tr>
|
|
265
|
+
<td><b>gfx120X</b> (RDNA4)</td>
|
|
266
|
+
<td>Windows only</td>
|
|
267
|
+
<td>Radeon AI PRO R9700, RX 9070 XT/GRE/9070, RX 9060 XT</td>
|
|
268
|
+
</tr>
|
|
269
|
+
<tr>
|
|
270
|
+
<td><b>gfx110X</b> (RDNA3)</td>
|
|
271
|
+
<td>Windows, Ubuntu</td>
|
|
272
|
+
<td>Radeon PRO W7900/W7800/W7700/V710, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT</td>
|
|
273
|
+
</tr>
|
|
274
|
+
</tbody>
|
|
275
|
+
</table>
|
|
276
|
+
</details>
|
|
238
277
|
|
|
239
278
|
## Integrate Lemonade Server with Your Application
|
|
240
279
|
|
|
@@ -47,7 +47,9 @@
|
|
|
47
47
|
<a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
|
|
48
48
|
</h3>
|
|
49
49
|
|
|
50
|
-
Lemonade
|
|
50
|
+
Lemonade helps users run local LLMs with the highest performance by configuring state-of-the-art inference engines for their NPUs and GPUs.
|
|
51
|
+
|
|
52
|
+
Startups such as [Styrk AI](https://styrk.ai/styrk-ai-and-amd-guardrails-for-your-on-device-ai-revolution/), research teams like [Hazy Research at Stanford](https://www.amd.com/en/developer/resources/technical-articles/2025/minions--on-device-and-cloud-language-model-collaboration-on-ryz.html), and large companies like [AMD](https://www.amd.com/en/developer/resources/technical-articles/unlocking-a-wave-of-llm-apps-on-ryzen-ai-through-lemonade-server.html) use Lemonade to run LLMs.
|
|
51
53
|
|
|
52
54
|
## Getting Started
|
|
53
55
|
|
|
@@ -66,7 +68,7 @@ Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus
|
|
|
66
68
|
</p>
|
|
67
69
|
|
|
68
70
|
> [!TIP]
|
|
69
|
-
> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email
|
|
71
|
+
> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or [email](lemonade@amd.com).
|
|
70
72
|
|
|
71
73
|
## Using the CLI
|
|
72
74
|
|
|
@@ -88,7 +90,10 @@ To check all models available, use the `list` command:
|
|
|
88
90
|
lemonade-server list
|
|
89
91
|
```
|
|
90
92
|
|
|
91
|
-
> Note
|
|
93
|
+
> **Note**: If you installed from source, use the `lemonade-server-dev` command instead.
|
|
94
|
+
|
|
95
|
+
> **Tip**: You can use `--llamacpp vulkan/rocm` to select a backend when running GGUF models.
|
|
96
|
+
|
|
92
97
|
|
|
93
98
|
## Model Library
|
|
94
99
|
|
|
@@ -130,7 +135,7 @@ Lemonade supports the following configurations, while also making it easy to swi
|
|
|
130
135
|
<tr>
|
|
131
136
|
<td><strong>🎮 GPU</strong></td>
|
|
132
137
|
<td align="center">—</td>
|
|
133
|
-
<td align="center">Vulkan: All platforms<br
|
|
138
|
+
<td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
|
|
134
139
|
<td align="center">—</td>
|
|
135
140
|
<td align="center">✅</td>
|
|
136
141
|
<td align="center">✅</td>
|
|
@@ -146,6 +151,38 @@ Lemonade supports the following configurations, while also making it easy to swi
|
|
|
146
151
|
</tbody>
|
|
147
152
|
</table>
|
|
148
153
|
|
|
154
|
+
<details>
|
|
155
|
+
<summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
|
|
156
|
+
|
|
157
|
+
<br>
|
|
158
|
+
|
|
159
|
+
<table>
|
|
160
|
+
<thead>
|
|
161
|
+
<tr>
|
|
162
|
+
<th>Architecture</th>
|
|
163
|
+
<th>Platform Support</th>
|
|
164
|
+
<th>GPU Models</th>
|
|
165
|
+
</tr>
|
|
166
|
+
</thead>
|
|
167
|
+
<tbody>
|
|
168
|
+
<tr>
|
|
169
|
+
<td><b>gfx1151</b> (STX Halo)</td>
|
|
170
|
+
<td>Windows, Ubuntu</td>
|
|
171
|
+
<td>Ryzen AI MAX+ Pro 395</td>
|
|
172
|
+
</tr>
|
|
173
|
+
<tr>
|
|
174
|
+
<td><b>gfx120X</b> (RDNA4)</td>
|
|
175
|
+
<td>Windows only</td>
|
|
176
|
+
<td>Radeon AI PRO R9700, RX 9070 XT/GRE/9070, RX 9060 XT</td>
|
|
177
|
+
</tr>
|
|
178
|
+
<tr>
|
|
179
|
+
<td><b>gfx110X</b> (RDNA3)</td>
|
|
180
|
+
<td>Windows, Ubuntu</td>
|
|
181
|
+
<td>Radeon PRO W7900/W7800/W7700/V710, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT</td>
|
|
182
|
+
</tr>
|
|
183
|
+
</tbody>
|
|
184
|
+
</table>
|
|
185
|
+
</details>
|
|
149
186
|
|
|
150
187
|
## Integrate Lemonade Server with Your Application
|
|
151
188
|
|
|
@@ -49,7 +49,8 @@ setup(
|
|
|
49
49
|
"jinja2",
|
|
50
50
|
"tabulate",
|
|
51
51
|
"sentencepiece",
|
|
52
|
-
"huggingface-hub==0.33.0",
|
|
52
|
+
"huggingface-hub[hf_xet]==0.33.0",
|
|
53
|
+
"python-dotenv",
|
|
53
54
|
],
|
|
54
55
|
extras_require={
|
|
55
56
|
# The non-dev extras are meant to deploy specific backends into end-user
|
|
@@ -73,6 +74,7 @@ setup(
|
|
|
73
74
|
"datasets",
|
|
74
75
|
"pandas>=1.5.3",
|
|
75
76
|
"matplotlib",
|
|
77
|
+
"model-generate==1.5.0; platform_system=='Windows' and python_version=='3.10'",
|
|
76
78
|
# Install human-eval from a forked repo with Windows support until the
|
|
77
79
|
# PR (https://github.com/openai/human-eval/pull/53) is merged
|
|
78
80
|
"human-eval-windows==1.0.4",
|
|
@@ -2,7 +2,6 @@ import os
|
|
|
2
2
|
import sys
|
|
3
3
|
import importlib.util
|
|
4
4
|
import importlib.metadata
|
|
5
|
-
import platform
|
|
6
5
|
import subprocess
|
|
7
6
|
from abc import ABC, abstractmethod
|
|
8
7
|
from typing import Dict, Optional
|
|
@@ -19,7 +18,9 @@ class InferenceEngineDetector:
|
|
|
19
18
|
self.llamacpp_detector = LlamaCppDetector()
|
|
20
19
|
self.transformers_detector = TransformersDetector()
|
|
21
20
|
|
|
22
|
-
def detect_engines_for_device(
|
|
21
|
+
def detect_engines_for_device(
|
|
22
|
+
self, device_type: str, device_name: str
|
|
23
|
+
) -> Dict[str, Dict]:
|
|
23
24
|
"""
|
|
24
25
|
Detect all available inference engines for a specific device type.
|
|
25
26
|
|
|
@@ -36,10 +37,19 @@ class InferenceEngineDetector:
|
|
|
36
37
|
if oga_info:
|
|
37
38
|
engines["oga"] = oga_info
|
|
38
39
|
|
|
39
|
-
# Detect llama.cpp availability
|
|
40
|
-
llamacpp_info = self.llamacpp_detector.detect_for_device(
|
|
40
|
+
# Detect llama.cpp vulkan availability
|
|
41
|
+
llamacpp_info = self.llamacpp_detector.detect_for_device(
|
|
42
|
+
device_type, device_name, "vulkan"
|
|
43
|
+
)
|
|
44
|
+
if llamacpp_info:
|
|
45
|
+
engines["llamacpp-vulkan"] = llamacpp_info
|
|
46
|
+
|
|
47
|
+
# Detect llama.cpp rocm availability
|
|
48
|
+
llamacpp_info = self.llamacpp_detector.detect_for_device(
|
|
49
|
+
device_type, device_name, "rocm"
|
|
50
|
+
)
|
|
41
51
|
if llamacpp_info:
|
|
42
|
-
engines["llamacpp"] = llamacpp_info
|
|
52
|
+
engines["llamacpp-rocm"] = llamacpp_info
|
|
43
53
|
|
|
44
54
|
# Detect Transformers availability
|
|
45
55
|
transformers_info = self.transformers_detector.detect_for_device(device_type)
|
|
@@ -206,57 +216,40 @@ class LlamaCppDetector(BaseEngineDetector):
|
|
|
206
216
|
Detector for llama.cpp.
|
|
207
217
|
"""
|
|
208
218
|
|
|
209
|
-
def detect_for_device(
|
|
219
|
+
def detect_for_device(
|
|
220
|
+
self, device_type: str, device_name: str, backend: str
|
|
221
|
+
) -> Optional[Dict]:
|
|
210
222
|
"""
|
|
211
223
|
Detect llama.cpp availability for specific device.
|
|
212
224
|
"""
|
|
213
225
|
try:
|
|
214
|
-
# Map device types to llama.cpp backends
|
|
215
|
-
device_backend_map = {
|
|
216
|
-
"cpu": "cpu",
|
|
217
|
-
"amd_igpu": "vulkan",
|
|
218
|
-
"amd_dgpu": "vulkan",
|
|
219
|
-
}
|
|
220
226
|
|
|
221
|
-
if device_type not in
|
|
227
|
+
if device_type not in ["cpu", "amd_igpu", "amd_dgpu"]:
|
|
222
228
|
return None
|
|
223
229
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
# CPU backend
|
|
248
|
-
if is_installed:
|
|
249
|
-
result = {
|
|
250
|
-
"available": True,
|
|
251
|
-
"version": self._get_llamacpp_version(),
|
|
252
|
-
"backend": backend,
|
|
253
|
-
}
|
|
254
|
-
return result
|
|
255
|
-
else:
|
|
256
|
-
return {
|
|
257
|
-
"available": False,
|
|
258
|
-
"error": "llama.cpp binaries not installed",
|
|
259
|
-
}
|
|
230
|
+
# Check if the device is supported by the backend
|
|
231
|
+
if device_type == "cpu":
|
|
232
|
+
device_supported = True
|
|
233
|
+
elif device_type == "amd_igpu" or device_type == "amd_dgpu":
|
|
234
|
+
if backend == "vulkan":
|
|
235
|
+
device_supported = self._check_vulkan_support()
|
|
236
|
+
elif backend == "rocm":
|
|
237
|
+
device_supported = self._check_rocm_support(device_name.lower())
|
|
238
|
+
if not device_supported:
|
|
239
|
+
return {"available": False, "error": f"{backend} not available"}
|
|
240
|
+
|
|
241
|
+
is_installed = self.is_installed(backend)
|
|
242
|
+
if not is_installed:
|
|
243
|
+
return {
|
|
244
|
+
"available": False,
|
|
245
|
+
"error": f"{backend} binaries not installed",
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return {
|
|
249
|
+
"available": True,
|
|
250
|
+
"version": self._get_llamacpp_version(backend),
|
|
251
|
+
"backend": backend,
|
|
252
|
+
}
|
|
260
253
|
|
|
261
254
|
except (ImportError, OSError, subprocess.SubprocessError) as e:
|
|
262
255
|
return {
|
|
@@ -264,35 +257,17 @@ class LlamaCppDetector(BaseEngineDetector):
|
|
|
264
257
|
"error": f"llama.cpp detection failed: {str(e)}",
|
|
265
258
|
}
|
|
266
259
|
|
|
267
|
-
def is_installed(self) -> bool:
|
|
260
|
+
def is_installed(self, backend: str) -> bool:
|
|
268
261
|
"""
|
|
269
|
-
Check if llama.cpp binaries are available.
|
|
262
|
+
Check if llama.cpp binaries are available for any backend.
|
|
270
263
|
"""
|
|
264
|
+
from lemonade.tools.llamacpp.utils import get_llama_server_exe_path
|
|
271
265
|
|
|
272
|
-
# Check lemonade-managed binary locations
|
|
273
266
|
try:
|
|
274
|
-
|
|
275
|
-
# Check lemonade server directory
|
|
276
|
-
server_base_dir = os.path.join(
|
|
277
|
-
os.path.dirname(sys.executable), "llama_server"
|
|
278
|
-
)
|
|
279
|
-
|
|
280
|
-
if platform.system().lower() == "windows":
|
|
281
|
-
server_exe_path = os.path.join(server_base_dir, "llama-server.exe")
|
|
282
|
-
else:
|
|
283
|
-
# Check both build/bin and root directory locations
|
|
284
|
-
build_bin_path = os.path.join(
|
|
285
|
-
server_base_dir, "build", "bin", "llama-server"
|
|
286
|
-
)
|
|
287
|
-
root_path = os.path.join(server_base_dir, "llama-server")
|
|
288
|
-
server_exe_path = (
|
|
289
|
-
build_bin_path if os.path.exists(build_bin_path) else root_path
|
|
290
|
-
)
|
|
291
|
-
|
|
267
|
+
server_exe_path = get_llama_server_exe_path(backend)
|
|
292
268
|
if os.path.exists(server_exe_path):
|
|
293
269
|
return True
|
|
294
|
-
|
|
295
|
-
except (ImportError, OSError):
|
|
270
|
+
except (ImportError, OSError, ValueError):
|
|
296
271
|
pass
|
|
297
272
|
|
|
298
273
|
return False
|
|
@@ -334,13 +309,22 @@ class LlamaCppDetector(BaseEngineDetector):
|
|
|
334
309
|
except OSError:
|
|
335
310
|
return False
|
|
336
311
|
|
|
337
|
-
def
|
|
312
|
+
def _check_rocm_support(self, device_name: str) -> bool:
|
|
313
|
+
"""
|
|
314
|
+
Check if ROCM is available for GPU acceleration.
|
|
315
|
+
"""
|
|
316
|
+
from lemonade.tools.llamacpp.utils import identify_rocm_arch_from_name
|
|
317
|
+
|
|
318
|
+
return identify_rocm_arch_from_name(device_name) is not None
|
|
319
|
+
|
|
320
|
+
def _get_llamacpp_version(self, backend: str) -> str:
|
|
338
321
|
"""
|
|
339
|
-
Get llama.cpp version from lemonade's managed installation.
|
|
322
|
+
Get llama.cpp version from lemonade's managed installation for specific backend.
|
|
340
323
|
"""
|
|
341
324
|
try:
|
|
325
|
+
# Use backend-specific path - same logic as get_llama_folder_path in utils.py
|
|
342
326
|
server_base_dir = os.path.join(
|
|
343
|
-
os.path.dirname(sys.executable), "llama_server"
|
|
327
|
+
os.path.dirname(sys.executable), backend, "llama_server"
|
|
344
328
|
)
|
|
345
329
|
version_file = os.path.join(server_base_dir, "version.txt")
|
|
346
330
|
|
|
@@ -401,15 +385,16 @@ class TransformersDetector(BaseEngineDetector):
|
|
|
401
385
|
)
|
|
402
386
|
|
|
403
387
|
|
|
404
|
-
def detect_inference_engines(device_type: str) -> Dict[str, Dict]:
|
|
388
|
+
def detect_inference_engines(device_type: str, device_name: str) -> Dict[str, Dict]:
|
|
405
389
|
"""
|
|
406
390
|
Helper function to detect inference engines for a device type.
|
|
407
391
|
|
|
408
392
|
Args:
|
|
409
393
|
device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
|
|
394
|
+
device_name: device name
|
|
410
395
|
|
|
411
396
|
Returns:
|
|
412
397
|
dict: Engine availability information.
|
|
413
398
|
"""
|
|
414
399
|
detector = InferenceEngineDetector()
|
|
415
|
-
return detector.detect_engines_for_device(device_type)
|
|
400
|
+
return detector.detect_engines_for_device(device_type, device_name)
|