lemonade-sdk 8.1.0__tar.gz → 8.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (78) hide show
  1. {lemonade_sdk-8.1.0/src/lemonade_sdk.egg-info → lemonade_sdk-8.1.1}/PKG-INFO +45 -6
  2. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/README.md +41 -4
  3. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/setup.py +3 -1
  4. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/inference_engines.py +62 -77
  5. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/system_info.py +61 -44
  6. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/llamacpp/load.py +13 -4
  7. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/llamacpp/utils.py +222 -54
  8. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/oga/load.py +3 -3
  9. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/llamacpp.py +30 -53
  10. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/serve.py +54 -104
  11. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/static/styles.css +203 -0
  12. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/static/webapp.html +507 -71
  13. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/tray.py +4 -2
  14. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/utils/thread.py +2 -4
  15. lemonade_sdk-8.1.1/src/lemonade/version.py +1 -0
  16. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_install/install.py +25 -2
  17. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1/src/lemonade_sdk.egg-info}/PKG-INFO +45 -6
  18. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/requires.txt +5 -1
  19. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_server/cli.py +79 -26
  20. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_server/server_models.json +26 -1
  21. lemonade_sdk-8.1.0/src/lemonade/version.py +0 -1
  22. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/LICENSE +0 -0
  23. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/NOTICE.md +0 -0
  24. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/setup.cfg +0 -0
  25. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/__init__.py +0 -0
  26. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/api.py +0 -0
  27. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/cache.py +0 -0
  28. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/cli.py +0 -0
  29. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/__init__.py +0 -0
  30. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/build.py +0 -0
  31. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/cli_helpers.py +0 -0
  32. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/exceptions.py +0 -0
  33. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/filesystem.py +0 -0
  34. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/network.py +0 -0
  35. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/printing.py +0 -0
  36. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/status.py +0 -0
  37. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/common/test_helpers.py +0 -0
  38. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/profilers/__init__.py +0 -0
  39. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/profilers/memory_tracker.py +0 -0
  40. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/profilers/profiler.py +0 -0
  41. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/sequence.py +0 -0
  42. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/state.py +0 -0
  43. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/__init__.py +0 -0
  44. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/accuracy.py +0 -0
  45. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/adapter.py +0 -0
  46. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/bench.py +0 -0
  47. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/huggingface/bench.py +0 -0
  48. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/huggingface/load.py +0 -0
  49. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/huggingface/utils.py +0 -0
  50. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/humaneval.py +0 -0
  51. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/llamacpp/bench.py +0 -0
  52. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/management_tools.py +0 -0
  53. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/mmlu.py +0 -0
  54. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/oga/__init__.py +0 -0
  55. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/oga/bench.py +0 -0
  56. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/oga/utils.py +0 -0
  57. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/perplexity.py +0 -0
  58. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/prompt.py +0 -0
  59. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/quark/__init__.py +0 -0
  60. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/quark/quark_load.py +0 -0
  61. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/quark/quark_quantize.py +0 -0
  62. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/report/__init__.py +0 -0
  63. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/report/llm_report.py +0 -0
  64. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/report/table.py +0 -0
  65. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/__init__.py +0 -0
  66. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/static/favicon.ico +0 -0
  67. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/tool_calls.py +0 -0
  68. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/utils/port.py +0 -0
  69. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/utils/system_tray.py +0 -0
  70. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/webapp.py +0 -0
  71. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade/tools/tool.py +0 -0
  72. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_install/__init__.py +0 -0
  73. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/SOURCES.txt +0 -0
  74. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
  75. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/entry_points.txt +0 -0
  76. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
  77. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_server/model_manager.py +0 -0
  78. {lemonade_sdk-8.1.0 → lemonade_sdk-8.1.1}/src/lemonade_server/pydantic_models.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.1.0
3
+ Version: 8.1.1
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.13
@@ -27,7 +27,8 @@ Requires-Dist: transformers<=4.53.2
27
27
  Requires-Dist: jinja2
28
28
  Requires-Dist: tabulate
29
29
  Requires-Dist: sentencepiece
30
- Requires-Dist: huggingface-hub==0.33.0
30
+ Requires-Dist: huggingface-hub[hf_xet]==0.33.0
31
+ Requires-Dist: python-dotenv
31
32
  Provides-Extra: oga-ryzenai
32
33
  Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
33
34
  Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
@@ -40,6 +41,7 @@ Requires-Dist: accelerate; extra == "dev"
40
41
  Requires-Dist: datasets; extra == "dev"
41
42
  Requires-Dist: pandas>=1.5.3; extra == "dev"
42
43
  Requires-Dist: matplotlib; extra == "dev"
44
+ Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "dev"
43
45
  Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
44
46
  Requires-Dist: lm-eval[api]; extra == "dev"
45
47
  Provides-Extra: oga-hybrid
@@ -136,7 +138,9 @@ Dynamic: summary
136
138
  <a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
137
139
  </h3>
138
140
 
139
- Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
141
+ Lemonade helps users run local LLMs with the highest performance by configuring state-of-the-art inference engines for their NPUs and GPUs.
142
+
143
+ Startups such as [Styrk AI](https://styrk.ai/styrk-ai-and-amd-guardrails-for-your-on-device-ai-revolution/), research teams like [Hazy Research at Stanford](https://www.amd.com/en/developer/resources/technical-articles/2025/minions--on-device-and-cloud-language-model-collaboration-on-ryz.html), and large companies like [AMD](https://www.amd.com/en/developer/resources/technical-articles/unlocking-a-wave-of-llm-apps-on-ryzen-ai-through-lemonade-server.html) use Lemonade to run LLMs.
140
144
 
141
145
  ## Getting Started
142
146
 
@@ -155,7 +159,7 @@ Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus
155
159
  </p>
156
160
 
157
161
  > [!TIP]
158
- > Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email lemonade@amd.com.
162
+ > Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or [email](lemonade@amd.com).
159
163
 
160
164
  ## Using the CLI
161
165
 
@@ -177,7 +181,10 @@ To check all models available, use the `list` command:
177
181
  lemonade-server list
178
182
  ```
179
183
 
180
- > Note: If you installed from source, use the `lemonade-server-dev` command instead.
184
+ > **Note**: If you installed from source, use the `lemonade-server-dev` command instead.
185
+
186
+ > **Tip**: You can use `--llamacpp vulkan/rocm` to select a backend when running GGUF models.
187
+
181
188
 
182
189
  ## Model Library
183
190
 
@@ -219,7 +226,7 @@ Lemonade supports the following configurations, while also making it easy to swi
219
226
  <tr>
220
227
  <td><strong>🎮 GPU</strong></td>
221
228
  <td align="center">—</td>
222
- <td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
229
+ <td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
223
230
  <td align="center">—</td>
224
231
  <td align="center">✅</td>
225
232
  <td align="center">✅</td>
@@ -235,6 +242,38 @@ Lemonade supports the following configurations, while also making it easy to swi
235
242
  </tbody>
236
243
  </table>
237
244
 
245
+ <details>
246
+ <summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
247
+
248
+ <br>
249
+
250
+ <table>
251
+ <thead>
252
+ <tr>
253
+ <th>Architecture</th>
254
+ <th>Platform Support</th>
255
+ <th>GPU Models</th>
256
+ </tr>
257
+ </thead>
258
+ <tbody>
259
+ <tr>
260
+ <td><b>gfx1151</b> (STX Halo)</td>
261
+ <td>Windows, Ubuntu</td>
262
+ <td>Ryzen AI MAX+ Pro 395</td>
263
+ </tr>
264
+ <tr>
265
+ <td><b>gfx120X</b> (RDNA4)</td>
266
+ <td>Windows only</td>
267
+ <td>Radeon AI PRO R9700, RX 9070 XT/GRE/9070, RX 9060 XT</td>
268
+ </tr>
269
+ <tr>
270
+ <td><b>gfx110X</b> (RDNA3)</td>
271
+ <td>Windows, Ubuntu</td>
272
+ <td>Radeon PRO W7900/W7800/W7700/V710, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT</td>
273
+ </tr>
274
+ </tbody>
275
+ </table>
276
+ </details>
238
277
 
239
278
  ## Integrate Lemonade Server with Your Application
240
279
 
@@ -47,7 +47,9 @@
47
47
  <a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
48
48
  </h3>
49
49
 
50
- Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
50
+ Lemonade helps users run local LLMs with the highest performance by configuring state-of-the-art inference engines for their NPUs and GPUs.
51
+
52
+ Startups such as [Styrk AI](https://styrk.ai/styrk-ai-and-amd-guardrails-for-your-on-device-ai-revolution/), research teams like [Hazy Research at Stanford](https://www.amd.com/en/developer/resources/technical-articles/2025/minions--on-device-and-cloud-language-model-collaboration-on-ryz.html), and large companies like [AMD](https://www.amd.com/en/developer/resources/technical-articles/unlocking-a-wave-of-llm-apps-on-ryzen-ai-through-lemonade-server.html) use Lemonade to run LLMs.
51
53
 
52
54
  ## Getting Started
53
55
 
@@ -66,7 +68,7 @@ Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus
66
68
  </p>
67
69
 
68
70
  > [!TIP]
69
- > Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email lemonade@amd.com.
71
+ > Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or [email](lemonade@amd.com).
70
72
 
71
73
  ## Using the CLI
72
74
 
@@ -88,7 +90,10 @@ To check all models available, use the `list` command:
88
90
  lemonade-server list
89
91
  ```
90
92
 
91
- > Note: If you installed from source, use the `lemonade-server-dev` command instead.
93
+ > **Note**: If you installed from source, use the `lemonade-server-dev` command instead.
94
+
95
+ > **Tip**: You can use `--llamacpp vulkan/rocm` to select a backend when running GGUF models.
96
+
92
97
 
93
98
  ## Model Library
94
99
 
@@ -130,7 +135,7 @@ Lemonade supports the following configurations, while also making it easy to swi
130
135
  <tr>
131
136
  <td><strong>🎮 GPU</strong></td>
132
137
  <td align="center">—</td>
133
- <td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
138
+ <td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
134
139
  <td align="center">—</td>
135
140
  <td align="center">✅</td>
136
141
  <td align="center">✅</td>
@@ -146,6 +151,38 @@ Lemonade supports the following configurations, while also making it easy to swi
146
151
  </tbody>
147
152
  </table>
148
153
 
154
+ <details>
155
+ <summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
156
+
157
+ <br>
158
+
159
+ <table>
160
+ <thead>
161
+ <tr>
162
+ <th>Architecture</th>
163
+ <th>Platform Support</th>
164
+ <th>GPU Models</th>
165
+ </tr>
166
+ </thead>
167
+ <tbody>
168
+ <tr>
169
+ <td><b>gfx1151</b> (STX Halo)</td>
170
+ <td>Windows, Ubuntu</td>
171
+ <td>Ryzen AI MAX+ Pro 395</td>
172
+ </tr>
173
+ <tr>
174
+ <td><b>gfx120X</b> (RDNA4)</td>
175
+ <td>Windows only</td>
176
+ <td>Radeon AI PRO R9700, RX 9070 XT/GRE/9070, RX 9060 XT</td>
177
+ </tr>
178
+ <tr>
179
+ <td><b>gfx110X</b> (RDNA3)</td>
180
+ <td>Windows, Ubuntu</td>
181
+ <td>Radeon PRO W7900/W7800/W7700/V710, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT</td>
182
+ </tr>
183
+ </tbody>
184
+ </table>
185
+ </details>
149
186
 
150
187
  ## Integrate Lemonade Server with Your Application
151
188
 
@@ -49,7 +49,8 @@ setup(
49
49
  "jinja2",
50
50
  "tabulate",
51
51
  "sentencepiece",
52
- "huggingface-hub==0.33.0",
52
+ "huggingface-hub[hf_xet]==0.33.0",
53
+ "python-dotenv",
53
54
  ],
54
55
  extras_require={
55
56
  # The non-dev extras are meant to deploy specific backends into end-user
@@ -73,6 +74,7 @@ setup(
73
74
  "datasets",
74
75
  "pandas>=1.5.3",
75
76
  "matplotlib",
77
+ "model-generate==1.5.0; platform_system=='Windows' and python_version=='3.10'",
76
78
  # Install human-eval from a forked repo with Windows support until the
77
79
  # PR (https://github.com/openai/human-eval/pull/53) is merged
78
80
  "human-eval-windows==1.0.4",
@@ -2,7 +2,6 @@ import os
2
2
  import sys
3
3
  import importlib.util
4
4
  import importlib.metadata
5
- import platform
6
5
  import subprocess
7
6
  from abc import ABC, abstractmethod
8
7
  from typing import Dict, Optional
@@ -19,7 +18,9 @@ class InferenceEngineDetector:
19
18
  self.llamacpp_detector = LlamaCppDetector()
20
19
  self.transformers_detector = TransformersDetector()
21
20
 
22
- def detect_engines_for_device(self, device_type: str) -> Dict[str, Dict]:
21
+ def detect_engines_for_device(
22
+ self, device_type: str, device_name: str
23
+ ) -> Dict[str, Dict]:
23
24
  """
24
25
  Detect all available inference engines for a specific device type.
25
26
 
@@ -36,10 +37,19 @@ class InferenceEngineDetector:
36
37
  if oga_info:
37
38
  engines["oga"] = oga_info
38
39
 
39
- # Detect llama.cpp availability
40
- llamacpp_info = self.llamacpp_detector.detect_for_device(device_type)
40
+ # Detect llama.cpp vulkan availability
41
+ llamacpp_info = self.llamacpp_detector.detect_for_device(
42
+ device_type, device_name, "vulkan"
43
+ )
44
+ if llamacpp_info:
45
+ engines["llamacpp-vulkan"] = llamacpp_info
46
+
47
+ # Detect llama.cpp rocm availability
48
+ llamacpp_info = self.llamacpp_detector.detect_for_device(
49
+ device_type, device_name, "rocm"
50
+ )
41
51
  if llamacpp_info:
42
- engines["llamacpp"] = llamacpp_info
52
+ engines["llamacpp-rocm"] = llamacpp_info
43
53
 
44
54
  # Detect Transformers availability
45
55
  transformers_info = self.transformers_detector.detect_for_device(device_type)
@@ -206,57 +216,40 @@ class LlamaCppDetector(BaseEngineDetector):
206
216
  Detector for llama.cpp.
207
217
  """
208
218
 
209
- def detect_for_device(self, device_type: str) -> Optional[Dict]:
219
+ def detect_for_device(
220
+ self, device_type: str, device_name: str, backend: str
221
+ ) -> Optional[Dict]:
210
222
  """
211
223
  Detect llama.cpp availability for specific device.
212
224
  """
213
225
  try:
214
- # Map device types to llama.cpp backends
215
- device_backend_map = {
216
- "cpu": "cpu",
217
- "amd_igpu": "vulkan",
218
- "amd_dgpu": "vulkan",
219
- }
220
226
 
221
- if device_type not in device_backend_map:
227
+ if device_type not in ["cpu", "amd_igpu", "amd_dgpu"]:
222
228
  return None
223
229
 
224
- backend = device_backend_map[device_type]
225
- is_installed = self.is_installed()
226
-
227
- # Check requirements based on backend
228
- if backend == "vulkan":
229
- vulkan_available = self._check_vulkan_support()
230
- if not vulkan_available:
231
- return {"available": False, "error": "Vulkan not available"}
232
-
233
- # Vulkan is available
234
- if is_installed:
235
- result = {
236
- "available": True,
237
- "version": self._get_llamacpp_version(),
238
- "backend": backend,
239
- }
240
- return result
241
- else:
242
- return {
243
- "available": False,
244
- "error": "llama.cpp binaries not installed",
245
- }
246
- else:
247
- # CPU backend
248
- if is_installed:
249
- result = {
250
- "available": True,
251
- "version": self._get_llamacpp_version(),
252
- "backend": backend,
253
- }
254
- return result
255
- else:
256
- return {
257
- "available": False,
258
- "error": "llama.cpp binaries not installed",
259
- }
230
+ # Check if the device is supported by the backend
231
+ if device_type == "cpu":
232
+ device_supported = True
233
+ elif device_type == "amd_igpu" or device_type == "amd_dgpu":
234
+ if backend == "vulkan":
235
+ device_supported = self._check_vulkan_support()
236
+ elif backend == "rocm":
237
+ device_supported = self._check_rocm_support(device_name.lower())
238
+ if not device_supported:
239
+ return {"available": False, "error": f"{backend} not available"}
240
+
241
+ is_installed = self.is_installed(backend)
242
+ if not is_installed:
243
+ return {
244
+ "available": False,
245
+ "error": f"{backend} binaries not installed",
246
+ }
247
+
248
+ return {
249
+ "available": True,
250
+ "version": self._get_llamacpp_version(backend),
251
+ "backend": backend,
252
+ }
260
253
 
261
254
  except (ImportError, OSError, subprocess.SubprocessError) as e:
262
255
  return {
@@ -264,35 +257,17 @@ class LlamaCppDetector(BaseEngineDetector):
264
257
  "error": f"llama.cpp detection failed: {str(e)}",
265
258
  }
266
259
 
267
- def is_installed(self) -> bool:
260
+ def is_installed(self, backend: str) -> bool:
268
261
  """
269
- Check if llama.cpp binaries are available.
262
+ Check if llama.cpp binaries are available for any backend.
270
263
  """
264
+ from lemonade.tools.llamacpp.utils import get_llama_server_exe_path
271
265
 
272
- # Check lemonade-managed binary locations
273
266
  try:
274
-
275
- # Check lemonade server directory
276
- server_base_dir = os.path.join(
277
- os.path.dirname(sys.executable), "llama_server"
278
- )
279
-
280
- if platform.system().lower() == "windows":
281
- server_exe_path = os.path.join(server_base_dir, "llama-server.exe")
282
- else:
283
- # Check both build/bin and root directory locations
284
- build_bin_path = os.path.join(
285
- server_base_dir, "build", "bin", "llama-server"
286
- )
287
- root_path = os.path.join(server_base_dir, "llama-server")
288
- server_exe_path = (
289
- build_bin_path if os.path.exists(build_bin_path) else root_path
290
- )
291
-
267
+ server_exe_path = get_llama_server_exe_path(backend)
292
268
  if os.path.exists(server_exe_path):
293
269
  return True
294
-
295
- except (ImportError, OSError):
270
+ except (ImportError, OSError, ValueError):
296
271
  pass
297
272
 
298
273
  return False
@@ -334,13 +309,22 @@ class LlamaCppDetector(BaseEngineDetector):
334
309
  except OSError:
335
310
  return False
336
311
 
337
- def _get_llamacpp_version(self) -> str:
312
+ def _check_rocm_support(self, device_name: str) -> bool:
313
+ """
314
+ Check if ROCM is available for GPU acceleration.
315
+ """
316
+ from lemonade.tools.llamacpp.utils import identify_rocm_arch_from_name
317
+
318
+ return identify_rocm_arch_from_name(device_name) is not None
319
+
320
+ def _get_llamacpp_version(self, backend: str) -> str:
338
321
  """
339
- Get llama.cpp version from lemonade's managed installation.
322
+ Get llama.cpp version from lemonade's managed installation for specific backend.
340
323
  """
341
324
  try:
325
+ # Use backend-specific path - same logic as get_llama_folder_path in utils.py
342
326
  server_base_dir = os.path.join(
343
- os.path.dirname(sys.executable), "llama_server"
327
+ os.path.dirname(sys.executable), backend, "llama_server"
344
328
  )
345
329
  version_file = os.path.join(server_base_dir, "version.txt")
346
330
 
@@ -401,15 +385,16 @@ class TransformersDetector(BaseEngineDetector):
401
385
  )
402
386
 
403
387
 
404
- def detect_inference_engines(device_type: str) -> Dict[str, Dict]:
388
+ def detect_inference_engines(device_type: str, device_name: str) -> Dict[str, Dict]:
405
389
  """
406
390
  Helper function to detect inference engines for a device type.
407
391
 
408
392
  Args:
409
393
  device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
394
+ device_name: device name
410
395
 
411
396
  Returns:
412
397
  dict: Engine availability information.
413
398
  """
414
399
  detector = InferenceEngineDetector()
415
- return detector.detect_engines_for_device(device_type)
400
+ return detector.detect_engines_for_device(device_type, device_name)