lemonade-sdk 8.0.6__tar.gz → 8.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (78) hide show
  1. {lemonade_sdk-8.0.6/src/lemonade_sdk.egg-info → lemonade_sdk-8.1.1}/PKG-INFO +74 -24
  2. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/README.md +47 -6
  3. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/setup.py +28 -19
  4. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/common/inference_engines.py +62 -77
  5. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/common/network.py +18 -1
  6. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/common/system_info.py +61 -44
  7. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/llamacpp/bench.py +3 -1
  8. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/llamacpp/load.py +13 -4
  9. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/llamacpp/utils.py +229 -61
  10. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/oga/load.py +239 -112
  11. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/oga/utils.py +19 -7
  12. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/llamacpp.py +30 -53
  13. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/serve.py +64 -123
  14. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/static/styles.css +208 -6
  15. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/static/webapp.html +510 -71
  16. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/tray.py +4 -2
  17. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/utils/thread.py +2 -4
  18. lemonade_sdk-8.1.1/src/lemonade/version.py +1 -0
  19. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade_install/install.py +90 -86
  20. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1/src/lemonade_sdk.egg-info}/PKG-INFO +74 -24
  21. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/requires.txt +22 -8
  22. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade_server/cli.py +79 -26
  23. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade_server/model_manager.py +4 -3
  24. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade_server/pydantic_models.py +1 -4
  25. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade_server/server_models.json +60 -11
  26. lemonade_sdk-8.0.6/src/lemonade/version.py +0 -1
  27. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/LICENSE +0 -0
  28. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/NOTICE.md +0 -0
  29. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/setup.cfg +0 -0
  30. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/__init__.py +0 -0
  31. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/api.py +0 -0
  32. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/cache.py +0 -0
  33. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/cli.py +0 -0
  34. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/common/__init__.py +0 -0
  35. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/common/build.py +0 -0
  36. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/common/cli_helpers.py +0 -0
  37. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/common/exceptions.py +0 -0
  38. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/common/filesystem.py +0 -0
  39. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/common/printing.py +0 -0
  40. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/common/status.py +0 -0
  41. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/common/test_helpers.py +0 -0
  42. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/profilers/__init__.py +0 -0
  43. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/profilers/memory_tracker.py +0 -0
  44. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/profilers/profiler.py +0 -0
  45. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/sequence.py +0 -0
  46. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/state.py +0 -0
  47. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/__init__.py +0 -0
  48. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/accuracy.py +0 -0
  49. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/adapter.py +0 -0
  50. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/bench.py +0 -0
  51. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/huggingface/bench.py +0 -0
  52. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/huggingface/load.py +0 -0
  53. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/huggingface/utils.py +0 -0
  54. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/humaneval.py +0 -0
  55. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/management_tools.py +0 -0
  56. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/mmlu.py +0 -0
  57. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/oga/__init__.py +0 -0
  58. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/oga/bench.py +0 -0
  59. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/perplexity.py +0 -0
  60. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/prompt.py +0 -0
  61. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/quark/__init__.py +0 -0
  62. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/quark/quark_load.py +0 -0
  63. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/quark/quark_quantize.py +0 -0
  64. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/report/__init__.py +0 -0
  65. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/report/llm_report.py +0 -0
  66. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/report/table.py +0 -0
  67. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/__init__.py +0 -0
  68. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/static/favicon.ico +0 -0
  69. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/tool_calls.py +0 -0
  70. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/utils/port.py +0 -0
  71. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/utils/system_tray.py +0 -0
  72. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/server/webapp.py +0 -0
  73. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade/tools/tool.py +0 -0
  74. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade_install/__init__.py +0 -0
  75. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/SOURCES.txt +0 -0
  76. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
  77. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/entry_points.txt +0 -0
  78. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.1}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.0.6
3
+ Version: 8.1.1
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.13
@@ -22,16 +22,16 @@ Requires-Dist: pytz
22
22
  Requires-Dist: zstandard
23
23
  Requires-Dist: fastapi
24
24
  Requires-Dist: uvicorn[standard]
25
- Requires-Dist: openai>=1.81.0
25
+ Requires-Dist: openai<1.97.1,>=1.81.0
26
26
  Requires-Dist: transformers<=4.53.2
27
27
  Requires-Dist: jinja2
28
28
  Requires-Dist: tabulate
29
29
  Requires-Dist: sentencepiece
30
- Requires-Dist: huggingface-hub==0.33.0
31
- Provides-Extra: oga-hybrid
32
- Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
33
- Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
34
- Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
30
+ Requires-Dist: huggingface-hub[hf_xet]==0.33.0
31
+ Requires-Dist: python-dotenv
32
+ Provides-Extra: oga-ryzenai
33
+ Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
34
+ Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
35
35
  Provides-Extra: oga-cpu
36
36
  Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
37
37
  Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
@@ -41,16 +41,35 @@ Requires-Dist: accelerate; extra == "dev"
41
41
  Requires-Dist: datasets; extra == "dev"
42
42
  Requires-Dist: pandas>=1.5.3; extra == "dev"
43
43
  Requires-Dist: matplotlib; extra == "dev"
44
+ Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "dev"
44
45
  Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
45
46
  Requires-Dist: lm-eval[api]; extra == "dev"
47
+ Provides-Extra: oga-hybrid
48
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
49
+ Provides-Extra: oga-unified
50
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-unified"
46
51
  Provides-Extra: oga-hybrid-minimal
47
- Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
52
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid-minimal"
48
53
  Provides-Extra: oga-cpu-minimal
49
54
  Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
55
+ Provides-Extra: oga-npu-minimal
56
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-npu-minimal"
50
57
  Provides-Extra: llm
51
58
  Requires-Dist: lemonade-sdk[dev]; extra == "llm"
52
59
  Provides-Extra: llm-oga-cpu
53
60
  Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
61
+ Provides-Extra: llm-oga-npu
62
+ Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
63
+ Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
64
+ Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
65
+ Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
66
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
67
+ Provides-Extra: llm-oga-hybrid
68
+ Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
69
+ Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
70
+ Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
71
+ Provides-Extra: llm-oga-unified
72
+ Requires-Dist: lemonade-sdk[dev,llm-oga-hybrid]; extra == "llm-oga-unified"
54
73
  Provides-Extra: llm-oga-igpu
55
74
  Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
56
75
  Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
@@ -61,16 +80,6 @@ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
61
80
  Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
62
81
  Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
63
82
  Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
64
- Provides-Extra: llm-oga-npu
65
- Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
66
- Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
67
- Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
68
- Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
69
- Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
70
- Provides-Extra: llm-oga-hybrid
71
- Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
72
- Provides-Extra: llm-oga-unified
73
- Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
74
83
  Dynamic: author-email
75
84
  Dynamic: description
76
85
  Dynamic: description-content-type
@@ -129,7 +138,9 @@ Dynamic: summary
129
138
  <a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
130
139
  </h3>
131
140
 
132
- Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
141
+ Lemonade helps users run local LLMs with the highest performance by configuring state-of-the-art inference engines for their NPUs and GPUs.
142
+
143
+ Startups such as [Styrk AI](https://styrk.ai/styrk-ai-and-amd-guardrails-for-your-on-device-ai-revolution/), research teams like [Hazy Research at Stanford](https://www.amd.com/en/developer/resources/technical-articles/2025/minions--on-device-and-cloud-language-model-collaboration-on-ryz.html), and large companies like [AMD](https://www.amd.com/en/developer/resources/technical-articles/unlocking-a-wave-of-llm-apps-on-ryzen-ai-through-lemonade-server.html) use Lemonade to run LLMs.
133
144
 
134
145
  ## Getting Started
135
146
 
@@ -148,7 +159,7 @@ Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus
148
159
  </p>
149
160
 
150
161
  > [!TIP]
151
- > Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email lemonade@amd.com.
162
+ > Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or [email](lemonade@amd.com).
152
163
 
153
164
  ## Using the CLI
154
165
 
@@ -170,11 +181,14 @@ To check all models available, use the `list` command:
170
181
  lemonade-server list
171
182
  ```
172
183
 
173
- > Note: If you installed from source, use the `lemonade-server-dev` command instead.
184
+ > **Note**: If you installed from source, use the `lemonade-server-dev` command instead.
185
+
186
+ > **Tip**: You can use `--llamacpp vulkan/rocm` to select a backend when running GGUF models.
187
+
174
188
 
175
189
  ## Model Library
176
190
 
177
- Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/models/).
191
+ Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/server_models/).
178
192
 
179
193
  You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
180
194
  <p align="center">
@@ -212,7 +226,7 @@ Lemonade supports the following configurations, while also making it easy to swi
212
226
  <tr>
213
227
  <td><strong>🎮 GPU</strong></td>
214
228
  <td align="center">—</td>
215
- <td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
229
+ <td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
216
230
  <td align="center">—</td>
217
231
  <td align="center">✅</td>
218
232
  <td align="center">✅</td>
@@ -228,6 +242,38 @@ Lemonade supports the following configurations, while also making it easy to swi
228
242
  </tbody>
229
243
  </table>
230
244
 
245
+ <details>
246
+ <summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
247
+
248
+ <br>
249
+
250
+ <table>
251
+ <thead>
252
+ <tr>
253
+ <th>Architecture</th>
254
+ <th>Platform Support</th>
255
+ <th>GPU Models</th>
256
+ </tr>
257
+ </thead>
258
+ <tbody>
259
+ <tr>
260
+ <td><b>gfx1151</b> (STX Halo)</td>
261
+ <td>Windows, Ubuntu</td>
262
+ <td>Ryzen AI MAX+ Pro 395</td>
263
+ </tr>
264
+ <tr>
265
+ <td><b>gfx120X</b> (RDNA4)</td>
266
+ <td>Windows only</td>
267
+ <td>Radeon AI PRO R9700, RX 9070 XT/GRE/9070, RX 9060 XT</td>
268
+ </tr>
269
+ <tr>
270
+ <td><b>gfx110X</b> (RDNA3)</td>
271
+ <td>Windows, Ubuntu</td>
272
+ <td>Radeon PRO W7900/W7800/W7700/V710, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT</td>
273
+ </tr>
274
+ </tbody>
275
+ </table>
276
+ </details>
231
277
 
232
278
  ## Integrate Lemonade Server with Your Application
233
279
 
@@ -263,7 +309,7 @@ completion = client.chat.completions.create(
263
309
  print(completion.choices[0].message.content)
264
310
  ```
265
311
 
266
- For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
312
+ For more detailed integration instructions, see the [Integration Guide](./docs/server/server_integration.md).
267
313
 
268
314
  ## Beyond an LLM Server
269
315
 
@@ -272,6 +318,10 @@ The [Lemonade SDK](./docs/README.md) also include the following components:
272
318
  - 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
273
319
  - 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
274
320
 
321
+ ## FAQ
322
+
323
+ To read our frequently asked questions, see our [FAQ Guide](./docs/faq.md)
324
+
275
325
  ## Contributing
276
326
 
277
327
  We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
@@ -47,7 +47,9 @@
47
47
  <a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
48
48
  </h3>
49
49
 
50
- Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
50
+ Lemonade helps users run local LLMs with the highest performance by configuring state-of-the-art inference engines for their NPUs and GPUs.
51
+
52
+ Startups such as [Styrk AI](https://styrk.ai/styrk-ai-and-amd-guardrails-for-your-on-device-ai-revolution/), research teams like [Hazy Research at Stanford](https://www.amd.com/en/developer/resources/technical-articles/2025/minions--on-device-and-cloud-language-model-collaboration-on-ryz.html), and large companies like [AMD](https://www.amd.com/en/developer/resources/technical-articles/unlocking-a-wave-of-llm-apps-on-ryzen-ai-through-lemonade-server.html) use Lemonade to run LLMs.
51
53
 
52
54
  ## Getting Started
53
55
 
@@ -66,7 +68,7 @@ Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus
66
68
  </p>
67
69
 
68
70
  > [!TIP]
69
- > Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email lemonade@amd.com.
71
+ > Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or [email](lemonade@amd.com).
70
72
 
71
73
  ## Using the CLI
72
74
 
@@ -88,11 +90,14 @@ To check all models available, use the `list` command:
88
90
  lemonade-server list
89
91
  ```
90
92
 
91
- > Note: If you installed from source, use the `lemonade-server-dev` command instead.
93
+ > **Note**: If you installed from source, use the `lemonade-server-dev` command instead.
94
+
95
+ > **Tip**: You can use `--llamacpp vulkan/rocm` to select a backend when running GGUF models.
96
+
92
97
 
93
98
  ## Model Library
94
99
 
95
- Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/models/).
100
+ Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/server_models/).
96
101
 
97
102
  You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
98
103
  <p align="center">
@@ -130,7 +135,7 @@ Lemonade supports the following configurations, while also making it easy to swi
130
135
  <tr>
131
136
  <td><strong>🎮 GPU</strong></td>
132
137
  <td align="center">—</td>
133
- <td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
138
+ <td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
134
139
  <td align="center">—</td>
135
140
  <td align="center">✅</td>
136
141
  <td align="center">✅</td>
@@ -146,6 +151,38 @@ Lemonade supports the following configurations, while also making it easy to swi
146
151
  </tbody>
147
152
  </table>
148
153
 
154
+ <details>
155
+ <summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
156
+
157
+ <br>
158
+
159
+ <table>
160
+ <thead>
161
+ <tr>
162
+ <th>Architecture</th>
163
+ <th>Platform Support</th>
164
+ <th>GPU Models</th>
165
+ </tr>
166
+ </thead>
167
+ <tbody>
168
+ <tr>
169
+ <td><b>gfx1151</b> (STX Halo)</td>
170
+ <td>Windows, Ubuntu</td>
171
+ <td>Ryzen AI MAX+ Pro 395</td>
172
+ </tr>
173
+ <tr>
174
+ <td><b>gfx120X</b> (RDNA4)</td>
175
+ <td>Windows only</td>
176
+ <td>Radeon AI PRO R9700, RX 9070 XT/GRE/9070, RX 9060 XT</td>
177
+ </tr>
178
+ <tr>
179
+ <td><b>gfx110X</b> (RDNA3)</td>
180
+ <td>Windows, Ubuntu</td>
181
+ <td>Radeon PRO W7900/W7800/W7700/V710, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT</td>
182
+ </tr>
183
+ </tbody>
184
+ </table>
185
+ </details>
149
186
 
150
187
  ## Integrate Lemonade Server with Your Application
151
188
 
@@ -181,7 +218,7 @@ completion = client.chat.completions.create(
181
218
  print(completion.choices[0].message.content)
182
219
  ```
183
220
 
184
- For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
221
+ For more detailed integration instructions, see the [Integration Guide](./docs/server/server_integration.md).
185
222
 
186
223
  ## Beyond an LLM Server
187
224
 
@@ -190,6 +227,10 @@ The [Lemonade SDK](./docs/README.md) also include the following components:
190
227
  - 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
191
228
  - 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
192
229
 
230
+ ## FAQ
231
+
232
+ To read our frequently asked questions, see our [FAQ Guide](./docs/faq.md)
233
+
193
234
  ## Contributing
194
235
 
195
236
  We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
@@ -44,21 +44,20 @@ setup(
44
44
  "zstandard",
45
45
  "fastapi",
46
46
  "uvicorn[standard]",
47
- "openai>=1.81.0",
47
+ "openai>=1.81.0,<1.97.1",
48
48
  "transformers<=4.53.2",
49
49
  "jinja2",
50
50
  "tabulate",
51
51
  "sentencepiece",
52
- "huggingface-hub==0.33.0",
52
+ "huggingface-hub[hf_xet]==0.33.0",
53
+ "python-dotenv",
53
54
  ],
54
55
  extras_require={
55
56
  # The non-dev extras are meant to deploy specific backends into end-user
56
57
  # applications, without including developer-focused tools
57
- "oga-hybrid": [
58
- # Note: `lemonade-install --ryzenai hybrid` is necessary
59
- # to complete installation
60
- "onnx==1.16.1",
61
- "numpy==1.26.4",
58
+ # Primary NPU extra using unified PyPI package
59
+ "oga-ryzenai": [
60
+ "onnxruntime-genai-directml-ryzenai==0.7.0.2",
62
61
  "protobuf>=6.30.1",
63
62
  ],
64
63
  "oga-cpu": [
@@ -75,17 +74,38 @@ setup(
75
74
  "datasets",
76
75
  "pandas>=1.5.3",
77
76
  "matplotlib",
77
+ "model-generate==1.5.0; platform_system=='Windows' and python_version=='3.10'",
78
78
  # Install human-eval from a forked repo with Windows support until the
79
79
  # PR (https://github.com/openai/human-eval/pull/53) is merged
80
80
  "human-eval-windows==1.0.4",
81
81
  "lm-eval[api]",
82
82
  ],
83
83
  # Keep backwards compatibility for old extras names
84
- "oga-hybrid-minimal": ["lemonade-sdk[oga-hybrid]"],
84
+ "oga-hybrid": ["lemonade-sdk[oga-ryzenai]"],
85
+ "oga-unified": ["lemonade-sdk[oga-ryzenai]"],
86
+ "oga-hybrid-minimal": ["lemonade-sdk[oga-ryzenai]"],
85
87
  "oga-cpu-minimal": ["lemonade-sdk[oga-cpu]"],
88
+ "oga-npu-minimal": ["lemonade-sdk[oga-ryzenai]"],
86
89
  "llm": ["lemonade-sdk[dev]"],
87
90
  "llm-oga-cpu": ["lemonade-sdk[dev,oga-cpu]"],
88
91
  # The following extras are deprecated and/or not commonly used
92
+ "llm-oga-npu": [
93
+ "onnx==1.16.0",
94
+ # NPU requires specific onnxruntime version for Ryzen AI compatibility
95
+ # This may conflict with other OGA extras that require >=1.22.0
96
+ "onnxruntime==1.18.0",
97
+ "numpy==1.26.4",
98
+ "protobuf>=6.30.1",
99
+ "lemonade-sdk[dev]",
100
+ ],
101
+ "llm-oga-hybrid": [
102
+ # Note: `lemonade-install --ryzenai hybrid` is necessary
103
+ # to complete installation for RAI 1.4.0.
104
+ "onnx==1.16.1",
105
+ "numpy==1.26.4",
106
+ "protobuf>=6.30.1",
107
+ ],
108
+ "llm-oga-unified": ["lemonade-sdk[dev, llm-oga-hybrid]"],
89
109
  "llm-oga-igpu": [
90
110
  "onnxruntime-genai-directml==0.6.0",
91
111
  "onnxruntime-directml>=1.19.0,<1.22.0",
@@ -98,17 +118,6 @@ setup(
98
118
  "transformers<=4.51.3",
99
119
  "lemonade-sdk[dev]",
100
120
  ],
101
- "llm-oga-npu": [
102
- "onnx==1.16.0",
103
- # NPU requires specific onnxruntime version for Ryzen AI compatibility
104
- # This may conflict with other OGA extras that require >=1.22.0
105
- "onnxruntime==1.18.0",
106
- "numpy==1.26.4",
107
- "protobuf>=6.30.1",
108
- "lemonade-sdk[dev]",
109
- ],
110
- "llm-oga-hybrid": ["lemonade-sdk[dev,oga-hybrid]"],
111
- "llm-oga-unified": ["lemonade-sdk[llm-oga-hybrid]"],
112
121
  },
113
122
  classifiers=[],
114
123
  entry_points={
@@ -2,7 +2,6 @@ import os
2
2
  import sys
3
3
  import importlib.util
4
4
  import importlib.metadata
5
- import platform
6
5
  import subprocess
7
6
  from abc import ABC, abstractmethod
8
7
  from typing import Dict, Optional
@@ -19,7 +18,9 @@ class InferenceEngineDetector:
19
18
  self.llamacpp_detector = LlamaCppDetector()
20
19
  self.transformers_detector = TransformersDetector()
21
20
 
22
- def detect_engines_for_device(self, device_type: str) -> Dict[str, Dict]:
21
+ def detect_engines_for_device(
22
+ self, device_type: str, device_name: str
23
+ ) -> Dict[str, Dict]:
23
24
  """
24
25
  Detect all available inference engines for a specific device type.
25
26
 
@@ -36,10 +37,19 @@ class InferenceEngineDetector:
36
37
  if oga_info:
37
38
  engines["oga"] = oga_info
38
39
 
39
- # Detect llama.cpp availability
40
- llamacpp_info = self.llamacpp_detector.detect_for_device(device_type)
40
+ # Detect llama.cpp vulkan availability
41
+ llamacpp_info = self.llamacpp_detector.detect_for_device(
42
+ device_type, device_name, "vulkan"
43
+ )
44
+ if llamacpp_info:
45
+ engines["llamacpp-vulkan"] = llamacpp_info
46
+
47
+ # Detect llama.cpp rocm availability
48
+ llamacpp_info = self.llamacpp_detector.detect_for_device(
49
+ device_type, device_name, "rocm"
50
+ )
41
51
  if llamacpp_info:
42
- engines["llamacpp"] = llamacpp_info
52
+ engines["llamacpp-rocm"] = llamacpp_info
43
53
 
44
54
  # Detect Transformers availability
45
55
  transformers_info = self.transformers_detector.detect_for_device(device_type)
@@ -206,57 +216,40 @@ class LlamaCppDetector(BaseEngineDetector):
206
216
  Detector for llama.cpp.
207
217
  """
208
218
 
209
- def detect_for_device(self, device_type: str) -> Optional[Dict]:
219
+ def detect_for_device(
220
+ self, device_type: str, device_name: str, backend: str
221
+ ) -> Optional[Dict]:
210
222
  """
211
223
  Detect llama.cpp availability for specific device.
212
224
  """
213
225
  try:
214
- # Map device types to llama.cpp backends
215
- device_backend_map = {
216
- "cpu": "cpu",
217
- "amd_igpu": "vulkan",
218
- "amd_dgpu": "vulkan",
219
- }
220
226
 
221
- if device_type not in device_backend_map:
227
+ if device_type not in ["cpu", "amd_igpu", "amd_dgpu"]:
222
228
  return None
223
229
 
224
- backend = device_backend_map[device_type]
225
- is_installed = self.is_installed()
226
-
227
- # Check requirements based on backend
228
- if backend == "vulkan":
229
- vulkan_available = self._check_vulkan_support()
230
- if not vulkan_available:
231
- return {"available": False, "error": "Vulkan not available"}
232
-
233
- # Vulkan is available
234
- if is_installed:
235
- result = {
236
- "available": True,
237
- "version": self._get_llamacpp_version(),
238
- "backend": backend,
239
- }
240
- return result
241
- else:
242
- return {
243
- "available": False,
244
- "error": "llama.cpp binaries not installed",
245
- }
246
- else:
247
- # CPU backend
248
- if is_installed:
249
- result = {
250
- "available": True,
251
- "version": self._get_llamacpp_version(),
252
- "backend": backend,
253
- }
254
- return result
255
- else:
256
- return {
257
- "available": False,
258
- "error": "llama.cpp binaries not installed",
259
- }
230
+ # Check if the device is supported by the backend
231
+ if device_type == "cpu":
232
+ device_supported = True
233
+ elif device_type == "amd_igpu" or device_type == "amd_dgpu":
234
+ if backend == "vulkan":
235
+ device_supported = self._check_vulkan_support()
236
+ elif backend == "rocm":
237
+ device_supported = self._check_rocm_support(device_name.lower())
238
+ if not device_supported:
239
+ return {"available": False, "error": f"{backend} not available"}
240
+
241
+ is_installed = self.is_installed(backend)
242
+ if not is_installed:
243
+ return {
244
+ "available": False,
245
+ "error": f"{backend} binaries not installed",
246
+ }
247
+
248
+ return {
249
+ "available": True,
250
+ "version": self._get_llamacpp_version(backend),
251
+ "backend": backend,
252
+ }
260
253
 
261
254
  except (ImportError, OSError, subprocess.SubprocessError) as e:
262
255
  return {
@@ -264,35 +257,17 @@ class LlamaCppDetector(BaseEngineDetector):
264
257
  "error": f"llama.cpp detection failed: {str(e)}",
265
258
  }
266
259
 
267
- def is_installed(self) -> bool:
260
+ def is_installed(self, backend: str) -> bool:
268
261
  """
269
- Check if llama.cpp binaries are available.
262
+ Check if llama.cpp binaries are available for any backend.
270
263
  """
264
+ from lemonade.tools.llamacpp.utils import get_llama_server_exe_path
271
265
 
272
- # Check lemonade-managed binary locations
273
266
  try:
274
-
275
- # Check lemonade server directory
276
- server_base_dir = os.path.join(
277
- os.path.dirname(sys.executable), "llama_server"
278
- )
279
-
280
- if platform.system().lower() == "windows":
281
- server_exe_path = os.path.join(server_base_dir, "llama-server.exe")
282
- else:
283
- # Check both build/bin and root directory locations
284
- build_bin_path = os.path.join(
285
- server_base_dir, "build", "bin", "llama-server"
286
- )
287
- root_path = os.path.join(server_base_dir, "llama-server")
288
- server_exe_path = (
289
- build_bin_path if os.path.exists(build_bin_path) else root_path
290
- )
291
-
267
+ server_exe_path = get_llama_server_exe_path(backend)
292
268
  if os.path.exists(server_exe_path):
293
269
  return True
294
-
295
- except (ImportError, OSError):
270
+ except (ImportError, OSError, ValueError):
296
271
  pass
297
272
 
298
273
  return False
@@ -334,13 +309,22 @@ class LlamaCppDetector(BaseEngineDetector):
334
309
  except OSError:
335
310
  return False
336
311
 
337
- def _get_llamacpp_version(self) -> str:
312
+ def _check_rocm_support(self, device_name: str) -> bool:
313
+ """
314
+ Check if ROCM is available for GPU acceleration.
315
+ """
316
+ from lemonade.tools.llamacpp.utils import identify_rocm_arch_from_name
317
+
318
+ return identify_rocm_arch_from_name(device_name) is not None
319
+
320
+ def _get_llamacpp_version(self, backend: str) -> str:
338
321
  """
339
- Get llama.cpp version from lemonade's managed installation.
322
+ Get llama.cpp version from lemonade's managed installation for specific backend.
340
323
  """
341
324
  try:
325
+ # Use backend-specific path - same logic as get_llama_folder_path in utils.py
342
326
  server_base_dir = os.path.join(
343
- os.path.dirname(sys.executable), "llama_server"
327
+ os.path.dirname(sys.executable), backend, "llama_server"
344
328
  )
345
329
  version_file = os.path.join(server_base_dir, "version.txt")
346
330
 
@@ -401,15 +385,16 @@ class TransformersDetector(BaseEngineDetector):
401
385
  )
402
386
 
403
387
 
404
- def detect_inference_engines(device_type: str) -> Dict[str, Dict]:
388
+ def detect_inference_engines(device_type: str, device_name: str) -> Dict[str, Dict]:
405
389
  """
406
390
  Helper function to detect inference engines for a device type.
407
391
 
408
392
  Args:
409
393
  device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
394
+ device_name: device name
410
395
 
411
396
  Returns:
412
397
  dict: Engine availability information.
413
398
  """
414
399
  detector = InferenceEngineDetector()
415
- return detector.detect_engines_for_device(device_type)
400
+ return detector.detect_engines_for_device(device_type, device_name)
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  from typing import Optional
3
3
  import socket
4
- from huggingface_hub import model_info
4
+ from huggingface_hub import model_info, snapshot_download
5
5
 
6
6
 
7
7
  def is_offline():
@@ -48,3 +48,20 @@ def get_base_model(checkpoint: str) -> Optional[str]:
48
48
  except Exception: # pylint: disable=broad-except
49
49
  pass
50
50
  return None
51
+
52
+
53
+ def custom_snapshot_download(repo_id, **kwargs):
54
+ """
55
+ Custom snapshot download with retry logic for Windows symlink privilege errors.
56
+ """
57
+ for attempt in range(2):
58
+ try:
59
+ return snapshot_download(repo_id=repo_id, **kwargs)
60
+ except OSError as e:
61
+ if (
62
+ hasattr(e, "winerror")
63
+ and e.winerror == 1314 # pylint: disable=no-member
64
+ and attempt < 1
65
+ ):
66
+ continue
67
+ raise