lemonade-sdk 8.1.2__tar.gz → 8.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (85) hide show
  1. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/NOTICE.md +27 -1
  2. {lemonade_sdk-8.1.2/src/lemonade_sdk.egg-info → lemonade_sdk-8.1.4}/PKG-INFO +26 -52
  3. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/README.md +19 -46
  4. lemonade_sdk-8.1.4/pyproject.toml +8 -0
  5. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/setup.py +8 -8
  6. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/api.py +5 -0
  7. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/network.py +27 -1
  8. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/llamacpp/utils.py +2 -1
  9. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/oga/load.py +2 -1
  10. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/oga/utils.py +54 -33
  11. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/llamacpp.py +104 -6
  12. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/serve.py +117 -9
  13. lemonade_sdk-8.1.4/src/lemonade/tools/server/static/js/chat.js +735 -0
  14. lemonade_sdk-8.1.4/src/lemonade/tools/server/static/js/model-settings.js +162 -0
  15. lemonade_sdk-8.1.4/src/lemonade/tools/server/static/js/models.js +865 -0
  16. lemonade_sdk-8.1.4/src/lemonade/tools/server/static/js/shared.js +491 -0
  17. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/static/styles.css +652 -26
  18. lemonade_sdk-8.1.4/src/lemonade/tools/server/static/webapp.html +257 -0
  19. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/tray.py +51 -3
  20. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/utils/port.py +3 -2
  21. lemonade_sdk-8.1.4/src/lemonade/version.py +1 -0
  22. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_install/install.py +1 -1
  23. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4/src/lemonade_sdk.egg-info}/PKG-INFO +26 -52
  24. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_sdk.egg-info/SOURCES.txt +7 -1
  25. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_sdk.egg-info/requires.txt +8 -6
  26. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_server/cli.py +36 -18
  27. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_server/model_manager.py +12 -2
  28. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_server/pydantic_models.py +23 -3
  29. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_server/server_models.json +9 -3
  30. lemonade_sdk-8.1.4/src/lemonade_server/settings.py +39 -0
  31. lemonade_sdk-8.1.2/src/lemonade/tools/server/static/webapp.html +0 -1204
  32. lemonade_sdk-8.1.2/src/lemonade/version.py +0 -1
  33. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/LICENSE +0 -0
  34. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/setup.cfg +0 -0
  35. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/__init__.py +0 -0
  36. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/cache.py +0 -0
  37. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/cli.py +0 -0
  38. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/__init__.py +0 -0
  39. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/build.py +0 -0
  40. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/cli_helpers.py +0 -0
  41. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/exceptions.py +0 -0
  42. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/filesystem.py +0 -0
  43. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/inference_engines.py +0 -0
  44. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/printing.py +0 -0
  45. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/status.py +0 -0
  46. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/system_info.py +0 -0
  47. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/test_helpers.py +0 -0
  48. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/profilers/__init__.py +0 -0
  49. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/profilers/memory_tracker.py +0 -0
  50. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/profilers/profiler.py +0 -0
  51. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/sequence.py +0 -0
  52. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/state.py +0 -0
  53. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/__init__.py +0 -0
  54. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/accuracy.py +0 -0
  55. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/adapter.py +0 -0
  56. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/bench.py +0 -0
  57. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/huggingface/bench.py +0 -0
  58. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/huggingface/load.py +0 -0
  59. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/huggingface/utils.py +0 -0
  60. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/humaneval.py +0 -0
  61. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/llamacpp/bench.py +0 -0
  62. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/llamacpp/load.py +0 -0
  63. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/management_tools.py +0 -0
  64. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/mmlu.py +0 -0
  65. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/oga/__init__.py +0 -0
  66. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/oga/bench.py +0 -0
  67. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/perplexity.py +0 -0
  68. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/prompt.py +0 -0
  69. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/quark/__init__.py +0 -0
  70. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/quark/quark_load.py +0 -0
  71. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/quark/quark_quantize.py +0 -0
  72. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/report/__init__.py +0 -0
  73. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/report/llm_report.py +0 -0
  74. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/report/table.py +0 -0
  75. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/__init__.py +0 -0
  76. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/static/favicon.ico +0 -0
  77. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/tool_calls.py +0 -0
  78. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/utils/system_tray.py +0 -0
  79. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/utils/thread.py +0 -0
  80. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/webapp.py +0 -0
  81. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/tool.py +0 -0
  82. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_install/__init__.py +0 -0
  83. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
  84. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_sdk.egg-info/entry_points.txt +0 -0
  85. {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
@@ -1,7 +1,33 @@
1
1
  PORTIONS LICENSED AS FOLLOWS
2
2
 
3
+ ## llama.cpp
4
+
5
+ Binaries for llama.cpp are downloaded under the MIT license from https://github.com/ggml-org/llama.cpp, as well as https://github.com/lemonade-sdk/llamacpp-rocm (which uses https://github.com/ggml-org/llama.cpp to build them.)
6
+
3
7
  Lemonade SDK used the [ONNX TurnkeyML](https://github.com/onnx/turnkeyml) project as a starting point under the [Apache 2.0 license](./LICENSE).
4
8
 
9
+ > MIT License
10
+ >
11
+ > Copyright (c) 2023-2024 The ggml authors
12
+ >
13
+ > Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ > of this software and associated documentation files (the "Software"), to deal
15
+ > in the Software without restriction, including without limitation the rights
16
+ > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ > copies of the Software, and to permit persons to whom the Software is
18
+ > furnished to do so, subject to the following conditions:
19
+ >
20
+ > The above copyright notice and this permission notice shall be included in all
21
+ > copies or substantial portions of the Software.
22
+ >
23
+ > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ > SOFTWARE.
30
+
5
31
  ## TurnkeyML Attribution
6
32
 
7
33
  TurnkeyML used code from other open source projects as a starting point (see [NOTICE.md](NOTICE.md)). Thank you Philip Colangelo, Derek Elkins, Jeremy Fowers, Dan Gard, Victoria Godsoe, Mark Heaps, Daniel Holanda, Brian Kurtz, Mariah Larwood, Philip Lassen, Andrew Ling, Adrian Macias, Gary Malik, Sarah Massengill, Ashwin Murthy, Hatice Ozen, Tim Sears, Sean Settle, Krishna Sivakumar, Aviv Weinstein, Xueli Xao, Bill Xing, and Lev Zlotnik for your contributions to that work.
@@ -18,4 +44,4 @@ TurnkeyML used code from other open source projects as a starting point (see [NO
18
44
  >
19
45
  >The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
20
46
  >
21
- >THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
47
+ >THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -1,18 +1,18 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.1.2
3
+ Version: 8.1.4
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
- Requires-Python: >=3.10, <3.13
6
+ Requires-Python: >=3.10, <3.14
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  License-File: NOTICE.md
10
10
  Requires-Dist: invoke>=2.0.0
11
- Requires-Dist: onnx<1.18.0,>=1.11.0
11
+ Requires-Dist: onnx==1.18.0
12
12
  Requires-Dist: pyyaml>=5.4
13
13
  Requires-Dist: typeguard>=2.3.13
14
14
  Requires-Dist: packaging>=20.9
15
- Requires-Dist: numpy<2.0.0
15
+ Requires-Dist: numpy
16
16
  Requires-Dist: fasteners
17
17
  Requires-Dist: GitPython>=3.1.40
18
18
  Requires-Dist: psutil>=6.1.1
@@ -30,7 +30,7 @@ Requires-Dist: sentencepiece
30
30
  Requires-Dist: huggingface-hub[hf_xet]==0.33.0
31
31
  Requires-Dist: python-dotenv
32
32
  Provides-Extra: oga-ryzenai
33
- Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
33
+ Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2.1; extra == "oga-ryzenai"
34
34
  Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
35
35
  Provides-Extra: oga-cpu
36
36
  Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
@@ -41,9 +41,10 @@ Requires-Dist: accelerate; extra == "dev"
41
41
  Requires-Dist: datasets; extra == "dev"
42
42
  Requires-Dist: pandas>=1.5.3; extra == "dev"
43
43
  Requires-Dist: matplotlib; extra == "dev"
44
- Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "dev"
45
44
  Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
46
45
  Requires-Dist: lm-eval[api]; extra == "dev"
46
+ Provides-Extra: model-generate
47
+ Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "model-generate"
47
48
  Provides-Extra: oga-hybrid
48
49
  Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
49
50
  Provides-Extra: oga-unified
@@ -105,7 +106,7 @@ Dynamic: summary
105
106
  <img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" />
106
107
  </a>
107
108
  <a href="docs/README.md#installation" title="Check out our instructions">
108
- <img src="https://img.shields.io/badge/Python-3.10%20%7C%203.12-blue?logo=python&logoColor=white" alt="Made with Python" />
109
+ <img src="https://img.shields.io/badge/Python-3.10--3.13-blue?logo=python&logoColor=white" alt="Made with Python" />
109
110
  </a>
110
111
  <a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
111
112
  <img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg" alt="PRs Welcome" />
@@ -199,48 +200,11 @@ You can also import custom GGUF and ONNX models from Hugging Face by using our [
199
200
 
200
201
  Lemonade supports the following configurations, while also making it easy to switch between them at runtime. Find more information about it [here](./docs/README.md#software-and-hardware-overview).
201
202
 
202
- <table>
203
- <thead>
204
- <tr>
205
- <th rowspan="2">Hardware</th>
206
- <th colspan="3" align="center">🛠️ Engine Support</th>
207
- <th colspan="2" align="center">🖥️ OS (x86/x64)</th>
208
- </tr>
209
- <tr>
210
- <th align="center">OGA</th>
211
- <th align="center">llamacpp</th>
212
- <th align="center">HF</th>
213
- <th align="center">Windows</th>
214
- <th align="center">Linux</th>
215
- </tr>
216
- </thead>
217
- <tbody>
218
- <tr>
219
- <td><strong>🧠 CPU</strong></td>
220
- <td align="center">All platforms</td>
221
- <td align="center">All platforms</td>
222
- <td align="center">All platforms</td>
223
- <td align="center">✅</td>
224
- <td align="center">✅</td>
225
- </tr>
226
- <tr>
227
- <td><strong>🎮 GPU</strong></td>
228
- <td align="center">—</td>
229
- <td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
230
- <td align="center">—</td>
231
- <td align="center">✅</td>
232
- <td align="center">✅</td>
233
- </tr>
234
- <tr>
235
- <td><strong>🤖 NPU</strong></td>
236
- <td align="center">AMD Ryzen™ AI 300 series</td>
237
- <td align="center">—</td>
238
- <td align="center">—</td>
239
- <td align="center">✅</td>
240
- <td align="center">—</td>
241
- </tr>
242
- </tbody>
243
- </table>
203
+ | Hardware | Engine: OGA | Engine: llamacpp | Engine: HF | Windows | Linux |
204
+ |----------|-------------|------------------|------------|---------|-------|
205
+ | **🧠 CPU** | All platforms | All platforms | All platforms | ✅ | ✅ |
206
+ | **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms* | — | ✅ | ✅ |
207
+ | **🤖 NPU** | AMD Ryzen™ AI 300 series | — | — | ✅ | — |
244
208
 
245
209
  <details>
246
210
  <summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
@@ -336,9 +300,19 @@ New contributors can find beginner-friendly issues tagged with "Good First Issue
336
300
 
337
301
  This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), emailing [lemonade@amd.com](mailto:lemonade@amd.com), or joining our [Discord](https://discord.gg/5xXzkMu8Zk).
338
302
 
339
- ## License
340
-
341
- This project is licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE). Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
303
+ ## License and Attribution
304
+
305
+ This project is:
306
+ - [Built with Python](https://www.amd.com/en/developer/resources/technical-articles/2025/rethinking-local-ai-lemonade-servers-python-advantage.html) with ❤️ for the open source community,
307
+ - Standing on the shoulders of great tools from:
308
+ - [ggml/llama.cpp](https://github.com/ggml-org/llama.cpp)
309
+ - [OnnxRuntime GenAI](https://github.com/microsoft/onnxruntime-genai)
310
+ - [Hugging Face Hub](https://github.com/huggingface/huggingface_hub)
311
+ - [OpenAI API](https://github.com/openai/openai-python)
312
+ - and more...
313
+ - Accelerated by mentorship from the OCV Catalyst program.
314
+ - Licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE).
315
+ - Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
342
316
 
343
317
  <!--This file was originally licensed under Apache 2.0. It has been modified.
344
318
  Modifications Copyright (c) 2025 AMD-->
@@ -14,7 +14,7 @@
14
14
  <img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" />
15
15
  </a>
16
16
  <a href="docs/README.md#installation" title="Check out our instructions">
17
- <img src="https://img.shields.io/badge/Python-3.10%20%7C%203.12-blue?logo=python&logoColor=white" alt="Made with Python" />
17
+ <img src="https://img.shields.io/badge/Python-3.10--3.13-blue?logo=python&logoColor=white" alt="Made with Python" />
18
18
  </a>
19
19
  <a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
20
20
  <img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg" alt="PRs Welcome" />
@@ -108,48 +108,11 @@ You can also import custom GGUF and ONNX models from Hugging Face by using our [
108
108
 
109
109
  Lemonade supports the following configurations, while also making it easy to switch between them at runtime. Find more information about it [here](./docs/README.md#software-and-hardware-overview).
110
110
 
111
- <table>
112
- <thead>
113
- <tr>
114
- <th rowspan="2">Hardware</th>
115
- <th colspan="3" align="center">🛠️ Engine Support</th>
116
- <th colspan="2" align="center">🖥️ OS (x86/x64)</th>
117
- </tr>
118
- <tr>
119
- <th align="center">OGA</th>
120
- <th align="center">llamacpp</th>
121
- <th align="center">HF</th>
122
- <th align="center">Windows</th>
123
- <th align="center">Linux</th>
124
- </tr>
125
- </thead>
126
- <tbody>
127
- <tr>
128
- <td><strong>🧠 CPU</strong></td>
129
- <td align="center">All platforms</td>
130
- <td align="center">All platforms</td>
131
- <td align="center">All platforms</td>
132
- <td align="center">✅</td>
133
- <td align="center">✅</td>
134
- </tr>
135
- <tr>
136
- <td><strong>🎮 GPU</strong></td>
137
- <td align="center">—</td>
138
- <td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
139
- <td align="center">—</td>
140
- <td align="center">✅</td>
141
- <td align="center">✅</td>
142
- </tr>
143
- <tr>
144
- <td><strong>🤖 NPU</strong></td>
145
- <td align="center">AMD Ryzen™ AI 300 series</td>
146
- <td align="center">—</td>
147
- <td align="center">—</td>
148
- <td align="center">✅</td>
149
- <td align="center">—</td>
150
- </tr>
151
- </tbody>
152
- </table>
111
+ | Hardware | Engine: OGA | Engine: llamacpp | Engine: HF | Windows | Linux |
112
+ |----------|-------------|------------------|------------|---------|-------|
113
+ | **🧠 CPU** | All platforms | All platforms | All platforms | ✅ | ✅ |
114
+ | **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms* | — | ✅ | ✅ |
115
+ | **🤖 NPU** | AMD Ryzen™ AI 300 series | — | — | ✅ | — |
153
116
 
154
117
  <details>
155
118
  <summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
@@ -245,9 +208,19 @@ New contributors can find beginner-friendly issues tagged with "Good First Issue
245
208
 
246
209
  This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), emailing [lemonade@amd.com](mailto:lemonade@amd.com), or joining our [Discord](https://discord.gg/5xXzkMu8Zk).
247
210
 
248
- ## License
249
-
250
- This project is licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE). Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
211
+ ## License and Attribution
212
+
213
+ This project is:
214
+ - [Built with Python](https://www.amd.com/en/developer/resources/technical-articles/2025/rethinking-local-ai-lemonade-servers-python-advantage.html) with ❤️ for the open source community,
215
+ - Standing on the shoulders of great tools from:
216
+ - [ggml/llama.cpp](https://github.com/ggml-org/llama.cpp)
217
+ - [OnnxRuntime GenAI](https://github.com/microsoft/onnxruntime-genai)
218
+ - [Hugging Face Hub](https://github.com/huggingface/huggingface_hub)
219
+ - [OpenAI API](https://github.com/openai/openai-python)
220
+ - and more...
221
+ - Accelerated by mentorship from the OCV Catalyst program.
222
+ - Licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE).
223
+ - Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
251
224
 
252
225
  <!--This file was originally licensed under Apache 2.0. It has been modified.
253
226
  Modifications Copyright (c) 2025 AMD-->
@@ -0,0 +1,8 @@
1
+ [build-system]
2
+ requires = [
3
+ "setuptools>=68",
4
+ "wheel"
5
+ ]
6
+ build-backend = "setuptools.build_meta"
7
+
8
+
@@ -28,13 +28,11 @@ setup(
28
28
  # Minimal dependencies required for end-users who are running
29
29
  # apps deployed on Lemonade SDK
30
30
  "invoke>=2.0.0",
31
- "onnx>=1.11.0,<1.18.0",
31
+ "onnx==1.18.0",
32
32
  "pyyaml>=5.4",
33
33
  "typeguard>=2.3.13",
34
34
  "packaging>=20.9",
35
- # Necessary until upstream packages account for the breaking
36
- # change to numpy
37
- "numpy<2.0.0",
35
+ "numpy",
38
36
  "fasteners",
39
37
  "GitPython>=3.1.40",
40
38
  "psutil>=6.1.1",
@@ -57,7 +55,7 @@ setup(
57
55
  # applications, without including developer-focused tools
58
56
  # Primary NPU extra using unified PyPI package
59
57
  "oga-ryzenai": [
60
- "onnxruntime-genai-directml-ryzenai==0.7.0.2",
58
+ "onnxruntime-genai-directml-ryzenai==0.7.0.2.1",
61
59
  "protobuf>=6.30.1",
62
60
  ],
63
61
  "oga-cpu": [
@@ -74,12 +72,14 @@ setup(
74
72
  "datasets",
75
73
  "pandas>=1.5.3",
76
74
  "matplotlib",
77
- "model-generate==1.5.0; platform_system=='Windows' and python_version=='3.10'",
78
75
  # Install human-eval from a forked repo with Windows support until the
79
76
  # PR (https://github.com/openai/human-eval/pull/53) is merged
80
77
  "human-eval-windows==1.0.4",
81
78
  "lm-eval[api]",
82
79
  ],
80
+ "model-generate": [
81
+ "model-generate==1.5.0; platform_system=='Windows' and python_version=='3.10'",
82
+ ],
83
83
  # Keep backwards compatibility for old extras names
84
84
  "oga-hybrid": ["lemonade-sdk[oga-ryzenai]"],
85
85
  "oga-unified": ["lemonade-sdk[oga-ryzenai]"],
@@ -128,13 +128,13 @@ setup(
128
128
  "lsdev=lemonade_server.cli:developer_entrypoint",
129
129
  ]
130
130
  },
131
- python_requires=">=3.10, <3.13",
131
+ python_requires=">=3.10, <3.14",
132
132
  long_description=open("README.md", "r", encoding="utf-8").read(),
133
133
  long_description_content_type="text/markdown",
134
134
  include_package_data=True,
135
135
  package_data={
136
136
  "lemonade_server": ["server_models.json"],
137
- "lemonade": ["tools/server/static/*"],
137
+ "lemonade": ["tools/server/static/**/*"],
138
138
  },
139
139
  )
140
140
 
@@ -36,6 +36,7 @@ def _make_state(recipe, checkpoint) -> Dict:
36
36
  def from_pretrained(
37
37
  checkpoint: str,
38
38
  recipe: str = "hf-cpu",
39
+ do_not_upgrade: bool = True,
39
40
  ) -> Tuple[ModelAdapter, TokenizerAdapter]:
40
41
  """
41
42
  Load an LLM and the corresponding tokenizer using a lemonade recipe.
@@ -43,6 +44,9 @@ def from_pretrained(
43
44
  Args:
44
45
  - checkpoint: huggingface checkpoint that defines the LLM
45
46
  - recipe: defines the implementation and hardware used for the LLM
47
+ - do_not_upgrade: prioritize the local copy of the model, if available,
48
+ even if an upgraded copy is available on the server (note: only applies
49
+ for oga-* recipes)
46
50
 
47
51
  Recipe choices:
48
52
  - hf-cpu: Huggingface Transformers implementation for CPU with max-perf settings
@@ -118,6 +122,7 @@ def from_pretrained(
118
122
  input=checkpoint,
119
123
  device=user_backend,
120
124
  dtype=backend_to_dtype[user_backend],
125
+ do_not_upgrade=do_not_upgrade,
121
126
  )
122
127
 
123
128
  return state.model, state.tokenizer
@@ -2,6 +2,7 @@ import os
2
2
  from typing import Optional
3
3
  import socket
4
4
  from huggingface_hub import model_info, snapshot_download
5
+ from huggingface_hub.errors import LocalEntryNotFoundError
5
6
 
6
7
 
7
8
  def is_offline():
@@ -50,10 +51,11 @@ def get_base_model(checkpoint: str) -> Optional[str]:
50
51
  return None
51
52
 
52
53
 
53
- def custom_snapshot_download(repo_id, **kwargs):
54
+ def _symlink_safe_snapshot_download(repo_id, **kwargs):
54
55
  """
55
56
  Custom snapshot download with retry logic for Windows symlink privilege errors.
56
57
  """
58
+
57
59
  for attempt in range(2):
58
60
  try:
59
61
  return snapshot_download(repo_id=repo_id, **kwargs)
@@ -65,3 +67,27 @@ def custom_snapshot_download(repo_id, **kwargs):
65
67
  ):
66
68
  continue
67
69
  raise
70
+
71
+
72
+ def custom_snapshot_download(repo_id, do_not_upgrade=False, **kwargs):
73
+ """
74
+ Custom snapshot download with:
75
+ 1) retry logic for Windows symlink privilege errors.
76
+ 2) do_not_upgrade allows the caller to prioritize a local copy
77
+ of the model over an upgraded remote copy.
78
+ """
79
+
80
+ if do_not_upgrade:
81
+ try:
82
+ # Prioritize the local model, if available
83
+ return _symlink_safe_snapshot_download(
84
+ repo_id, local_files_only=True, **kwargs
85
+ )
86
+ except LocalEntryNotFoundError:
87
+ # LocalEntryNotFoundError means there was no local model, at this point
88
+ # we'll accept a remote model
89
+ return _symlink_safe_snapshot_download(
90
+ repo_id, local_files_only=False, **kwargs
91
+ )
92
+ else:
93
+ return _symlink_safe_snapshot_download(repo_id, **kwargs)
@@ -585,7 +585,7 @@ def identify_gguf_models(
585
585
  return core_files, sharded_files
586
586
 
587
587
 
588
- def download_gguf(config_checkpoint, config_mmproj=None) -> dict:
588
+ def download_gguf(config_checkpoint, config_mmproj=None, do_not_upgrade=False) -> dict:
589
589
  """
590
590
  Downloads the GGUF file for the given model configuration.
591
591
 
@@ -605,6 +605,7 @@ def download_gguf(config_checkpoint, config_mmproj=None) -> dict:
605
605
  snapshot_folder = custom_snapshot_download(
606
606
  checkpoint,
607
607
  allow_patterns=list(core_files.values()) + sharded_files,
608
+ do_not_upgrade=do_not_upgrade,
608
609
  )
609
610
 
610
611
  # Ensure we downloaded all expected files
@@ -654,6 +654,7 @@ class OgaLoad(FirstTool):
654
654
  download_only: bool = False,
655
655
  trust_remote_code=False,
656
656
  subfolder: str = None,
657
+ do_not_upgrade: bool = False,
657
658
  ) -> State:
658
659
  from lemonade.common.network import (
659
660
  custom_snapshot_download,
@@ -744,7 +745,7 @@ class OgaLoad(FirstTool):
744
745
  input_model_path = custom_snapshot_download(
745
746
  checkpoint,
746
747
  ignore_patterns=["*.md", "*.txt"],
747
- local_files_only=offline,
748
+ local_files_only=offline or do_not_upgrade,
748
749
  )
749
750
  # Check if model is ONNX or safetensors
750
751
  is_onnx_model = any(
@@ -100,9 +100,10 @@ class OrtGenaiModel(ModelAdapter):
100
100
  max_new_tokens=512,
101
101
  min_new_tokens=0,
102
102
  do_sample=True,
103
- top_k=50,
104
- top_p=1.0,
105
- temperature=0.7,
103
+ top_k=None,
104
+ top_p=None,
105
+ temperature=None,
106
+ repeat_penalty=None,
106
107
  streamer: OrtGenaiStreamer = None,
107
108
  pad_token_id=None,
108
109
  stopping_criteria=None,
@@ -154,38 +155,58 @@ class OrtGenaiModel(ModelAdapter):
154
155
  if random_seed is None:
155
156
  random_seed = -1 # In og.Generator, -1 = seed with random device
156
157
 
158
+ # Get search config if available, otherwise use empty dict
159
+ # Thanks to the empty dict, if the model doesn't have a built-in search
160
+ # config, the .get() calls will all just use the default values
161
+ search_config = {}
157
162
  if self.config and "search" in self.config:
158
163
  search_config = self.config["search"]
159
- params.set_search_options(
160
- do_sample=search_config.get("do_sample", do_sample),
161
- top_k=search_config.get("top_k", top_k),
162
- top_p=search_config.get("top_p", top_p),
163
- temperature=search_config.get("temperature", temperature),
164
- max_length=max_length_to_use,
165
- min_length=min_length,
166
- early_stopping=search_config.get("early_stopping", False),
167
- length_penalty=search_config.get("length_penalty", 1.0),
168
- num_beams=search_config.get("num_beams", 1),
169
- num_return_sequences=search_config.get("num_return_sequences", 1),
170
- repetition_penalty=search_config.get("repetition_penalty", 1.0),
171
- past_present_share_buffer=search_config.get(
172
- "past_present_share_buffer", True
173
- ),
174
- random_seed=random_seed,
175
- # Not currently supported by OGA
176
- # diversity_penalty=search_config.get('diversity_penalty', 0.0),
177
- # no_repeat_ngram_size=search_config.get('no_repeat_ngram_size', 0),
178
- )
179
- else:
180
- params.set_search_options(
181
- do_sample=do_sample,
182
- top_k=top_k,
183
- top_p=top_p,
184
- temperature=temperature,
185
- max_length=max_length_to_use,
186
- min_length=min_length,
187
- random_seed=random_seed,
188
- )
164
+
165
+ # Apply parameter hierarchy: user provided > search config > defaults
166
+ default_top_k = 50
167
+ default_top_p = 1.0
168
+ default_temperature = 0.7
169
+ default_repetition_penalty = 1.0
170
+
171
+ top_k_to_use = (
172
+ top_k if top_k is not None else search_config.get("top_k", default_top_k)
173
+ )
174
+ top_p_to_use = (
175
+ top_p if top_p is not None else search_config.get("top_p", default_top_p)
176
+ )
177
+ temperature_to_use = (
178
+ temperature
179
+ if temperature is not None
180
+ else search_config.get("temperature", default_temperature)
181
+ )
182
+ # Map the llamacpp name, `repeat_penalty`, to the OGA name, `repetition_penalty`
183
+ repetition_penalty_to_use = (
184
+ repeat_penalty
185
+ if repeat_penalty is not None
186
+ else search_config.get("repetition_penalty", default_repetition_penalty)
187
+ )
188
+
189
+ # Set search options once with all parameters
190
+ params.set_search_options(
191
+ do_sample=search_config.get("do_sample", do_sample),
192
+ top_k=top_k_to_use,
193
+ top_p=top_p_to_use,
194
+ temperature=temperature_to_use,
195
+ repetition_penalty=repetition_penalty_to_use,
196
+ max_length=max_length_to_use,
197
+ min_length=min_length,
198
+ early_stopping=search_config.get("early_stopping", False),
199
+ length_penalty=search_config.get("length_penalty", 1.0),
200
+ num_beams=search_config.get("num_beams", 1),
201
+ num_return_sequences=search_config.get("num_return_sequences", 1),
202
+ past_present_share_buffer=search_config.get(
203
+ "past_present_share_buffer", True
204
+ ),
205
+ random_seed=random_seed,
206
+ # Not currently supported by OGA
207
+ # diversity_penalty=search_config.get('diversity_penalty', 0.0),
208
+ # no_repeat_ngram_size=search_config.get('no_repeat_ngram_size', 0),
209
+ )
189
210
  params.try_graph_capture_with_max_batch_size(1)
190
211
 
191
212
  generator = og.Generator(self.model, params)