lemonade-sdk 8.1.2__tar.gz → 8.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/NOTICE.md +27 -1
- {lemonade_sdk-8.1.2/src/lemonade_sdk.egg-info → lemonade_sdk-8.1.4}/PKG-INFO +26 -52
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/README.md +19 -46
- lemonade_sdk-8.1.4/pyproject.toml +8 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/setup.py +8 -8
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/api.py +5 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/network.py +27 -1
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/llamacpp/utils.py +2 -1
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/oga/load.py +2 -1
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/oga/utils.py +54 -33
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/llamacpp.py +104 -6
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/serve.py +117 -9
- lemonade_sdk-8.1.4/src/lemonade/tools/server/static/js/chat.js +735 -0
- lemonade_sdk-8.1.4/src/lemonade/tools/server/static/js/model-settings.js +162 -0
- lemonade_sdk-8.1.4/src/lemonade/tools/server/static/js/models.js +865 -0
- lemonade_sdk-8.1.4/src/lemonade/tools/server/static/js/shared.js +491 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/static/styles.css +652 -26
- lemonade_sdk-8.1.4/src/lemonade/tools/server/static/webapp.html +257 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/tray.py +51 -3
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/utils/port.py +3 -2
- lemonade_sdk-8.1.4/src/lemonade/version.py +1 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_install/install.py +1 -1
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4/src/lemonade_sdk.egg-info}/PKG-INFO +26 -52
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_sdk.egg-info/SOURCES.txt +7 -1
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_sdk.egg-info/requires.txt +8 -6
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_server/cli.py +36 -18
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_server/model_manager.py +12 -2
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_server/pydantic_models.py +23 -3
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_server/server_models.json +9 -3
- lemonade_sdk-8.1.4/src/lemonade_server/settings.py +39 -0
- lemonade_sdk-8.1.2/src/lemonade/tools/server/static/webapp.html +0 -1204
- lemonade_sdk-8.1.2/src/lemonade/version.py +0 -1
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/LICENSE +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/setup.cfg +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/__init__.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/cache.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/cli.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/__init__.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/build.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/cli_helpers.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/exceptions.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/filesystem.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/inference_engines.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/printing.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/status.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/system_info.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/common/test_helpers.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/profilers/__init__.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/profilers/memory_tracker.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/profilers/profiler.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/sequence.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/state.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/__init__.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/accuracy.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/adapter.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/bench.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/huggingface/bench.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/huggingface/load.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/huggingface/utils.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/humaneval.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/llamacpp/bench.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/llamacpp/load.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/management_tools.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/mmlu.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/oga/__init__.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/oga/bench.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/perplexity.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/prompt.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/quark/__init__.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/quark/quark_load.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/quark/quark_quantize.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/report/__init__.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/report/llm_report.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/report/table.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/__init__.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/static/favicon.ico +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/tool_calls.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/utils/system_tray.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/utils/thread.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/server/webapp.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade/tools/tool.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_install/__init__.py +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_sdk.egg-info/entry_points.txt +0 -0
- {lemonade_sdk-8.1.2 → lemonade_sdk-8.1.4}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
|
@@ -1,7 +1,33 @@
|
|
|
1
1
|
PORTIONS LICENSED AS FOLLOWS
|
|
2
2
|
|
|
3
|
+
## llama.cpp
|
|
4
|
+
|
|
5
|
+
Binaries for llama.cpp are downloaded under the MIT license from https://github.com/ggml-org/llama.cpp, as well as https://github.com/lemonade-sdk/llamacpp-rocm (which uses https://github.com/ggml-org/llama.cpp to build them.)
|
|
6
|
+
|
|
3
7
|
Lemonade SDK used the [ONNX TurnkeyML](https://github.com/onnx/turnkeyml) project as a starting point under the [Apache 2.0 license](./LICENSE).
|
|
4
8
|
|
|
9
|
+
> MIT License
|
|
10
|
+
>
|
|
11
|
+
> Copyright (c) 2023-2024 The ggml authors
|
|
12
|
+
>
|
|
13
|
+
> Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
> of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
> in the Software without restriction, including without limitation the rights
|
|
16
|
+
> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
> copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
> furnished to do so, subject to the following conditions:
|
|
19
|
+
>
|
|
20
|
+
> The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
> copies or substantial portions of the Software.
|
|
22
|
+
>
|
|
23
|
+
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
> SOFTWARE.
|
|
30
|
+
|
|
5
31
|
## TurnkeyML Attribution
|
|
6
32
|
|
|
7
33
|
TurnkeyML used code from other open source projects as a starting point (see [NOTICE.md](NOTICE.md)). Thank you Philip Colangelo, Derek Elkins, Jeremy Fowers, Dan Gard, Victoria Godsoe, Mark Heaps, Daniel Holanda, Brian Kurtz, Mariah Larwood, Philip Lassen, Andrew Ling, Adrian Macias, Gary Malik, Sarah Massengill, Ashwin Murthy, Hatice Ozen, Tim Sears, Sean Settle, Krishna Sivakumar, Aviv Weinstein, Xueli Xao, Bill Xing, and Lev Zlotnik for your contributions to that work.
|
|
@@ -18,4 +44,4 @@ TurnkeyML used code from other open source projects as a starting point (see [NO
|
|
|
18
44
|
>
|
|
19
45
|
>The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
20
46
|
>
|
|
21
|
-
>THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
47
|
+
>THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.1.
|
|
3
|
+
Version: 8.1.4
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
|
-
Requires-Python: >=3.10, <3.
|
|
6
|
+
Requires-Python: >=3.10, <3.14
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
License-File: LICENSE
|
|
9
9
|
License-File: NOTICE.md
|
|
10
10
|
Requires-Dist: invoke>=2.0.0
|
|
11
|
-
Requires-Dist: onnx
|
|
11
|
+
Requires-Dist: onnx==1.18.0
|
|
12
12
|
Requires-Dist: pyyaml>=5.4
|
|
13
13
|
Requires-Dist: typeguard>=2.3.13
|
|
14
14
|
Requires-Dist: packaging>=20.9
|
|
15
|
-
Requires-Dist: numpy
|
|
15
|
+
Requires-Dist: numpy
|
|
16
16
|
Requires-Dist: fasteners
|
|
17
17
|
Requires-Dist: GitPython>=3.1.40
|
|
18
18
|
Requires-Dist: psutil>=6.1.1
|
|
@@ -30,7 +30,7 @@ Requires-Dist: sentencepiece
|
|
|
30
30
|
Requires-Dist: huggingface-hub[hf_xet]==0.33.0
|
|
31
31
|
Requires-Dist: python-dotenv
|
|
32
32
|
Provides-Extra: oga-ryzenai
|
|
33
|
-
Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
|
|
33
|
+
Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2.1; extra == "oga-ryzenai"
|
|
34
34
|
Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
|
|
35
35
|
Provides-Extra: oga-cpu
|
|
36
36
|
Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
|
|
@@ -41,9 +41,10 @@ Requires-Dist: accelerate; extra == "dev"
|
|
|
41
41
|
Requires-Dist: datasets; extra == "dev"
|
|
42
42
|
Requires-Dist: pandas>=1.5.3; extra == "dev"
|
|
43
43
|
Requires-Dist: matplotlib; extra == "dev"
|
|
44
|
-
Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "dev"
|
|
45
44
|
Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
|
|
46
45
|
Requires-Dist: lm-eval[api]; extra == "dev"
|
|
46
|
+
Provides-Extra: model-generate
|
|
47
|
+
Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "model-generate"
|
|
47
48
|
Provides-Extra: oga-hybrid
|
|
48
49
|
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
|
|
49
50
|
Provides-Extra: oga-unified
|
|
@@ -105,7 +106,7 @@ Dynamic: summary
|
|
|
105
106
|
<img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" />
|
|
106
107
|
</a>
|
|
107
108
|
<a href="docs/README.md#installation" title="Check out our instructions">
|
|
108
|
-
<img src="https://img.shields.io/badge/Python-3.10
|
|
109
|
+
<img src="https://img.shields.io/badge/Python-3.10--3.13-blue?logo=python&logoColor=white" alt="Made with Python" />
|
|
109
110
|
</a>
|
|
110
111
|
<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
|
|
111
112
|
<img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg" alt="PRs Welcome" />
|
|
@@ -199,48 +200,11 @@ You can also import custom GGUF and ONNX models from Hugging Face by using our [
|
|
|
199
200
|
|
|
200
201
|
Lemonade supports the following configurations, while also making it easy to switch between them at runtime. Find more information about it [here](./docs/README.md#software-and-hardware-overview).
|
|
201
202
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
<th colspan="2" align="center">🖥️ OS (x86/x64)</th>
|
|
208
|
-
</tr>
|
|
209
|
-
<tr>
|
|
210
|
-
<th align="center">OGA</th>
|
|
211
|
-
<th align="center">llamacpp</th>
|
|
212
|
-
<th align="center">HF</th>
|
|
213
|
-
<th align="center">Windows</th>
|
|
214
|
-
<th align="center">Linux</th>
|
|
215
|
-
</tr>
|
|
216
|
-
</thead>
|
|
217
|
-
<tbody>
|
|
218
|
-
<tr>
|
|
219
|
-
<td><strong>🧠 CPU</strong></td>
|
|
220
|
-
<td align="center">All platforms</td>
|
|
221
|
-
<td align="center">All platforms</td>
|
|
222
|
-
<td align="center">All platforms</td>
|
|
223
|
-
<td align="center">✅</td>
|
|
224
|
-
<td align="center">✅</td>
|
|
225
|
-
</tr>
|
|
226
|
-
<tr>
|
|
227
|
-
<td><strong>🎮 GPU</strong></td>
|
|
228
|
-
<td align="center">—</td>
|
|
229
|
-
<td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
|
|
230
|
-
<td align="center">—</td>
|
|
231
|
-
<td align="center">✅</td>
|
|
232
|
-
<td align="center">✅</td>
|
|
233
|
-
</tr>
|
|
234
|
-
<tr>
|
|
235
|
-
<td><strong>🤖 NPU</strong></td>
|
|
236
|
-
<td align="center">AMD Ryzen™ AI 300 series</td>
|
|
237
|
-
<td align="center">—</td>
|
|
238
|
-
<td align="center">—</td>
|
|
239
|
-
<td align="center">✅</td>
|
|
240
|
-
<td align="center">—</td>
|
|
241
|
-
</tr>
|
|
242
|
-
</tbody>
|
|
243
|
-
</table>
|
|
203
|
+
| Hardware | Engine: OGA | Engine: llamacpp | Engine: HF | Windows | Linux |
|
|
204
|
+
|----------|-------------|------------------|------------|---------|-------|
|
|
205
|
+
| **🧠 CPU** | All platforms | All platforms | All platforms | ✅ | ✅ |
|
|
206
|
+
| **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms* | — | ✅ | ✅ |
|
|
207
|
+
| **🤖 NPU** | AMD Ryzen™ AI 300 series | — | — | ✅ | — |
|
|
244
208
|
|
|
245
209
|
<details>
|
|
246
210
|
<summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
|
|
@@ -336,9 +300,19 @@ New contributors can find beginner-friendly issues tagged with "Good First Issue
|
|
|
336
300
|
|
|
337
301
|
This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), emailing [lemonade@amd.com](mailto:lemonade@amd.com), or joining our [Discord](https://discord.gg/5xXzkMu8Zk).
|
|
338
302
|
|
|
339
|
-
## License
|
|
340
|
-
|
|
341
|
-
This project is
|
|
303
|
+
## License and Attribution
|
|
304
|
+
|
|
305
|
+
This project is:
|
|
306
|
+
- [Built with Python](https://www.amd.com/en/developer/resources/technical-articles/2025/rethinking-local-ai-lemonade-servers-python-advantage.html) with ❤️ for the open source community,
|
|
307
|
+
- Standing on the shoulders of great tools from:
|
|
308
|
+
- [ggml/llama.cpp](https://github.com/ggml-org/llama.cpp)
|
|
309
|
+
- [OnnxRuntime GenAI](https://github.com/microsoft/onnxruntime-genai)
|
|
310
|
+
- [Hugging Face Hub](https://github.com/huggingface/huggingface_hub)
|
|
311
|
+
- [OpenAI API](https://github.com/openai/openai-python)
|
|
312
|
+
- and more...
|
|
313
|
+
- Accelerated by mentorship from the OCV Catalyst program.
|
|
314
|
+
- Licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE).
|
|
315
|
+
- Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
|
|
342
316
|
|
|
343
317
|
<!--This file was originally licensed under Apache 2.0. It has been modified.
|
|
344
318
|
Modifications Copyright (c) 2025 AMD-->
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
<img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" />
|
|
15
15
|
</a>
|
|
16
16
|
<a href="docs/README.md#installation" title="Check out our instructions">
|
|
17
|
-
<img src="https://img.shields.io/badge/Python-3.10
|
|
17
|
+
<img src="https://img.shields.io/badge/Python-3.10--3.13-blue?logo=python&logoColor=white" alt="Made with Python" />
|
|
18
18
|
</a>
|
|
19
19
|
<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
|
|
20
20
|
<img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg" alt="PRs Welcome" />
|
|
@@ -108,48 +108,11 @@ You can also import custom GGUF and ONNX models from Hugging Face by using our [
|
|
|
108
108
|
|
|
109
109
|
Lemonade supports the following configurations, while also making it easy to switch between them at runtime. Find more information about it [here](./docs/README.md#software-and-hardware-overview).
|
|
110
110
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
<th colspan="2" align="center">🖥️ OS (x86/x64)</th>
|
|
117
|
-
</tr>
|
|
118
|
-
<tr>
|
|
119
|
-
<th align="center">OGA</th>
|
|
120
|
-
<th align="center">llamacpp</th>
|
|
121
|
-
<th align="center">HF</th>
|
|
122
|
-
<th align="center">Windows</th>
|
|
123
|
-
<th align="center">Linux</th>
|
|
124
|
-
</tr>
|
|
125
|
-
</thead>
|
|
126
|
-
<tbody>
|
|
127
|
-
<tr>
|
|
128
|
-
<td><strong>🧠 CPU</strong></td>
|
|
129
|
-
<td align="center">All platforms</td>
|
|
130
|
-
<td align="center">All platforms</td>
|
|
131
|
-
<td align="center">All platforms</td>
|
|
132
|
-
<td align="center">✅</td>
|
|
133
|
-
<td align="center">✅</td>
|
|
134
|
-
</tr>
|
|
135
|
-
<tr>
|
|
136
|
-
<td><strong>🎮 GPU</strong></td>
|
|
137
|
-
<td align="center">—</td>
|
|
138
|
-
<td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
|
|
139
|
-
<td align="center">—</td>
|
|
140
|
-
<td align="center">✅</td>
|
|
141
|
-
<td align="center">✅</td>
|
|
142
|
-
</tr>
|
|
143
|
-
<tr>
|
|
144
|
-
<td><strong>🤖 NPU</strong></td>
|
|
145
|
-
<td align="center">AMD Ryzen™ AI 300 series</td>
|
|
146
|
-
<td align="center">—</td>
|
|
147
|
-
<td align="center">—</td>
|
|
148
|
-
<td align="center">✅</td>
|
|
149
|
-
<td align="center">—</td>
|
|
150
|
-
</tr>
|
|
151
|
-
</tbody>
|
|
152
|
-
</table>
|
|
111
|
+
| Hardware | Engine: OGA | Engine: llamacpp | Engine: HF | Windows | Linux |
|
|
112
|
+
|----------|-------------|------------------|------------|---------|-------|
|
|
113
|
+
| **🧠 CPU** | All platforms | All platforms | All platforms | ✅ | ✅ |
|
|
114
|
+
| **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms* | — | ✅ | ✅ |
|
|
115
|
+
| **🤖 NPU** | AMD Ryzen™ AI 300 series | — | — | ✅ | — |
|
|
153
116
|
|
|
154
117
|
<details>
|
|
155
118
|
<summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
|
|
@@ -245,9 +208,19 @@ New contributors can find beginner-friendly issues tagged with "Good First Issue
|
|
|
245
208
|
|
|
246
209
|
This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), emailing [lemonade@amd.com](mailto:lemonade@amd.com), or joining our [Discord](https://discord.gg/5xXzkMu8Zk).
|
|
247
210
|
|
|
248
|
-
## License
|
|
249
|
-
|
|
250
|
-
This project is
|
|
211
|
+
## License and Attribution
|
|
212
|
+
|
|
213
|
+
This project is:
|
|
214
|
+
- [Built with Python](https://www.amd.com/en/developer/resources/technical-articles/2025/rethinking-local-ai-lemonade-servers-python-advantage.html) with ❤️ for the open source community,
|
|
215
|
+
- Standing on the shoulders of great tools from:
|
|
216
|
+
- [ggml/llama.cpp](https://github.com/ggml-org/llama.cpp)
|
|
217
|
+
- [OnnxRuntime GenAI](https://github.com/microsoft/onnxruntime-genai)
|
|
218
|
+
- [Hugging Face Hub](https://github.com/huggingface/huggingface_hub)
|
|
219
|
+
- [OpenAI API](https://github.com/openai/openai-python)
|
|
220
|
+
- and more...
|
|
221
|
+
- Accelerated by mentorship from the OCV Catalyst program.
|
|
222
|
+
- Licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE).
|
|
223
|
+
- Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
|
|
251
224
|
|
|
252
225
|
<!--This file was originally licensed under Apache 2.0. It has been modified.
|
|
253
226
|
Modifications Copyright (c) 2025 AMD-->
|
|
@@ -28,13 +28,11 @@ setup(
|
|
|
28
28
|
# Minimal dependencies required for end-users who are running
|
|
29
29
|
# apps deployed on Lemonade SDK
|
|
30
30
|
"invoke>=2.0.0",
|
|
31
|
-
"onnx
|
|
31
|
+
"onnx==1.18.0",
|
|
32
32
|
"pyyaml>=5.4",
|
|
33
33
|
"typeguard>=2.3.13",
|
|
34
34
|
"packaging>=20.9",
|
|
35
|
-
|
|
36
|
-
# change to numpy
|
|
37
|
-
"numpy<2.0.0",
|
|
35
|
+
"numpy",
|
|
38
36
|
"fasteners",
|
|
39
37
|
"GitPython>=3.1.40",
|
|
40
38
|
"psutil>=6.1.1",
|
|
@@ -57,7 +55,7 @@ setup(
|
|
|
57
55
|
# applications, without including developer-focused tools
|
|
58
56
|
# Primary NPU extra using unified PyPI package
|
|
59
57
|
"oga-ryzenai": [
|
|
60
|
-
"onnxruntime-genai-directml-ryzenai==0.7.0.2",
|
|
58
|
+
"onnxruntime-genai-directml-ryzenai==0.7.0.2.1",
|
|
61
59
|
"protobuf>=6.30.1",
|
|
62
60
|
],
|
|
63
61
|
"oga-cpu": [
|
|
@@ -74,12 +72,14 @@ setup(
|
|
|
74
72
|
"datasets",
|
|
75
73
|
"pandas>=1.5.3",
|
|
76
74
|
"matplotlib",
|
|
77
|
-
"model-generate==1.5.0; platform_system=='Windows' and python_version=='3.10'",
|
|
78
75
|
# Install human-eval from a forked repo with Windows support until the
|
|
79
76
|
# PR (https://github.com/openai/human-eval/pull/53) is merged
|
|
80
77
|
"human-eval-windows==1.0.4",
|
|
81
78
|
"lm-eval[api]",
|
|
82
79
|
],
|
|
80
|
+
"model-generate": [
|
|
81
|
+
"model-generate==1.5.0; platform_system=='Windows' and python_version=='3.10'",
|
|
82
|
+
],
|
|
83
83
|
# Keep backwards compatibility for old extras names
|
|
84
84
|
"oga-hybrid": ["lemonade-sdk[oga-ryzenai]"],
|
|
85
85
|
"oga-unified": ["lemonade-sdk[oga-ryzenai]"],
|
|
@@ -128,13 +128,13 @@ setup(
|
|
|
128
128
|
"lsdev=lemonade_server.cli:developer_entrypoint",
|
|
129
129
|
]
|
|
130
130
|
},
|
|
131
|
-
python_requires=">=3.10, <3.
|
|
131
|
+
python_requires=">=3.10, <3.14",
|
|
132
132
|
long_description=open("README.md", "r", encoding="utf-8").read(),
|
|
133
133
|
long_description_content_type="text/markdown",
|
|
134
134
|
include_package_data=True,
|
|
135
135
|
package_data={
|
|
136
136
|
"lemonade_server": ["server_models.json"],
|
|
137
|
-
"lemonade": ["tools/server/static
|
|
137
|
+
"lemonade": ["tools/server/static/**/*"],
|
|
138
138
|
},
|
|
139
139
|
)
|
|
140
140
|
|
|
@@ -36,6 +36,7 @@ def _make_state(recipe, checkpoint) -> Dict:
|
|
|
36
36
|
def from_pretrained(
|
|
37
37
|
checkpoint: str,
|
|
38
38
|
recipe: str = "hf-cpu",
|
|
39
|
+
do_not_upgrade: bool = True,
|
|
39
40
|
) -> Tuple[ModelAdapter, TokenizerAdapter]:
|
|
40
41
|
"""
|
|
41
42
|
Load an LLM and the corresponding tokenizer using a lemonade recipe.
|
|
@@ -43,6 +44,9 @@ def from_pretrained(
|
|
|
43
44
|
Args:
|
|
44
45
|
- checkpoint: huggingface checkpoint that defines the LLM
|
|
45
46
|
- recipe: defines the implementation and hardware used for the LLM
|
|
47
|
+
- do_not_upgrade: prioritize the local copy of the model, if available,
|
|
48
|
+
even if an upgraded copy is available on the server (note: only applies
|
|
49
|
+
for oga-* recipes)
|
|
46
50
|
|
|
47
51
|
Recipe choices:
|
|
48
52
|
- hf-cpu: Huggingface Transformers implementation for CPU with max-perf settings
|
|
@@ -118,6 +122,7 @@ def from_pretrained(
|
|
|
118
122
|
input=checkpoint,
|
|
119
123
|
device=user_backend,
|
|
120
124
|
dtype=backend_to_dtype[user_backend],
|
|
125
|
+
do_not_upgrade=do_not_upgrade,
|
|
121
126
|
)
|
|
122
127
|
|
|
123
128
|
return state.model, state.tokenizer
|
|
@@ -2,6 +2,7 @@ import os
|
|
|
2
2
|
from typing import Optional
|
|
3
3
|
import socket
|
|
4
4
|
from huggingface_hub import model_info, snapshot_download
|
|
5
|
+
from huggingface_hub.errors import LocalEntryNotFoundError
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
def is_offline():
|
|
@@ -50,10 +51,11 @@ def get_base_model(checkpoint: str) -> Optional[str]:
|
|
|
50
51
|
return None
|
|
51
52
|
|
|
52
53
|
|
|
53
|
-
def
|
|
54
|
+
def _symlink_safe_snapshot_download(repo_id, **kwargs):
|
|
54
55
|
"""
|
|
55
56
|
Custom snapshot download with retry logic for Windows symlink privilege errors.
|
|
56
57
|
"""
|
|
58
|
+
|
|
57
59
|
for attempt in range(2):
|
|
58
60
|
try:
|
|
59
61
|
return snapshot_download(repo_id=repo_id, **kwargs)
|
|
@@ -65,3 +67,27 @@ def custom_snapshot_download(repo_id, **kwargs):
|
|
|
65
67
|
):
|
|
66
68
|
continue
|
|
67
69
|
raise
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def custom_snapshot_download(repo_id, do_not_upgrade=False, **kwargs):
|
|
73
|
+
"""
|
|
74
|
+
Custom snapshot download with:
|
|
75
|
+
1) retry logic for Windows symlink privilege errors.
|
|
76
|
+
2) do_not_upgrade allows the caller to prioritize a local copy
|
|
77
|
+
of the model over an upgraded remote copy.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
if do_not_upgrade:
|
|
81
|
+
try:
|
|
82
|
+
# Prioritize the local model, if available
|
|
83
|
+
return _symlink_safe_snapshot_download(
|
|
84
|
+
repo_id, local_files_only=True, **kwargs
|
|
85
|
+
)
|
|
86
|
+
except LocalEntryNotFoundError:
|
|
87
|
+
# LocalEntryNotFoundError means there was no local model, at this point
|
|
88
|
+
# we'll accept a remote model
|
|
89
|
+
return _symlink_safe_snapshot_download(
|
|
90
|
+
repo_id, local_files_only=False, **kwargs
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
return _symlink_safe_snapshot_download(repo_id, **kwargs)
|
|
@@ -585,7 +585,7 @@ def identify_gguf_models(
|
|
|
585
585
|
return core_files, sharded_files
|
|
586
586
|
|
|
587
587
|
|
|
588
|
-
def download_gguf(config_checkpoint, config_mmproj=None) -> dict:
|
|
588
|
+
def download_gguf(config_checkpoint, config_mmproj=None, do_not_upgrade=False) -> dict:
|
|
589
589
|
"""
|
|
590
590
|
Downloads the GGUF file for the given model configuration.
|
|
591
591
|
|
|
@@ -605,6 +605,7 @@ def download_gguf(config_checkpoint, config_mmproj=None) -> dict:
|
|
|
605
605
|
snapshot_folder = custom_snapshot_download(
|
|
606
606
|
checkpoint,
|
|
607
607
|
allow_patterns=list(core_files.values()) + sharded_files,
|
|
608
|
+
do_not_upgrade=do_not_upgrade,
|
|
608
609
|
)
|
|
609
610
|
|
|
610
611
|
# Ensure we downloaded all expected files
|
|
@@ -654,6 +654,7 @@ class OgaLoad(FirstTool):
|
|
|
654
654
|
download_only: bool = False,
|
|
655
655
|
trust_remote_code=False,
|
|
656
656
|
subfolder: str = None,
|
|
657
|
+
do_not_upgrade: bool = False,
|
|
657
658
|
) -> State:
|
|
658
659
|
from lemonade.common.network import (
|
|
659
660
|
custom_snapshot_download,
|
|
@@ -744,7 +745,7 @@ class OgaLoad(FirstTool):
|
|
|
744
745
|
input_model_path = custom_snapshot_download(
|
|
745
746
|
checkpoint,
|
|
746
747
|
ignore_patterns=["*.md", "*.txt"],
|
|
747
|
-
local_files_only=offline,
|
|
748
|
+
local_files_only=offline or do_not_upgrade,
|
|
748
749
|
)
|
|
749
750
|
# Check if model is ONNX or safetensors
|
|
750
751
|
is_onnx_model = any(
|
|
@@ -100,9 +100,10 @@ class OrtGenaiModel(ModelAdapter):
|
|
|
100
100
|
max_new_tokens=512,
|
|
101
101
|
min_new_tokens=0,
|
|
102
102
|
do_sample=True,
|
|
103
|
-
top_k=
|
|
104
|
-
top_p=
|
|
105
|
-
temperature=
|
|
103
|
+
top_k=None,
|
|
104
|
+
top_p=None,
|
|
105
|
+
temperature=None,
|
|
106
|
+
repeat_penalty=None,
|
|
106
107
|
streamer: OrtGenaiStreamer = None,
|
|
107
108
|
pad_token_id=None,
|
|
108
109
|
stopping_criteria=None,
|
|
@@ -154,38 +155,58 @@ class OrtGenaiModel(ModelAdapter):
|
|
|
154
155
|
if random_seed is None:
|
|
155
156
|
random_seed = -1 # In og.Generator, -1 = seed with random device
|
|
156
157
|
|
|
158
|
+
# Get search config if available, otherwise use empty dict
|
|
159
|
+
# Thanks to the empty dict, if the model doesn't have a built-in search
|
|
160
|
+
# config, the .get() calls will all just use the default values
|
|
161
|
+
search_config = {}
|
|
157
162
|
if self.config and "search" in self.config:
|
|
158
163
|
search_config = self.config["search"]
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
164
|
+
|
|
165
|
+
# Apply parameter hierarchy: user provided > search config > defaults
|
|
166
|
+
default_top_k = 50
|
|
167
|
+
default_top_p = 1.0
|
|
168
|
+
default_temperature = 0.7
|
|
169
|
+
default_repetition_penalty = 1.0
|
|
170
|
+
|
|
171
|
+
top_k_to_use = (
|
|
172
|
+
top_k if top_k is not None else search_config.get("top_k", default_top_k)
|
|
173
|
+
)
|
|
174
|
+
top_p_to_use = (
|
|
175
|
+
top_p if top_p is not None else search_config.get("top_p", default_top_p)
|
|
176
|
+
)
|
|
177
|
+
temperature_to_use = (
|
|
178
|
+
temperature
|
|
179
|
+
if temperature is not None
|
|
180
|
+
else search_config.get("temperature", default_temperature)
|
|
181
|
+
)
|
|
182
|
+
# Map the llamacpp name, `repeat_penalty`, to the OGA name, `repetition_penalty`
|
|
183
|
+
repetition_penalty_to_use = (
|
|
184
|
+
repeat_penalty
|
|
185
|
+
if repeat_penalty is not None
|
|
186
|
+
else search_config.get("repetition_penalty", default_repetition_penalty)
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Set search options once with all parameters
|
|
190
|
+
params.set_search_options(
|
|
191
|
+
do_sample=search_config.get("do_sample", do_sample),
|
|
192
|
+
top_k=top_k_to_use,
|
|
193
|
+
top_p=top_p_to_use,
|
|
194
|
+
temperature=temperature_to_use,
|
|
195
|
+
repetition_penalty=repetition_penalty_to_use,
|
|
196
|
+
max_length=max_length_to_use,
|
|
197
|
+
min_length=min_length,
|
|
198
|
+
early_stopping=search_config.get("early_stopping", False),
|
|
199
|
+
length_penalty=search_config.get("length_penalty", 1.0),
|
|
200
|
+
num_beams=search_config.get("num_beams", 1),
|
|
201
|
+
num_return_sequences=search_config.get("num_return_sequences", 1),
|
|
202
|
+
past_present_share_buffer=search_config.get(
|
|
203
|
+
"past_present_share_buffer", True
|
|
204
|
+
),
|
|
205
|
+
random_seed=random_seed,
|
|
206
|
+
# Not currently supported by OGA
|
|
207
|
+
# diversity_penalty=search_config.get('diversity_penalty', 0.0),
|
|
208
|
+
# no_repeat_ngram_size=search_config.get('no_repeat_ngram_size', 0),
|
|
209
|
+
)
|
|
189
210
|
params.try_graph_capture_with_max_batch_size(1)
|
|
190
211
|
|
|
191
212
|
generator = og.Generator(self.model, params)
|