lemonade-sdk 8.0.3__py3-none-any.whl → 8.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -0,0 +1,295 @@
1
+ Metadata-Version: 2.4
2
+ Name: lemonade-sdk
3
+ Version: 8.0.5
4
+ Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
+ Author-email: lemonade@amd.com
6
+ Requires-Python: >=3.10, <3.13
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ License-File: NOTICE.md
10
+ Requires-Dist: invoke>=2.0.0
11
+ Requires-Dist: onnx<1.18.0,>=1.11.0
12
+ Requires-Dist: pyyaml>=5.4
13
+ Requires-Dist: typeguard>=2.3.13
14
+ Requires-Dist: packaging>=20.9
15
+ Requires-Dist: numpy<2.0.0
16
+ Requires-Dist: fasteners
17
+ Requires-Dist: GitPython>=3.1.40
18
+ Requires-Dist: psutil>=6.1.1
19
+ Requires-Dist: wmi
20
+ Requires-Dist: py-cpuinfo
21
+ Requires-Dist: pytz
22
+ Requires-Dist: zstandard
23
+ Requires-Dist: fastapi
24
+ Requires-Dist: uvicorn[standard]
25
+ Requires-Dist: openai>=1.81.0
26
+ Requires-Dist: transformers<=4.51.3
27
+ Requires-Dist: jinja2
28
+ Requires-Dist: tabulate
29
+ Requires-Dist: sentencepiece
30
+ Requires-Dist: huggingface-hub==0.33.0
31
+ Provides-Extra: oga-hybrid
32
+ Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
33
+ Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
34
+ Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
35
+ Provides-Extra: oga-cpu
36
+ Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
37
+ Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
38
+ Provides-Extra: dev
39
+ Requires-Dist: torch>=2.6.0; extra == "dev"
40
+ Requires-Dist: accelerate; extra == "dev"
41
+ Requires-Dist: datasets; extra == "dev"
42
+ Requires-Dist: pandas>=1.5.3; extra == "dev"
43
+ Requires-Dist: matplotlib; extra == "dev"
44
+ Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
45
+ Requires-Dist: lm-eval[api]; extra == "dev"
46
+ Provides-Extra: oga-hybrid-minimal
47
+ Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
48
+ Provides-Extra: oga-cpu-minimal
49
+ Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
50
+ Provides-Extra: llm
51
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm"
52
+ Provides-Extra: llm-oga-cpu
53
+ Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
54
+ Provides-Extra: llm-oga-igpu
55
+ Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
56
+ Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
57
+ Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
58
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
59
+ Provides-Extra: llm-oga-cuda
60
+ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
61
+ Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
62
+ Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
63
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
64
+ Provides-Extra: llm-oga-npu
65
+ Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
66
+ Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
67
+ Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
68
+ Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
69
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
70
+ Provides-Extra: llm-oga-hybrid
71
+ Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
72
+ Provides-Extra: llm-oga-unified
73
+ Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
74
+ Dynamic: author-email
75
+ Dynamic: description
76
+ Dynamic: description-content-type
77
+ Dynamic: license-file
78
+ Dynamic: provides-extra
79
+ Dynamic: requires-dist
80
+ Dynamic: requires-python
81
+ Dynamic: summary
82
+
83
+ ## 🍋 Lemonade: Local LLM Serving with GPU and NPU acceleration
84
+
85
+ <p align="center">
86
+ <a href="https://discord.gg/5xXzkMu8Zk">
87
+ <img src="https://img.shields.io/badge/Discord-7289DA?logo=discord&logoColor=white" alt="Discord" />
88
+ </a>
89
+ <a href="https://github.com/lemonade-sdk/lemonade/tree/main/test" title="Check out our tests">
90
+ <img src="https://github.com/lemonade-sdk/lemonade/actions/workflows/test_lemonade.yml/badge.svg" alt="Lemonade tests" />
91
+ </a>
92
+ <a href="docs/README.md#installation" title="Check out our instructions">
93
+ <img src="https://img.shields.io/badge/Windows-11-0078D6?logo=windows&logoColor=white" alt="Windows 11" />
94
+ </a>
95
+ <a href="https://lemonade-server.ai/#linux" title="Ubuntu 24.04 & 25.04 Supported">
96
+ <img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" />
97
+ </a>
98
+ <a href="docs/README.md#installation" title="Check out our instructions">
99
+ <img src="https://img.shields.io/badge/Python-3.10%20%7C%203.12-blue?logo=python&logoColor=white" alt="Made with Python" />
100
+ </a>
101
+ <a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
102
+ <img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg" alt="PRs Welcome" />
103
+ </a>
104
+ <a href="https://github.com/lemonade-sdk/lemonade/releases/latest" title="Download the latest release">
105
+ <img src="https://img.shields.io/github/v/release/lemonade-sdk/lemonade?include_prereleases" alt="Latest Release" />
106
+ </a>
107
+ <a href="https://tooomm.github.io/github-release-stats/?username=lemonade-sdk&repository=lemonade">
108
+ <img src="https://img.shields.io/github/downloads/lemonade-sdk/lemonade/total.svg" alt="GitHub downloads" />
109
+ </a>
110
+ <a href="https://github.com/lemonade-sdk/lemonade/issues">
111
+ <img src="https://img.shields.io/github/issues/lemonade-sdk/lemonade" alt="GitHub issues" />
112
+ </a>
113
+ <a href="https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE">
114
+ <img src="https://img.shields.io/badge/License-Apache-yellow.svg" alt="License: Apache" />
115
+ </a>
116
+ <a href="https://github.com/psf/black">
117
+ <img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Code style: black" />
118
+ </a>
119
+ <a href="https://star-history.com/#lemonade-sdk/lemonade">
120
+ <img src="https://img.shields.io/badge/Star%20History-View-brightgreen" alt="Star History Chart" />
121
+ </a>
122
+ </p>
123
+ <p align="center">
124
+ <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/banner.png?raw=true" alt="Lemonade Banner" />
125
+ </p>
126
+ <h3 align="center">
127
+ <a href="https://lemonade-server.ai">Download</a> |
128
+ <a href="https://lemonade-server.ai/docs/">Documentation</a> |
129
+ <a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
130
+ </h3>
131
+
132
+ Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
133
+
134
+ ## Getting Started
135
+
136
+ <div align="center">
137
+
138
+ | Step 1: Download & Install | Step 2: Launch and Pull Models | Step 3: Start chatting! |
139
+ |:---------------------------:|:-------------------------------:|:------------------------:|
140
+ | <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/install.gif?raw=true" alt="Download & Install" width="245" /> | <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/launch_and_pull.gif?raw=true" alt="Launch and Pull Models" width="245" /> | <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/chat.gif?raw=true" alt="Start chatting!" width="245" /> |
141
+ |Install using a [GUI](https://github.com/lemonade-sdk/lemonade/releases/latest/download/Lemonade_Server_Installer.exe) (Windows only), [pip](https://lemonade-server.ai/install_options.html), or [from source](https://lemonade-server.ai/install_options.html). |Use the [Model Manager](#model-library) to install models|A built-in chat interface is available!|
142
+ </div>
143
+
144
+ ### Use it with your favorite OpenAI-compatible app!
145
+
146
+ <p align="center">
147
+ <a href="https://lemonade-server.ai/docs/server/apps/open-webui/" title="Open WebUI" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/openwebui.jpg" alt="Open WebUI" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/continue/" title="Continue" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/continue_dev.png" alt="Continue" width="60" /></a>&nbsp;&nbsp;<a href="https://github.com/amd/gaia" title="Gaia" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/gaia.ico" alt="Gaia" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/anythingLLM/" title="AnythingLLM" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/anything_llm.png" alt="AnythingLLM" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/ai-dev-gallery/" title="AI Dev Gallery" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_dev_gallery.webp" alt="AI Dev Gallery" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/lm-eval/" title="LM-Eval" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/lm_eval.png" alt="LM-Eval" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/codeGPT/" title="CodeGPT" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/codegpt.jpg" alt="CodeGPT" width="60" /></a>&nbsp;&nbsp;<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/apps/ai-toolkit.md" title="AI Toolkit" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_toolkit.png" alt="AI Toolkit" width="60" /></a>
148
+ </p>
149
+
150
+ > [!TIP]
151
+ > Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email lemonade@amd.com.
152
+
153
+ ## Using the CLI
154
+
155
+ To run and chat with Gemma 3:
156
+
157
+ ```
158
+ lemonade-server run Gemma-3-4b-it-GGUF
159
+ ```
160
+
161
+ To install models ahead of time, use the `pull` command:
162
+
163
+ ```
164
+ lemonade-server pull Gemma-3-4b-it-GGUF
165
+ ```
166
+
167
+ To check all models available, use the `list` command:
168
+
169
+ ```
170
+ lemonade-server list
171
+ ```
172
+
173
+ > Note: If you installed from source, use the `lemonade-server-dev` command instead.
174
+
175
+ ## Model Library
176
+
177
+ Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/models/).
178
+
179
+ You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
180
+ <p align="center">
181
+ <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/model_manager.png?raw=true" alt="Model Manager" width="650" />
182
+ </p>
183
+
184
+ ## Supported Configurations
185
+
186
+ Lemonade supports the following configurations, while also making it easy to switch between them at runtime. Find more information about it [here](./docs/README.md#software-and-hardware-overview).
187
+
188
+ <table>
189
+ <thead>
190
+ <tr>
191
+ <th rowspan="2">Hardware</th>
192
+ <th colspan="3" align="center">🛠️ Engine Support</th>
193
+ <th colspan="2" align="center">🖥️ OS (x86/x64)</th>
194
+ </tr>
195
+ <tr>
196
+ <th align="center">OGA</th>
197
+ <th align="center">llamacpp</th>
198
+ <th align="center">HF</th>
199
+ <th align="center">Windows</th>
200
+ <th align="center">Linux</th>
201
+ </tr>
202
+ </thead>
203
+ <tbody>
204
+ <tr>
205
+ <td><strong>🧠 CPU</strong></td>
206
+ <td align="center">All platforms</td>
207
+ <td align="center">All platforms</td>
208
+ <td align="center">All platforms</td>
209
+ <td align="center">✅</td>
210
+ <td align="center">✅</td>
211
+ </tr>
212
+ <tr>
213
+ <td><strong>🎮 GPU</strong></td>
214
+ <td align="center">—</td>
215
+ <td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
216
+ <td align="center">—</td>
217
+ <td align="center">✅</td>
218
+ <td align="center">✅</td>
219
+ </tr>
220
+ <tr>
221
+ <td><strong>🤖 NPU</strong></td>
222
+ <td align="center">AMD Ryzen™ AI 300 series</td>
223
+ <td align="center">—</td>
224
+ <td align="center">—</td>
225
+ <td align="center">✅</td>
226
+ <td align="center">—</td>
227
+ </tr>
228
+ </tbody>
229
+ </table>
230
+
231
+
232
+ ## Integrate Lemonade Server with Your Application
233
+
234
+ You can use any OpenAI-compatible client library by configuring it to use `http://localhost:8000/api/v1` as the base URL. A table containing official and popular OpenAI clients on different languages is shown below.
235
+
236
+ Feel free to pick and choose your preferred language.
237
+
238
+
239
+ | Python | C++ | Java | C# | Node.js | Go | Ruby | Rust | PHP |
240
+ |--------|-----|------|----|---------|----|-------|------|-----|
241
+ | [openai-python](https://github.com/openai/openai-python) | [openai-cpp](https://github.com/olrea/openai-cpp) | [openai-java](https://github.com/openai/openai-java) | [openai-dotnet](https://github.com/openai/openai-dotnet) | [openai-node](https://github.com/openai/openai-node) | [go-openai](https://github.com/sashabaranov/go-openai) | [ruby-openai](https://github.com/alexrudall/ruby-openai) | [async-openai](https://github.com/64bit/async-openai) | [openai-php](https://github.com/openai-php/client) |
242
+
243
+
244
+ ### Python Client Example
245
+ ```python
246
+ from openai import OpenAI
247
+
248
+ # Initialize the client to use Lemonade Server
249
+ client = OpenAI(
250
+ base_url="http://localhost:8000/api/v1",
251
+ api_key="lemonade" # required but unused
252
+ )
253
+
254
+ # Create a chat completion
255
+ completion = client.chat.completions.create(
256
+ model="Llama-3.2-1B-Instruct-Hybrid", # or any other available model
257
+ messages=[
258
+ {"role": "user", "content": "What is the capital of France?"}
259
+ ]
260
+ )
261
+
262
+ # Print the response
263
+ print(completion.choices[0].message.content)
264
+ ```
265
+
266
+ For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
267
+
268
+ ## Beyond an LLM Server
269
+
270
+ The [Lemonade SDK](./docs/README.md) also include the following components:
271
+
272
+ - 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
273
+ - 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
274
+
275
+ ## Contributing
276
+
277
+ We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
278
+
279
+ New contributors can find beginner-friendly issues tagged with "Good First Issue" to get started.
280
+
281
+ <a href="https://github.com/lemonade-sdk/lemonade/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22">
282
+ <img src="https://img.shields.io/badge/🍋Lemonade-Good%20First%20Issue-yellowgreen?colorA=38b000&colorB=cccccc" alt="Good First Issue" />
283
+ </a>
284
+
285
+ ## Maintainers
286
+
287
+ This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), email [lemonade@amd.com](mailto:lemonade@amd.com), or join our [Discord](https://discord.gg/5xXzkMu8Zk).
288
+
289
+ ## License
290
+
291
+ This project is licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE). Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
292
+
293
+ <!--This file was originally licensed under Apache 2.0. It has been modified.
294
+ Modifications Copyright (c) 2025 AMD-->
295
+
@@ -1,19 +1,20 @@
1
1
  lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
2
- lemonade/api.py,sha256=X7DxBgsOl5L_z6uTkwoJWf8x0rjXWS2JoeEqmo9bMfc,3873
2
+ lemonade/api.py,sha256=kGz8N_9TuN3peFG8fES0odN0bWR9itLNomlR-FC2z8k,5515
3
3
  lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
4
4
  lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
5
5
  lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
6
  lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
- lemonade/version.py,sha256=GImAlzwPDxsACkYFf5rTrX8QMH23tcqdm6vgjfFYD10,22
7
+ lemonade/version.py,sha256=obOXkQD52zgzH-mM2spS6LQ-gEWkuaiGpNTM_ISH0D8,22
8
8
  lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
10
10
  lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
11
11
  lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
12
12
  lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
13
+ lemonade/common/inference_engines.py,sha256=lcmir_pATr71TfSBJoIZEi3G9xyxNwi2_xpPvPD8_xI,12932
13
14
  lemonade/common/network.py,sha256=EkWTxjJ-UM469nEJdzEl-5UNO7dPfVfFzU2SVMMPaD0,1425
14
15
  lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
15
16
  lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
16
- lemonade/common/system_info.py,sha256=qOwteG_mBo-ImilbiK7Gq37sWIE9ugF0dbWcj9zLD40,12234
17
+ lemonade/common/system_info.py,sha256=dOtX8WLHCz1xmURZWnqhDbyNZv_AulrpX_bbI58eHFQ,27084
17
18
  lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
18
19
  lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
19
20
  lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
@@ -22,10 +23,10 @@ lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
22
23
  lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
23
24
  lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
24
25
  lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
25
- lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9532
26
- lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
27
- lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
28
- lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
26
+ lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
27
+ lemonade/tools/management_tools.py,sha256=U8GaJnjdXyQ9sw8UxBQMc7glpaLciaVphASaQS4kJsA,10202
28
+ lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
29
+ lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
29
30
  lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
30
31
  lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
31
32
  lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
@@ -35,36 +36,36 @@ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPd
35
36
  lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
36
37
  lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
38
  lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
38
- lemonade/tools/oga/load.py,sha256=xSP0DWoGd5zBRozSafj1MMyIQyHJuIRj_vNlCTx8mfs,28309
39
+ lemonade/tools/oga/load.py,sha256=XSznW8lOX_KafSq5J5mIBJzj8YJEBpK0RFGcTE1wnE8,28317
39
40
  lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
40
41
  lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
42
- lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
42
+ lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
43
+ lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
43
44
  lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
45
  lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
45
46
  lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
46
47
  lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
- lemonade/tools/server/llamacpp.py,sha256=vjFNelm_VyKBBgWmltsAwLI7ncQ9AwVFQD7krZnF42w,16199
48
- lemonade/tools/server/serve.py,sha256=3_jBpi6THnnAmtKOxvPlOkIhSTTmrlZE3fr2Dpto-Q4,52794
48
+ lemonade/tools/server/llamacpp.py,sha256=e1MYKSJBu-jlOE5GQSBsC9CUPAeqw5wXXxoxBKA5zb8,20038
49
+ lemonade/tools/server/serve.py,sha256=Pp_w4iuRMkpJLF-XrTsBIBrSNBQIOl8PRZC_Cj4URnU,57334
49
50
  lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
50
51
  lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
51
52
  lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
52
53
  lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
53
- lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
54
- lemonade/tools/server/static/webapp.html,sha256=kPzORaogVRdFQewXyNI_JaH2ZZCTaq5zfMSyzuoFTuA,22414
54
+ lemonade/tools/server/static/styles.css,sha256=jXFPIHPrhRz_CJyRJrYusAECSDTO00sKUu7ajrQgFuA,24655
55
+ lemonade/tools/server/static/webapp.html,sha256=tmwASvULb3d2_NfHEH9rKbEEJl3D7ygXjaCLVYkyWbg,35969
55
56
  lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
56
57
  lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
57
58
  lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
58
59
  lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
59
60
  lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
60
- lemonade_sdk-8.0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
61
- lemonade_sdk-8.0.3.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
62
- lemonade_server/cli.py,sha256=z6ojwFaOIz0hbUbVtZWMLP4YDpkcVOmqwmdm55dhKA4,11980
63
- lemonade_server/model_manager.py,sha256=Yvlsl0wipKfryKULH5ASQ9INhLQXPq9dTGQVBXf2_h0,16167
64
- lemonade_server/pydantic_models.py,sha256=nsbpHqAkd6nkz5QT16u9xMZbCXqccGiy5O0fWecOM88,2338
65
- lemonade_server/server_models.json,sha256=O5zk94gH_zRq6GSwbqvi2SNwx51eY9uqgAl_kxTi0iM,7271
66
- lemonade_sdk-8.0.3.dist-info/METADATA,sha256=WesWziLri9jQjZILRENliiJbggTVF8LmXKVIERInVbE,8285
67
- lemonade_sdk-8.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
- lemonade_sdk-8.0.3.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
69
- lemonade_sdk-8.0.3.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
70
- lemonade_sdk-8.0.3.dist-info/RECORD,,
61
+ lemonade_sdk-8.0.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
62
+ lemonade_sdk-8.0.5.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
63
+ lemonade_server/cli.py,sha256=2Un5uLK04fIxlfcTiZ0T_EWbbaq2tYymkUHNFeuvB7g,16041
64
+ lemonade_server/model_manager.py,sha256=0HqLR38uOu_hxRWVYQ_P6YmwaR-jkDuaAqGYo60X8C0,16702
65
+ lemonade_server/pydantic_models.py,sha256=rp_FFhoTwg6jNmgol-kShwffnRDGbt7jTbIeELvgOIo,2876
66
+ lemonade_server/server_models.json,sha256=Y-j9KAvHmfv77welC0rfRao4inLBce6AVySb-oy_uNE,7519
67
+ lemonade_sdk-8.0.5.dist-info/METADATA,sha256=e2w0jPyEnyk-SeLAbYZgeGldq-2CQHm9Hly_mQgZ8uo,15224
68
+ lemonade_sdk-8.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
69
+ lemonade_sdk-8.0.5.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
70
+ lemonade_sdk-8.0.5.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
71
+ lemonade_sdk-8.0.5.dist-info/RECORD,,
lemonade_server/cli.py CHANGED
@@ -27,43 +27,75 @@ class DeleteError(Exception):
27
27
  """
28
28
 
29
29
 
30
+ class ServerTimeoutError(Exception):
31
+ """
32
+ The server failed to start within the timeout period
33
+ """
34
+
35
+
36
+ class ModelNotAvailableError(Exception):
37
+ """
38
+ The specified model is not available on the server
39
+ """
40
+
41
+
30
42
  def serve(
31
- port: int,
43
+ port: int = None,
32
44
  log_level: str = None,
33
45
  tray: bool = False,
46
+ use_thread: bool = False,
34
47
  ):
35
48
  """
36
49
  Execute the serve command
37
50
  """
38
51
 
39
- # Check if Lemonade Server is already running
40
- _, running_port = get_server_info()
41
- if running_port is not None:
42
- print(
43
- (
44
- f"Lemonade Server is already running on port {running_port}\n"
45
- "Please stop the existing server before starting a new instance."
46
- ),
47
- )
48
- sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
49
-
50
52
  # Otherwise, start the server
51
53
  print("Starting Lemonade Server...")
52
54
  from lemonade.tools.server.serve import Server, DEFAULT_PORT, DEFAULT_LOG_LEVEL
53
55
 
54
- server = Server()
55
56
  port = port if port is not None else DEFAULT_PORT
56
57
  log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
57
58
 
58
59
  # Hidden environment variable to enable input truncation (experimental feature)
59
60
  truncate_inputs = "LEMONADE_TRUNCATE_INPUTS" in os.environ
60
61
 
61
- server.run(
62
- port=port,
63
- log_level=log_level,
64
- truncate_inputs=truncate_inputs,
65
- tray=tray,
66
- )
62
+ # Start the server
63
+ serve_kwargs = {
64
+ "log_level": log_level,
65
+ "truncate_inputs": truncate_inputs,
66
+ "tray": tray,
67
+ }
68
+ server = Server()
69
+ if not use_thread:
70
+ server.run(
71
+ port=port,
72
+ **serve_kwargs,
73
+ )
74
+ else:
75
+ from threading import Thread
76
+ import time
77
+
78
+ # Start a background thread to run the server
79
+ server_thread = Thread(
80
+ target=server.run,
81
+ args=(port,),
82
+ kwargs=serve_kwargs,
83
+ daemon=True,
84
+ )
85
+ server_thread.start()
86
+
87
+ # Wait for the server to be ready
88
+ max_wait_time = 30
89
+ wait_interval = 0.5
90
+ waited = 0
91
+ while waited < max_wait_time:
92
+ time.sleep(wait_interval)
93
+ _, running_port = get_server_info()
94
+ if running_port is not None:
95
+ break
96
+ waited += wait_interval
97
+
98
+ return port, server_thread
67
99
 
68
100
 
69
101
  def stop():
@@ -161,9 +193,8 @@ def pull(
161
193
  if pull_response.status_code != 200:
162
194
  raise PullError(
163
195
  f"Failed to install {model_name}. Check the "
164
- "Lemonade Server log for more information. A list of supported models "
165
- "is provided at "
166
- "https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/server_models.md"
196
+ "Lemonade Server log for more information. You can list "
197
+ "supported models with `lemonade-server list`"
167
198
  )
168
199
  else:
169
200
  from lemonade_server.model_manager import ModelManager
@@ -212,6 +243,53 @@ def delete(model_names: List[str]):
212
243
  ModelManager().delete_model(model_name)
213
244
 
214
245
 
246
+ def run(model_name: str):
247
+ """
248
+ Start the server if not running and open the webapp with the specified model
249
+ """
250
+ import webbrowser
251
+ import time
252
+
253
+ # Start the server if not running
254
+ _, port = get_server_info()
255
+ server_previously_running = port is not None
256
+ if not server_previously_running:
257
+ port, server_thread = serve(use_thread=True, tray=True, log_level="info")
258
+
259
+ # Pull model
260
+ pull([model_name])
261
+
262
+ # Load model
263
+ load(model_name, port)
264
+
265
+ # Open the webapp with the specified model
266
+ url = f"http://localhost:{port}/?model={model_name}#llm-chat"
267
+ print(f"You can now chat with {model_name} at {url}")
268
+ webbrowser.open(url)
269
+
270
+ # Keep the server running if we started it
271
+ if not server_previously_running:
272
+ while server_thread.is_alive():
273
+ time.sleep(0.5)
274
+
275
+
276
+ def load(model_name: str, port: int):
277
+ """
278
+ Load a model using the endpoint
279
+ """
280
+ import requests
281
+
282
+ base_url = f"http://localhost:{port}/api/v1"
283
+
284
+ # Load the model
285
+ load_response = requests.post(f"{base_url}/load", json={"model_name": model_name})
286
+ if load_response.status_code != 200:
287
+ raise ModelLoadError(
288
+ f"Failed to load {model_name}. Check the "
289
+ "Lemonade Server log for more information."
290
+ )
291
+
292
+
215
293
  def version():
216
294
  """
217
295
  Print the version number
@@ -294,6 +372,46 @@ def get_server_info() -> Tuple[int | None, int | None]:
294
372
  return None, None
295
373
 
296
374
 
375
+ def list_models():
376
+ """
377
+ List recommended models and their download status
378
+ """
379
+ from tabulate import tabulate
380
+ from lemonade_server.model_manager import ModelManager
381
+
382
+ model_manager = ModelManager()
383
+
384
+ # Get all supported models and downloaded models
385
+ supported_models = model_manager.supported_models
386
+ downloaded_models = model_manager.downloaded_models
387
+
388
+ # Filter to only show recommended models
389
+ recommended_models = {
390
+ model_name: model_info
391
+ for model_name, model_info in supported_models.items()
392
+ if model_info.get("suggested", False)
393
+ }
394
+
395
+ # Create table data
396
+ table_data = []
397
+ for model_name, model_info in recommended_models.items():
398
+ downloaded_status = "Yes" if model_name in downloaded_models else "No"
399
+
400
+ # Get model labels/type
401
+ labels = model_info.get("labels", [])
402
+ model_type = ", ".join(labels) if labels else "-"
403
+
404
+ table_data.append([model_name, downloaded_status, model_type])
405
+
406
+ # Sort by model name for consistent display
407
+ # Show downloaded models first
408
+ table_data.sort(key=lambda x: (x[1] == "No", x[0].lower()))
409
+
410
+ # Display table
411
+ headers = ["Model Name", "Downloaded", "Details"]
412
+ print(tabulate(table_data, headers=headers, tablefmt="simple"))
413
+
414
+
297
415
  def main():
298
416
  parser = argparse.ArgumentParser(
299
417
  description="Serve LLMs on CPU, GPU, and NPU.",
@@ -333,6 +451,11 @@ def main():
333
451
  # Stop command
334
452
  stop_parser = subparsers.add_parser("stop", help="Stop the server")
335
453
 
454
+ # List command
455
+ list_parser = subparsers.add_parser(
456
+ "list", help="List recommended models and their download status"
457
+ )
458
+
336
459
  # Pull command
337
460
  pull_parser = subparsers.add_parser(
338
461
  "pull",
@@ -381,6 +504,16 @@ def main():
381
504
  nargs="+",
382
505
  )
383
506
 
507
+ # Run command
508
+ run_parser = subparsers.add_parser(
509
+ "run",
510
+ help="Chat with specified model (starts server if needed)",
511
+ )
512
+ run_parser.add_argument(
513
+ "model",
514
+ help="Lemonade Server model name to run",
515
+ )
516
+
384
517
  args = parser.parse_args()
385
518
 
386
519
  if os.name != "nt":
@@ -389,6 +522,15 @@ def main():
389
522
  if args.version:
390
523
  version()
391
524
  elif args.command == "serve":
525
+ _, running_port = get_server_info()
526
+ if running_port is not None:
527
+ print(
528
+ (
529
+ f"Lemonade Server is already running on port {running_port}\n"
530
+ "Please stop the existing server before starting a new instance."
531
+ ),
532
+ )
533
+ sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
392
534
  serve(
393
535
  port=args.port,
394
536
  log_level=args.log_level,
@@ -396,6 +538,8 @@ def main():
396
538
  )
397
539
  elif args.command == "status":
398
540
  status()
541
+ elif args.command == "list":
542
+ list_models()
399
543
  elif args.command == "pull":
400
544
  pull(
401
545
  args.model,
@@ -408,6 +552,8 @@ def main():
408
552
  delete(args.model)
409
553
  elif args.command == "stop":
410
554
  stop()
555
+ elif args.command == "run":
556
+ run(args.model)
411
557
  elif args.command == "help" or not args.command:
412
558
  parser.print_help()
413
559