lemonade-sdk 8.1.0__py3-none-any.whl → 8.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/common/inference_engines.py +62 -77
- lemonade/common/system_info.py +61 -44
- lemonade/tools/llamacpp/load.py +13 -4
- lemonade/tools/llamacpp/utils.py +222 -54
- lemonade/tools/oga/load.py +3 -3
- lemonade/tools/server/llamacpp.py +30 -53
- lemonade/tools/server/serve.py +54 -104
- lemonade/tools/server/static/styles.css +203 -0
- lemonade/tools/server/static/webapp.html +507 -71
- lemonade/tools/server/tray.py +4 -2
- lemonade/tools/server/utils/thread.py +2 -4
- lemonade/version.py +1 -1
- lemonade_install/install.py +25 -2
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/METADATA +45 -6
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/RECORD +22 -22
- lemonade_server/cli.py +79 -26
- lemonade_server/server_models.json +26 -1
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/top_level.txt +0 -0
lemonade/tools/server/tray.py
CHANGED
|
@@ -263,8 +263,10 @@ class LemonadeTray(SystemTray):
|
|
|
263
263
|
self.server.uvicorn_server.should_exit = True
|
|
264
264
|
self.server_thread.join(timeout=2)
|
|
265
265
|
|
|
266
|
-
# Update the port
|
|
266
|
+
# Update the port in both the tray and the server instance
|
|
267
267
|
self.port = new_port
|
|
268
|
+
if self.server:
|
|
269
|
+
self.server.port = new_port
|
|
268
270
|
|
|
269
271
|
# Restart the server
|
|
270
272
|
self.server_thread = threading.Thread(target=self.start_server, daemon=True)
|
|
@@ -425,7 +427,7 @@ class LemonadeTray(SystemTray):
|
|
|
425
427
|
Start the uvicorn server.
|
|
426
428
|
"""
|
|
427
429
|
self.server = self.server_factory()
|
|
428
|
-
self.server.uvicorn_server = self.server.run_in_thread(
|
|
430
|
+
self.server.uvicorn_server = self.server.run_in_thread()
|
|
429
431
|
self.server.uvicorn_server.run()
|
|
430
432
|
|
|
431
433
|
def run(self):
|
|
@@ -26,7 +26,7 @@ class ServerRunner(threading.Thread):
|
|
|
26
26
|
def run(self):
|
|
27
27
|
try:
|
|
28
28
|
# Create the server instance
|
|
29
|
-
self.server = Server()
|
|
29
|
+
self.server = Server(port=self.port, log_level="warning")
|
|
30
30
|
|
|
31
31
|
# Configure the server with model/tokenizer
|
|
32
32
|
self.server.model = self.model
|
|
@@ -44,9 +44,7 @@ class ServerRunner(threading.Thread):
|
|
|
44
44
|
)
|
|
45
45
|
|
|
46
46
|
# Set up the server for threaded execution
|
|
47
|
-
self.uvicorn_server = self.server.run_in_thread(
|
|
48
|
-
port=self.port, host=self.host, log_level="warning"
|
|
49
|
-
)
|
|
47
|
+
self.uvicorn_server = self.server.run_in_thread(host=self.host)
|
|
50
48
|
|
|
51
49
|
# Set the ready event
|
|
52
50
|
self.ready_event.set()
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.1.
|
|
1
|
+
__version__ = "8.1.1"
|
lemonade_install/install.py
CHANGED
|
@@ -451,6 +451,12 @@ class Install:
|
|
|
451
451
|
choices=["0.6.0"],
|
|
452
452
|
)
|
|
453
453
|
|
|
454
|
+
parser.add_argument(
|
|
455
|
+
"--llamacpp",
|
|
456
|
+
help="Install llama.cpp binaries with specified backend",
|
|
457
|
+
choices=["rocm", "vulkan"],
|
|
458
|
+
)
|
|
459
|
+
|
|
454
460
|
return parser
|
|
455
461
|
|
|
456
462
|
@staticmethod
|
|
@@ -739,18 +745,32 @@ class Install:
|
|
|
739
745
|
|
|
740
746
|
print(f"\nQuark installed successfully at: {quark_path}")
|
|
741
747
|
|
|
748
|
+
@staticmethod
|
|
749
|
+
def _install_llamacpp(backend):
|
|
750
|
+
"""
|
|
751
|
+
Install llama.cpp binaries with the specified backend.
|
|
752
|
+
|
|
753
|
+
Args:
|
|
754
|
+
backend: The backend to use ('rocm' or 'vulkan')
|
|
755
|
+
"""
|
|
756
|
+
|
|
757
|
+
from lemonade.tools.llamacpp.utils import install_llamacpp
|
|
758
|
+
|
|
759
|
+
install_llamacpp(backend)
|
|
760
|
+
|
|
742
761
|
def run(
|
|
743
762
|
self,
|
|
744
763
|
ryzenai: Optional[str] = None,
|
|
745
764
|
build_model: Optional[str] = None,
|
|
746
765
|
quark: Optional[str] = None,
|
|
766
|
+
llamacpp: Optional[str] = None,
|
|
747
767
|
yes: bool = False,
|
|
748
768
|
token: Optional[str] = None,
|
|
749
769
|
):
|
|
750
|
-
if ryzenai is None and quark is None and
|
|
770
|
+
if ryzenai is None and quark is None and llamacpp is None:
|
|
751
771
|
raise ValueError(
|
|
752
772
|
"You must select something to install, "
|
|
753
|
-
"for example `--ryzenai`, `--quark`, or `--
|
|
773
|
+
"for example `--ryzenai`, `--quark`, or `--llamacpp`"
|
|
754
774
|
)
|
|
755
775
|
|
|
756
776
|
if ryzenai is not None:
|
|
@@ -759,6 +779,9 @@ class Install:
|
|
|
759
779
|
if quark is not None:
|
|
760
780
|
self._install_quark(quark)
|
|
761
781
|
|
|
782
|
+
if llamacpp is not None:
|
|
783
|
+
self._install_llamacpp(llamacpp)
|
|
784
|
+
|
|
762
785
|
|
|
763
786
|
def main():
|
|
764
787
|
installer = Install()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.1.
|
|
3
|
+
Version: 8.1.1
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.13
|
|
@@ -27,7 +27,8 @@ Requires-Dist: transformers<=4.53.2
|
|
|
27
27
|
Requires-Dist: jinja2
|
|
28
28
|
Requires-Dist: tabulate
|
|
29
29
|
Requires-Dist: sentencepiece
|
|
30
|
-
Requires-Dist: huggingface-hub==0.33.0
|
|
30
|
+
Requires-Dist: huggingface-hub[hf_xet]==0.33.0
|
|
31
|
+
Requires-Dist: python-dotenv
|
|
31
32
|
Provides-Extra: oga-ryzenai
|
|
32
33
|
Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
|
|
33
34
|
Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
|
|
@@ -40,6 +41,7 @@ Requires-Dist: accelerate; extra == "dev"
|
|
|
40
41
|
Requires-Dist: datasets; extra == "dev"
|
|
41
42
|
Requires-Dist: pandas>=1.5.3; extra == "dev"
|
|
42
43
|
Requires-Dist: matplotlib; extra == "dev"
|
|
44
|
+
Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "dev"
|
|
43
45
|
Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
|
|
44
46
|
Requires-Dist: lm-eval[api]; extra == "dev"
|
|
45
47
|
Provides-Extra: oga-hybrid
|
|
@@ -136,7 +138,9 @@ Dynamic: summary
|
|
|
136
138
|
<a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
|
|
137
139
|
</h3>
|
|
138
140
|
|
|
139
|
-
Lemonade
|
|
141
|
+
Lemonade helps users run local LLMs with the highest performance by configuring state-of-the-art inference engines for their NPUs and GPUs.
|
|
142
|
+
|
|
143
|
+
Startups such as [Styrk AI](https://styrk.ai/styrk-ai-and-amd-guardrails-for-your-on-device-ai-revolution/), research teams like [Hazy Research at Stanford](https://www.amd.com/en/developer/resources/technical-articles/2025/minions--on-device-and-cloud-language-model-collaboration-on-ryz.html), and large companies like [AMD](https://www.amd.com/en/developer/resources/technical-articles/unlocking-a-wave-of-llm-apps-on-ryzen-ai-through-lemonade-server.html) use Lemonade to run LLMs.
|
|
140
144
|
|
|
141
145
|
## Getting Started
|
|
142
146
|
|
|
@@ -155,7 +159,7 @@ Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus
|
|
|
155
159
|
</p>
|
|
156
160
|
|
|
157
161
|
> [!TIP]
|
|
158
|
-
> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email
|
|
162
|
+
> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or [email](lemonade@amd.com).
|
|
159
163
|
|
|
160
164
|
## Using the CLI
|
|
161
165
|
|
|
@@ -177,7 +181,10 @@ To check all models available, use the `list` command:
|
|
|
177
181
|
lemonade-server list
|
|
178
182
|
```
|
|
179
183
|
|
|
180
|
-
> Note
|
|
184
|
+
> **Note**: If you installed from source, use the `lemonade-server-dev` command instead.
|
|
185
|
+
|
|
186
|
+
> **Tip**: You can use `--llamacpp vulkan/rocm` to select a backend when running GGUF models.
|
|
187
|
+
|
|
181
188
|
|
|
182
189
|
## Model Library
|
|
183
190
|
|
|
@@ -219,7 +226,7 @@ Lemonade supports the following configurations, while also making it easy to swi
|
|
|
219
226
|
<tr>
|
|
220
227
|
<td><strong>🎮 GPU</strong></td>
|
|
221
228
|
<td align="center">—</td>
|
|
222
|
-
<td align="center">Vulkan: All platforms<br
|
|
229
|
+
<td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
|
|
223
230
|
<td align="center">—</td>
|
|
224
231
|
<td align="center">✅</td>
|
|
225
232
|
<td align="center">✅</td>
|
|
@@ -235,6 +242,38 @@ Lemonade supports the following configurations, while also making it easy to swi
|
|
|
235
242
|
</tbody>
|
|
236
243
|
</table>
|
|
237
244
|
|
|
245
|
+
<details>
|
|
246
|
+
<summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
|
|
247
|
+
|
|
248
|
+
<br>
|
|
249
|
+
|
|
250
|
+
<table>
|
|
251
|
+
<thead>
|
|
252
|
+
<tr>
|
|
253
|
+
<th>Architecture</th>
|
|
254
|
+
<th>Platform Support</th>
|
|
255
|
+
<th>GPU Models</th>
|
|
256
|
+
</tr>
|
|
257
|
+
</thead>
|
|
258
|
+
<tbody>
|
|
259
|
+
<tr>
|
|
260
|
+
<td><b>gfx1151</b> (STX Halo)</td>
|
|
261
|
+
<td>Windows, Ubuntu</td>
|
|
262
|
+
<td>Ryzen AI MAX+ Pro 395</td>
|
|
263
|
+
</tr>
|
|
264
|
+
<tr>
|
|
265
|
+
<td><b>gfx120X</b> (RDNA4)</td>
|
|
266
|
+
<td>Windows only</td>
|
|
267
|
+
<td>Radeon AI PRO R9700, RX 9070 XT/GRE/9070, RX 9060 XT</td>
|
|
268
|
+
</tr>
|
|
269
|
+
<tr>
|
|
270
|
+
<td><b>gfx110X</b> (RDNA3)</td>
|
|
271
|
+
<td>Windows, Ubuntu</td>
|
|
272
|
+
<td>Radeon PRO W7900/W7800/W7700/V710, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT</td>
|
|
273
|
+
</tr>
|
|
274
|
+
</tbody>
|
|
275
|
+
</table>
|
|
276
|
+
</details>
|
|
238
277
|
|
|
239
278
|
## Integrate Lemonade Server with Your Application
|
|
240
279
|
|
|
@@ -4,17 +4,17 @@ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
|
|
|
4
4
|
lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
|
|
5
5
|
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
6
|
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
-
lemonade/version.py,sha256=
|
|
7
|
+
lemonade/version.py,sha256=8YlEPKK1Cm5T4dPa2BQPpPwVVTzjPLnmqAeNcTb5nOw,22
|
|
8
8
|
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
|
|
10
10
|
lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
|
|
11
11
|
lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
|
|
12
12
|
lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
|
|
13
|
-
lemonade/common/inference_engines.py,sha256=
|
|
13
|
+
lemonade/common/inference_engines.py,sha256=OJQcED9P1ZeQ8d11lDMNeAoaFoUuZlsDcwEZXLbqWRg,12579
|
|
14
14
|
lemonade/common/network.py,sha256=p1lWJkN0H5hCpb4rKi3Zc47W_BRrrm-7ghdTALJLGqU,1944
|
|
15
15
|
lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
|
|
16
16
|
lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
|
|
17
|
-
lemonade/common/system_info.py,sha256=
|
|
17
|
+
lemonade/common/system_info.py,sha256=pn-k3zMQCbt5cu3aHXa4cENgrubOK97gs9PYdGPsFXA,28405
|
|
18
18
|
lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
|
|
19
19
|
lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
|
|
20
20
|
lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
|
|
@@ -33,11 +33,11 @@ lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnS
|
|
|
33
33
|
lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
|
|
34
34
|
lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
|
|
35
35
|
lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
|
|
36
|
-
lemonade/tools/llamacpp/load.py,sha256=
|
|
37
|
-
lemonade/tools/llamacpp/utils.py,sha256=
|
|
36
|
+
lemonade/tools/llamacpp/load.py,sha256=DFCvQN548Ch9H8U_rHOiYviinzw6vixb5-V7xLj7XE4,6499
|
|
37
|
+
lemonade/tools/llamacpp/utils.py,sha256=CTWnzbEYGPSbOizF26yCnyNrHDY19pLusU-YyND992s,29070
|
|
38
38
|
lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
39
|
lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
|
|
40
|
-
lemonade/tools/oga/load.py,sha256=
|
|
40
|
+
lemonade/tools/oga/load.py,sha256=6Pf_QrHpIXDbfpTwFNRj4RmWTxI-RImhYuqRvmTVgmY,33722
|
|
41
41
|
lemonade/tools/oga/utils.py,sha256=Xd7tmNr69u_bCut0hZqA7saUR3NFZlp4bvWo54mOZb0,16918
|
|
42
42
|
lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
43
|
lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
|
|
@@ -46,27 +46,27 @@ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
|
46
46
|
lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
|
|
47
47
|
lemonade/tools/report/table.py,sha256=ssqy1bZqF-wptNzKEOj6_9REtCNZyXO8R5vakAtg3R4,27973
|
|
48
48
|
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
-
lemonade/tools/server/llamacpp.py,sha256=
|
|
50
|
-
lemonade/tools/server/serve.py,sha256=
|
|
49
|
+
lemonade/tools/server/llamacpp.py,sha256=KZO4npzefvbaPvlZbpCYsdW0tMSfmmupT8gaK9y65I8,17962
|
|
50
|
+
lemonade/tools/server/serve.py,sha256=PAAGowj2Z5AQIW3G1l52taNyf_0U4kRFR3G735M4DsU,55513
|
|
51
51
|
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
52
|
-
lemonade/tools/server/tray.py,sha256=
|
|
52
|
+
lemonade/tools/server/tray.py,sha256=qlQKBkQwG9W2v9GTyycvFc12_jly6vPU1uEkrIFBGTs,17624
|
|
53
53
|
lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
|
|
54
54
|
lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
|
|
55
|
-
lemonade/tools/server/static/styles.css,sha256=
|
|
56
|
-
lemonade/tools/server/static/webapp.html,sha256=
|
|
55
|
+
lemonade/tools/server/static/styles.css,sha256=M_JrH_vML65MWun-C8XCvLOFw35qZURSa77Fk4fVngQ,30029
|
|
56
|
+
lemonade/tools/server/static/webapp.html,sha256=oU6FZHGQCq-SoT6VkWObQvYzzNS0ser5Fmqx2j_5jCI,54380
|
|
57
57
|
lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
|
|
58
58
|
lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
|
|
59
|
-
lemonade/tools/server/utils/thread.py,sha256=
|
|
59
|
+
lemonade/tools/server/utils/thread.py,sha256=Z-PDzGcpgfN2qxTmtlROWqrUN0B2fXdPrqo_J10fR_w,2772
|
|
60
60
|
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
61
|
-
lemonade_install/install.py,sha256=
|
|
62
|
-
lemonade_sdk-8.1.
|
|
63
|
-
lemonade_sdk-8.1.
|
|
64
|
-
lemonade_server/cli.py,sha256=
|
|
61
|
+
lemonade_install/install.py,sha256=Zl_JtEIhbqZZTvxcqtq895IomEN-JNxp9xOZEtahMHQ,28289
|
|
62
|
+
lemonade_sdk-8.1.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
63
|
+
lemonade_sdk-8.1.1.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
|
|
64
|
+
lemonade_server/cli.py,sha256=CFfhrRgZNJCd0rDRBF3TeS3dMJgwlKGtvT5_kbsWaXk,17316
|
|
65
65
|
lemonade_server/model_manager.py,sha256=O3fIX52AqU0z10WzPmNEA3lQ_KjOqNq_G-SxjwIgEio,10781
|
|
66
66
|
lemonade_server/pydantic_models.py,sha256=qEvF7x7AuHCHMiByVzGGuLdQTNs233Sw9uQq5cpI6is,2721
|
|
67
|
-
lemonade_server/server_models.json,sha256=
|
|
68
|
-
lemonade_sdk-8.1.
|
|
69
|
-
lemonade_sdk-8.1.
|
|
70
|
-
lemonade_sdk-8.1.
|
|
71
|
-
lemonade_sdk-8.1.
|
|
72
|
-
lemonade_sdk-8.1.
|
|
67
|
+
lemonade_server/server_models.json,sha256=iag_dG9S1tkHZUhkJmGAfiUJkgEazdQSv7stC1fVAsQ,9741
|
|
68
|
+
lemonade_sdk-8.1.1.dist-info/METADATA,sha256=XT9cwNUAkhwQ6kad6l7t2nj7m8S0t-9GvaFLOMxLCyE,17065
|
|
69
|
+
lemonade_sdk-8.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
70
|
+
lemonade_sdk-8.1.1.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
|
|
71
|
+
lemonade_sdk-8.1.1.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
72
|
+
lemonade_sdk-8.1.1.dist-info/RECORD,,
|
lemonade_server/cli.py
CHANGED
|
@@ -39,11 +39,19 @@ class ModelNotAvailableError(Exception):
|
|
|
39
39
|
"""
|
|
40
40
|
|
|
41
41
|
|
|
42
|
+
class ModelLoadError(Exception):
|
|
43
|
+
"""
|
|
44
|
+
The model failed to load on the server
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
|
|
42
48
|
def serve(
|
|
43
49
|
port: int = None,
|
|
44
50
|
log_level: str = None,
|
|
45
51
|
tray: bool = False,
|
|
46
52
|
use_thread: bool = False,
|
|
53
|
+
llamacpp_backend: str = None,
|
|
54
|
+
ctx_size: int = None,
|
|
47
55
|
):
|
|
48
56
|
"""
|
|
49
57
|
Execute the serve command
|
|
@@ -51,26 +59,33 @@ def serve(
|
|
|
51
59
|
|
|
52
60
|
# Otherwise, start the server
|
|
53
61
|
print("Starting Lemonade Server...")
|
|
54
|
-
from lemonade.tools.server.serve import
|
|
62
|
+
from lemonade.tools.server.serve import (
|
|
63
|
+
Server,
|
|
64
|
+
DEFAULT_PORT,
|
|
65
|
+
DEFAULT_LOG_LEVEL,
|
|
66
|
+
DEFAULT_LLAMACPP_BACKEND,
|
|
67
|
+
DEFAULT_CTX_SIZE,
|
|
68
|
+
)
|
|
55
69
|
|
|
56
70
|
port = port if port is not None else DEFAULT_PORT
|
|
57
71
|
log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
|
|
72
|
+
llamacpp_backend = (
|
|
73
|
+
llamacpp_backend if llamacpp_backend is not None else DEFAULT_LLAMACPP_BACKEND
|
|
74
|
+
)
|
|
58
75
|
|
|
59
|
-
#
|
|
60
|
-
|
|
76
|
+
# Use ctx_size if provided, otherwise use default
|
|
77
|
+
ctx_size = ctx_size if ctx_size is not None else DEFAULT_CTX_SIZE
|
|
61
78
|
|
|
62
79
|
# Start the server
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
80
|
+
server = Server(
|
|
81
|
+
port=port,
|
|
82
|
+
log_level=log_level,
|
|
83
|
+
ctx_size=ctx_size,
|
|
84
|
+
tray=tray,
|
|
85
|
+
llamacpp_backend=llamacpp_backend,
|
|
86
|
+
)
|
|
69
87
|
if not use_thread:
|
|
70
|
-
server.run(
|
|
71
|
-
port=port,
|
|
72
|
-
**serve_kwargs,
|
|
73
|
-
)
|
|
88
|
+
server.run()
|
|
74
89
|
else:
|
|
75
90
|
from threading import Thread
|
|
76
91
|
import time
|
|
@@ -78,8 +93,6 @@ def serve(
|
|
|
78
93
|
# Start a background thread to run the server
|
|
79
94
|
server_thread = Thread(
|
|
80
95
|
target=server.run,
|
|
81
|
-
args=(port,),
|
|
82
|
-
kwargs=serve_kwargs,
|
|
83
96
|
daemon=True,
|
|
84
97
|
)
|
|
85
98
|
server_thread.start()
|
|
@@ -243,7 +256,13 @@ def delete(model_names: List[str]):
|
|
|
243
256
|
ModelManager().delete_model(model_name)
|
|
244
257
|
|
|
245
258
|
|
|
246
|
-
def run(
|
|
259
|
+
def run(
|
|
260
|
+
model_name: str,
|
|
261
|
+
port: int = None,
|
|
262
|
+
log_level: str = None,
|
|
263
|
+
llamacpp_backend: str = None,
|
|
264
|
+
ctx_size: int = None,
|
|
265
|
+
):
|
|
247
266
|
"""
|
|
248
267
|
Start the server if not running and open the webapp with the specified model
|
|
249
268
|
"""
|
|
@@ -254,7 +273,16 @@ def run(model_name: str):
|
|
|
254
273
|
_, port = get_server_info()
|
|
255
274
|
server_previously_running = port is not None
|
|
256
275
|
if not server_previously_running:
|
|
257
|
-
port, server_thread = serve(
|
|
276
|
+
port, server_thread = serve(
|
|
277
|
+
port=port,
|
|
278
|
+
log_level=log_level,
|
|
279
|
+
tray=True,
|
|
280
|
+
use_thread=True,
|
|
281
|
+
llamacpp_backend=llamacpp_backend,
|
|
282
|
+
ctx_size=ctx_size,
|
|
283
|
+
)
|
|
284
|
+
else:
|
|
285
|
+
port = running_port
|
|
258
286
|
|
|
259
287
|
# Pull model
|
|
260
288
|
pull([model_name])
|
|
@@ -412,6 +440,29 @@ def list_models():
|
|
|
412
440
|
print(tabulate(table_data, headers=headers, tablefmt="simple"))
|
|
413
441
|
|
|
414
442
|
|
|
443
|
+
def _add_server_arguments(parser):
|
|
444
|
+
"""Add common server arguments to a parser"""
|
|
445
|
+
parser.add_argument("--port", type=int, help="Port number to serve on")
|
|
446
|
+
parser.add_argument(
|
|
447
|
+
"--log-level",
|
|
448
|
+
type=str,
|
|
449
|
+
help="Log level for the server",
|
|
450
|
+
choices=["critical", "error", "warning", "info", "debug", "trace"],
|
|
451
|
+
default="info",
|
|
452
|
+
)
|
|
453
|
+
parser.add_argument(
|
|
454
|
+
"--llamacpp",
|
|
455
|
+
type=str,
|
|
456
|
+
help=f"LlamaCpp backend to use",
|
|
457
|
+
choices=["vulkan", "rocm"],
|
|
458
|
+
)
|
|
459
|
+
parser.add_argument(
|
|
460
|
+
"--ctx-size",
|
|
461
|
+
type=int,
|
|
462
|
+
help="Context size for the model (default: 4096 for llamacpp, truncates prompts for other recipes)",
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
|
|
415
466
|
def main():
|
|
416
467
|
parser = argparse.ArgumentParser(
|
|
417
468
|
description="Serve LLMs on CPU, GPU, and NPU.",
|
|
@@ -430,14 +481,7 @@ def main():
|
|
|
430
481
|
|
|
431
482
|
# Serve command
|
|
432
483
|
serve_parser = subparsers.add_parser("serve", help="Start server")
|
|
433
|
-
serve_parser
|
|
434
|
-
serve_parser.add_argument(
|
|
435
|
-
"--log-level",
|
|
436
|
-
type=str,
|
|
437
|
-
help="Log level for the server",
|
|
438
|
-
choices=["critical", "error", "warning", "info", "debug", "trace"],
|
|
439
|
-
default="info",
|
|
440
|
-
)
|
|
484
|
+
_add_server_arguments(serve_parser)
|
|
441
485
|
if os.name == "nt":
|
|
442
486
|
serve_parser.add_argument(
|
|
443
487
|
"--no-tray",
|
|
@@ -513,6 +557,7 @@ def main():
|
|
|
513
557
|
"model",
|
|
514
558
|
help="Lemonade Server model name to run",
|
|
515
559
|
)
|
|
560
|
+
_add_server_arguments(run_parser)
|
|
516
561
|
|
|
517
562
|
args = parser.parse_args()
|
|
518
563
|
|
|
@@ -535,6 +580,8 @@ def main():
|
|
|
535
580
|
port=args.port,
|
|
536
581
|
log_level=args.log_level,
|
|
537
582
|
tray=not args.no_tray,
|
|
583
|
+
llamacpp_backend=args.llamacpp,
|
|
584
|
+
ctx_size=args.ctx_size,
|
|
538
585
|
)
|
|
539
586
|
elif args.command == "status":
|
|
540
587
|
status()
|
|
@@ -553,7 +600,13 @@ def main():
|
|
|
553
600
|
elif args.command == "stop":
|
|
554
601
|
stop()
|
|
555
602
|
elif args.command == "run":
|
|
556
|
-
run(
|
|
603
|
+
run(
|
|
604
|
+
args.model,
|
|
605
|
+
port=args.port,
|
|
606
|
+
log_level=args.log_level,
|
|
607
|
+
llamacpp_backend=args.llamacpp,
|
|
608
|
+
ctx_size=args.ctx_size,
|
|
609
|
+
)
|
|
557
610
|
elif args.command == "help" or not args.command:
|
|
558
611
|
parser.print_help()
|
|
559
612
|
|
|
@@ -190,7 +190,13 @@
|
|
|
190
190
|
"checkpoint": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF:Qwen3-30B-A3B-Instruct-2507-Q4_0.gguf",
|
|
191
191
|
"recipe": "llamacpp",
|
|
192
192
|
"suggested": true,
|
|
193
|
-
"labels": ["
|
|
193
|
+
"labels": ["hot"]
|
|
194
|
+
},
|
|
195
|
+
"Qwen3-Coder-30B-A3B-Instruct-GGUF": {
|
|
196
|
+
"checkpoint": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
|
|
197
|
+
"recipe": "llamacpp",
|
|
198
|
+
"suggested": true,
|
|
199
|
+
"labels": ["coding","hot"]
|
|
194
200
|
},
|
|
195
201
|
"Gemma-3-4b-it-GGUF": {
|
|
196
202
|
"checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
|
@@ -213,6 +219,13 @@
|
|
|
213
219
|
"suggested": true,
|
|
214
220
|
"labels": ["vision"]
|
|
215
221
|
},
|
|
222
|
+
"Cogito-v2-llama-109B-MoE-GGUF": {
|
|
223
|
+
"checkpoint": "unsloth/cogito-v2-preview-llama-109B-MoE-GGUF:Q4_K_M",
|
|
224
|
+
"mmproj": "mmproj-F16.gguf",
|
|
225
|
+
"recipe": "llamacpp",
|
|
226
|
+
"suggested": true,
|
|
227
|
+
"labels": ["vision","hot"]
|
|
228
|
+
},
|
|
216
229
|
"nomic-embed-text-v1-GGUF": {
|
|
217
230
|
"checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
|
|
218
231
|
"recipe": "llamacpp",
|
|
@@ -248,5 +261,17 @@
|
|
|
248
261
|
"recipe": "llamacpp",
|
|
249
262
|
"suggested": true,
|
|
250
263
|
"labels": ["reasoning", "coding"]
|
|
264
|
+
},
|
|
265
|
+
"gpt-oss-120b-GGUF": {
|
|
266
|
+
"checkpoint": "unsloth/gpt-oss-120b-GGUF:Q4_K_M",
|
|
267
|
+
"recipe": "llamacpp",
|
|
268
|
+
"suggested": true,
|
|
269
|
+
"labels": ["hot", "reasoning"]
|
|
270
|
+
},
|
|
271
|
+
"gpt-oss-20b-GGUF": {
|
|
272
|
+
"checkpoint": "unsloth/gpt-oss-20b-GGUF:Q4_K_M",
|
|
273
|
+
"recipe": "llamacpp",
|
|
274
|
+
"suggested": true,
|
|
275
|
+
"labels": ["hot", "reasoning"]
|
|
251
276
|
}
|
|
252
277
|
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|