lemonade-sdk 8.1.0__py3-none-any.whl → 8.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/common/inference_engines.py +63 -78
- lemonade/common/system_info.py +61 -44
- lemonade/tools/llamacpp/load.py +13 -4
- lemonade/tools/llamacpp/utils.py +322 -54
- lemonade/tools/management_tools.py +1 -1
- lemonade/tools/oga/load.py +3 -3
- lemonade/tools/server/llamacpp.py +30 -53
- lemonade/tools/server/serve.py +58 -104
- lemonade/tools/server/static/styles.css +203 -0
- lemonade/tools/server/static/webapp.html +509 -72
- lemonade/tools/server/tray.py +4 -2
- lemonade/tools/server/utils/port.py +2 -2
- lemonade/tools/server/utils/thread.py +2 -4
- lemonade/version.py +1 -1
- lemonade_install/install.py +25 -2
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.2.dist-info}/METADATA +45 -6
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.2.dist-info}/RECORD +25 -25
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.2.dist-info}/entry_points.txt +1 -0
- lemonade_server/cli.py +115 -27
- lemonade_server/model_manager.py +1 -1
- lemonade_server/server_models.json +71 -1
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.2.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.2.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.2.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.2.dist-info}/top_level.txt +0 -0
lemonade/tools/server/tray.py
CHANGED
|
@@ -263,8 +263,10 @@ class LemonadeTray(SystemTray):
|
|
|
263
263
|
self.server.uvicorn_server.should_exit = True
|
|
264
264
|
self.server_thread.join(timeout=2)
|
|
265
265
|
|
|
266
|
-
# Update the port
|
|
266
|
+
# Update the port in both the tray and the server instance
|
|
267
267
|
self.port = new_port
|
|
268
|
+
if self.server:
|
|
269
|
+
self.server.port = new_port
|
|
268
270
|
|
|
269
271
|
# Restart the server
|
|
270
272
|
self.server_thread = threading.Thread(target=self.start_server, daemon=True)
|
|
@@ -425,7 +427,7 @@ class LemonadeTray(SystemTray):
|
|
|
425
427
|
Start the uvicorn server.
|
|
426
428
|
"""
|
|
427
429
|
self.server = self.server_factory()
|
|
428
|
-
self.server.uvicorn_server = self.server.run_in_thread(
|
|
430
|
+
self.server.uvicorn_server = self.server.run_in_thread(self.server.host)
|
|
429
431
|
self.server.uvicorn_server.run()
|
|
430
432
|
|
|
431
433
|
def run(self):
|
|
@@ -43,7 +43,7 @@ async def lifespan(app: FastAPI):
|
|
|
43
43
|
"\n"
|
|
44
44
|
"\n"
|
|
45
45
|
"🍋 Lemonade Server Ready!\n"
|
|
46
|
-
f"🍋 Open http://
|
|
46
|
+
f"🍋 Open http://{app.host_}:{app.port} in your browser for:\n"
|
|
47
47
|
"🍋 💬 chat\n"
|
|
48
48
|
"🍋 💻 model management\n"
|
|
49
49
|
"🍋 📄 docs\n"
|
|
@@ -53,7 +53,7 @@ async def lifespan(app: FastAPI):
|
|
|
53
53
|
"\n"
|
|
54
54
|
"\n"
|
|
55
55
|
"[Lemonade] Lemonade Server Ready!\n"
|
|
56
|
-
f"[Lemonade] Open http://
|
|
56
|
+
f"[Lemonade] Open http://{app.host_}:{app.port} in your browser for:\n"
|
|
57
57
|
"[Lemonade] chat\n"
|
|
58
58
|
"[Lemonade] model management\n"
|
|
59
59
|
"[Lemonade] docs\n"
|
|
@@ -26,7 +26,7 @@ class ServerRunner(threading.Thread):
|
|
|
26
26
|
def run(self):
|
|
27
27
|
try:
|
|
28
28
|
# Create the server instance
|
|
29
|
-
self.server = Server()
|
|
29
|
+
self.server = Server(port=self.port, log_level="warning")
|
|
30
30
|
|
|
31
31
|
# Configure the server with model/tokenizer
|
|
32
32
|
self.server.model = self.model
|
|
@@ -44,9 +44,7 @@ class ServerRunner(threading.Thread):
|
|
|
44
44
|
)
|
|
45
45
|
|
|
46
46
|
# Set up the server for threaded execution
|
|
47
|
-
self.uvicorn_server = self.server.run_in_thread(
|
|
48
|
-
port=self.port, host=self.host, log_level="warning"
|
|
49
|
-
)
|
|
47
|
+
self.uvicorn_server = self.server.run_in_thread(host=self.host)
|
|
50
48
|
|
|
51
49
|
# Set the ready event
|
|
52
50
|
self.ready_event.set()
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.1.
|
|
1
|
+
__version__ = "8.1.2"
|
lemonade_install/install.py
CHANGED
|
@@ -451,6 +451,12 @@ class Install:
|
|
|
451
451
|
choices=["0.6.0"],
|
|
452
452
|
)
|
|
453
453
|
|
|
454
|
+
parser.add_argument(
|
|
455
|
+
"--llamacpp",
|
|
456
|
+
help="Install llama.cpp binaries with specified backend",
|
|
457
|
+
choices=["rocm", "vulkan"],
|
|
458
|
+
)
|
|
459
|
+
|
|
454
460
|
return parser
|
|
455
461
|
|
|
456
462
|
@staticmethod
|
|
@@ -739,18 +745,32 @@ class Install:
|
|
|
739
745
|
|
|
740
746
|
print(f"\nQuark installed successfully at: {quark_path}")
|
|
741
747
|
|
|
748
|
+
@staticmethod
|
|
749
|
+
def _install_llamacpp(backend):
|
|
750
|
+
"""
|
|
751
|
+
Install llama.cpp binaries with the specified backend.
|
|
752
|
+
|
|
753
|
+
Args:
|
|
754
|
+
backend: The backend to use ('rocm' or 'vulkan')
|
|
755
|
+
"""
|
|
756
|
+
|
|
757
|
+
from lemonade.tools.llamacpp.utils import install_llamacpp
|
|
758
|
+
|
|
759
|
+
install_llamacpp(backend)
|
|
760
|
+
|
|
742
761
|
def run(
|
|
743
762
|
self,
|
|
744
763
|
ryzenai: Optional[str] = None,
|
|
745
764
|
build_model: Optional[str] = None,
|
|
746
765
|
quark: Optional[str] = None,
|
|
766
|
+
llamacpp: Optional[str] = None,
|
|
747
767
|
yes: bool = False,
|
|
748
768
|
token: Optional[str] = None,
|
|
749
769
|
):
|
|
750
|
-
if ryzenai is None and quark is None and
|
|
770
|
+
if ryzenai is None and quark is None and llamacpp is None:
|
|
751
771
|
raise ValueError(
|
|
752
772
|
"You must select something to install, "
|
|
753
|
-
"for example `--ryzenai`, `--quark`, or `--
|
|
773
|
+
"for example `--ryzenai`, `--quark`, or `--llamacpp`"
|
|
754
774
|
)
|
|
755
775
|
|
|
756
776
|
if ryzenai is not None:
|
|
@@ -759,6 +779,9 @@ class Install:
|
|
|
759
779
|
if quark is not None:
|
|
760
780
|
self._install_quark(quark)
|
|
761
781
|
|
|
782
|
+
if llamacpp is not None:
|
|
783
|
+
self._install_llamacpp(llamacpp)
|
|
784
|
+
|
|
762
785
|
|
|
763
786
|
def main():
|
|
764
787
|
installer = Install()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.1.
|
|
3
|
+
Version: 8.1.2
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.13
|
|
@@ -27,7 +27,8 @@ Requires-Dist: transformers<=4.53.2
|
|
|
27
27
|
Requires-Dist: jinja2
|
|
28
28
|
Requires-Dist: tabulate
|
|
29
29
|
Requires-Dist: sentencepiece
|
|
30
|
-
Requires-Dist: huggingface-hub==0.33.0
|
|
30
|
+
Requires-Dist: huggingface-hub[hf_xet]==0.33.0
|
|
31
|
+
Requires-Dist: python-dotenv
|
|
31
32
|
Provides-Extra: oga-ryzenai
|
|
32
33
|
Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
|
|
33
34
|
Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
|
|
@@ -40,6 +41,7 @@ Requires-Dist: accelerate; extra == "dev"
|
|
|
40
41
|
Requires-Dist: datasets; extra == "dev"
|
|
41
42
|
Requires-Dist: pandas>=1.5.3; extra == "dev"
|
|
42
43
|
Requires-Dist: matplotlib; extra == "dev"
|
|
44
|
+
Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "dev"
|
|
43
45
|
Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
|
|
44
46
|
Requires-Dist: lm-eval[api]; extra == "dev"
|
|
45
47
|
Provides-Extra: oga-hybrid
|
|
@@ -136,7 +138,9 @@ Dynamic: summary
|
|
|
136
138
|
<a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
|
|
137
139
|
</h3>
|
|
138
140
|
|
|
139
|
-
Lemonade
|
|
141
|
+
Lemonade helps users run local LLMs with the highest performance by configuring state-of-the-art inference engines for their NPUs and GPUs.
|
|
142
|
+
|
|
143
|
+
Startups such as [Styrk AI](https://styrk.ai/styrk-ai-and-amd-guardrails-for-your-on-device-ai-revolution/), research teams like [Hazy Research at Stanford](https://www.amd.com/en/developer/resources/technical-articles/2025/minions--on-device-and-cloud-language-model-collaboration-on-ryz.html), and large companies like [AMD](https://www.amd.com/en/developer/resources/technical-articles/unlocking-a-wave-of-llm-apps-on-ryzen-ai-through-lemonade-server.html) use Lemonade to run LLMs.
|
|
140
144
|
|
|
141
145
|
## Getting Started
|
|
142
146
|
|
|
@@ -155,7 +159,7 @@ Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus
|
|
|
155
159
|
</p>
|
|
156
160
|
|
|
157
161
|
> [!TIP]
|
|
158
|
-
> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email
|
|
162
|
+
> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or [email](lemonade@amd.com).
|
|
159
163
|
|
|
160
164
|
## Using the CLI
|
|
161
165
|
|
|
@@ -177,7 +181,10 @@ To check all models available, use the `list` command:
|
|
|
177
181
|
lemonade-server list
|
|
178
182
|
```
|
|
179
183
|
|
|
180
|
-
> Note
|
|
184
|
+
> **Note**: If you installed from source, use the `lemonade-server-dev` command instead.
|
|
185
|
+
|
|
186
|
+
> **Tip**: You can use `--llamacpp vulkan/rocm` to select a backend when running GGUF models.
|
|
187
|
+
|
|
181
188
|
|
|
182
189
|
## Model Library
|
|
183
190
|
|
|
@@ -219,7 +226,7 @@ Lemonade supports the following configurations, while also making it easy to swi
|
|
|
219
226
|
<tr>
|
|
220
227
|
<td><strong>🎮 GPU</strong></td>
|
|
221
228
|
<td align="center">—</td>
|
|
222
|
-
<td align="center">Vulkan: All platforms<br
|
|
229
|
+
<td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
|
|
223
230
|
<td align="center">—</td>
|
|
224
231
|
<td align="center">✅</td>
|
|
225
232
|
<td align="center">✅</td>
|
|
@@ -235,6 +242,38 @@ Lemonade supports the following configurations, while also making it easy to swi
|
|
|
235
242
|
</tbody>
|
|
236
243
|
</table>
|
|
237
244
|
|
|
245
|
+
<details>
|
|
246
|
+
<summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
|
|
247
|
+
|
|
248
|
+
<br>
|
|
249
|
+
|
|
250
|
+
<table>
|
|
251
|
+
<thead>
|
|
252
|
+
<tr>
|
|
253
|
+
<th>Architecture</th>
|
|
254
|
+
<th>Platform Support</th>
|
|
255
|
+
<th>GPU Models</th>
|
|
256
|
+
</tr>
|
|
257
|
+
</thead>
|
|
258
|
+
<tbody>
|
|
259
|
+
<tr>
|
|
260
|
+
<td><b>gfx1151</b> (STX Halo)</td>
|
|
261
|
+
<td>Windows, Ubuntu</td>
|
|
262
|
+
<td>Ryzen AI MAX+ Pro 395</td>
|
|
263
|
+
</tr>
|
|
264
|
+
<tr>
|
|
265
|
+
<td><b>gfx120X</b> (RDNA4)</td>
|
|
266
|
+
<td>Windows only</td>
|
|
267
|
+
<td>Radeon AI PRO R9700, RX 9070 XT/GRE/9070, RX 9060 XT</td>
|
|
268
|
+
</tr>
|
|
269
|
+
<tr>
|
|
270
|
+
<td><b>gfx110X</b> (RDNA3)</td>
|
|
271
|
+
<td>Windows, Ubuntu</td>
|
|
272
|
+
<td>Radeon PRO W7900/W7800/W7700/V710, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT</td>
|
|
273
|
+
</tr>
|
|
274
|
+
</tbody>
|
|
275
|
+
</table>
|
|
276
|
+
</details>
|
|
238
277
|
|
|
239
278
|
## Integrate Lemonade Server with Your Application
|
|
240
279
|
|
|
@@ -4,17 +4,17 @@ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
|
|
|
4
4
|
lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
|
|
5
5
|
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
6
|
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
-
lemonade/version.py,sha256=
|
|
7
|
+
lemonade/version.py,sha256=EsLdncrDNNSYcAkb6bacgIFA7Oh4EZnzpp9alKokT5A,22
|
|
8
8
|
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
|
|
10
10
|
lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
|
|
11
11
|
lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
|
|
12
12
|
lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
|
|
13
|
-
lemonade/common/inference_engines.py,sha256=
|
|
13
|
+
lemonade/common/inference_engines.py,sha256=pJxn0zOf3gEmjGAIWXNdCibfzarzc7LRbZjoQyygkcU,12591
|
|
14
14
|
lemonade/common/network.py,sha256=p1lWJkN0H5hCpb4rKi3Zc47W_BRrrm-7ghdTALJLGqU,1944
|
|
15
15
|
lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
|
|
16
16
|
lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
|
|
17
|
-
lemonade/common/system_info.py,sha256=
|
|
17
|
+
lemonade/common/system_info.py,sha256=pn-k3zMQCbt5cu3aHXa4cENgrubOK97gs9PYdGPsFXA,28405
|
|
18
18
|
lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
|
|
19
19
|
lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
|
|
20
20
|
lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
|
|
@@ -24,7 +24,7 @@ lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11
|
|
|
24
24
|
lemonade/tools/adapter.py,sha256=Ex63Y1SPCOSV4M_QtzEn3YVd39d3yew0lpmEFgp8aH4,3169
|
|
25
25
|
lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
|
|
26
26
|
lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
|
|
27
|
-
lemonade/tools/management_tools.py,sha256=
|
|
27
|
+
lemonade/tools/management_tools.py,sha256=HQBcr7LYuMqVRYQtvnkNpfutBTA7lblszyoAjjVGu1Y,10201
|
|
28
28
|
lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
|
|
29
29
|
lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
|
|
30
30
|
lemonade/tools/prompt.py,sha256=PyLksp1k8jsZsU7XBRK61k1DUHhbdLa20h-AP8Noh3w,9011
|
|
@@ -33,11 +33,11 @@ lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnS
|
|
|
33
33
|
lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
|
|
34
34
|
lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
|
|
35
35
|
lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
|
|
36
|
-
lemonade/tools/llamacpp/load.py,sha256=
|
|
37
|
-
lemonade/tools/llamacpp/utils.py,sha256=
|
|
36
|
+
lemonade/tools/llamacpp/load.py,sha256=DFCvQN548Ch9H8U_rHOiYviinzw6vixb5-V7xLj7XE4,6499
|
|
37
|
+
lemonade/tools/llamacpp/utils.py,sha256=Auid9FepxwLIgDahaDNIxwz8kP_ap8Opd3eSF6t637g,32336
|
|
38
38
|
lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
39
|
lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
|
|
40
|
-
lemonade/tools/oga/load.py,sha256=
|
|
40
|
+
lemonade/tools/oga/load.py,sha256=6Pf_QrHpIXDbfpTwFNRj4RmWTxI-RImhYuqRvmTVgmY,33722
|
|
41
41
|
lemonade/tools/oga/utils.py,sha256=Xd7tmNr69u_bCut0hZqA7saUR3NFZlp4bvWo54mOZb0,16918
|
|
42
42
|
lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
43
|
lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
|
|
@@ -46,27 +46,27 @@ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
|
46
46
|
lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
|
|
47
47
|
lemonade/tools/report/table.py,sha256=ssqy1bZqF-wptNzKEOj6_9REtCNZyXO8R5vakAtg3R4,27973
|
|
48
48
|
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
-
lemonade/tools/server/llamacpp.py,sha256=
|
|
50
|
-
lemonade/tools/server/serve.py,sha256=
|
|
49
|
+
lemonade/tools/server/llamacpp.py,sha256=KZO4npzefvbaPvlZbpCYsdW0tMSfmmupT8gaK9y65I8,17962
|
|
50
|
+
lemonade/tools/server/serve.py,sha256=jk343MlY9fdWbsw3JaD5CO11r6LUKJUHyp2f--6t5i4,55729
|
|
51
51
|
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
52
|
-
lemonade/tools/server/tray.py,sha256=
|
|
52
|
+
lemonade/tools/server/tray.py,sha256=YJ4-vJlM6tJ0ojY_wVM6COuNscETFkQPt-BaNqYa9YQ,17640
|
|
53
53
|
lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
|
|
54
54
|
lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
|
|
55
|
-
lemonade/tools/server/static/styles.css,sha256=
|
|
56
|
-
lemonade/tools/server/static/webapp.html,sha256=
|
|
57
|
-
lemonade/tools/server/utils/port.py,sha256=
|
|
55
|
+
lemonade/tools/server/static/styles.css,sha256=M_JrH_vML65MWun-C8XCvLOFw35qZURSa77Fk4fVngQ,30029
|
|
56
|
+
lemonade/tools/server/static/webapp.html,sha256=xPjqQgZVp_JJednxJ0rgXRwNd8fomqIav7Ap0pWlhd4,54440
|
|
57
|
+
lemonade/tools/server/utils/port.py,sha256=df1gQ-W5BgDSbOOEY548xTBqRDENtDLRmCvF-iP9JPk,2190
|
|
58
58
|
lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
|
|
59
|
-
lemonade/tools/server/utils/thread.py,sha256=
|
|
59
|
+
lemonade/tools/server/utils/thread.py,sha256=Z-PDzGcpgfN2qxTmtlROWqrUN0B2fXdPrqo_J10fR_w,2772
|
|
60
60
|
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
61
|
-
lemonade_install/install.py,sha256=
|
|
62
|
-
lemonade_sdk-8.1.
|
|
63
|
-
lemonade_sdk-8.1.
|
|
64
|
-
lemonade_server/cli.py,sha256=
|
|
65
|
-
lemonade_server/model_manager.py,sha256=
|
|
61
|
+
lemonade_install/install.py,sha256=Zl_JtEIhbqZZTvxcqtq895IomEN-JNxp9xOZEtahMHQ,28289
|
|
62
|
+
lemonade_sdk-8.1.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
63
|
+
lemonade_sdk-8.1.2.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
|
|
64
|
+
lemonade_server/cli.py,sha256=vFdqFB3E5ivlVM-ywcnKsEy0vKFutExDnVCjO7akXcw,18486
|
|
65
|
+
lemonade_server/model_manager.py,sha256=cFaHJVOsabwekAPryXAPdo6qrXYBD_yht7XPg2QImqc,10791
|
|
66
66
|
lemonade_server/pydantic_models.py,sha256=qEvF7x7AuHCHMiByVzGGuLdQTNs233Sw9uQq5cpI6is,2721
|
|
67
|
-
lemonade_server/server_models.json,sha256=
|
|
68
|
-
lemonade_sdk-8.1.
|
|
69
|
-
lemonade_sdk-8.1.
|
|
70
|
-
lemonade_sdk-8.1.
|
|
71
|
-
lemonade_sdk-8.1.
|
|
72
|
-
lemonade_sdk-8.1.
|
|
67
|
+
lemonade_server/server_models.json,sha256=pphygJLu_kfWod6Afws9DG1CwhTKFo8rL_3cz8m9kzg,11483
|
|
68
|
+
lemonade_sdk-8.1.2.dist-info/METADATA,sha256=wUJMAvKRZlDpSYYAYkB-c0hGC0-O9rPp1N39htmxuQo,17065
|
|
69
|
+
lemonade_sdk-8.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
70
|
+
lemonade_sdk-8.1.2.dist-info/entry_points.txt,sha256=7sRvpNhi1E7amnM7RZo57e8yFF9iA5uuRaIeJ1Xre6w,193
|
|
71
|
+
lemonade_sdk-8.1.2.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
72
|
+
lemonade_sdk-8.1.2.dist-info/RECORD,,
|
lemonade_server/cli.py
CHANGED
|
@@ -39,11 +39,20 @@ class ModelNotAvailableError(Exception):
|
|
|
39
39
|
"""
|
|
40
40
|
|
|
41
41
|
|
|
42
|
+
class ModelLoadError(Exception):
|
|
43
|
+
"""
|
|
44
|
+
The model failed to load on the server
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
|
|
42
48
|
def serve(
|
|
43
49
|
port: int = None,
|
|
50
|
+
host: str = "localhost",
|
|
44
51
|
log_level: str = None,
|
|
45
52
|
tray: bool = False,
|
|
46
53
|
use_thread: bool = False,
|
|
54
|
+
llamacpp_backend: str = None,
|
|
55
|
+
ctx_size: int = None,
|
|
47
56
|
):
|
|
48
57
|
"""
|
|
49
58
|
Execute the serve command
|
|
@@ -51,26 +60,34 @@ def serve(
|
|
|
51
60
|
|
|
52
61
|
# Otherwise, start the server
|
|
53
62
|
print("Starting Lemonade Server...")
|
|
54
|
-
from lemonade.tools.server.serve import
|
|
63
|
+
from lemonade.tools.server.serve import (
|
|
64
|
+
Server,
|
|
65
|
+
DEFAULT_PORT,
|
|
66
|
+
DEFAULT_LOG_LEVEL,
|
|
67
|
+
DEFAULT_LLAMACPP_BACKEND,
|
|
68
|
+
DEFAULT_CTX_SIZE,
|
|
69
|
+
)
|
|
55
70
|
|
|
56
71
|
port = port if port is not None else DEFAULT_PORT
|
|
57
72
|
log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
|
|
73
|
+
llamacpp_backend = (
|
|
74
|
+
llamacpp_backend if llamacpp_backend is not None else DEFAULT_LLAMACPP_BACKEND
|
|
75
|
+
)
|
|
58
76
|
|
|
59
|
-
#
|
|
60
|
-
|
|
77
|
+
# Use ctx_size if provided, otherwise use default
|
|
78
|
+
ctx_size = ctx_size if ctx_size is not None else DEFAULT_CTX_SIZE
|
|
61
79
|
|
|
62
80
|
# Start the server
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
81
|
+
server = Server(
|
|
82
|
+
port=port,
|
|
83
|
+
host=host,
|
|
84
|
+
log_level=log_level,
|
|
85
|
+
ctx_size=ctx_size,
|
|
86
|
+
tray=tray,
|
|
87
|
+
llamacpp_backend=llamacpp_backend,
|
|
88
|
+
)
|
|
69
89
|
if not use_thread:
|
|
70
|
-
server.run(
|
|
71
|
-
port=port,
|
|
72
|
-
**serve_kwargs,
|
|
73
|
-
)
|
|
90
|
+
server.run()
|
|
74
91
|
else:
|
|
75
92
|
from threading import Thread
|
|
76
93
|
import time
|
|
@@ -78,8 +95,6 @@ def serve(
|
|
|
78
95
|
# Start a background thread to run the server
|
|
79
96
|
server_thread = Thread(
|
|
80
97
|
target=server.run,
|
|
81
|
-
args=(port,),
|
|
82
|
-
kwargs=serve_kwargs,
|
|
83
98
|
daemon=True,
|
|
84
99
|
)
|
|
85
100
|
server_thread.start()
|
|
@@ -243,7 +258,15 @@ def delete(model_names: List[str]):
|
|
|
243
258
|
ModelManager().delete_model(model_name)
|
|
244
259
|
|
|
245
260
|
|
|
246
|
-
def run(
|
|
261
|
+
def run(
|
|
262
|
+
model_name: str,
|
|
263
|
+
port: int = None,
|
|
264
|
+
host: str = "localhost",
|
|
265
|
+
log_level: str = None,
|
|
266
|
+
tray: bool = False,
|
|
267
|
+
llamacpp_backend: str = None,
|
|
268
|
+
ctx_size: int = None,
|
|
269
|
+
):
|
|
247
270
|
"""
|
|
248
271
|
Start the server if not running and open the webapp with the specified model
|
|
249
272
|
"""
|
|
@@ -254,7 +277,17 @@ def run(model_name: str):
|
|
|
254
277
|
_, port = get_server_info()
|
|
255
278
|
server_previously_running = port is not None
|
|
256
279
|
if not server_previously_running:
|
|
257
|
-
port, server_thread = serve(
|
|
280
|
+
port, server_thread = serve(
|
|
281
|
+
port=port,
|
|
282
|
+
host=host,
|
|
283
|
+
log_level=log_level,
|
|
284
|
+
tray=tray,
|
|
285
|
+
use_thread=True,
|
|
286
|
+
llamacpp_backend=llamacpp_backend,
|
|
287
|
+
ctx_size=ctx_size,
|
|
288
|
+
)
|
|
289
|
+
else:
|
|
290
|
+
port = running_port
|
|
258
291
|
|
|
259
292
|
# Pull model
|
|
260
293
|
pull([model_name])
|
|
@@ -263,7 +296,7 @@ def run(model_name: str):
|
|
|
263
296
|
load(model_name, port)
|
|
264
297
|
|
|
265
298
|
# Open the webapp with the specified model
|
|
266
|
-
url = f"http://
|
|
299
|
+
url = f"http://{host}:{port}/?model={model_name}#llm-chat"
|
|
267
300
|
print(f"You can now chat with {model_name} at {url}")
|
|
268
301
|
webbrowser.open(url)
|
|
269
302
|
|
|
@@ -412,6 +445,56 @@ def list_models():
|
|
|
412
445
|
print(tabulate(table_data, headers=headers, tablefmt="simple"))
|
|
413
446
|
|
|
414
447
|
|
|
448
|
+
def developer_entrypoint():
|
|
449
|
+
"""
|
|
450
|
+
Developer entry point that starts the server with debug logging
|
|
451
|
+
Equivalent to running: lemonade-server-dev serve --log-level debug [additional args]
|
|
452
|
+
|
|
453
|
+
This function automatically prepends "serve --log-level debug" to any arguments
|
|
454
|
+
passed to the lsdev command.
|
|
455
|
+
"""
|
|
456
|
+
# Save original sys.argv
|
|
457
|
+
original_argv = sys.argv.copy()
|
|
458
|
+
|
|
459
|
+
try:
|
|
460
|
+
# Take any additional arguments passed to lsdev and append them
|
|
461
|
+
# after "serve --log-level debug"
|
|
462
|
+
additional_args = sys.argv[1:] if len(sys.argv) > 1 else []
|
|
463
|
+
|
|
464
|
+
# Set sys.argv to simulate "serve --log-level debug" + additional args
|
|
465
|
+
sys.argv = [sys.argv[0], "serve", "--log-level", "debug"] + additional_args
|
|
466
|
+
main()
|
|
467
|
+
finally:
|
|
468
|
+
# Restore original sys.argv
|
|
469
|
+
sys.argv = original_argv
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def _add_server_arguments(parser):
|
|
473
|
+
"""Add common server arguments to a parser"""
|
|
474
|
+
parser.add_argument("--port", type=int, help="Port number to serve on")
|
|
475
|
+
parser.add_argument(
|
|
476
|
+
"--host", type=str, help="Address to bind for connections", default="localhost"
|
|
477
|
+
)
|
|
478
|
+
parser.add_argument(
|
|
479
|
+
"--log-level",
|
|
480
|
+
type=str,
|
|
481
|
+
help="Log level for the server",
|
|
482
|
+
choices=["critical", "error", "warning", "info", "debug", "trace"],
|
|
483
|
+
default="info",
|
|
484
|
+
)
|
|
485
|
+
parser.add_argument(
|
|
486
|
+
"--llamacpp",
|
|
487
|
+
type=str,
|
|
488
|
+
help=f"LlamaCpp backend to use",
|
|
489
|
+
choices=["vulkan", "rocm"],
|
|
490
|
+
)
|
|
491
|
+
parser.add_argument(
|
|
492
|
+
"--ctx-size",
|
|
493
|
+
type=int,
|
|
494
|
+
help="Context size for the model (default: 4096 for llamacpp, truncates prompts for other recipes)",
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
|
|
415
498
|
def main():
|
|
416
499
|
parser = argparse.ArgumentParser(
|
|
417
500
|
description="Serve LLMs on CPU, GPU, and NPU.",
|
|
@@ -430,14 +513,7 @@ def main():
|
|
|
430
513
|
|
|
431
514
|
# Serve command
|
|
432
515
|
serve_parser = subparsers.add_parser("serve", help="Start server")
|
|
433
|
-
serve_parser
|
|
434
|
-
serve_parser.add_argument(
|
|
435
|
-
"--log-level",
|
|
436
|
-
type=str,
|
|
437
|
-
help="Log level for the server",
|
|
438
|
-
choices=["critical", "error", "warning", "info", "debug", "trace"],
|
|
439
|
-
default="info",
|
|
440
|
-
)
|
|
516
|
+
_add_server_arguments(serve_parser)
|
|
441
517
|
if os.name == "nt":
|
|
442
518
|
serve_parser.add_argument(
|
|
443
519
|
"--no-tray",
|
|
@@ -513,6 +589,7 @@ def main():
|
|
|
513
589
|
"model",
|
|
514
590
|
help="Lemonade Server model name to run",
|
|
515
591
|
)
|
|
592
|
+
_add_server_arguments(run_parser)
|
|
516
593
|
|
|
517
594
|
args = parser.parse_args()
|
|
518
595
|
|
|
@@ -533,8 +610,11 @@ def main():
|
|
|
533
610
|
sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
|
|
534
611
|
serve(
|
|
535
612
|
port=args.port,
|
|
613
|
+
host=args.host,
|
|
536
614
|
log_level=args.log_level,
|
|
537
615
|
tray=not args.no_tray,
|
|
616
|
+
llamacpp_backend=args.llamacpp,
|
|
617
|
+
ctx_size=args.ctx_size,
|
|
538
618
|
)
|
|
539
619
|
elif args.command == "status":
|
|
540
620
|
status()
|
|
@@ -553,7 +633,15 @@ def main():
|
|
|
553
633
|
elif args.command == "stop":
|
|
554
634
|
stop()
|
|
555
635
|
elif args.command == "run":
|
|
556
|
-
run(
|
|
636
|
+
run(
|
|
637
|
+
args.model,
|
|
638
|
+
port=args.port,
|
|
639
|
+
host=args.host,
|
|
640
|
+
log_level=args.log_level,
|
|
641
|
+
tray=not args.no_tray,
|
|
642
|
+
llamacpp_backend=args.llamacpp,
|
|
643
|
+
ctx_size=args.ctx_size,
|
|
644
|
+
)
|
|
557
645
|
elif args.command == "help" or not args.command:
|
|
558
646
|
parser.print_help()
|
|
559
647
|
|
lemonade_server/model_manager.py
CHANGED
|
@@ -114,6 +114,51 @@
|
|
|
114
114
|
"recipe": "oga-npu",
|
|
115
115
|
"suggested": true
|
|
116
116
|
},
|
|
117
|
+
"DeepSeek-R1-Distill-Llama-8B-NPU": {
|
|
118
|
+
"checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
119
|
+
"recipe": "oga-npu",
|
|
120
|
+
"suggested": true
|
|
121
|
+
},
|
|
122
|
+
"DeepSeek-R1-Distill-Qwen-7B-NPU": {
|
|
123
|
+
"checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
124
|
+
"recipe": "oga-npu",
|
|
125
|
+
"suggested": false
|
|
126
|
+
},
|
|
127
|
+
"DeepSeek-R1-Distill-Qwen-1.5B-NPU": {
|
|
128
|
+
"checkpoint": "amd/DeepSeek-R1-Distill-Qwen-1.5B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
129
|
+
"recipe": "oga-npu",
|
|
130
|
+
"suggested": false
|
|
131
|
+
},
|
|
132
|
+
"Llama-3.2-3B-Instruct-NPU": {
|
|
133
|
+
"checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
134
|
+
"recipe": "oga-npu",
|
|
135
|
+
"suggested": false
|
|
136
|
+
},
|
|
137
|
+
"Llama-3.2-1B-Instruct-NPU": {
|
|
138
|
+
"checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
139
|
+
"recipe": "oga-npu",
|
|
140
|
+
"suggested": false
|
|
141
|
+
},
|
|
142
|
+
"Mistral-7B-v0.3-Instruct-NPU": {
|
|
143
|
+
"checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
144
|
+
"recipe": "oga-npu",
|
|
145
|
+
"suggested": true
|
|
146
|
+
},
|
|
147
|
+
"Phi-3.5-Mini-Instruct-NPU": {
|
|
148
|
+
"checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
149
|
+
"recipe": "oga-npu",
|
|
150
|
+
"suggested": true
|
|
151
|
+
},
|
|
152
|
+
"ChatGLM-3-6b-Instruct-NPU": {
|
|
153
|
+
"checkpoint": "amd/chatglm3-6b-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
154
|
+
"recipe": "oga-npu",
|
|
155
|
+
"suggested": false
|
|
156
|
+
},
|
|
157
|
+
"AMD-OLMo-1B-Instruct-NPU": {
|
|
158
|
+
"checkpoint": "amd/AMD-OLMo-1B-SFT-DPO-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
159
|
+
"recipe": "oga-npu",
|
|
160
|
+
"suggested": false
|
|
161
|
+
},
|
|
117
162
|
"Llama-3.2-1B-Instruct-DirectML": {
|
|
118
163
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
|
|
119
164
|
"recipe": "oga-igpu",
|
|
@@ -190,7 +235,13 @@
|
|
|
190
235
|
"checkpoint": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF:Qwen3-30B-A3B-Instruct-2507-Q4_0.gguf",
|
|
191
236
|
"recipe": "llamacpp",
|
|
192
237
|
"suggested": true,
|
|
193
|
-
"labels": ["
|
|
238
|
+
"labels": ["hot"]
|
|
239
|
+
},
|
|
240
|
+
"Qwen3-Coder-30B-A3B-Instruct-GGUF": {
|
|
241
|
+
"checkpoint": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
|
|
242
|
+
"recipe": "llamacpp",
|
|
243
|
+
"suggested": true,
|
|
244
|
+
"labels": ["coding","hot"]
|
|
194
245
|
},
|
|
195
246
|
"Gemma-3-4b-it-GGUF": {
|
|
196
247
|
"checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
|
@@ -213,6 +264,13 @@
|
|
|
213
264
|
"suggested": true,
|
|
214
265
|
"labels": ["vision"]
|
|
215
266
|
},
|
|
267
|
+
"Cogito-v2-llama-109B-MoE-GGUF": {
|
|
268
|
+
"checkpoint": "unsloth/cogito-v2-preview-llama-109B-MoE-GGUF:Q4_K_M",
|
|
269
|
+
"mmproj": "mmproj-F16.gguf",
|
|
270
|
+
"recipe": "llamacpp",
|
|
271
|
+
"suggested": true,
|
|
272
|
+
"labels": ["vision","hot"]
|
|
273
|
+
},
|
|
216
274
|
"nomic-embed-text-v1-GGUF": {
|
|
217
275
|
"checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
|
|
218
276
|
"recipe": "llamacpp",
|
|
@@ -248,5 +306,17 @@
|
|
|
248
306
|
"recipe": "llamacpp",
|
|
249
307
|
"suggested": true,
|
|
250
308
|
"labels": ["reasoning", "coding"]
|
|
309
|
+
},
|
|
310
|
+
"gpt-oss-120b-GGUF": {
|
|
311
|
+
"checkpoint": "unsloth/gpt-oss-120b-GGUF:Q4_K_M",
|
|
312
|
+
"recipe": "llamacpp",
|
|
313
|
+
"suggested": true,
|
|
314
|
+
"labels": ["hot", "reasoning"]
|
|
315
|
+
},
|
|
316
|
+
"gpt-oss-20b-GGUF": {
|
|
317
|
+
"checkpoint": "unsloth/gpt-oss-20b-GGUF:Q4_K_M",
|
|
318
|
+
"recipe": "llamacpp",
|
|
319
|
+
"suggested": true,
|
|
320
|
+
"labels": ["hot", "reasoning"]
|
|
251
321
|
}
|
|
252
322
|
}
|
|
File without changes
|