PyPI - lemonade-sdk - Versions diffs - 8.1.0__py3-none-any.whl → 8.1.1__py3-none-any.whl - Mend

lemonade-sdk 8.1.0py3-none-any.whl → 8.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (22) hide show

lemonade/common/inference_engines.py +62 -77
lemonade/common/system_info.py +61 -44
lemonade/tools/llamacpp/load.py +13 -4
lemonade/tools/llamacpp/utils.py +222 -54
lemonade/tools/oga/load.py +3 -3
lemonade/tools/server/llamacpp.py +30 -53
lemonade/tools/server/serve.py +54 -104
lemonade/tools/server/static/styles.css +203 -0
lemonade/tools/server/static/webapp.html +507 -71
lemonade/tools/server/tray.py +4 -2
lemonade/tools/server/utils/thread.py +2 -4
lemonade/version.py +1 -1
lemonade_install/install.py +25 -2
{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/METADATA +45 -6
{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/RECORD +22 -22
lemonade_server/cli.py +79 -26
lemonade_server/server_models.json +26 -1
{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/WHEEL +0 -0
{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/top_level.txt +0 -0

lemonade/tools/server/tray.py CHANGED Viewed

@@ -263,8 +263,10 @@ class LemonadeTray(SystemTray):
                     self.server.uvicorn_server.should_exit = True
                 self.server_thread.join(timeout=2)
-            # Update the port
+            # Update the port in both the tray and the server instance
             self.port = new_port
+            if self.server:
+                self.server.port = new_port
             # Restart the server
             self.server_thread = threading.Thread(target=self.start_server, daemon=True)
@@ -425,7 +427,7 @@ class LemonadeTray(SystemTray):
         Start the uvicorn server.
         """
         self.server = self.server_factory()
-        self.server.uvicorn_server = self.server.run_in_thread(port=self.port)
+        self.server.uvicorn_server = self.server.run_in_thread()
         self.server.uvicorn_server.run()
     def run(self):

lemonade/tools/server/utils/thread.py CHANGED Viewed

@@ -26,7 +26,7 @@ class ServerRunner(threading.Thread):
     def run(self):
         try:
             # Create the server instance
-            self.server = Server()
+            self.server = Server(port=self.port, log_level="warning")
             # Configure the server with model/tokenizer
             self.server.model = self.model
@@ -44,9 +44,7 @@ class ServerRunner(threading.Thread):
             )
             # Set up the server for threaded execution
-            self.uvicorn_server = self.server.run_in_thread(
-                port=self.port, host=self.host, log_level="warning"
-            )
+            self.uvicorn_server = self.server.run_in_thread(host=self.host)
             # Set the ready event
             self.ready_event.set()

lemonade/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "8.1.0"
1	+ __version__ = "8.1.1"

lemonade_install/install.py CHANGED Viewed

@@ -451,6 +451,12 @@ class Install:
             choices=["0.6.0"],
         )
+        parser.add_argument(
+            "--llamacpp",
+            help="Install llama.cpp binaries with specified backend",
+            choices=["rocm", "vulkan"],
+        )
         return parser
     @staticmethod
@@ -739,18 +745,32 @@ class Install:
         print(f"\nQuark installed successfully at: {quark_path}")
+    @staticmethod
+    def _install_llamacpp(backend):
+        """
+        Install llama.cpp binaries with the specified backend.
+        Args:
+            backend: The backend to use ('rocm' or 'vulkan')
+        """
+        from lemonade.tools.llamacpp.utils import install_llamacpp
+        install_llamacpp(backend)
     def run(
         self,
         ryzenai: Optional[str] = None,
         build_model: Optional[str] = None,
         quark: Optional[str] = None,
+        llamacpp: Optional[str] = None,
         yes: bool = False,
         token: Optional[str] = None,
     ):
-        if ryzenai is None and quark is None and models is None:
+        if ryzenai is None and quark is None and llamacpp is None:
             raise ValueError(
                 "You must select something to install, "
-                "for example `--ryzenai`, `--quark`, or `--models`"
+                "for example `--ryzenai`, `--quark`, or `--llamacpp`"
             )
         if ryzenai is not None:
@@ -759,6 +779,9 @@ class Install:
         if quark is not None:
             self._install_quark(quark)
+        if llamacpp is not None:
+            self._install_llamacpp(llamacpp)
 def main():
     installer = Install()

{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lemonade-sdk
-Version: 8.1.0
+Version: 8.1.1
 Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
 Author-email: lemonade@amd.com
 Requires-Python: >=3.10, <3.13
@@ -27,7 +27,8 @@ Requires-Dist: transformers<=4.53.2
 Requires-Dist: jinja2
 Requires-Dist: tabulate
 Requires-Dist: sentencepiece
-Requires-Dist: huggingface-hub==0.33.0
+Requires-Dist: huggingface-hub[hf_xet]==0.33.0
+Requires-Dist: python-dotenv
 Provides-Extra: oga-ryzenai
 Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
 Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
@@ -40,6 +41,7 @@ Requires-Dist: accelerate; extra == "dev"
 Requires-Dist: datasets; extra == "dev"
 Requires-Dist: pandas>=1.5.3; extra == "dev"
 Requires-Dist: matplotlib; extra == "dev"
+Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "dev"
 Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
 Requires-Dist: lm-eval[api]; extra == "dev"
 Provides-Extra: oga-hybrid
@@ -136,7 +138,9 @@ Dynamic: summary
   <a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
 </h3>
-Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
+Lemonade helps users run local LLMs with the highest performance by configuring state-of-the-art inference engines for their NPUs and GPUs.
+Startups such as [Styrk AI](https://styrk.ai/styrk-ai-and-amd-guardrails-for-your-on-device-ai-revolution/), research teams like [Hazy Research at Stanford](https://www.amd.com/en/developer/resources/technical-articles/2025/minions--on-device-and-cloud-language-model-collaboration-on-ryz.html), and large companies like [AMD](https://www.amd.com/en/developer/resources/technical-articles/unlocking-a-wave-of-llm-apps-on-ryzen-ai-through-lemonade-server.html) use Lemonade to run LLMs.
 ## Getting Started
@@ -155,7 +159,7 @@ Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus
 </p>
 > [!TIP]
-> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email lemonade@amd.com.
+> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or [email](lemonade@amd.com).
 ## Using the CLI
@@ -177,7 +181,10 @@ To check all models available, use the `list` command:
 lemonade-server list
 ```
-> Note: If you installed from source, use the `lemonade-server-dev` command instead.
+> **Note**:  If you installed from source, use the `lemonade-server-dev` command instead.
+> **Tip**: You can use `--llamacpp vulkan/rocm` to select a backend when running GGUF models.
 ## Model Library
@@ -219,7 +226,7 @@ Lemonade supports the following configurations, while also making it easy to swi
     <tr>
       <td><strong>🎮 GPU</strong></td>
       <td align="center">—</td>
-      <td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
+      <td align="center">Vulkan: All platforms<br>ROCm: Selected AMD platforms*</td>
       <td align="center">—</td>
       <td align="center">✅</td>
       <td align="center">✅</td>
@@ -235,6 +242,38 @@ Lemonade supports the following configurations, while also making it easy to swi
   </tbody>
 </table>
+<details>
+<summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
+<br>
+<table>
+  <thead>
+    <tr>
+      <th>Architecture</th>
+      <th>Platform Support</th>
+      <th>GPU Models</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><b>gfx1151</b> (STX Halo)</td>
+      <td>Windows, Ubuntu</td>
+      <td>Ryzen AI MAX+ Pro 395</td>
+    </tr>
+    <tr>
+      <td><b>gfx120X</b> (RDNA4)</td>
+      <td>Windows only</td>
+      <td>Radeon AI PRO R9700, RX 9070 XT/GRE/9070, RX 9060 XT</td>
+    </tr>
+    <tr>
+      <td><b>gfx110X</b> (RDNA3)</td>
+      <td>Windows, Ubuntu</td>
+      <td>Radeon PRO W7900/W7800/W7700/V710, RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT</td>
+    </tr>
+  </tbody>
+</table>
+</details>
 ## Integrate Lemonade Server with Your Application

{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/RECORD RENAMED Viewed

@@ -4,17 +4,17 @@ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
 lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
 lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
 lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
-lemonade/version.py,sha256=c04nFsyfS0zYoDvZjLO-uEi12TFB5EWSD6fiWiI7OLQ,22
+lemonade/version.py,sha256=8YlEPKK1Cm5T4dPa2BQPpPwVVTzjPLnmqAeNcTb5nOw,22
 lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
 lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
 lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
 lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
-lemonade/common/inference_engines.py,sha256=lcmir_pATr71TfSBJoIZEi3G9xyxNwi2_xpPvPD8_xI,12932
+lemonade/common/inference_engines.py,sha256=OJQcED9P1ZeQ8d11lDMNeAoaFoUuZlsDcwEZXLbqWRg,12579
 lemonade/common/network.py,sha256=p1lWJkN0H5hCpb4rKi3Zc47W_BRrrm-7ghdTALJLGqU,1944
 lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
 lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
-lemonade/common/system_info.py,sha256=dOtX8WLHCz1xmURZWnqhDbyNZv_AulrpX_bbI58eHFQ,27084
+lemonade/common/system_info.py,sha256=pn-k3zMQCbt5cu3aHXa4cENgrubOK97gs9PYdGPsFXA,28405
 lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
 lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
 lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
@@ -33,11 +33,11 @@ lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnS
 lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
 lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
 lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
-lemonade/tools/llamacpp/load.py,sha256=SKacK2n8LpC4DN4yALyEpV2c8_sgOv2G7t6Nlyu7XXg,6273
-lemonade/tools/llamacpp/utils.py,sha256=vHA5kykkdHSsMGmbEA4RyOHr8wFIh1WenfhCvY8WxZs,22445
+lemonade/tools/llamacpp/load.py,sha256=DFCvQN548Ch9H8U_rHOiYviinzw6vixb5-V7xLj7XE4,6499
+lemonade/tools/llamacpp/utils.py,sha256=CTWnzbEYGPSbOizF26yCnyNrHDY19pLusU-YyND992s,29070
 lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
-lemonade/tools/oga/load.py,sha256=O82ezF7Jhgz3CJrxDWZYqLHyD_0NS1nsvfMWDaaUI4I,33728
+lemonade/tools/oga/load.py,sha256=6Pf_QrHpIXDbfpTwFNRj4RmWTxI-RImhYuqRvmTVgmY,33722
 lemonade/tools/oga/utils.py,sha256=Xd7tmNr69u_bCut0hZqA7saUR3NFZlp4bvWo54mOZb0,16918
 lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
@@ -46,27 +46,27 @@ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
 lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
 lemonade/tools/report/table.py,sha256=ssqy1bZqF-wptNzKEOj6_9REtCNZyXO8R5vakAtg3R4,27973
 lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lemonade/tools/server/llamacpp.py,sha256=OP0j74QcowEu3zFEcrKIsBbGDOFemBXS5F5DC6oQHaI,18853
-lemonade/tools/server/serve.py,sha256=0-NprfsU-YrX8Qsf1atEi6wPJWemrPjHKEBHV69SwCQ,57046
+lemonade/tools/server/llamacpp.py,sha256=KZO4npzefvbaPvlZbpCYsdW0tMSfmmupT8gaK9y65I8,17962
+lemonade/tools/server/serve.py,sha256=PAAGowj2Z5AQIW3G1l52taNyf_0U4kRFR3G735M4DsU,55513
 lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
-lemonade/tools/server/tray.py,sha256=yoGCM8j_2KzPqo-AlYiauWd8QR56yp6jW6HZ9921Ydg,17525
+lemonade/tools/server/tray.py,sha256=qlQKBkQwG9W2v9GTyycvFc12_jly6vPU1uEkrIFBGTs,17624
 lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
 lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
-lemonade/tools/server/static/styles.css,sha256=8wQ5Cg4rbEh03kC8t7ALE7dB20GiD0Pfu5BAxh9hECU,26429
-lemonade/tools/server/static/webapp.html,sha256=KZm1ZFIhQzLT2Y2wy3hFsQxcOxFzv-blaeLzc1ODhb8,36396
+lemonade/tools/server/static/styles.css,sha256=M_JrH_vML65MWun-C8XCvLOFw35qZURSa77Fk4fVngQ,30029
+lemonade/tools/server/static/webapp.html,sha256=oU6FZHGQCq-SoT6VkWObQvYzzNS0ser5Fmqx2j_5jCI,54380
 lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
 lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
-lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
+lemonade/tools/server/utils/thread.py,sha256=Z-PDzGcpgfN2qxTmtlROWqrUN0B2fXdPrqo_J10fR_w,2772
 lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
-lemonade_install/install.py,sha256=TBX-VwEHcPo4WX0K_12pKKINnIK3o4SUo3L5XjkqEtw,27669
-lemonade_sdk-8.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lemonade_sdk-8.1.0.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
-lemonade_server/cli.py,sha256=6QJ5fxNLuVUbuHauA5JHXf0H5dqJ5E4GNTo4YoMOJtg,16049
+lemonade_install/install.py,sha256=Zl_JtEIhbqZZTvxcqtq895IomEN-JNxp9xOZEtahMHQ,28289
+lemonade_sdk-8.1.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lemonade_sdk-8.1.1.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
+lemonade_server/cli.py,sha256=CFfhrRgZNJCd0rDRBF3TeS3dMJgwlKGtvT5_kbsWaXk,17316
 lemonade_server/model_manager.py,sha256=O3fIX52AqU0z10WzPmNEA3lQ_KjOqNq_G-SxjwIgEio,10781
 lemonade_server/pydantic_models.py,sha256=qEvF7x7AuHCHMiByVzGGuLdQTNs233Sw9uQq5cpI6is,2721
-lemonade_server/server_models.json,sha256=gitKHj_VHANxjtcXeE5zFpukVO0HyEfKhu3ZaZsj2xo,8867
-lemonade_sdk-8.1.0.dist-info/METADATA,sha256=c3JxCUYw5ujhGSb3FX3mG6UmgG5BLqik8a5j4oe8n7o,15712
-lemonade_sdk-8.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lemonade_sdk-8.1.0.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
-lemonade_sdk-8.1.0.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
-lemonade_sdk-8.1.0.dist-info/RECORD,,
+lemonade_server/server_models.json,sha256=iag_dG9S1tkHZUhkJmGAfiUJkgEazdQSv7stC1fVAsQ,9741
+lemonade_sdk-8.1.1.dist-info/METADATA,sha256=XT9cwNUAkhwQ6kad6l7t2nj7m8S0t-9GvaFLOMxLCyE,17065
+lemonade_sdk-8.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lemonade_sdk-8.1.1.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
+lemonade_sdk-8.1.1.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
+lemonade_sdk-8.1.1.dist-info/RECORD,,

lemonade_server/cli.py CHANGED Viewed

@@ -39,11 +39,19 @@ class ModelNotAvailableError(Exception):
     """
+class ModelLoadError(Exception):
+    """
+    The model failed to load on the server
+    """
 def serve(
     port: int = None,
     log_level: str = None,
     tray: bool = False,
     use_thread: bool = False,
+    llamacpp_backend: str = None,
+    ctx_size: int = None,
 ):
     """
     Execute the serve command
@@ -51,26 +59,33 @@ def serve(
     # Otherwise, start the server
     print("Starting Lemonade Server...")
-    from lemonade.tools.server.serve import Server, DEFAULT_PORT, DEFAULT_LOG_LEVEL
+    from lemonade.tools.server.serve import (
+        Server,
+        DEFAULT_PORT,
+        DEFAULT_LOG_LEVEL,
+        DEFAULT_LLAMACPP_BACKEND,
+        DEFAULT_CTX_SIZE,
+    )
     port = port if port is not None else DEFAULT_PORT
     log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
+    llamacpp_backend = (
+        llamacpp_backend if llamacpp_backend is not None else DEFAULT_LLAMACPP_BACKEND
+    )
-    # Hidden environment variable to enable input truncation (experimental feature)
-    truncate_inputs = os.environ.get("LEMONADE_TRUNCATE_INPUTS", None)
+    # Use ctx_size if provided, otherwise use default
+    ctx_size = ctx_size if ctx_size is not None else DEFAULT_CTX_SIZE
     # Start the server
-    serve_kwargs = {
-        "log_level": log_level,
-        "truncate_inputs": truncate_inputs,
-        "tray": tray,
-    }
-    server = Server()
+    server = Server(
+        port=port,
+        log_level=log_level,
+        ctx_size=ctx_size,
+        tray=tray,
+        llamacpp_backend=llamacpp_backend,
+    )
     if not use_thread:
-        server.run(
-            port=port,
-            **serve_kwargs,
-        )
+        server.run()
     else:
         from threading import Thread
         import time
@@ -78,8 +93,6 @@ def serve(
         # Start a background thread to run the server
         server_thread = Thread(
             target=server.run,
-            args=(port,),
-            kwargs=serve_kwargs,
             daemon=True,
         )
         server_thread.start()
@@ -243,7 +256,13 @@ def delete(model_names: List[str]):
             ModelManager().delete_model(model_name)
-def run(model_name: str):
+def run(
+    model_name: str,
+    port: int = None,
+    log_level: str = None,
+    llamacpp_backend: str = None,
+    ctx_size: int = None,
+):
     """
     Start the server if not running and open the webapp with the specified model
     """
@@ -254,7 +273,16 @@ def run(model_name: str):
     _, port = get_server_info()
     server_previously_running = port is not None
     if not server_previously_running:
-        port, server_thread = serve(use_thread=True, tray=True, log_level="info")
+        port, server_thread = serve(
+            port=port,
+            log_level=log_level,
+            tray=True,
+            use_thread=True,
+            llamacpp_backend=llamacpp_backend,
+            ctx_size=ctx_size,
+        )
+    else:
+        port = running_port
     # Pull model
     pull([model_name])
@@ -412,6 +440,29 @@ def list_models():
     print(tabulate(table_data, headers=headers, tablefmt="simple"))
+def _add_server_arguments(parser):
+    """Add common server arguments to a parser"""
+    parser.add_argument("--port", type=int, help="Port number to serve on")
+    parser.add_argument(
+        "--log-level",
+        type=str,
+        help="Log level for the server",
+        choices=["critical", "error", "warning", "info", "debug", "trace"],
+        default="info",
+    )
+    parser.add_argument(
+        "--llamacpp",
+        type=str,
+        help=f"LlamaCpp backend to use",
+        choices=["vulkan", "rocm"],
+    )
+    parser.add_argument(
+        "--ctx-size",
+        type=int,
+        help="Context size for the model (default: 4096 for llamacpp, truncates prompts for other recipes)",
+    )
 def main():
     parser = argparse.ArgumentParser(
         description="Serve LLMs on CPU, GPU, and NPU.",
@@ -430,14 +481,7 @@ def main():
     # Serve command
     serve_parser = subparsers.add_parser("serve", help="Start server")
-    serve_parser.add_argument("--port", type=int, help="Port number to serve on")
-    serve_parser.add_argument(
-        "--log-level",
-        type=str,
-        help="Log level for the server",
-        choices=["critical", "error", "warning", "info", "debug", "trace"],
-        default="info",
-    )
+    _add_server_arguments(serve_parser)
     if os.name == "nt":
         serve_parser.add_argument(
             "--no-tray",
@@ -513,6 +557,7 @@ def main():
         "model",
         help="Lemonade Server model name to run",
     )
+    _add_server_arguments(run_parser)
     args = parser.parse_args()
@@ -535,6 +580,8 @@ def main():
             port=args.port,
             log_level=args.log_level,
             tray=not args.no_tray,
+            llamacpp_backend=args.llamacpp,
+            ctx_size=args.ctx_size,
         )
     elif args.command == "status":
         status()
@@ -553,7 +600,13 @@ def main():
     elif args.command == "stop":
         stop()
     elif args.command == "run":
-        run(args.model)
+        run(
+            args.model,
+            port=args.port,
+            log_level=args.log_level,
+            llamacpp_backend=args.llamacpp,
+            ctx_size=args.ctx_size,
+        )
     elif args.command == "help" or not args.command:
         parser.print_help()

lemonade_server/server_models.json CHANGED Viewed

@@ -190,7 +190,13 @@
         "checkpoint": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF:Qwen3-30B-A3B-Instruct-2507-Q4_0.gguf",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["coding"]
+        "labels": ["hot"]
+    },
+    "Qwen3-Coder-30B-A3B-Instruct-GGUF": {
+        "checkpoint": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["coding","hot"]
     },
     "Gemma-3-4b-it-GGUF": {
         "checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
@@ -213,6 +219,13 @@
         "suggested": true,
         "labels": ["vision"]
     },
+    "Cogito-v2-llama-109B-MoE-GGUF": {
+        "checkpoint": "unsloth/cogito-v2-preview-llama-109B-MoE-GGUF:Q4_K_M",
+        "mmproj": "mmproj-F16.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["vision","hot"]
+    },
     "nomic-embed-text-v1-GGUF": {
         "checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
         "recipe": "llamacpp",
@@ -248,5 +261,17 @@
         "recipe": "llamacpp",
         "suggested": true,
         "labels": ["reasoning", "coding"]
+    },
+    "gpt-oss-120b-GGUF": {
+        "checkpoint": "unsloth/gpt-oss-120b-GGUF:Q4_K_M",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["hot", "reasoning"]
+    },
+    "gpt-oss-20b-GGUF": {
+        "checkpoint": "unsloth/gpt-oss-20b-GGUF:Q4_K_M",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["hot", "reasoning"]
     }
 }

{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/licenses/NOTICE.md RENAMED Viewed

File without changes

{lemonade_sdk-8.1.0.dist-info → lemonade_sdk-8.1.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

lemonade-sdk 8.1.0__py3-none-any.whl → 8.1.1__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 8.1.0py3-none-any.whl → 8.1.1py3-none-any.whl