PyPI - lemonade-sdk - Versions diffs - 7.0.3__py3-none-any.whl → 8.0.0__py3-none-any.whl - Mend

lemonade-sdk 7.0.3py3-none-any.whl → 8.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (55) hide show

lemonade/api.py +3 -3
lemonade/cli.py +11 -17
lemonade/common/build.py +0 -47
lemonade/common/network.py +50 -0
lemonade/common/status.py +2 -21
lemonade/common/system_info.py +19 -4
lemonade/profilers/memory_tracker.py +3 -1
lemonade/tools/accuracy.py +3 -4
lemonade/tools/adapter.py +1 -2
lemonade/tools/{huggingface_bench.py → huggingface/bench.py} +2 -87
lemonade/tools/huggingface/load.py +235 -0
lemonade/tools/{huggingface_load.py → huggingface/utils.py} +87 -255
lemonade/tools/humaneval.py +9 -3
lemonade/tools/{llamacpp_bench.py → llamacpp/bench.py} +1 -1
lemonade/tools/{llamacpp.py → llamacpp/load.py} +18 -2
lemonade/tools/mmlu.py +7 -15
lemonade/tools/{ort_genai/oga.py → oga/load.py} +31 -422
lemonade/tools/oga/utils.py +423 -0
lemonade/tools/perplexity.py +4 -3
lemonade/tools/prompt.py +2 -1
lemonade/tools/quark/quark_load.py +2 -1
lemonade/tools/quark/quark_quantize.py +5 -5
lemonade/tools/report/table.py +3 -3
lemonade/tools/server/llamacpp.py +159 -34
lemonade/tools/server/serve.py +169 -147
lemonade/tools/server/static/favicon.ico +0 -0
lemonade/tools/server/static/styles.css +568 -0
lemonade/tools/server/static/webapp.html +439 -0
lemonade/tools/server/tray.py +458 -0
lemonade/tools/server/{port_utils.py → utils/port.py} +22 -3
lemonade/tools/server/utils/system_tray.py +395 -0
lemonade/tools/server/{instructions.py → webapp.py} +4 -10
lemonade/version.py +1 -1
lemonade_install/install.py +46 -28
{lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/METADATA +84 -22
lemonade_sdk-8.0.0.dist-info/RECORD +70 -0
lemonade_server/cli.py +182 -27
lemonade_server/model_manager.py +192 -20
lemonade_server/pydantic_models.py +9 -4
lemonade_server/server_models.json +5 -3
lemonade/common/analyze_model.py +0 -26
lemonade/common/labels.py +0 -61
lemonade/common/onnx_helpers.py +0 -176
lemonade/common/plugins.py +0 -10
lemonade/common/tensor_helpers.py +0 -83
lemonade/tools/server/static/instructions.html +0 -262
lemonade_sdk-7.0.3.dist-info/RECORD +0 -69
/lemonade/tools/{ort_genai → oga}/__init__.py +0 -0
/lemonade/tools/{ort_genai/oga_bench.py → oga/bench.py} +0 -0
/lemonade/tools/server/{thread_utils.py → utils/thread.py} +0 -0
{lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/WHEEL +0 -0
{lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/top_level.txt +0 -0

{lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lemonade-sdk
-Version: 7.0.3
+Version: 8.0.0
 Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
 Author-email: lemonade@amd.com
 Requires-Python: >=3.10, <3.12
@@ -9,36 +9,42 @@ License-File: LICENSE
 License-File: NOTICE.md
 Requires-Dist: invoke>=2.0.0
 Requires-Dist: onnx<1.18.0,>=1.11.0
-Requires-Dist: torch>=1.12.1
 Requires-Dist: pyyaml>=5.4
 Requires-Dist: typeguard>=2.3.13
 Requires-Dist: packaging>=20.9
 Requires-Dist: numpy<2.0.0
-Requires-Dist: pandas>=1.5.3
 Requires-Dist: fasteners
 Requires-Dist: GitPython>=3.1.40
 Requires-Dist: psutil>=6.1.1
 Requires-Dist: wmi
+Requires-Dist: py-cpuinfo
 Requires-Dist: pytz
 Requires-Dist: zstandard
-Requires-Dist: matplotlib
+Requires-Dist: fastapi
+Requires-Dist: uvicorn[standard]
+Requires-Dist: openai>=1.81.0
+Requires-Dist: transformers<=4.51.3
+Requires-Dist: jinja2
 Requires-Dist: tabulate
 Requires-Dist: huggingface-hub==0.30.2
+Provides-Extra: oga-hybrid-minimal
+Requires-Dist: onnx==1.16.1; extra == "oga-hybrid-minimal"
+Requires-Dist: numpy==1.26.4; extra == "oga-hybrid-minimal"
+Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid-minimal"
+Provides-Extra: oga-cpu-minimal
+Requires-Dist: onnxruntime-genai==0.6.0; extra == "oga-cpu-minimal"
+Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "oga-cpu-minimal"
 Provides-Extra: llm
 Requires-Dist: torch>=2.6.0; extra == "llm"
-Requires-Dist: transformers<=4.51.3; extra == "llm"
 Requires-Dist: accelerate; extra == "llm"
-Requires-Dist: py-cpuinfo; extra == "llm"
 Requires-Dist: sentencepiece; extra == "llm"
 Requires-Dist: datasets; extra == "llm"
+Requires-Dist: pandas>=1.5.3; extra == "llm"
+Requires-Dist: matplotlib; extra == "llm"
 Requires-Dist: human-eval-windows==1.0.4; extra == "llm"
-Requires-Dist: fastapi; extra == "llm"
-Requires-Dist: uvicorn[standard]; extra == "llm"
-Requires-Dist: openai>=1.81.0; extra == "llm"
 Requires-Dist: lm-eval[api]; extra == "llm"
 Provides-Extra: llm-oga-cpu
-Requires-Dist: onnxruntime-genai==0.6.0; extra == "llm-oga-cpu"
-Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "llm-oga-cpu"
+Requires-Dist: lemonade-sdk[oga-cpu-minimal]; extra == "llm-oga-cpu"
 Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cpu"
 Provides-Extra: llm-oga-igpu
 Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
@@ -57,9 +63,7 @@ Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
 Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
 Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-npu"
 Provides-Extra: llm-oga-hybrid
-Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
-Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
-Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
+Requires-Dist: lemonade-sdk[oga-hybrid-minimal]; extra == "llm-oga-hybrid"
 Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-hybrid"
 Provides-Extra: llm-oga-unified
 Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
@@ -78,24 +82,82 @@ Dynamic: summary
 ## 🍋 Lemonade SDK: Quickly serve, benchmark and deploy LLMs
-The [Lemonade SDK](./docs/README.md) is designed to make it easy to serve, benchmark, and deploy large language models (LLMs) on a variety of hardware platforms, including CPU, GPU, and NPU.
+The [Lemonade SDK](./docs/README.md) makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
 <div align="center">
   <img src="https://download.amd.com/images/lemonade_640x480_1.gif" alt="Lemonade Demo" title="Lemonade in Action">
 </div>
+### Features
 The [Lemonade SDK](./docs/README.md) is comprised of the following:
-- 🌐 **Lemonade Server**: A server interface that uses the standard Open AI API, allowing applications to integrate with local LLMs.
-- 🐍 **Lemonade Python API**: Offers High-Level API for easy integration of Lemonade LLMs into Python applications and Low-Level API for custom experiments.
-- 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs, frameworks (PyTorch, ONNX, GGUF), and measurement tools to run experiments. The available tools are:
-  - Prompting an LLM.
-  - Measuring the accuracy of an LLM using a variety of tests.
-  - Benchmarking an LLM to get the time-to-first-token and tokens per second.
-  - Profiling the memory usage of an LLM.
+- 🌐 **[Lemonade Server](https://lemonade-server.ai/docs)**: A local LLM server for running ONNX and GGUF models using the OpenAI API standard. Install and enable your applications with NPU and GPU acceleration in minutes.
+- 🐍 **Lemonade API**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
+- 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
+  - Prompting with templates.
+  - Measuring accuracy with a variety of tests.
+  - Benchmarking to get the time-to-first-token and tokens per second.
+  - Profiling the memory utilization.
 ### [Click here to get started with Lemonade.](./docs/README.md)
+### Supported Configurations
+Maximum LLM performance requires the right hardware accelerator with the right inference engine for your scenario. Lemonade supports the following configurations, while also making it easy to switch between them at runtime.
+<table border="1" cellpadding="6" cellspacing="0">
+  <thead>
+    <tr>
+      <th rowspan="2">Hardware</th>
+      <th colspan="3" align="center">🛠️ Engine Support</th>
+      <th colspan="2" align="center">🖥️ OS (x86/x64)</th>
+    </tr>
+    <tr>
+      <th align="center">OGA</th>
+      <th align="center">llamacpp</th>
+      <th align="center">HF</th>
+      <th align="center">Windows</th>
+      <th align="center">Linux</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>🧠 CPU</td>
+      <td align="center">All platforms</td>
+      <td align="center">All platforms</td>
+      <td align="center">All platforms</td>
+      <td align="center">✅</td>
+      <td align="center">✅</td>
+    </tr>
+    <tr>
+      <td>🎮 GPU</td>
+      <td align="center">—</td>
+      <td align="center">Vulkan: All platforms<br><small>Focus: Radeon™ 7000/9000</small></td>
+      <td align="center">—</td>
+      <td align="center">✅</td>
+      <td align="center">✅</td>
+    </tr>
+    <tr>
+      <td>🤖 NPU</td>
+      <td align="center">AMD Ryzen™ AI 300 series</td>
+      <td align="center">—</td>
+      <td align="center">—</td>
+      <td align="center">✅</td>
+      <td align="center">—</td>
+    </tr>
+  </tbody>
+</table>
+#### Inference Engines Overview
+| Engine | Description |
+| :--- | :--- |
+| **OnnxRuntime GenAI (OGA)** | Microsoft engine that runs `.onnx` models and enables hardware vendors to provide their own execution providers (EPs) to support specialized hardware, such as neural processing units (NPUs). |
+| **llamacpp** | Community-driven engine with strong GPU acceleration, support for thousands of `.gguf` models, and advanced features such as vision-language models (VLMs) and mixture-of-experts (MoEs). |
+| **Hugging Face (HF)** | Hugging Face's `transformers` library can run the original `.safetensors` trained weights for models on Meta's PyTorch engine, which provides a source of truth for accuracy measurement. |
 ## Contributing
 We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).

lemonade_sdk-8.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,70 @@
+lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
+lemonade/api.py,sha256=X7DxBgsOl5L_z6uTkwoJWf8x0rjXWS2JoeEqmo9bMfc,3873
+lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
+lemonade/cli.py,sha256=XzptHh6LTl5OdGRnxiLykQ8QBl2rQmhWH5w0KPJVyY4,4359
+lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
+lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
+lemonade/version.py,sha256=SWqJTEDnx2fOon29wQowBCNjEkhyhMbbqVsSu4EpdWI,22
+lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
+lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
+lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
+lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
+lemonade/common/network.py,sha256=EkWTxjJ-UM469nEJdzEl-5UNO7dPfVfFzU2SVMMPaD0,1425
+lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
+lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
+lemonade/common/system_info.py,sha256=qOwteG_mBo-ImilbiK7Gq37sWIE9ugF0dbWcj9zLD40,12234
+lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
+lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
+lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
+lemonade/profilers/profiler.py,sha256=y_iMGr1ToQ6rcwcIcXck4ajapisLXCfHggiV-IpPF98,1666
+lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
+lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
+lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
+lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
+lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9532
+lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
+lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
+lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
+lemonade/tools/prompt.py,sha256=AT3p5rCGHEs9ozeGxwWl07iKF-mgLxFOkYLjU2btFHs,8638
+lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
+lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
+lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
+lemonade/tools/huggingface/utils.py,sha256=xybIWOEXHaMuw-nAEu3aITdvZSHcGKgZ9kFS5mIWcEg,13873
+lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPdv3Q,5946
+lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
+lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
+lemonade/tools/oga/load.py,sha256=7Sdf6PFPrqbadPabyJb_uPRUIP09qj21ZYdXz47MqsE,28570
+lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
+lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
+lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
+lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
+lemonade/tools/report/table.py,sha256=di8IZkolt_kaZfWri6GQkhPE1zCELqcrBcG1x1fzWqg,24843
+lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lemonade/tools/server/llamacpp.py,sha256=U2eE9zfwE5sWUnS8A9oSf0Ak4v8dbjnX3fBb76g6uiE,14969
+lemonade/tools/server/serve.py,sha256=2Z3mbK-iVXAGA6jBDgJSwuWMbBRbmN_E0lMN2h-u6Wo,52230
+lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
+lemonade/tools/server/tray.py,sha256=SakwhZKPgo7VtWP4q10SaCcZdxKG95dnNsXdTu9Eei0,16030
+lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
+lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
+lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
+lemonade/tools/server/static/webapp.html,sha256=im7YQkwvbuqrbO-sLhStVqtA6B7HKAn2azZka1KoeJQ,21260
+lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
+lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
+lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
+lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
+lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
+lemonade_sdk-8.0.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lemonade_sdk-8.0.0.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
+lemonade_server/cli.py,sha256=fm1eORLKElHfzqO5VVicDmn9EbmqIffi1bynqacJeyw,11744
+lemonade_server/model_manager.py,sha256=HqbahDMRv1x8jyQj4pa1rXanlPmcCykt8tlI6WfaxjE,13023
+lemonade_server/pydantic_models.py,sha256=2ALw47C1VWGe2nKWjlEAzP1ggKYsky4xlahUFxQJCMs,2298
+lemonade_server/server_models.json,sha256=wTK_H9XDHLxqMWQJqbBsJwm50PhOR4gURyVj9Jm35PQ,6992
+lemonade_sdk-8.0.0.dist-info/METADATA,sha256=fJV_bzC7VCQjqpHTDkb8G58fvBlbsuqOa_zEJNZW5JU,7940
+lemonade_sdk-8.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lemonade_sdk-8.0.0.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
+lemonade_sdk-8.0.0.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
+lemonade_sdk-8.0.0.dist-info/RECORD,,

lemonade_server/cli.py CHANGED Viewed

@@ -1,9 +1,19 @@
 import argparse
 import sys
 import os
-from typing import Tuple
+from typing import Tuple, Optional
 import psutil
 from typing import List
+import subprocess
+# Error codes for different CLI scenarios
+class ExitCodes:
+    SUCCESS = 0
+    GENERAL_ERROR = 1
+    SERVER_ALREADY_RUNNING = 2
+    TIMEOUT_STOPPING_SERVER = 3
+    ERROR_STOPPING_SERVER = 4
 class PullError(Exception):
@@ -12,9 +22,16 @@ class PullError(Exception):
     """
+class DeleteError(Exception):
+    """
+    The delete command has failed to delete an LLM
+    """
 def serve(
     port: int,
     log_level: str = None,
+    tray: bool = False,
 ):
     """
     Execute the serve command
@@ -29,7 +46,7 @@ def serve(
                 "Please stop the existing server before starting a new instance."
             ),
         )
-        sys.exit(1)
+        sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
     # Otherwise, start the server
     print("Starting Lemonade Server...")
@@ -46,6 +63,7 @@ def serve(
         port=port,
         log_level=log_level,
         truncate_inputs=truncate_inputs,
+        tray=tray,
     )
@@ -63,21 +81,49 @@ def stop():
     # Stop the server
     try:
         process = psutil.Process(running_pid)
+        # Get all child processes (including llama-server)
+        children = process.children(recursive=True)
+        # Terminate the main process first
         process.terminate()
+        # Then terminate all children
+        for child in children:
+            try:
+                child.terminate()
+            except psutil.NoSuchProcess:
+                pass  # Child already terminated
+        # Wait for main process
         process.wait(timeout=10)
+        # Kill any children that didn't terminate gracefully
+        for child in children:
+            try:
+                if child.is_running():
+                    child.kill()
+            except psutil.NoSuchProcess:
+                pass  # Child already terminated
     except psutil.NoSuchProcess:
         # Process already terminated
         pass
     except psutil.TimeoutExpired:
         print("Timed out waiting for Lemonade Server to stop.")
-        sys.exit(1)
+        sys.exit(ExitCodes.TIMEOUT_STOPPING_SERVER)
     except Exception as e:  # pylint: disable=broad-exception-caught
         print(f"Error stopping Lemonade Server: {e}")
-        sys.exit(1)
+        sys.exit(ExitCodes.ERROR_STOPPING_SERVER)
     print("Lemonade Server stopped successfully.")
-def pull(model_names: List[str]):
+def pull(
+    model_names: List[str],
+    checkpoint: Optional[str] = None,
+    recipe: Optional[str] = None,
+    reasoning: bool = False,
+    mmproj: str = "",
+):
     """
     Install an LLM based on its Lemonade Server model name
@@ -95,10 +141,20 @@ def pull(model_names: List[str]):
         base_url = f"http://localhost:{port}/api/v1"
         for model_name in model_names:
+            payload = {"model_name": model_name}
+            if checkpoint and recipe:
+                # Add the parameters for registering a new model
+                payload["checkpoint"] = checkpoint
+                payload["recipe"] = recipe
+                if reasoning:
+                    payload["reasoning"] = reasoning
+                if mmproj:
+                    payload["mmproj"] = mmproj
             # Install the model
-            pull_response = requests.post(
-                f"{base_url}/pull", json={"model_name": model_name}
-            )
+            pull_response = requests.post(f"{base_url}/pull", json=payload)
             if pull_response.status_code != 200:
                 raise PullError(
@@ -110,7 +166,48 @@ def pull(model_names: List[str]):
     else:
         from lemonade_server.model_manager import ModelManager
-        ModelManager().download_models(model_names)
+        ModelManager().download_models(
+            model_names,
+            checkpoint=checkpoint,
+            recipe=recipe,
+            reasoning=reasoning,
+            mmproj=mmproj,
+        )
+def delete(model_names: List[str]):
+    """
+    Delete an LLM based on its Lemonade Server model name
+    If Lemonade Server is running, use the delete endpoint to delete the model
+    so that the Lemonade Server instance is aware of the deletion.
+    Otherwise, use ModelManager to delete the model.
+    """
+    server_running, port = status(verbose=False)
+    if server_running:
+        import requests
+        base_url = f"http://localhost:{port}/api/v1"
+        for model_name in model_names:
+            # Delete the model
+            delete_response = requests.post(
+                f"{base_url}/delete", json={"model_name": model_name}
+            )
+            if delete_response.status_code != 200:
+                raise DeleteError(
+                    f"Failed to delete {model_name}. Check the "
+                    "Lemonade Server log for more information."
+                )
+    else:
+        from lemonade_server.model_manager import ModelManager
+        for model_name in model_names:
+            ModelManager().delete_model(model_name)
 def version():
@@ -147,18 +244,18 @@ def is_lemonade_server(pid):
     """
     try:
         process = psutil.Process(pid)
         while True:
-            if process.name() in [  # Windows
+            process_name = process.name()
+            if process_name in [  # Windows
                 "lemonade-server-dev.exe",
                 "lemonade-server.exe",
-                "lemonade.exe",
-            ] or process.name() in [  # Linux
+            ] or process_name in [  # Linux
                 "lemonade-server-dev",
                 "lemonade-server",
-                "lemonade",
             ]:
                 return True
-            elif "llama-server" in process.name():
+            elif "llama-server" in process_name:
                 return False
             if not process.parent():
                 return False
@@ -174,16 +271,23 @@ def get_server_info() -> Tuple[int | None, int | None]:
     1. Lemonade Server's PID
     2. The port that Lemonade Server is running on
     """
-    # Go over all python processes that have a port open
-    for process in psutil.process_iter(["pid", "name"]):
-        try:
-            connections = process.net_connections()
-            for conn in connections:
-                if conn.status == "LISTEN":
-                    if is_lemonade_server(process.info["pid"]):
-                        return process.info["pid"], conn.laddr.port
-        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
-            continue
+    # Get all network connections and filter for localhost IPv4 listening ports
+    try:
+        connections = psutil.net_connections(kind="tcp4")
+        for conn in connections:
+            if (
+                conn.status == "LISTEN"
+                and conn.laddr
+                and conn.laddr.ip in ["127.0.0.1"]
+                and conn.pid is not None
+            ):
+                if is_lemonade_server(conn.pid):
+                    return conn.pid, conn.laddr.port
+    except Exception:
+        pass
     return None, None
@@ -214,6 +318,12 @@ def main():
         choices=["critical", "error", "warning", "info", "debug", "trace"],
         default="info",
     )
+    if os.name == "nt":
+        serve_parser.add_argument(
+            "--no-tray",
+            action="store_true",
+            help="Do not show a tray icon when the server is running",
+        )
     # Status command
     status_parser = subparsers.add_parser("status", help="Check if server is running")
@@ -235,20 +345,65 @@ def main():
         help="Lemonade Server model name",
         nargs="+",
     )
+    pull_parser.add_argument(
+        "--checkpoint",
+        help="For registering a new model: Hugging Face checkpoint to source the model from",
+    )
+    pull_parser.add_argument(
+        "--recipe",
+        help="For registering a new model: lemonade.api recipe to use with the model",
+    )
+    pull_parser.add_argument(
+        "--reasoning",
+        help="For registering a new model: whether the model is a reasoning model or not",
+        type=bool,
+        default=False,
+    )
+    pull_parser.add_argument(
+        "--mmproj",
+        help="For registering a new multimodal model: full file name of the .mmproj file in the checkpoint",
+    )
+    # Delete command
+    delete_parser = subparsers.add_parser(
+        "delete",
+        help="Delete an LLM",
+        epilog=(
+            "More information: "
+            "https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/server_models.md"
+        ),
+    )
+    delete_parser.add_argument(
+        "model",
+        help="Lemonade Server model name",
+        nargs="+",
+    )
     args = parser.parse_args()
+    if os.name != "nt":
+        args.no_tray = True
     if args.version:
         version()
     elif args.command == "serve":
         serve(
-            args.port,
-            args.log_level,
+            port=args.port,
+            log_level=args.log_level,
+            tray=not args.no_tray,
         )
     elif args.command == "status":
         status()
     elif args.command == "pull":
-        pull(args.model)
+        pull(
+            args.model,
+            checkpoint=args.checkpoint,
+            recipe=args.recipe,
+            reasoning=args.reasoning,
+            mmproj=args.mmproj,
+        )
+    elif args.command == "delete":
+        delete(args.model)
     elif args.command == "stop":
         stop()
     elif args.command == "help" or not args.command:

lemonade-sdk 7.0.3__py3-none-any.whl → 8.0.0__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 7.0.3py3-none-any.whl → 8.0.0py3-none-any.whl