PyPI - lemonade-sdk - Versions diffs - 8.0.3__py3-none-any.whl → 8.0.5__py3-none-any.whl - Mend

lemonade-sdk 8.0.3py3-none-any.whl → 8.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (27) hide show

lemonade/api.py +50 -0
lemonade/common/inference_engines.py +415 -0
lemonade/common/system_info.py +493 -47
lemonade/tools/humaneval.py +1 -1
lemonade/tools/management_tools.py +53 -7
lemonade/tools/mmlu.py +1 -1
lemonade/tools/oga/load.py +1 -1
lemonade/tools/perplexity.py +2 -2
lemonade/tools/quark/quark_load.py +1 -1
lemonade/tools/quark/quark_quantize.py +2 -2
lemonade/tools/server/llamacpp.py +130 -9
lemonade/tools/server/serve.py +102 -0
lemonade/tools/server/static/styles.css +458 -55
lemonade/tools/server/static/webapp.html +322 -35
lemonade/version.py +1 -1
lemonade_sdk-8.0.5.dist-info/METADATA +295 -0
{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.5.dist-info}/RECORD +26 -25
lemonade_server/cli.py +168 -22
lemonade_server/model_manager.py +12 -2
lemonade_server/pydantic_models.py +25 -1
lemonade_server/server_models.json +46 -44
lemonade_sdk-8.0.3.dist-info/METADATA +0 -183
{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.5.dist-info}/WHEEL +0 -0
{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.5.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.5.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.5.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.5.dist-info}/top_level.txt +0 -0

lemonade_sdk-8.0.5.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,295 @@
+Metadata-Version: 2.4
+Name: lemonade-sdk
+Version: 8.0.5
+Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
+Author-email: lemonade@amd.com
+Requires-Python: >=3.10, <3.13
+Description-Content-Type: text/markdown
+License-File: LICENSE
+License-File: NOTICE.md
+Requires-Dist: invoke>=2.0.0
+Requires-Dist: onnx<1.18.0,>=1.11.0
+Requires-Dist: pyyaml>=5.4
+Requires-Dist: typeguard>=2.3.13
+Requires-Dist: packaging>=20.9
+Requires-Dist: numpy<2.0.0
+Requires-Dist: fasteners
+Requires-Dist: GitPython>=3.1.40
+Requires-Dist: psutil>=6.1.1
+Requires-Dist: wmi
+Requires-Dist: py-cpuinfo
+Requires-Dist: pytz
+Requires-Dist: zstandard
+Requires-Dist: fastapi
+Requires-Dist: uvicorn[standard]
+Requires-Dist: openai>=1.81.0
+Requires-Dist: transformers<=4.51.3
+Requires-Dist: jinja2
+Requires-Dist: tabulate
+Requires-Dist: sentencepiece
+Requires-Dist: huggingface-hub==0.33.0
+Provides-Extra: oga-hybrid
+Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
+Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
+Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
+Provides-Extra: oga-cpu
+Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
+Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
+Provides-Extra: dev
+Requires-Dist: torch>=2.6.0; extra == "dev"
+Requires-Dist: accelerate; extra == "dev"
+Requires-Dist: datasets; extra == "dev"
+Requires-Dist: pandas>=1.5.3; extra == "dev"
+Requires-Dist: matplotlib; extra == "dev"
+Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
+Requires-Dist: lm-eval[api]; extra == "dev"
+Provides-Extra: oga-hybrid-minimal
+Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
+Provides-Extra: oga-cpu-minimal
+Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
+Provides-Extra: llm
+Requires-Dist: lemonade-sdk[dev]; extra == "llm"
+Provides-Extra: llm-oga-cpu
+Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
+Provides-Extra: llm-oga-igpu
+Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
+Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
+Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
+Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
+Provides-Extra: llm-oga-cuda
+Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
+Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
+Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
+Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
+Provides-Extra: llm-oga-npu
+Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
+Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
+Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
+Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
+Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
+Provides-Extra: llm-oga-hybrid
+Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
+Provides-Extra: llm-oga-unified
+Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
+Dynamic: author-email
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: license-file
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
+## 🍋 Lemonade: Local LLM Serving with GPU and NPU acceleration
+<p align="center">
+  <a href="https://discord.gg/5xXzkMu8Zk">
+    <img src="https://img.shields.io/badge/Discord-7289DA?logo=discord&logoColor=white" alt="Discord" />
+  </a>
+  <a href="https://github.com/lemonade-sdk/lemonade/tree/main/test" title="Check out our tests">
+    <img src="https://github.com/lemonade-sdk/lemonade/actions/workflows/test_lemonade.yml/badge.svg" alt="Lemonade tests" />
+  </a>
+  <a href="docs/README.md#installation" title="Check out our instructions">
+    <img src="https://img.shields.io/badge/Windows-11-0078D6?logo=windows&logoColor=white" alt="Windows 11" />
+  </a>
+  <a href="https://lemonade-server.ai/#linux" title="Ubuntu 24.04 & 25.04 Supported">
+    <img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" />
+  </a>
+  <a href="docs/README.md#installation" title="Check out our instructions">
+    <img src="https://img.shields.io/badge/Python-3.10%20%7C%203.12-blue?logo=python&logoColor=white" alt="Made with Python" />
+  </a>
+  <a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
+    <img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg" alt="PRs Welcome" />
+  </a>
+  <a href="https://github.com/lemonade-sdk/lemonade/releases/latest" title="Download the latest release">
+    <img src="https://img.shields.io/github/v/release/lemonade-sdk/lemonade?include_prereleases" alt="Latest Release" />
+  </a>
+  <a href="https://tooomm.github.io/github-release-stats/?username=lemonade-sdk&repository=lemonade">
+    <img src="https://img.shields.io/github/downloads/lemonade-sdk/lemonade/total.svg" alt="GitHub downloads" />
+  </a>
+  <a href="https://github.com/lemonade-sdk/lemonade/issues">
+    <img src="https://img.shields.io/github/issues/lemonade-sdk/lemonade" alt="GitHub issues" />
+  </a>
+  <a href="https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE">
+    <img src="https://img.shields.io/badge/License-Apache-yellow.svg" alt="License: Apache" />
+  </a>
+  <a href="https://github.com/psf/black">
+    <img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Code style: black" />
+  </a>
+  <a href="https://star-history.com/#lemonade-sdk/lemonade">
+    <img src="https://img.shields.io/badge/Star%20History-View-brightgreen" alt="Star History Chart" />
+  </a>
+</p>
+<p align="center">
+  <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/banner.png?raw=true" alt="Lemonade Banner" />
+</p>
+<h3 align="center">
+  <a href="https://lemonade-server.ai">Download</a> |
+  <a href="https://lemonade-server.ai/docs/">Documentation</a> |
+  <a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
+</h3>
+Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
+## Getting Started
+<div align="center">
+| Step 1: Download & Install | Step 2: Launch and Pull Models | Step 3: Start chatting! |
+|:---------------------------:|:-------------------------------:|:------------------------:|
+| <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/install.gif?raw=true" alt="Download & Install" width="245" /> | <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/launch_and_pull.gif?raw=true" alt="Launch and Pull Models" width="245" /> | <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/chat.gif?raw=true" alt="Start chatting!" width="245" /> |
+|Install using a [GUI](https://github.com/lemonade-sdk/lemonade/releases/latest/download/Lemonade_Server_Installer.exe) (Windows only), [pip](https://lemonade-server.ai/install_options.html), or [from source](https://lemonade-server.ai/install_options.html). |Use the [Model Manager](#model-library) to install models|A built-in chat interface is available!|
+</div>
+### Use it with your favorite OpenAI-compatible app!
+<p align="center">
+  <a href="https://lemonade-server.ai/docs/server/apps/open-webui/" title="Open WebUI" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/openwebui.jpg" alt="Open WebUI" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/continue/" title="Continue" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/continue_dev.png" alt="Continue" width="60" /></a>&nbsp;&nbsp;<a href="https://github.com/amd/gaia" title="Gaia" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/gaia.ico" alt="Gaia" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/anythingLLM/" title="AnythingLLM" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/anything_llm.png" alt="AnythingLLM" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/ai-dev-gallery/" title="AI Dev Gallery" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_dev_gallery.webp" alt="AI Dev Gallery" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/lm-eval/" title="LM-Eval" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/lm_eval.png" alt="LM-Eval" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/codeGPT/" title="CodeGPT" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/codegpt.jpg" alt="CodeGPT" width="60" /></a>&nbsp;&nbsp;<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/apps/ai-toolkit.md" title="AI Toolkit" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_toolkit.png" alt="AI Toolkit" width="60" /></a>
+</p>
+> [!TIP]
+> Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email lemonade@amd.com.
+## Using the CLI
+To run and chat with Gemma 3:
+```
+lemonade-server run Gemma-3-4b-it-GGUF
+```
+To install models ahead of time, use the `pull` command:
+```
+lemonade-server pull Gemma-3-4b-it-GGUF
+```
+To check all models available, use the `list` command:
+```
+lemonade-server list
+```
+> Note: If you installed from source, use the `lemonade-server-dev` command instead.
+## Model Library
+Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/models/).
+You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
+<p align="center">
+  <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/model_manager.png?raw=true" alt="Model Manager" width="650" />
+</p>
+## Supported Configurations
+Lemonade supports the following configurations, while also making it easy to switch between them at runtime. Find more information about it [here](./docs/README.md#software-and-hardware-overview).
+<table>
+  <thead>
+    <tr>
+      <th rowspan="2">Hardware</th>
+      <th colspan="3" align="center">🛠️ Engine Support</th>
+      <th colspan="2" align="center">🖥️ OS (x86/x64)</th>
+    </tr>
+    <tr>
+      <th align="center">OGA</th>
+      <th align="center">llamacpp</th>
+      <th align="center">HF</th>
+      <th align="center">Windows</th>
+      <th align="center">Linux</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><strong>🧠 CPU</strong></td>
+      <td align="center">All platforms</td>
+      <td align="center">All platforms</td>
+      <td align="center">All platforms</td>
+      <td align="center">✅</td>
+      <td align="center">✅</td>
+    </tr>
+    <tr>
+      <td><strong>🎮 GPU</strong></td>
+      <td align="center">—</td>
+      <td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
+      <td align="center">—</td>
+      <td align="center">✅</td>
+      <td align="center">✅</td>
+    </tr>
+    <tr>
+      <td><strong>🤖 NPU</strong></td>
+      <td align="center">AMD Ryzen™ AI 300 series</td>
+      <td align="center">—</td>
+      <td align="center">—</td>
+      <td align="center">✅</td>
+      <td align="center">—</td>
+    </tr>
+  </tbody>
+</table>
+## Integrate Lemonade Server with Your Application
+You can use any OpenAI-compatible client library by configuring it to use `http://localhost:8000/api/v1` as the base URL. A table containing official and popular OpenAI clients on different languages is shown below.
+Feel free to pick and choose your preferred language.
+| Python | C++ | Java | C# | Node.js | Go | Ruby | Rust | PHP |
+|--------|-----|------|----|---------|----|-------|------|-----|
+| [openai-python](https://github.com/openai/openai-python) | [openai-cpp](https://github.com/olrea/openai-cpp) | [openai-java](https://github.com/openai/openai-java) | [openai-dotnet](https://github.com/openai/openai-dotnet) | [openai-node](https://github.com/openai/openai-node) | [go-openai](https://github.com/sashabaranov/go-openai) | [ruby-openai](https://github.com/alexrudall/ruby-openai) | [async-openai](https://github.com/64bit/async-openai) | [openai-php](https://github.com/openai-php/client) |
+### Python Client Example
+```python
+from openai import OpenAI
+# Initialize the client to use Lemonade Server
+client = OpenAI(
+    base_url="http://localhost:8000/api/v1",
+    api_key="lemonade"  # required but unused
+)
+# Create a chat completion
+completion = client.chat.completions.create(
+    model="Llama-3.2-1B-Instruct-Hybrid",  # or any other available model
+    messages=[
+        {"role": "user", "content": "What is the capital of France?"}
+    ]
+)
+# Print the response
+print(completion.choices[0].message.content)
+```
+For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
+## Beyond an LLM Server
+The [Lemonade SDK](./docs/README.md) also include the following components:
+- 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
+- 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
+## Contributing
+We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
+New contributors can find beginner-friendly issues tagged with "Good First Issue" to get started.
+<a href="https://github.com/lemonade-sdk/lemonade/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22">
+  <img src="https://img.shields.io/badge/🍋Lemonade-Good%20First%20Issue-yellowgreen?colorA=38b000&colorB=cccccc" alt="Good First Issue" />
+</a>
+## Maintainers
+This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), email [lemonade@amd.com](mailto:lemonade@amd.com), or join our [Discord](https://discord.gg/5xXzkMu8Zk).
+## License
+This project is licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE). Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
+<!--This file was originally licensed under Apache 2.0. It has been modified.
+Modifications Copyright (c) 2025 AMD-->

{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.5.dist-info}/RECORD RENAMED Viewed

@@ -1,19 +1,20 @@
 lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
-lemonade/api.py,sha256=X7DxBgsOl5L_z6uTkwoJWf8x0rjXWS2JoeEqmo9bMfc,3873
+lemonade/api.py,sha256=kGz8N_9TuN3peFG8fES0odN0bWR9itLNomlR-FC2z8k,5515
 lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
 lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
 lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
 lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
-lemonade/version.py,sha256=GImAlzwPDxsACkYFf5rTrX8QMH23tcqdm6vgjfFYD10,22
+lemonade/version.py,sha256=obOXkQD52zgzH-mM2spS6LQ-gEWkuaiGpNTM_ISH0D8,22
 lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
 lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
 lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
 lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
+lemonade/common/inference_engines.py,sha256=lcmir_pATr71TfSBJoIZEi3G9xyxNwi2_xpPvPD8_xI,12932
 lemonade/common/network.py,sha256=EkWTxjJ-UM469nEJdzEl-5UNO7dPfVfFzU2SVMMPaD0,1425
 lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
 lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
-lemonade/common/system_info.py,sha256=qOwteG_mBo-ImilbiK7Gq37sWIE9ugF0dbWcj9zLD40,12234
+lemonade/common/system_info.py,sha256=dOtX8WLHCz1xmURZWnqhDbyNZv_AulrpX_bbI58eHFQ,27084
 lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
 lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
 lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
@@ -22,10 +23,10 @@ lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
 lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
 lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
 lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
-lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9532
-lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
-lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
-lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
+lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
+lemonade/tools/management_tools.py,sha256=U8GaJnjdXyQ9sw8UxBQMc7glpaLciaVphASaQS4kJsA,10202
+lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
+lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
 lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
 lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
 lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
@@ -35,36 +36,36 @@ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPd
 lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
 lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
-lemonade/tools/oga/load.py,sha256=xSP0DWoGd5zBRozSafj1MMyIQyHJuIRj_vNlCTx8mfs,28309
+lemonade/tools/oga/load.py,sha256=XSznW8lOX_KafSq5J5mIBJzj8YJEBpK0RFGcTE1wnE8,28317
 lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
 lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
-lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
+lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
+lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
 lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
 lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
 lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lemonade/tools/server/llamacpp.py,sha256=vjFNelm_VyKBBgWmltsAwLI7ncQ9AwVFQD7krZnF42w,16199
-lemonade/tools/server/serve.py,sha256=3_jBpi6THnnAmtKOxvPlOkIhSTTmrlZE3fr2Dpto-Q4,52794
+lemonade/tools/server/llamacpp.py,sha256=e1MYKSJBu-jlOE5GQSBsC9CUPAeqw5wXXxoxBKA5zb8,20038
+lemonade/tools/server/serve.py,sha256=Pp_w4iuRMkpJLF-XrTsBIBrSNBQIOl8PRZC_Cj4URnU,57334
 lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
 lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
 lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
 lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
-lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
-lemonade/tools/server/static/webapp.html,sha256=kPzORaogVRdFQewXyNI_JaH2ZZCTaq5zfMSyzuoFTuA,22414
+lemonade/tools/server/static/styles.css,sha256=jXFPIHPrhRz_CJyRJrYusAECSDTO00sKUu7ajrQgFuA,24655
+lemonade/tools/server/static/webapp.html,sha256=tmwASvULb3d2_NfHEH9rKbEEJl3D7ygXjaCLVYkyWbg,35969
 lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
 lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
 lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
 lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
 lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
-lemonade_sdk-8.0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lemonade_sdk-8.0.3.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
-lemonade_server/cli.py,sha256=z6ojwFaOIz0hbUbVtZWMLP4YDpkcVOmqwmdm55dhKA4,11980
-lemonade_server/model_manager.py,sha256=Yvlsl0wipKfryKULH5ASQ9INhLQXPq9dTGQVBXf2_h0,16167
-lemonade_server/pydantic_models.py,sha256=nsbpHqAkd6nkz5QT16u9xMZbCXqccGiy5O0fWecOM88,2338
-lemonade_server/server_models.json,sha256=O5zk94gH_zRq6GSwbqvi2SNwx51eY9uqgAl_kxTi0iM,7271
-lemonade_sdk-8.0.3.dist-info/METADATA,sha256=WesWziLri9jQjZILRENliiJbggTVF8LmXKVIERInVbE,8285
-lemonade_sdk-8.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lemonade_sdk-8.0.3.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
-lemonade_sdk-8.0.3.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
-lemonade_sdk-8.0.3.dist-info/RECORD,,
+lemonade_sdk-8.0.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lemonade_sdk-8.0.5.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
+lemonade_server/cli.py,sha256=2Un5uLK04fIxlfcTiZ0T_EWbbaq2tYymkUHNFeuvB7g,16041
+lemonade_server/model_manager.py,sha256=0HqLR38uOu_hxRWVYQ_P6YmwaR-jkDuaAqGYo60X8C0,16702
+lemonade_server/pydantic_models.py,sha256=rp_FFhoTwg6jNmgol-kShwffnRDGbt7jTbIeELvgOIo,2876
+lemonade_server/server_models.json,sha256=Y-j9KAvHmfv77welC0rfRao4inLBce6AVySb-oy_uNE,7519
+lemonade_sdk-8.0.5.dist-info/METADATA,sha256=e2w0jPyEnyk-SeLAbYZgeGldq-2CQHm9Hly_mQgZ8uo,15224
+lemonade_sdk-8.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lemonade_sdk-8.0.5.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
+lemonade_sdk-8.0.5.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
+lemonade_sdk-8.0.5.dist-info/RECORD,,

lemonade_server/cli.py CHANGED Viewed

@@ -27,43 +27,75 @@ class DeleteError(Exception):
     """
+class ServerTimeoutError(Exception):
+    """
+    The server failed to start within the timeout period
+    """
+class ModelNotAvailableError(Exception):
+    """
+    The specified model is not available on the server
+    """
 def serve(
-    port: int,
+    port: int = None,
     log_level: str = None,
     tray: bool = False,
+    use_thread: bool = False,
 ):
     """
     Execute the serve command
     """
-    # Check if Lemonade Server is already running
-    _, running_port = get_server_info()
-    if running_port is not None:
-        print(
-            (
-                f"Lemonade Server is already running on port {running_port}\n"
-                "Please stop the existing server before starting a new instance."
-            ),
-        )
-        sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
     # Otherwise, start the server
     print("Starting Lemonade Server...")
     from lemonade.tools.server.serve import Server, DEFAULT_PORT, DEFAULT_LOG_LEVEL
-    server = Server()
     port = port if port is not None else DEFAULT_PORT
     log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
     # Hidden environment variable to enable input truncation (experimental feature)
     truncate_inputs = "LEMONADE_TRUNCATE_INPUTS" in os.environ
-    server.run(
-        port=port,
-        log_level=log_level,
-        truncate_inputs=truncate_inputs,
-        tray=tray,
-    )
+    # Start the server
+    serve_kwargs = {
+        "log_level": log_level,
+        "truncate_inputs": truncate_inputs,
+        "tray": tray,
+    }
+    server = Server()
+    if not use_thread:
+        server.run(
+            port=port,
+            **serve_kwargs,
+        )
+    else:
+        from threading import Thread
+        import time
+        # Start a background thread to run the server
+        server_thread = Thread(
+            target=server.run,
+            args=(port,),
+            kwargs=serve_kwargs,
+            daemon=True,
+        )
+        server_thread.start()
+        # Wait for the server to be ready
+        max_wait_time = 30
+        wait_interval = 0.5
+        waited = 0
+        while waited < max_wait_time:
+            time.sleep(wait_interval)
+            _, running_port = get_server_info()
+            if running_port is not None:
+                break
+            waited += wait_interval
+        return port, server_thread
 def stop():
@@ -161,9 +193,8 @@ def pull(
             if pull_response.status_code != 200:
                 raise PullError(
                     f"Failed to install {model_name}. Check the "
-                    "Lemonade Server log for more information. A list of supported models "
-                    "is provided at "
-                    "https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/server_models.md"
+                    "Lemonade Server log for more information. You can list "
+                    "supported models with `lemonade-server list`"
                 )
     else:
         from lemonade_server.model_manager import ModelManager
@@ -212,6 +243,53 @@ def delete(model_names: List[str]):
             ModelManager().delete_model(model_name)
+def run(model_name: str):
+    """
+    Start the server if not running and open the webapp with the specified model
+    """
+    import webbrowser
+    import time
+    # Start the server if not running
+    _, port = get_server_info()
+    server_previously_running = port is not None
+    if not server_previously_running:
+        port, server_thread = serve(use_thread=True, tray=True, log_level="info")
+    # Pull model
+    pull([model_name])
+    # Load model
+    load(model_name, port)
+    # Open the webapp with the specified model
+    url = f"http://localhost:{port}/?model={model_name}#llm-chat"
+    print(f"You can now chat with {model_name} at {url}")
+    webbrowser.open(url)
+    # Keep the server running if we started it
+    if not server_previously_running:
+        while server_thread.is_alive():
+            time.sleep(0.5)
+def load(model_name: str, port: int):
+    """
+    Load a model using the endpoint
+    """
+    import requests
+    base_url = f"http://localhost:{port}/api/v1"
+    # Load the model
+    load_response = requests.post(f"{base_url}/load", json={"model_name": model_name})
+    if load_response.status_code != 200:
+        raise ModelLoadError(
+            f"Failed to load {model_name}. Check the "
+            "Lemonade Server log for more information."
+        )
 def version():
     """
     Print the version number
@@ -294,6 +372,46 @@ def get_server_info() -> Tuple[int | None, int | None]:
     return None, None
+def list_models():
+    """
+    List recommended models and their download status
+    """
+    from tabulate import tabulate
+    from lemonade_server.model_manager import ModelManager
+    model_manager = ModelManager()
+    # Get all supported models and downloaded models
+    supported_models = model_manager.supported_models
+    downloaded_models = model_manager.downloaded_models
+    # Filter to only show recommended models
+    recommended_models = {
+        model_name: model_info
+        for model_name, model_info in supported_models.items()
+        if model_info.get("suggested", False)
+    }
+    # Create table data
+    table_data = []
+    for model_name, model_info in recommended_models.items():
+        downloaded_status = "Yes" if model_name in downloaded_models else "No"
+        # Get model labels/type
+        labels = model_info.get("labels", [])
+        model_type = ", ".join(labels) if labels else "-"
+        table_data.append([model_name, downloaded_status, model_type])
+    # Sort by model name for consistent display
+    # Show downloaded models first
+    table_data.sort(key=lambda x: (x[1] == "No", x[0].lower()))
+    # Display table
+    headers = ["Model Name", "Downloaded", "Details"]
+    print(tabulate(table_data, headers=headers, tablefmt="simple"))
 def main():
     parser = argparse.ArgumentParser(
         description="Serve LLMs on CPU, GPU, and NPU.",
@@ -333,6 +451,11 @@ def main():
     # Stop command
     stop_parser = subparsers.add_parser("stop", help="Stop the server")
+    # List command
+    list_parser = subparsers.add_parser(
+        "list", help="List recommended models and their download status"
+    )
     # Pull command
     pull_parser = subparsers.add_parser(
         "pull",
@@ -381,6 +504,16 @@ def main():
         nargs="+",
     )
+    # Run command
+    run_parser = subparsers.add_parser(
+        "run",
+        help="Chat with specified model (starts server if needed)",
+    )
+    run_parser.add_argument(
+        "model",
+        help="Lemonade Server model name to run",
+    )
     args = parser.parse_args()
     if os.name != "nt":
@@ -389,6 +522,15 @@ def main():
     if args.version:
         version()
     elif args.command == "serve":
+        _, running_port = get_server_info()
+        if running_port is not None:
+            print(
+                (
+                    f"Lemonade Server is already running on port {running_port}\n"
+                    "Please stop the existing server before starting a new instance."
+                ),
+            )
+            sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
         serve(
             port=args.port,
             log_level=args.log_level,
@@ -396,6 +538,8 @@ def main():
         )
     elif args.command == "status":
         status()
+    elif args.command == "list":
+        list_models()
     elif args.command == "pull":
         pull(
             args.model,
@@ -408,6 +552,8 @@ def main():
         delete(args.model)
     elif args.command == "stop":
         stop()
+    elif args.command == "run":
+        run(args.model)
     elif args.command == "help" or not args.command:
         parser.print_help()

lemonade-sdk 8.0.3__py3-none-any.whl → 8.0.5__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 8.0.3py3-none-any.whl → 8.0.5py3-none-any.whl