lemonade-sdk 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (53) hide show
  1. lemonade/cache.py +6 -1
  2. lemonade/cli.py +47 -5
  3. lemonade/common/inference_engines.py +13 -4
  4. lemonade/common/status.py +4 -4
  5. lemonade/common/system_info.py +544 -1
  6. lemonade/profilers/agt_power.py +437 -0
  7. lemonade/profilers/hwinfo_power.py +429 -0
  8. lemonade/tools/accuracy.py +143 -48
  9. lemonade/tools/adapter.py +6 -1
  10. lemonade/tools/bench.py +26 -8
  11. lemonade/tools/flm/__init__.py +1 -0
  12. lemonade/tools/flm/utils.py +303 -0
  13. lemonade/tools/huggingface/bench.py +6 -1
  14. lemonade/tools/llamacpp/bench.py +146 -27
  15. lemonade/tools/llamacpp/load.py +30 -2
  16. lemonade/tools/llamacpp/utils.py +393 -33
  17. lemonade/tools/oga/bench.py +5 -26
  18. lemonade/tools/oga/load.py +60 -121
  19. lemonade/tools/oga/migration.py +403 -0
  20. lemonade/tools/report/table.py +76 -8
  21. lemonade/tools/server/flm.py +133 -0
  22. lemonade/tools/server/llamacpp.py +220 -553
  23. lemonade/tools/server/serve.py +684 -168
  24. lemonade/tools/server/static/js/chat.js +666 -342
  25. lemonade/tools/server/static/js/model-settings.js +24 -3
  26. lemonade/tools/server/static/js/models.js +597 -73
  27. lemonade/tools/server/static/js/shared.js +79 -14
  28. lemonade/tools/server/static/logs.html +191 -0
  29. lemonade/tools/server/static/styles.css +491 -66
  30. lemonade/tools/server/static/webapp.html +83 -31
  31. lemonade/tools/server/tray.py +158 -38
  32. lemonade/tools/server/utils/macos_tray.py +226 -0
  33. lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
  34. lemonade/tools/server/webapp.py +4 -1
  35. lemonade/tools/server/wrapped_server.py +559 -0
  36. lemonade/version.py +1 -1
  37. lemonade_install/install.py +54 -611
  38. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +29 -72
  39. lemonade_sdk-8.2.2.dist-info/RECORD +83 -0
  40. lemonade_server/cli.py +145 -37
  41. lemonade_server/model_manager.py +521 -37
  42. lemonade_server/pydantic_models.py +28 -1
  43. lemonade_server/server_models.json +246 -92
  44. lemonade_server/settings.py +39 -39
  45. lemonade/tools/quark/__init__.py +0 -0
  46. lemonade/tools/quark/quark_load.py +0 -173
  47. lemonade/tools/quark/quark_quantize.py +0 -439
  48. lemonade_sdk-8.1.4.dist-info/RECORD +0 -77
  49. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
  50. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
  51. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
  52. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
  53. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.1.4
3
+ Version: 8.2.2
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.14
@@ -16,24 +16,26 @@ Requires-Dist: numpy
16
16
  Requires-Dist: fasteners
17
17
  Requires-Dist: GitPython>=3.1.40
18
18
  Requires-Dist: psutil>=6.1.1
19
- Requires-Dist: wmi
19
+ Requires-Dist: wmi; platform_system == "Windows"
20
20
  Requires-Dist: py-cpuinfo
21
21
  Requires-Dist: pytz
22
22
  Requires-Dist: zstandard
23
23
  Requires-Dist: fastapi
24
24
  Requires-Dist: uvicorn[standard]
25
- Requires-Dist: openai<1.97.1,>=1.81.0
25
+ Requires-Dist: openai<3.0.0,>=2.0.0
26
26
  Requires-Dist: transformers<=4.53.2
27
27
  Requires-Dist: jinja2
28
28
  Requires-Dist: tabulate
29
29
  Requires-Dist: sentencepiece
30
30
  Requires-Dist: huggingface-hub[hf_xet]==0.33.0
31
31
  Requires-Dist: python-dotenv
32
+ Requires-Dist: python-multipart
33
+ Requires-Dist: rumps>=0.4.0; sys_platform == "darwin"
32
34
  Provides-Extra: oga-ryzenai
33
- Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2.1; extra == "oga-ryzenai"
35
+ Requires-Dist: onnxruntime-genai-directml-ryzenai==0.9.2.1; extra == "oga-ryzenai"
34
36
  Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
35
37
  Provides-Extra: oga-cpu
36
- Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
38
+ Requires-Dist: onnxruntime-genai==0.9.2; extra == "oga-cpu"
37
39
  Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
38
40
  Provides-Extra: dev
39
41
  Requires-Dist: torch>=2.6.0; extra == "dev"
@@ -45,42 +47,6 @@ Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
45
47
  Requires-Dist: lm-eval[api]; extra == "dev"
46
48
  Provides-Extra: model-generate
47
49
  Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "model-generate"
48
- Provides-Extra: oga-hybrid
49
- Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
50
- Provides-Extra: oga-unified
51
- Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-unified"
52
- Provides-Extra: oga-hybrid-minimal
53
- Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid-minimal"
54
- Provides-Extra: oga-cpu-minimal
55
- Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
56
- Provides-Extra: oga-npu-minimal
57
- Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-npu-minimal"
58
- Provides-Extra: llm
59
- Requires-Dist: lemonade-sdk[dev]; extra == "llm"
60
- Provides-Extra: llm-oga-cpu
61
- Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
62
- Provides-Extra: llm-oga-npu
63
- Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
64
- Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
65
- Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
66
- Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
67
- Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
68
- Provides-Extra: llm-oga-hybrid
69
- Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
70
- Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
71
- Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
72
- Provides-Extra: llm-oga-unified
73
- Requires-Dist: lemonade-sdk[dev,llm-oga-hybrid]; extra == "llm-oga-unified"
74
- Provides-Extra: llm-oga-igpu
75
- Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
76
- Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
77
- Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
78
- Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
79
- Provides-Extra: llm-oga-cuda
80
- Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
81
- Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
82
- Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
83
- Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
84
50
  Dynamic: author-email
85
51
  Dynamic: description
86
52
  Dynamic: description-content-type
@@ -94,41 +60,31 @@ Dynamic: summary
94
60
 
95
61
  <p align="center">
96
62
  <a href="https://discord.gg/5xXzkMu8Zk">
97
- <img src="https://img.shields.io/badge/Discord-7289DA?logo=discord&logoColor=white" alt="Discord" />
98
- </a>
63
+ <img src="https://img.shields.io/badge/Discord-7289DA?logo=discord&logoColor=white" alt="Discord" /></a>
99
64
  <a href="https://github.com/lemonade-sdk/lemonade/tree/main/test" title="Check out our tests">
100
- <img src="https://github.com/lemonade-sdk/lemonade/actions/workflows/test_lemonade.yml/badge.svg" alt="Lemonade tests" />
101
- </a>
65
+ <img src="https://github.com/lemonade-sdk/lemonade/actions/workflows/test_lemonade.yml/badge.svg" alt="Lemonade tests" /></a>
102
66
  <a href="docs/README.md#installation" title="Check out our instructions">
103
- <img src="https://img.shields.io/badge/Windows-11-0078D6?logo=windows&logoColor=white" alt="Windows 11" />
104
- </a>
67
+ <img src="https://img.shields.io/badge/Windows-11-0078D6?logo=windows&logoColor=white" alt="Windows 11" /></a>
105
68
  <a href="https://lemonade-server.ai/#linux" title="Ubuntu 24.04 & 25.04 Supported">
106
- <img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" />
107
- </a>
69
+ <img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" /></a>
70
+ <a href="https://lemonade-server.ai/" title="macOS 14+ with Apple Silicon">
71
+ <img src="https://img.shields.io/badge/macOS-14%2B-000000?logo=apple&logoColor=white" alt="macOS 14+" /></a>
108
72
  <a href="docs/README.md#installation" title="Check out our instructions">
109
- <img src="https://img.shields.io/badge/Python-3.10--3.13-blue?logo=python&logoColor=white" alt="Made with Python" />
110
- </a>
73
+ <img src="https://img.shields.io/badge/Python-3.10--3.13-blue?logo=python&logoColor=white" alt="Made with Python" /></a>
111
74
  <a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
112
- <img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg" alt="PRs Welcome" />
113
- </a>
75
+ <img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg" alt="PRs Welcome" /></a>
114
76
  <a href="https://github.com/lemonade-sdk/lemonade/releases/latest" title="Download the latest release">
115
- <img src="https://img.shields.io/github/v/release/lemonade-sdk/lemonade?include_prereleases" alt="Latest Release" />
116
- </a>
77
+ <img src="https://img.shields.io/github/v/release/lemonade-sdk/lemonade?include_prereleases" alt="Latest Release" /></a>
117
78
  <a href="https://tooomm.github.io/github-release-stats/?username=lemonade-sdk&repository=lemonade">
118
- <img src="https://img.shields.io/github/downloads/lemonade-sdk/lemonade/total.svg" alt="GitHub downloads" />
119
- </a>
79
+ <img src="https://img.shields.io/github/downloads/lemonade-sdk/lemonade/total.svg" alt="GitHub downloads" /></a>
120
80
  <a href="https://github.com/lemonade-sdk/lemonade/issues">
121
- <img src="https://img.shields.io/github/issues/lemonade-sdk/lemonade" alt="GitHub issues" />
122
- </a>
81
+ <img src="https://img.shields.io/github/issues/lemonade-sdk/lemonade" alt="GitHub issues" /></a>
123
82
  <a href="https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE">
124
- <img src="https://img.shields.io/badge/License-Apache-yellow.svg" alt="License: Apache" />
125
- </a>
83
+ <img src="https://img.shields.io/badge/License-Apache-yellow.svg" alt="License: Apache" /></a>
126
84
  <a href="https://github.com/psf/black">
127
- <img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Code style: black" />
128
- </a>
85
+ <img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Code style: black" /></a>
129
86
  <a href="https://star-history.com/#lemonade-sdk/lemonade">
130
- <img src="https://img.shields.io/badge/Star%20History-View-brightgreen" alt="Star History Chart" />
131
- </a>
87
+ <img src="https://img.shields.io/badge/Star%20History-View-brightgreen" alt="Star History Chart" /></a>
132
88
  </p>
133
89
  <p align="center">
134
90
  <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/banner.png?raw=true" alt="Lemonade Banner" />
@@ -156,7 +112,7 @@ Startups such as [Styrk AI](https://styrk.ai/styrk-ai-and-amd-guardrails-for-you
156
112
  ### Use it with your favorite OpenAI-compatible app!
157
113
 
158
114
  <p align="center">
159
- <a href="https://lemonade-server.ai/docs/server/apps/open-webui/" title="Open WebUI" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/openwebui.jpg" alt="Open WebUI" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/continue/" title="Continue" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/continue_dev.png" alt="Continue" width="60" /></a>&nbsp;&nbsp;<a href="https://github.com/amd/gaia" title="Gaia" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/gaia.ico" alt="Gaia" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/anythingLLM/" title="AnythingLLM" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/anything_llm.png" alt="AnythingLLM" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/ai-dev-gallery/" title="AI Dev Gallery" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_dev_gallery.webp" alt="AI Dev Gallery" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/lm-eval/" title="LM-Eval" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/lm_eval.png" alt="LM-Eval" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/codeGPT/" title="CodeGPT" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/codegpt.jpg" alt="CodeGPT" width="60" /></a>&nbsp;&nbsp;<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/apps/ai-toolkit.md" title="AI Toolkit" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_toolkit.png" alt="AI Toolkit" width="60" /></a>
115
+ <a href="https://lemonade-server.ai/docs/server/apps/open-webui/" title="Open WebUI" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/openwebui.jpg" alt="Open WebUI" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/continue/" title="Continue" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/continue_dev.png" alt="Continue" width="60" /></a>&nbsp;&nbsp;<a href="https://github.com/amd/gaia" title="Gaia" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/gaia.ico" alt="Gaia" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/anythingLLM/" title="AnythingLLM" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/anything_llm.png" alt="AnythingLLM" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/ai-dev-gallery/" title="AI Dev Gallery" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_dev_gallery.webp" alt="AI Dev Gallery" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/lm-eval/" title="LM-Eval" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/lm_eval.png" alt="LM-Eval" width="60" /></a>&nbsp;&nbsp;<a href="https://github.com/lemonade-sdk/lemonade-arcade" title="Lemonade Arcade" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/lemonade-arcade/refs/heads/main/docs/assets/favicon.ico" alt="Lemonade Arcade" width="60" /></a>&nbsp;&nbsp;<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/apps/ai-toolkit.md" title="AI Toolkit" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_toolkit.png" alt="AI Toolkit" width="60" /></a>
160
116
  </p>
161
117
 
162
118
  > [!TIP]
@@ -200,11 +156,11 @@ You can also import custom GGUF and ONNX models from Hugging Face by using our [
200
156
 
201
157
  Lemonade supports the following configurations, while also making it easy to switch between them at runtime. Find more information about it [here](./docs/README.md#software-and-hardware-overview).
202
158
 
203
- | Hardware | Engine: OGA | Engine: llamacpp | Engine: HF | Windows | Linux |
204
- |----------|-------------|------------------|------------|---------|-------|
205
- | **🧠 CPU** | All platforms | All platforms | All platforms | ✅ | ✅ |
206
- | **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms* | — | ✅ | ✅ |
207
- | **🤖 NPU** | AMD Ryzen™ AI 300 series | — | | ✅ | — |
159
+ | Hardware | Engine: OGA | Engine: llamacpp | Engine: FLM | Windows | Linux | macOS |
160
+ |----------|-------------|------------------|------------|---------|-------|-------|
161
+ | **🧠 CPU** | All platforms | All platforms | - | | ✅ | ✅ |
162
+ | **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms*<br>Metal: Apple Silicon | — | ✅ | ✅ | ✅ |
163
+ | **🤖 NPU** | AMD Ryzen™ AI 300 series | — | Ryzen™ AI 300 series | ✅ | — | — |
208
164
 
209
165
  <details>
210
166
  <summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
@@ -227,7 +183,7 @@ Lemonade supports the following configurations, while also making it easy to swi
227
183
  </tr>
228
184
  <tr>
229
185
  <td><b>gfx120X</b> (RDNA4)</td>
230
- <td>Windows only</td>
186
+ <td>Windows, Ubuntu</td>
231
187
  <td>Radeon AI PRO R9700, RX 9070 XT/GRE/9070, RX 9060 XT</td>
232
188
  </tr>
233
189
  <tr>
@@ -309,6 +265,7 @@ This project is:
309
265
  - [OnnxRuntime GenAI](https://github.com/microsoft/onnxruntime-genai)
310
266
  - [Hugging Face Hub](https://github.com/huggingface/huggingface_hub)
311
267
  - [OpenAI API](https://github.com/openai/openai-python)
268
+ - [IRON/MLIR-AIE](https://github.com/Xilinx/mlir-aie)
312
269
  - and more...
313
270
  - Accelerated by mentorship from the OCV Catalyst program.
314
271
  - Licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE).
@@ -0,0 +1,83 @@
1
+ lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
2
+ lemonade/api.py,sha256=Oc4yBA3LZg8FrTsbuDq1p9-XE74pqNnIEUhXyKa7qg8,5786
3
+ lemonade/cache.py,sha256=fUjtHYkRdHTULjNMrDNOFDGW_QMVUg54ZE1NukBP2oM,3314
4
+ lemonade/cli.py,sha256=qU5bW7RQAUKNSpvrhVyzn68NMxyi-336Ke_JU4bsv1Q,5708
5
+ lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
+ lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
+ lemonade/version.py,sha256=qHoUgbIt6pjWzUYt9wXlO8qcrd9dEl09FyPsRTkJshI,22
8
+ lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
10
+ lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
11
+ lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
12
+ lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
13
+ lemonade/common/inference_engines.py,sha256=3bUGQe9wtfTiwt8kvI_ry077uyc9lid2G1fJX95kN1A,12969
14
+ lemonade/common/network.py,sha256=qXpUjDYQEYM_gH3JwTtU-pu_yCKcaa1IeohJRPy91-A,2903
15
+ lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
16
+ lemonade/common/status.py,sha256=dxAahChPGVmfT91DJW949Xjgm9r5E-Y9KOLPEw7BMh8,16562
17
+ lemonade/common/system_info.py,sha256=PWpEtOz6CqvnUYL8_3lX61_GNbrbp0O2x4ZIwRh6RJg,48780
18
+ lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
19
+ lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
20
+ lemonade/profilers/agt_power.py,sha256=t_37VEg8LPapjSKSjJln-jFznZtTIf5UpzlAXcVGOrc,16771
21
+ lemonade/profilers/hwinfo_power.py,sha256=UQr-EHq7B4T-IvzmErCRK0-QxcFnho4ftCaWy5p8Qvo,15819
22
+ lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
23
+ lemonade/profilers/profiler.py,sha256=Y5FSbc386bMlTVbqCuya9pYrso5aTthxahR1V_ZKQ9E,1902
24
+ lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
25
+ lemonade/tools/accuracy.py,sha256=X2OLzx3VDhpOZO9aHKH3vrjeeKI1lTYXZFeZF9Qpslg,15188
26
+ lemonade/tools/adapter.py,sha256=fFRSnLiQk20WYQmYFD0JK1t-fDLKEWhD6u9qqB_BECk,3496
27
+ lemonade/tools/bench.py,sha256=jyL85yqBPzyT2pr9OWzihvyoN0v3JQg_BHtIHy9d2zg,10881
28
+ lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
29
+ lemonade/tools/management_tools.py,sha256=HQBcr7LYuMqVRYQtvnkNpfutBTA7lblszyoAjjVGu1Y,10201
30
+ lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
31
+ lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
32
+ lemonade/tools/prompt.py,sha256=PyLksp1k8jsZsU7XBRK61k1DUHhbdLa20h-AP8Noh3w,9011
33
+ lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
34
+ lemonade/tools/flm/__init__.py,sha256=NQ4CEzJZGS_VvxPMlfrK4Dcx48bQSoUR4iG8e7yZjas,46
35
+ lemonade/tools/flm/utils.py,sha256=5KS0IND_8E3lgKNdIFeJqMcdiCsPpYCjgL1FzH8YvGk,9933
36
+ lemonade/tools/huggingface/bench.py,sha256=OMkIm_6o4znu8xZo4MzvHULGu0WPQl529kpAngJut-g,6907
37
+ lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
38
+ lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
39
+ lemonade/tools/llamacpp/bench.py,sha256=JfsKboJCDdkZ5ZJ9Xl-ty5cCBj6pAlUrXZiEQ7Tt60c,9689
40
+ lemonade/tools/llamacpp/load.py,sha256=YvDHqXe4tKGuT5aG_wP4xH3Px0RmG92u73kVOTHSEgc,7671
41
+ lemonade/tools/llamacpp/utils.py,sha256=kHL2xaCnQ7hLY15FoQJLYGf29KddxoEyczXmN1TWfQY,46182
42
+ lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
+ lemonade/tools/oga/bench.py,sha256=V-qc8mbFHvOv13QTrgFOXSmUAhzzGTK8S7UpVCl-KIw,4329
44
+ lemonade/tools/oga/load.py,sha256=yxeRPUUrDOgrjv9d_R8MGLInunvI7HId_Q2cN8K_8h4,31113
45
+ lemonade/tools/oga/migration.py,sha256=zo34chEbtkWk4GaDwgW326o09eF2wAsoOiTzqQ6DMLU,13505
46
+ lemonade/tools/oga/utils.py,sha256=F8UVLKlfYcLa2SUqlehar8-jaX2Aw4u58DjHNNvLdOA,17675
47
+ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
+ lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
49
+ lemonade/tools/report/table.py,sha256=7hsJNr9x9yxvpTTVp64FpTAfxyUDvXHcDvGk5L84j1g,30187
50
+ lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
+ lemonade/tools/server/flm.py,sha256=NVpjFzcmX6YzW6wCWyMEvQEe_QpRK-Jlcaajxy3kWuo,4025
52
+ lemonade/tools/server/llamacpp.py,sha256=0scaDxJXne-PfUGh-r25UyqDSIOJdFkD1Js1WPFPzv0,11676
53
+ lemonade/tools/server/serve.py,sha256=WYsKZTtPFp6JAp5K-N734wTsD3sltiTYrwV6oCs7ZWk,83487
54
+ lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
55
+ lemonade/tools/server/tray.py,sha256=EFnSc2Ra4owiHVz6ykoMhxi2fYqZAK1g21AynAYBiyk,24426
56
+ lemonade/tools/server/webapp.py,sha256=GGSVIzN19C2ZaadOEPBg_D7Lt0PuF339NuWwjMPfZu8,1225
57
+ lemonade/tools/server/wrapped_server.py,sha256=uh7ifrRX1Hx0IuRwZRCGPyQOukitE7kKQipCCz0bSGA,19844
58
+ lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
59
+ lemonade/tools/server/static/logs.html,sha256=b9BtKWQwXJgr8xc2lWLDTx1zwYgM0Nsv5ZHRT_EdBM8,6157
60
+ lemonade/tools/server/static/styles.css,sha256=XOi3-Aw_W0WOBEGBnyYZcniwjeHx7dhRBU2Lt4ePN00,52815
61
+ lemonade/tools/server/static/webapp.html,sha256=QgUcUo7coLNDpy2HuF7IHu1t6wySSTNwcZQ2-Q8WqlU,21506
62
+ lemonade/tools/server/static/js/chat.js,sha256=xil7PrnGohiNGtlqBcZ3upBr9hw5Y0P6KYqjbHpUYh0,39812
63
+ lemonade/tools/server/static/js/model-settings.js,sha256=MZZkflb7cRaRqSagyknR_Pgii6pbDSlVKiAbgYqrGsQ,6737
64
+ lemonade/tools/server/static/js/models.js,sha256=OirBQ7dEflazOaPwr4XtCjqcuaW_SUZY6vMkAAhctFg,53178
65
+ lemonade/tools/server/static/js/shared.js,sha256=QPekwJQ9BLD2c0D6-CkoB0VjsehVy05TF8vTQmWGf-c,19312
66
+ lemonade/tools/server/utils/macos_tray.py,sha256=xwHW44ZN5hDVlJcwIpHHfqn4VRXWxXHuDACaT-ZqdO8,7095
67
+ lemonade/tools/server/utils/port.py,sha256=J7-g-Aqygb50jNoHLhhRfBZVM-uhGlcB5-oYBAehvgw,2263
68
+ lemonade/tools/server/utils/thread.py,sha256=Z-PDzGcpgfN2qxTmtlROWqrUN0B2fXdPrqo_J10fR_w,2772
69
+ lemonade/tools/server/utils/windows_tray.py,sha256=2z5aTmUPlkT-QfkcfwHsyA6dv6nSNBT0gXUErarhac8,13170
70
+ lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
71
+ lemonade_install/install.py,sha256=3NtQ4JayFM9VW7IOHommaV4s9RgdightyfhhxQ190O4,7753
72
+ lemonade_sdk-8.2.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
73
+ lemonade_sdk-8.2.2.dist-info/licenses/NOTICE.md,sha256=RSca9LE5e6pvdWA_LXAUCcACIHPmINKqkRX-AVRqBGo,3499
74
+ lemonade_server/cli.py,sha256=BItlVpuykHwTnCZJs-7v1qWRPBCnEx-aeP43a7S9uFk,23918
75
+ lemonade_server/model_manager.py,sha256=dboE-7wF1Y9iLGUcBkaA-lpF8XJKSIN_y0mUYzgCm2w,32657
76
+ lemonade_server/pydantic_models.py,sha256=QpsQrBjplv0IlwwUEt259o3eGv5YUuhq_sSiX2xBAyM,4074
77
+ lemonade_server/server_models.json,sha256=cZAxlDkxQ5neb57jQ8j2fvIeuAfyk4Ga1Yele58OeTY,15062
78
+ lemonade_server/settings.py,sha256=JOlZmirUXO9rA6BCODVFwyXrrHtYoH_LiKYm49lGm_c,1260
79
+ lemonade_sdk-8.2.2.dist-info/METADATA,sha256=e_usINpNwof9VkWrKlpWNqyLH3UD0j6D6zX_Bimynww,15417
80
+ lemonade_sdk-8.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
81
+ lemonade_sdk-8.2.2.dist-info/entry_points.txt,sha256=7sRvpNhi1E7amnM7RZo57e8yFF9iA5uuRaIeJ1Xre6w,193
82
+ lemonade_sdk-8.2.2.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
83
+ lemonade_sdk-8.2.2.dist-info/RECORD,,
lemonade_server/cli.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import argparse
2
2
  import sys
3
3
  import os
4
+ import platform
4
5
  from typing import Tuple, Optional
5
6
  import psutil
6
7
  from typing import List
@@ -104,12 +105,34 @@ def serve(
104
105
  max_wait_time = 30
105
106
  wait_interval = 0.5
106
107
  waited = 0
107
- while waited < max_wait_time:
108
- time.sleep(wait_interval)
109
- _, running_port = get_server_info()
110
- if running_port is not None:
111
- break
112
- waited += wait_interval
108
+
109
+ if platform.system() == "Darwin":
110
+ # On macOS, use direct HTTP health check instead of process scanning for better
111
+ # performance
112
+ import requests
113
+
114
+ while waited < max_wait_time:
115
+ time.sleep(wait_interval)
116
+ try:
117
+ response = requests.get(
118
+ f"http://{host}:{port}/api/v1/health", timeout=1
119
+ )
120
+ if response.status_code == 200:
121
+ break
122
+ except (
123
+ requests.exceptions.ConnectionError,
124
+ requests.exceptions.Timeout,
125
+ ):
126
+ pass # Server not ready yet
127
+ waited += wait_interval
128
+ else:
129
+ # On other platforms, use the existing approach
130
+ while waited < max_wait_time:
131
+ time.sleep(wait_interval)
132
+ _, running_port = get_server_info()
133
+ if running_port is not None:
134
+ break
135
+ waited += wait_interval
113
136
 
114
137
  return port, server_thread
115
138
 
@@ -144,8 +167,12 @@ def stop():
144
167
  except psutil.NoSuchProcess:
145
168
  pass # Child already terminated
146
169
 
147
- # Wait for main process
148
- process.wait(timeout=10)
170
+ # Wait for main process to terminate gracefully
171
+ # kill if it doesn't terminate gracefully
172
+ try:
173
+ process.wait(timeout=5)
174
+ except psutil.TimeoutExpired:
175
+ process.kill()
149
176
 
150
177
  # Kill llama-server child process if it didn't terminate gracefully
151
178
  for child in children:
@@ -172,6 +199,7 @@ def pull(
172
199
  checkpoint: Optional[str] = None,
173
200
  recipe: Optional[str] = None,
174
201
  reasoning: bool = False,
202
+ vision: bool = False,
175
203
  mmproj: str = "",
176
204
  ):
177
205
  """
@@ -193,15 +221,16 @@ def pull(
193
221
  for model_name in model_names:
194
222
  payload = {"model_name": model_name}
195
223
 
196
- if checkpoint and recipe:
197
- # Add the parameters for registering a new model
198
- payload["checkpoint"] = checkpoint
199
- payload["recipe"] = recipe
200
-
201
- if reasoning:
202
- payload["reasoning"] = reasoning
203
- if mmproj:
204
- payload["mmproj"] = mmproj
224
+ # Add the parameters to the payload
225
+ for key, value in [
226
+ ("checkpoint", checkpoint),
227
+ ("recipe", recipe),
228
+ ("reasoning", reasoning),
229
+ ("vision", vision),
230
+ ("mmproj", mmproj),
231
+ ]:
232
+ if value:
233
+ payload[key] = value
205
234
 
206
235
  # Install the model
207
236
  pull_response = requests.post(f"{base_url}/pull", json=payload)
@@ -220,7 +249,11 @@ def pull(
220
249
  checkpoint=checkpoint,
221
250
  recipe=recipe,
222
251
  reasoning=reasoning,
252
+ vision=vision,
223
253
  mmproj=mmproj,
254
+ # The pull command will download an upgraded model if available, even
255
+ # if we already have a local copy of the model
256
+ do_not_upgrade=False,
224
257
  )
225
258
 
226
259
 
@@ -273,6 +306,11 @@ def run(
273
306
  """
274
307
  import webbrowser
275
308
  import time
309
+ import os
310
+
311
+ # Disable tray on macOS for run command due to threading issues
312
+ if platform.system() == "Darwin":
313
+ tray = False
276
314
 
277
315
  # Start the server if not running
278
316
  _, running_port = get_server_info()
@@ -288,6 +326,16 @@ def run(
288
326
  ctx_size=ctx_size,
289
327
  )
290
328
  else:
329
+ # macOS: Check for port conflicts when server is already running
330
+ if platform.system() == "Darwin":
331
+ requested_port = port if port is not None else DEFAULT_PORT
332
+ if running_port != requested_port:
333
+ print(
334
+ f"Lemonade Server is already running on port {running_port}\n"
335
+ f"You requested port {requested_port}. Please stop the existing server first "
336
+ )
337
+ sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
338
+
291
339
  port = running_port
292
340
 
293
341
  # Pull model
@@ -299,7 +347,10 @@ def run(
299
347
  # Open the webapp with the specified model
300
348
  url = f"http://{host}:{port}/?model={model_name}#llm-chat"
301
349
  print(f"You can now chat with {model_name} at {url}")
302
- webbrowser.open(url)
350
+
351
+ # Only open browser if not disabled via environment variable
352
+ if not os.environ.get("LEMONADE_DISABLE_BROWSER"):
353
+ webbrowser.open(url)
303
354
 
304
355
  # Keep the server running if we started it
305
356
  if not server_previously_running:
@@ -356,6 +407,23 @@ def is_lemonade_server(pid):
356
407
  """
357
408
  Check whether or not a given PID corresponds to a Lemonade server
358
409
  """
410
+ # macOS only: Self-exclusion to prevent blocking server startup
411
+ if platform.system() == "Darwin":
412
+ current_pid = os.getpid()
413
+ if pid == current_pid:
414
+ return False
415
+
416
+ # Exclude children of current process to avoid detecting status commands
417
+ try:
418
+ current_process = psutil.Process(current_pid)
419
+ child_pids = [
420
+ child.pid for child in current_process.children(recursive=True)
421
+ ]
422
+ if pid in child_pids:
423
+ return False
424
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
425
+ pass
426
+
359
427
  try:
360
428
  process = psutil.Process(pid)
361
429
 
@@ -364,11 +432,29 @@ def is_lemonade_server(pid):
364
432
  if process_name in [ # Windows
365
433
  "lemonade-server-dev.exe",
366
434
  "lemonade-server.exe",
435
+ "lsdev.exe",
367
436
  ] or process_name in [ # Linux
368
437
  "lemonade-server-dev",
369
438
  "lemonade-server",
439
+ "lsdev",
370
440
  ]:
371
441
  return True
442
+ # macOS only: Python scripts appear as "python3.x", check command line
443
+ elif process_name.startswith("python") and platform.system() == "Darwin":
444
+ try:
445
+ cmdline = process.cmdline()
446
+ if len(cmdline) >= 2:
447
+ script_path = cmdline[1]
448
+ # Check for various lemonade server command patterns (macOS only)
449
+ lemonade_patterns = [
450
+ "lemonade-server-dev",
451
+ "lemonade-server",
452
+ "lsdev", # Short alias for lemonade-server-dev
453
+ ]
454
+ if any(pattern in script_path for pattern in lemonade_patterns):
455
+ return True
456
+ except (psutil.AccessDenied, psutil.NoSuchProcess):
457
+ pass
372
458
  elif "llama-server" in process_name:
373
459
  return False
374
460
  if not process.parent():
@@ -386,23 +472,43 @@ def get_server_info() -> Tuple[int | None, int | None]:
386
472
  2. The port that Lemonade Server is running on
387
473
  """
388
474
 
389
- # Get all network connections and filter for localhost IPv4 listening ports
475
+ # Try the global approach first (works on Windows/Linux without permissions)
390
476
  try:
391
477
  connections = psutil.net_connections(kind="tcp4")
392
-
393
478
  for conn in connections:
394
- if (
395
- conn.status == "LISTEN"
396
- and conn.laddr
397
- and conn.laddr.ip in ["localhost"]
398
- and conn.pid is not None
399
- ):
479
+ if conn.status == "LISTEN" and conn.laddr and conn.pid is not None:
400
480
  if is_lemonade_server(conn.pid):
401
481
  return conn.pid, conn.laddr.port
402
-
403
- except Exception:
482
+ except (psutil.AccessDenied, PermissionError):
483
+ # Global approach needs elevated permissions on macOS, fall back to per-process approach
484
+ pass
485
+ except Exception: # pylint: disable=broad-exception-caught
404
486
  pass
405
487
 
488
+ # Per-process approach (macOS only - needs this due to permission requirements)
489
+ if platform.system() == "Darwin":
490
+ try:
491
+ for proc in psutil.process_iter(["pid", "name"]):
492
+ try:
493
+ pid = proc.info["pid"]
494
+ if is_lemonade_server(pid):
495
+ # Found a lemonade server, check its listening ports
496
+ connections = proc.net_connections(kind="inet")
497
+ for conn in connections:
498
+ if conn.status == "LISTEN" and conn.laddr:
499
+ return pid, conn.laddr.port
500
+ # If no listening connections found, this process is not actually serving
501
+ # Continue looking for other processes
502
+ except (
503
+ psutil.NoSuchProcess,
504
+ psutil.AccessDenied,
505
+ psutil.ZombieProcess,
506
+ ):
507
+ # Some processes may be inaccessible, continue to next
508
+ continue
509
+ except Exception: # pylint: disable=broad-exception-caught
510
+ pass
511
+
406
512
  return None, None
407
513
 
408
514
 
@@ -417,12 +523,13 @@ def list_models():
417
523
 
418
524
  # Get all supported models and downloaded models
419
525
  supported_models = model_manager.supported_models
526
+ filtered_models = model_manager.filter_models_by_backend(supported_models)
420
527
  downloaded_models = model_manager.downloaded_models
421
528
 
422
529
  # Filter to only show recommended models
423
530
  recommended_models = {
424
531
  model_name: model_info
425
- for model_name, model_info in supported_models.items()
532
+ for model_name, model_info in filtered_models.items()
426
533
  if model_info.get("suggested", False)
427
534
  }
428
535
 
@@ -499,7 +606,7 @@ def _add_server_arguments(parser):
499
606
  "--llamacpp",
500
607
  type=str,
501
608
  help="LlamaCpp backend to use",
502
- choices=["vulkan", "rocm"],
609
+ choices=["vulkan", "rocm", "metal"],
503
610
  default=DEFAULT_LLAMACPP_BACKEND,
504
611
  )
505
612
  parser.add_argument(
@@ -512,6 +619,13 @@ def _add_server_arguments(parser):
512
619
  default=DEFAULT_CTX_SIZE,
513
620
  )
514
621
 
622
+ if os.name == "nt" or platform.system() == "Darwin":
623
+ parser.add_argument(
624
+ "--no-tray",
625
+ action="store_true",
626
+ help="Do not show a tray icon when the server is running",
627
+ )
628
+
515
629
 
516
630
  def main():
517
631
  parser = argparse.ArgumentParser(
@@ -532,12 +646,6 @@ def main():
532
646
  # Serve command
533
647
  serve_parser = subparsers.add_parser("serve", help="Start server")
534
648
  _add_server_arguments(serve_parser)
535
- if os.name == "nt":
536
- serve_parser.add_argument(
537
- "--no-tray",
538
- action="store_true",
539
- help="Do not show a tray icon when the server is running",
540
- )
541
649
 
542
650
  # Status command
543
651
  status_parser = subparsers.add_parser("status", help="Check if server is running")
@@ -611,7 +719,7 @@ def main():
611
719
 
612
720
  args = parser.parse_args()
613
721
 
614
- if os.name != "nt":
722
+ if os.name != "nt" and platform.system() != "Darwin":
615
723
  args.no_tray = True
616
724
 
617
725
  if args.version: