lemonade-sdk 7.0.4__py3-none-any.whl → 8.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (56) hide show
  1. lemonade/api.py +3 -3
  2. lemonade/cli.py +11 -17
  3. lemonade/common/build.py +0 -47
  4. lemonade/common/network.py +50 -0
  5. lemonade/common/status.py +2 -21
  6. lemonade/common/system_info.py +19 -4
  7. lemonade/profilers/memory_tracker.py +3 -1
  8. lemonade/tools/accuracy.py +3 -4
  9. lemonade/tools/adapter.py +1 -2
  10. lemonade/tools/{huggingface_bench.py → huggingface/bench.py} +2 -87
  11. lemonade/tools/huggingface/load.py +235 -0
  12. lemonade/tools/{huggingface_load.py → huggingface/utils.py} +87 -255
  13. lemonade/tools/humaneval.py +9 -3
  14. lemonade/tools/{llamacpp_bench.py → llamacpp/bench.py} +1 -1
  15. lemonade/tools/{llamacpp.py → llamacpp/load.py} +18 -2
  16. lemonade/tools/mmlu.py +7 -15
  17. lemonade/tools/{ort_genai/oga.py → oga/load.py} +31 -422
  18. lemonade/tools/oga/utils.py +423 -0
  19. lemonade/tools/perplexity.py +4 -3
  20. lemonade/tools/prompt.py +2 -1
  21. lemonade/tools/quark/quark_load.py +2 -1
  22. lemonade/tools/quark/quark_quantize.py +5 -5
  23. lemonade/tools/report/table.py +3 -3
  24. lemonade/tools/server/llamacpp.py +188 -45
  25. lemonade/tools/server/serve.py +184 -146
  26. lemonade/tools/server/static/favicon.ico +0 -0
  27. lemonade/tools/server/static/styles.css +568 -0
  28. lemonade/tools/server/static/webapp.html +439 -0
  29. lemonade/tools/server/tray.py +458 -0
  30. lemonade/tools/server/{port_utils.py → utils/port.py} +22 -3
  31. lemonade/tools/server/utils/system_tray.py +395 -0
  32. lemonade/tools/server/{instructions.py → webapp.py} +4 -10
  33. lemonade/version.py +1 -1
  34. lemonade_install/install.py +46 -28
  35. lemonade_sdk-8.0.1.dist-info/METADATA +179 -0
  36. lemonade_sdk-8.0.1.dist-info/RECORD +70 -0
  37. lemonade_server/cli.py +182 -27
  38. lemonade_server/model_manager.py +192 -20
  39. lemonade_server/pydantic_models.py +9 -4
  40. lemonade_server/server_models.json +5 -3
  41. lemonade/common/analyze_model.py +0 -26
  42. lemonade/common/labels.py +0 -61
  43. lemonade/common/onnx_helpers.py +0 -176
  44. lemonade/common/plugins.py +0 -10
  45. lemonade/common/tensor_helpers.py +0 -83
  46. lemonade/tools/server/static/instructions.html +0 -262
  47. lemonade_sdk-7.0.4.dist-info/METADATA +0 -113
  48. lemonade_sdk-7.0.4.dist-info/RECORD +0 -69
  49. /lemonade/tools/{ort_genai → oga}/__init__.py +0 -0
  50. /lemonade/tools/{ort_genai/oga_bench.py → oga/bench.py} +0 -0
  51. /lemonade/tools/server/{thread_utils.py → utils/thread.py} +0 -0
  52. {lemonade_sdk-7.0.4.dist-info → lemonade_sdk-8.0.1.dist-info}/WHEEL +0 -0
  53. {lemonade_sdk-7.0.4.dist-info → lemonade_sdk-8.0.1.dist-info}/entry_points.txt +0 -0
  54. {lemonade_sdk-7.0.4.dist-info → lemonade_sdk-8.0.1.dist-info}/licenses/LICENSE +0 -0
  55. {lemonade_sdk-7.0.4.dist-info → lemonade_sdk-8.0.1.dist-info}/licenses/NOTICE.md +0 -0
  56. {lemonade_sdk-7.0.4.dist-info → lemonade_sdk-8.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,179 @@
1
+ Metadata-Version: 2.4
2
+ Name: lemonade-sdk
3
+ Version: 8.0.1
4
+ Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
+ Author-email: lemonade@amd.com
6
+ Requires-Python: >=3.10, <3.12
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ License-File: NOTICE.md
10
+ Requires-Dist: invoke>=2.0.0
11
+ Requires-Dist: onnx<1.18.0,>=1.11.0
12
+ Requires-Dist: pyyaml>=5.4
13
+ Requires-Dist: typeguard>=2.3.13
14
+ Requires-Dist: packaging>=20.9
15
+ Requires-Dist: numpy<2.0.0
16
+ Requires-Dist: fasteners
17
+ Requires-Dist: GitPython>=3.1.40
18
+ Requires-Dist: psutil>=6.1.1
19
+ Requires-Dist: wmi
20
+ Requires-Dist: py-cpuinfo
21
+ Requires-Dist: pytz
22
+ Requires-Dist: zstandard
23
+ Requires-Dist: fastapi
24
+ Requires-Dist: uvicorn[standard]
25
+ Requires-Dist: openai>=1.81.0
26
+ Requires-Dist: transformers<=4.51.3
27
+ Requires-Dist: jinja2
28
+ Requires-Dist: tabulate
29
+ Requires-Dist: huggingface-hub==0.30.2
30
+ Provides-Extra: oga-hybrid-minimal
31
+ Requires-Dist: onnx==1.16.1; extra == "oga-hybrid-minimal"
32
+ Requires-Dist: numpy==1.26.4; extra == "oga-hybrid-minimal"
33
+ Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid-minimal"
34
+ Provides-Extra: oga-cpu-minimal
35
+ Requires-Dist: onnxruntime-genai==0.6.0; extra == "oga-cpu-minimal"
36
+ Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "oga-cpu-minimal"
37
+ Provides-Extra: llm
38
+ Requires-Dist: torch>=2.6.0; extra == "llm"
39
+ Requires-Dist: accelerate; extra == "llm"
40
+ Requires-Dist: sentencepiece; extra == "llm"
41
+ Requires-Dist: datasets; extra == "llm"
42
+ Requires-Dist: pandas>=1.5.3; extra == "llm"
43
+ Requires-Dist: matplotlib; extra == "llm"
44
+ Requires-Dist: human-eval-windows==1.0.4; extra == "llm"
45
+ Requires-Dist: lm-eval[api]; extra == "llm"
46
+ Provides-Extra: llm-oga-cpu
47
+ Requires-Dist: lemonade-sdk[oga-cpu-minimal]; extra == "llm-oga-cpu"
48
+ Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cpu"
49
+ Provides-Extra: llm-oga-igpu
50
+ Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
51
+ Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
52
+ Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
53
+ Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-igpu"
54
+ Provides-Extra: llm-oga-cuda
55
+ Requires-Dist: onnxruntime-genai-cuda==0.6.0; extra == "llm-oga-cuda"
56
+ Requires-Dist: onnxruntime-gpu<1.22.0,>=1.19.1; extra == "llm-oga-cuda"
57
+ Requires-Dist: transformers<4.45.0; extra == "llm-oga-cuda"
58
+ Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cuda"
59
+ Provides-Extra: llm-oga-npu
60
+ Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
61
+ Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
62
+ Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
63
+ Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
64
+ Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-npu"
65
+ Provides-Extra: llm-oga-hybrid
66
+ Requires-Dist: lemonade-sdk[oga-hybrid-minimal]; extra == "llm-oga-hybrid"
67
+ Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-hybrid"
68
+ Provides-Extra: llm-oga-unified
69
+ Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
70
+ Dynamic: author-email
71
+ Dynamic: description
72
+ Dynamic: description-content-type
73
+ Dynamic: license-file
74
+ Dynamic: provides-extra
75
+ Dynamic: requires-dist
76
+ Dynamic: requires-python
77
+ Dynamic: summary
78
+
79
+ [![Lemonade tests](https://github.com/lemonade-sdk/lemonade/actions/workflows/test_lemonade.yml/badge.svg)](https://github.com/lemonade-sdk/lemonade/tree/main/test "Check out our tests")
80
+ [![OS - Windows | Linux](https://img.shields.io/badge/OS-windows%20%7C%20linux-blue)](docs/README.md#installation "Check out our instructions")
81
+ [![Made with Python](https://img.shields.io/badge/Python-3.8,3.10-blue?logo=python&logoColor=white)](docs/README.md#installation "Check out our instructions")
82
+
83
+ ## 🍋 Lemonade SDK: Quickly serve, benchmark and deploy LLMs
84
+
85
+ The [Lemonade SDK](./docs/README.md) makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
86
+
87
+ <div align="center">
88
+ <img src="https://download.amd.com/images/lemonade_640x480_1.gif" alt="Lemonade Demo" title="Lemonade in Action">
89
+ </div>
90
+
91
+ ### Features
92
+
93
+ The [Lemonade SDK](./docs/README.md) is comprised of the following:
94
+
95
+ - 🌐 **[Lemonade Server](https://lemonade-server.ai/docs)**: A local LLM server for running ONNX and GGUF models using the OpenAI API standard. Install and enable your applications with NPU and GPU acceleration in minutes.
96
+ - 🐍 **Lemonade API**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
97
+ - 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
98
+ - Prompting with templates.
99
+ - Measuring accuracy with a variety of tests.
100
+ - Benchmarking to get the time-to-first-token and tokens per second.
101
+ - Profiling the memory utilization.
102
+
103
+ ### [Click here to get started with Lemonade.](./docs/README.md)
104
+
105
+ ### Supported Configurations
106
+
107
+ Maximum LLM performance requires the right hardware accelerator with the right inference engine for your scenario. Lemonade supports the following configurations, while also making it easy to switch between them at runtime.
108
+
109
+ <table border="1" cellpadding="6" cellspacing="0">
110
+ <thead>
111
+ <tr>
112
+ <th rowspan="2">Hardware</th>
113
+ <th colspan="3" align="center">🛠️ Engine Support</th>
114
+ <th colspan="2" align="center">🖥️ OS (x86/x64)</th>
115
+ </tr>
116
+ <tr>
117
+ <th align="center">OGA</th>
118
+ <th align="center">llamacpp</th>
119
+ <th align="center">HF</th>
120
+ <th align="center">Windows</th>
121
+ <th align="center">Linux</th>
122
+ </tr>
123
+ </thead>
124
+ <tbody>
125
+ <tr>
126
+ <td>🧠 CPU</td>
127
+ <td align="center">All platforms</td>
128
+ <td align="center">All platforms</td>
129
+ <td align="center">All platforms</td>
130
+ <td align="center">✅</td>
131
+ <td align="center">✅</td>
132
+ </tr>
133
+ <tr>
134
+ <td>🎮 GPU</td>
135
+ <td align="center">—</td>
136
+ <td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
137
+ <td align="center">—</td>
138
+ <td align="center">✅</td>
139
+ <td align="center">✅</td>
140
+ </tr>
141
+ <tr>
142
+ <td>🤖 NPU</td>
143
+ <td align="center">AMD Ryzen™ AI 300 series</td>
144
+ <td align="center">—</td>
145
+ <td align="center">—</td>
146
+ <td align="center">✅</td>
147
+ <td align="center">—</td>
148
+ </tr>
149
+ </tbody>
150
+ </table>
151
+
152
+
153
+
154
+ #### Inference Engines Overview
155
+ | Engine | Description |
156
+ | :--- | :--- |
157
+ | **OnnxRuntime GenAI (OGA)** | Microsoft engine that runs `.onnx` models and enables hardware vendors to provide their own execution providers (EPs) to support specialized hardware, such as neural processing units (NPUs). |
158
+ | **llamacpp** | Community-driven engine with strong GPU acceleration, support for thousands of `.gguf` models, and advanced features such as vision-language models (VLMs) and mixture-of-experts (MoEs). |
159
+ | **Hugging Face (HF)** | Hugging Face's `transformers` library can run the original `.safetensors` trained weights for models on Meta's PyTorch engine, which provides a source of truth for accuracy measurement. |
160
+
161
+ ## Integrate Lemonade Server with Your Application
162
+
163
+ Lemonade Server enables languages including Python, C++, Java, C#, Node.js, Go, Ruby, Rust, and PHP. For the full list and integration details, see [docs/server/README.md](./docs/server/README.md).
164
+
165
+ ## Contributing
166
+
167
+ We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
168
+
169
+ ## Maintainers
170
+
171
+ This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues) or email [lemonade@amd.com](mailto:lemonade@amd.com).
172
+
173
+ ## License
174
+
175
+ This project is licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE). Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
176
+
177
+ <!--This file was originally licensed under Apache 2.0. It has been modified.
178
+ Modifications Copyright (c) 2025 AMD-->
179
+
@@ -0,0 +1,70 @@
1
+ lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
2
+ lemonade/api.py,sha256=X7DxBgsOl5L_z6uTkwoJWf8x0rjXWS2JoeEqmo9bMfc,3873
3
+ lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
4
+ lemonade/cli.py,sha256=XzptHh6LTl5OdGRnxiLykQ8QBl2rQmhWH5w0KPJVyY4,4359
5
+ lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
+ lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
+ lemonade/version.py,sha256=qR-61NMOca8p2Rty8s6xwXQSXLDufw2os6i4zdyqfak,22
8
+ lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
10
+ lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
11
+ lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
12
+ lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
13
+ lemonade/common/network.py,sha256=EkWTxjJ-UM469nEJdzEl-5UNO7dPfVfFzU2SVMMPaD0,1425
14
+ lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
15
+ lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
16
+ lemonade/common/system_info.py,sha256=qOwteG_mBo-ImilbiK7Gq37sWIE9ugF0dbWcj9zLD40,12234
17
+ lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
18
+ lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
19
+ lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
20
+ lemonade/profilers/profiler.py,sha256=y_iMGr1ToQ6rcwcIcXck4ajapisLXCfHggiV-IpPF98,1666
21
+ lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
22
+ lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
23
+ lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
24
+ lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
25
+ lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9532
26
+ lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
27
+ lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
28
+ lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
29
+ lemonade/tools/prompt.py,sha256=AT3p5rCGHEs9ozeGxwWl07iKF-mgLxFOkYLjU2btFHs,8638
30
+ lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
31
+ lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
32
+ lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
33
+ lemonade/tools/huggingface/utils.py,sha256=xybIWOEXHaMuw-nAEu3aITdvZSHcGKgZ9kFS5mIWcEg,13873
34
+ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPdv3Q,5946
35
+ lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
36
+ lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
+ lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
38
+ lemonade/tools/oga/load.py,sha256=7Sdf6PFPrqbadPabyJb_uPRUIP09qj21ZYdXz47MqsE,28570
39
+ lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
40
+ lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
42
+ lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
43
+ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
+ lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
45
+ lemonade/tools/report/table.py,sha256=di8IZkolt_kaZfWri6GQkhPE1zCELqcrBcG1x1fzWqg,24843
46
+ lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
+ lemonade/tools/server/llamacpp.py,sha256=aDVjjkU2Z2PN25Uuy-lk6ByKPR8kg5r2X-YsVSs4vi8,15624
48
+ lemonade/tools/server/serve.py,sha256=3_jBpi6THnnAmtKOxvPlOkIhSTTmrlZE3fr2Dpto-Q4,52794
49
+ lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
50
+ lemonade/tools/server/tray.py,sha256=SakwhZKPgo7VtWP4q10SaCcZdxKG95dnNsXdTu9Eei0,16030
51
+ lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
52
+ lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
53
+ lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
54
+ lemonade/tools/server/static/webapp.html,sha256=im7YQkwvbuqrbO-sLhStVqtA6B7HKAn2azZka1KoeJQ,21260
55
+ lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
56
+ lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
57
+ lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
58
+ lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
59
+ lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
60
+ lemonade_sdk-8.0.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
61
+ lemonade_sdk-8.0.1.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
62
+ lemonade_server/cli.py,sha256=fm1eORLKElHfzqO5VVicDmn9EbmqIffi1bynqacJeyw,11744
63
+ lemonade_server/model_manager.py,sha256=HqbahDMRv1x8jyQj4pa1rXanlPmcCykt8tlI6WfaxjE,13023
64
+ lemonade_server/pydantic_models.py,sha256=2ALw47C1VWGe2nKWjlEAzP1ggKYsky4xlahUFxQJCMs,2298
65
+ lemonade_server/server_models.json,sha256=wTK_H9XDHLxqMWQJqbBsJwm50PhOR4gURyVj9Jm35PQ,6992
66
+ lemonade_sdk-8.0.1.dist-info/METADATA,sha256=s5q-KKS3Drrxxm1-wGLUP9c0HymN2RgC7PjMqr0biog,8225
67
+ lemonade_sdk-8.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
+ lemonade_sdk-8.0.1.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
69
+ lemonade_sdk-8.0.1.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
70
+ lemonade_sdk-8.0.1.dist-info/RECORD,,
lemonade_server/cli.py CHANGED
@@ -1,9 +1,19 @@
1
1
  import argparse
2
2
  import sys
3
3
  import os
4
- from typing import Tuple
4
+ from typing import Tuple, Optional
5
5
  import psutil
6
6
  from typing import List
7
+ import subprocess
8
+
9
+
10
+ # Error codes for different CLI scenarios
11
+ class ExitCodes:
12
+ SUCCESS = 0
13
+ GENERAL_ERROR = 1
14
+ SERVER_ALREADY_RUNNING = 2
15
+ TIMEOUT_STOPPING_SERVER = 3
16
+ ERROR_STOPPING_SERVER = 4
7
17
 
8
18
 
9
19
  class PullError(Exception):
@@ -12,9 +22,16 @@ class PullError(Exception):
12
22
  """
13
23
 
14
24
 
25
+ class DeleteError(Exception):
26
+ """
27
+ The delete command has failed to delete an LLM
28
+ """
29
+
30
+
15
31
  def serve(
16
32
  port: int,
17
33
  log_level: str = None,
34
+ tray: bool = False,
18
35
  ):
19
36
  """
20
37
  Execute the serve command
@@ -29,7 +46,7 @@ def serve(
29
46
  "Please stop the existing server before starting a new instance."
30
47
  ),
31
48
  )
32
- sys.exit(1)
49
+ sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
33
50
 
34
51
  # Otherwise, start the server
35
52
  print("Starting Lemonade Server...")
@@ -46,6 +63,7 @@ def serve(
46
63
  port=port,
47
64
  log_level=log_level,
48
65
  truncate_inputs=truncate_inputs,
66
+ tray=tray,
49
67
  )
50
68
 
51
69
 
@@ -63,21 +81,49 @@ def stop():
63
81
  # Stop the server
64
82
  try:
65
83
  process = psutil.Process(running_pid)
84
+
85
+ # Get all child processes (including llama-server)
86
+ children = process.children(recursive=True)
87
+
88
+ # Terminate the main process first
66
89
  process.terminate()
90
+
91
+ # Then terminate all children
92
+ for child in children:
93
+ try:
94
+ child.terminate()
95
+ except psutil.NoSuchProcess:
96
+ pass # Child already terminated
97
+
98
+ # Wait for main process
67
99
  process.wait(timeout=10)
100
+
101
+ # Kill any children that didn't terminate gracefully
102
+ for child in children:
103
+ try:
104
+ if child.is_running():
105
+ child.kill()
106
+ except psutil.NoSuchProcess:
107
+ pass # Child already terminated
68
108
  except psutil.NoSuchProcess:
69
109
  # Process already terminated
70
110
  pass
71
111
  except psutil.TimeoutExpired:
72
112
  print("Timed out waiting for Lemonade Server to stop.")
73
- sys.exit(1)
113
+ sys.exit(ExitCodes.TIMEOUT_STOPPING_SERVER)
74
114
  except Exception as e: # pylint: disable=broad-exception-caught
75
115
  print(f"Error stopping Lemonade Server: {e}")
76
- sys.exit(1)
116
+ sys.exit(ExitCodes.ERROR_STOPPING_SERVER)
77
117
  print("Lemonade Server stopped successfully.")
78
118
 
79
119
 
80
- def pull(model_names: List[str]):
120
+ def pull(
121
+ model_names: List[str],
122
+ checkpoint: Optional[str] = None,
123
+ recipe: Optional[str] = None,
124
+ reasoning: bool = False,
125
+ mmproj: str = "",
126
+ ):
81
127
  """
82
128
  Install an LLM based on its Lemonade Server model name
83
129
 
@@ -95,10 +141,20 @@ def pull(model_names: List[str]):
95
141
  base_url = f"http://localhost:{port}/api/v1"
96
142
 
97
143
  for model_name in model_names:
144
+ payload = {"model_name": model_name}
145
+
146
+ if checkpoint and recipe:
147
+ # Add the parameters for registering a new model
148
+ payload["checkpoint"] = checkpoint
149
+ payload["recipe"] = recipe
150
+
151
+ if reasoning:
152
+ payload["reasoning"] = reasoning
153
+ if mmproj:
154
+ payload["mmproj"] = mmproj
155
+
98
156
  # Install the model
99
- pull_response = requests.post(
100
- f"{base_url}/pull", json={"model_name": model_name}
101
- )
157
+ pull_response = requests.post(f"{base_url}/pull", json=payload)
102
158
 
103
159
  if pull_response.status_code != 200:
104
160
  raise PullError(
@@ -110,7 +166,48 @@ def pull(model_names: List[str]):
110
166
  else:
111
167
  from lemonade_server.model_manager import ModelManager
112
168
 
113
- ModelManager().download_models(model_names)
169
+ ModelManager().download_models(
170
+ model_names,
171
+ checkpoint=checkpoint,
172
+ recipe=recipe,
173
+ reasoning=reasoning,
174
+ mmproj=mmproj,
175
+ )
176
+
177
+
178
+ def delete(model_names: List[str]):
179
+ """
180
+ Delete an LLM based on its Lemonade Server model name
181
+
182
+ If Lemonade Server is running, use the delete endpoint to delete the model
183
+ so that the Lemonade Server instance is aware of the deletion.
184
+
185
+ Otherwise, use ModelManager to delete the model.
186
+ """
187
+
188
+ server_running, port = status(verbose=False)
189
+
190
+ if server_running:
191
+ import requests
192
+
193
+ base_url = f"http://localhost:{port}/api/v1"
194
+
195
+ for model_name in model_names:
196
+ # Delete the model
197
+ delete_response = requests.post(
198
+ f"{base_url}/delete", json={"model_name": model_name}
199
+ )
200
+
201
+ if delete_response.status_code != 200:
202
+ raise DeleteError(
203
+ f"Failed to delete {model_name}. Check the "
204
+ "Lemonade Server log for more information."
205
+ )
206
+ else:
207
+ from lemonade_server.model_manager import ModelManager
208
+
209
+ for model_name in model_names:
210
+ ModelManager().delete_model(model_name)
114
211
 
115
212
 
116
213
  def version():
@@ -147,18 +244,18 @@ def is_lemonade_server(pid):
147
244
  """
148
245
  try:
149
246
  process = psutil.Process(pid)
247
+
150
248
  while True:
151
- if process.name() in [ # Windows
249
+ process_name = process.name()
250
+ if process_name in [ # Windows
152
251
  "lemonade-server-dev.exe",
153
252
  "lemonade-server.exe",
154
- "lemonade.exe",
155
- ] or process.name() in [ # Linux
253
+ ] or process_name in [ # Linux
156
254
  "lemonade-server-dev",
157
255
  "lemonade-server",
158
- "lemonade",
159
256
  ]:
160
257
  return True
161
- elif "llama-server" in process.name():
258
+ elif "llama-server" in process_name:
162
259
  return False
163
260
  if not process.parent():
164
261
  return False
@@ -174,16 +271,23 @@ def get_server_info() -> Tuple[int | None, int | None]:
174
271
  1. Lemonade Server's PID
175
272
  2. The port that Lemonade Server is running on
176
273
  """
177
- # Go over all python processes that have a port open
178
- for process in psutil.process_iter(["pid", "name"]):
179
- try:
180
- connections = process.net_connections()
181
- for conn in connections:
182
- if conn.status == "LISTEN":
183
- if is_lemonade_server(process.info["pid"]):
184
- return process.info["pid"], conn.laddr.port
185
- except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
186
- continue
274
+
275
+ # Get all network connections and filter for localhost IPv4 listening ports
276
+ try:
277
+ connections = psutil.net_connections(kind="tcp4")
278
+
279
+ for conn in connections:
280
+ if (
281
+ conn.status == "LISTEN"
282
+ and conn.laddr
283
+ and conn.laddr.ip in ["127.0.0.1"]
284
+ and conn.pid is not None
285
+ ):
286
+ if is_lemonade_server(conn.pid):
287
+ return conn.pid, conn.laddr.port
288
+
289
+ except Exception:
290
+ pass
187
291
 
188
292
  return None, None
189
293
 
@@ -214,6 +318,12 @@ def main():
214
318
  choices=["critical", "error", "warning", "info", "debug", "trace"],
215
319
  default="info",
216
320
  )
321
+ if os.name == "nt":
322
+ serve_parser.add_argument(
323
+ "--no-tray",
324
+ action="store_true",
325
+ help="Do not show a tray icon when the server is running",
326
+ )
217
327
 
218
328
  # Status command
219
329
  status_parser = subparsers.add_parser("status", help="Check if server is running")
@@ -235,20 +345,65 @@ def main():
235
345
  help="Lemonade Server model name",
236
346
  nargs="+",
237
347
  )
348
+ pull_parser.add_argument(
349
+ "--checkpoint",
350
+ help="For registering a new model: Hugging Face checkpoint to source the model from",
351
+ )
352
+ pull_parser.add_argument(
353
+ "--recipe",
354
+ help="For registering a new model: lemonade.api recipe to use with the model",
355
+ )
356
+ pull_parser.add_argument(
357
+ "--reasoning",
358
+ help="For registering a new model: whether the model is a reasoning model or not",
359
+ type=bool,
360
+ default=False,
361
+ )
362
+ pull_parser.add_argument(
363
+ "--mmproj",
364
+ help="For registering a new multimodal model: full file name of the .mmproj file in the checkpoint",
365
+ )
366
+
367
+ # Delete command
368
+ delete_parser = subparsers.add_parser(
369
+ "delete",
370
+ help="Delete an LLM",
371
+ epilog=(
372
+ "More information: "
373
+ "https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/server_models.md"
374
+ ),
375
+ )
376
+ delete_parser.add_argument(
377
+ "model",
378
+ help="Lemonade Server model name",
379
+ nargs="+",
380
+ )
238
381
 
239
382
  args = parser.parse_args()
240
383
 
384
+ if os.name != "nt":
385
+ args.no_tray = True
386
+
241
387
  if args.version:
242
388
  version()
243
389
  elif args.command == "serve":
244
390
  serve(
245
- args.port,
246
- args.log_level,
391
+ port=args.port,
392
+ log_level=args.log_level,
393
+ tray=not args.no_tray,
247
394
  )
248
395
  elif args.command == "status":
249
396
  status()
250
397
  elif args.command == "pull":
251
- pull(args.model)
398
+ pull(
399
+ args.model,
400
+ checkpoint=args.checkpoint,
401
+ recipe=args.recipe,
402
+ reasoning=args.reasoning,
403
+ mmproj=args.mmproj,
404
+ )
405
+ elif args.command == "delete":
406
+ delete(args.model)
252
407
  elif args.command == "stop":
253
408
  stop()
254
409
  elif args.command == "help" or not args.command: