lemonade-sdk 7.0.3__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (55) hide show
  1. lemonade/api.py +3 -3
  2. lemonade/cli.py +11 -17
  3. lemonade/common/build.py +0 -47
  4. lemonade/common/network.py +50 -0
  5. lemonade/common/status.py +2 -21
  6. lemonade/common/system_info.py +19 -4
  7. lemonade/profilers/memory_tracker.py +3 -1
  8. lemonade/tools/accuracy.py +3 -4
  9. lemonade/tools/adapter.py +1 -2
  10. lemonade/tools/{huggingface_bench.py → huggingface/bench.py} +2 -87
  11. lemonade/tools/huggingface/load.py +235 -0
  12. lemonade/tools/{huggingface_load.py → huggingface/utils.py} +87 -255
  13. lemonade/tools/humaneval.py +9 -3
  14. lemonade/tools/{llamacpp_bench.py → llamacpp/bench.py} +1 -1
  15. lemonade/tools/{llamacpp.py → llamacpp/load.py} +18 -2
  16. lemonade/tools/mmlu.py +7 -15
  17. lemonade/tools/{ort_genai/oga.py → oga/load.py} +31 -422
  18. lemonade/tools/oga/utils.py +423 -0
  19. lemonade/tools/perplexity.py +4 -3
  20. lemonade/tools/prompt.py +2 -1
  21. lemonade/tools/quark/quark_load.py +2 -1
  22. lemonade/tools/quark/quark_quantize.py +5 -5
  23. lemonade/tools/report/table.py +3 -3
  24. lemonade/tools/server/llamacpp.py +159 -34
  25. lemonade/tools/server/serve.py +169 -147
  26. lemonade/tools/server/static/favicon.ico +0 -0
  27. lemonade/tools/server/static/styles.css +568 -0
  28. lemonade/tools/server/static/webapp.html +439 -0
  29. lemonade/tools/server/tray.py +458 -0
  30. lemonade/tools/server/{port_utils.py → utils/port.py} +22 -3
  31. lemonade/tools/server/utils/system_tray.py +395 -0
  32. lemonade/tools/server/{instructions.py → webapp.py} +4 -10
  33. lemonade/version.py +1 -1
  34. lemonade_install/install.py +46 -28
  35. {lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/METADATA +84 -22
  36. lemonade_sdk-8.0.0.dist-info/RECORD +70 -0
  37. lemonade_server/cli.py +182 -27
  38. lemonade_server/model_manager.py +192 -20
  39. lemonade_server/pydantic_models.py +9 -4
  40. lemonade_server/server_models.json +5 -3
  41. lemonade/common/analyze_model.py +0 -26
  42. lemonade/common/labels.py +0 -61
  43. lemonade/common/onnx_helpers.py +0 -176
  44. lemonade/common/plugins.py +0 -10
  45. lemonade/common/tensor_helpers.py +0 -83
  46. lemonade/tools/server/static/instructions.html +0 -262
  47. lemonade_sdk-7.0.3.dist-info/RECORD +0 -69
  48. /lemonade/tools/{ort_genai → oga}/__init__.py +0 -0
  49. /lemonade/tools/{ort_genai/oga_bench.py → oga/bench.py} +0 -0
  50. /lemonade/tools/server/{thread_utils.py → utils/thread.py} +0 -0
  51. {lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/WHEEL +0 -0
  52. {lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/entry_points.txt +0 -0
  53. {lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/licenses/LICENSE +0 -0
  54. {lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/licenses/NOTICE.md +0 -0
  55. {lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 7.0.3
3
+ Version: 8.0.0
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.12
@@ -9,36 +9,42 @@ License-File: LICENSE
9
9
  License-File: NOTICE.md
10
10
  Requires-Dist: invoke>=2.0.0
11
11
  Requires-Dist: onnx<1.18.0,>=1.11.0
12
- Requires-Dist: torch>=1.12.1
13
12
  Requires-Dist: pyyaml>=5.4
14
13
  Requires-Dist: typeguard>=2.3.13
15
14
  Requires-Dist: packaging>=20.9
16
15
  Requires-Dist: numpy<2.0.0
17
- Requires-Dist: pandas>=1.5.3
18
16
  Requires-Dist: fasteners
19
17
  Requires-Dist: GitPython>=3.1.40
20
18
  Requires-Dist: psutil>=6.1.1
21
19
  Requires-Dist: wmi
20
+ Requires-Dist: py-cpuinfo
22
21
  Requires-Dist: pytz
23
22
  Requires-Dist: zstandard
24
- Requires-Dist: matplotlib
23
+ Requires-Dist: fastapi
24
+ Requires-Dist: uvicorn[standard]
25
+ Requires-Dist: openai>=1.81.0
26
+ Requires-Dist: transformers<=4.51.3
27
+ Requires-Dist: jinja2
25
28
  Requires-Dist: tabulate
26
29
  Requires-Dist: huggingface-hub==0.30.2
30
+ Provides-Extra: oga-hybrid-minimal
31
+ Requires-Dist: onnx==1.16.1; extra == "oga-hybrid-minimal"
32
+ Requires-Dist: numpy==1.26.4; extra == "oga-hybrid-minimal"
33
+ Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid-minimal"
34
+ Provides-Extra: oga-cpu-minimal
35
+ Requires-Dist: onnxruntime-genai==0.6.0; extra == "oga-cpu-minimal"
36
+ Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "oga-cpu-minimal"
27
37
  Provides-Extra: llm
28
38
  Requires-Dist: torch>=2.6.0; extra == "llm"
29
- Requires-Dist: transformers<=4.51.3; extra == "llm"
30
39
  Requires-Dist: accelerate; extra == "llm"
31
- Requires-Dist: py-cpuinfo; extra == "llm"
32
40
  Requires-Dist: sentencepiece; extra == "llm"
33
41
  Requires-Dist: datasets; extra == "llm"
42
+ Requires-Dist: pandas>=1.5.3; extra == "llm"
43
+ Requires-Dist: matplotlib; extra == "llm"
34
44
  Requires-Dist: human-eval-windows==1.0.4; extra == "llm"
35
- Requires-Dist: fastapi; extra == "llm"
36
- Requires-Dist: uvicorn[standard]; extra == "llm"
37
- Requires-Dist: openai>=1.81.0; extra == "llm"
38
45
  Requires-Dist: lm-eval[api]; extra == "llm"
39
46
  Provides-Extra: llm-oga-cpu
40
- Requires-Dist: onnxruntime-genai==0.6.0; extra == "llm-oga-cpu"
41
- Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "llm-oga-cpu"
47
+ Requires-Dist: lemonade-sdk[oga-cpu-minimal]; extra == "llm-oga-cpu"
42
48
  Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cpu"
43
49
  Provides-Extra: llm-oga-igpu
44
50
  Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
@@ -57,9 +63,7 @@ Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
57
63
  Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
58
64
  Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-npu"
59
65
  Provides-Extra: llm-oga-hybrid
60
- Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
61
- Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
62
- Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
66
+ Requires-Dist: lemonade-sdk[oga-hybrid-minimal]; extra == "llm-oga-hybrid"
63
67
  Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-hybrid"
64
68
  Provides-Extra: llm-oga-unified
65
69
  Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
@@ -78,24 +82,82 @@ Dynamic: summary
78
82
 
79
83
  ## 🍋 Lemonade SDK: Quickly serve, benchmark and deploy LLMs
80
84
 
81
- The [Lemonade SDK](./docs/README.md) is designed to make it easy to serve, benchmark, and deploy large language models (LLMs) on a variety of hardware platforms, including CPU, GPU, and NPU.
85
+ The [Lemonade SDK](./docs/README.md) makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
82
86
 
83
87
  <div align="center">
84
88
  <img src="https://download.amd.com/images/lemonade_640x480_1.gif" alt="Lemonade Demo" title="Lemonade in Action">
85
89
  </div>
86
90
 
91
+ ### Features
92
+
87
93
  The [Lemonade SDK](./docs/README.md) is comprised of the following:
88
94
 
89
- - 🌐 **Lemonade Server**: A server interface that uses the standard Open AI API, allowing applications to integrate with local LLMs.
90
- - 🐍 **Lemonade Python API**: Offers High-Level API for easy integration of Lemonade LLMs into Python applications and Low-Level API for custom experiments.
91
- - 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs, frameworks (PyTorch, ONNX, GGUF), and measurement tools to run experiments. The available tools are:
92
- - Prompting an LLM.
93
- - Measuring the accuracy of an LLM using a variety of tests.
94
- - Benchmarking an LLM to get the time-to-first-token and tokens per second.
95
- - Profiling the memory usage of an LLM.
95
+ - 🌐 **[Lemonade Server](https://lemonade-server.ai/docs)**: A local LLM server for running ONNX and GGUF models using the OpenAI API standard. Install and enable your applications with NPU and GPU acceleration in minutes.
96
+ - 🐍 **Lemonade API**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
97
+ - 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
98
+ - Prompting with templates.
99
+ - Measuring accuracy with a variety of tests.
100
+ - Benchmarking to get the time-to-first-token and tokens per second.
101
+ - Profiling the memory utilization.
96
102
 
97
103
  ### [Click here to get started with Lemonade.](./docs/README.md)
98
104
 
105
+ ### Supported Configurations
106
+
107
+ Maximum LLM performance requires the right hardware accelerator with the right inference engine for your scenario. Lemonade supports the following configurations, while also making it easy to switch between them at runtime.
108
+
109
+ <table border="1" cellpadding="6" cellspacing="0">
110
+ <thead>
111
+ <tr>
112
+ <th rowspan="2">Hardware</th>
113
+ <th colspan="3" align="center">🛠️ Engine Support</th>
114
+ <th colspan="2" align="center">🖥️ OS (x86/x64)</th>
115
+ </tr>
116
+ <tr>
117
+ <th align="center">OGA</th>
118
+ <th align="center">llamacpp</th>
119
+ <th align="center">HF</th>
120
+ <th align="center">Windows</th>
121
+ <th align="center">Linux</th>
122
+ </tr>
123
+ </thead>
124
+ <tbody>
125
+ <tr>
126
+ <td>🧠 CPU</td>
127
+ <td align="center">All platforms</td>
128
+ <td align="center">All platforms</td>
129
+ <td align="center">All platforms</td>
130
+ <td align="center">✅</td>
131
+ <td align="center">✅</td>
132
+ </tr>
133
+ <tr>
134
+ <td>🎮 GPU</td>
135
+ <td align="center">—</td>
136
+ <td align="center">Vulkan: All platforms<br><small>Focus: Radeon™ 7000/9000</small></td>
137
+ <td align="center">—</td>
138
+ <td align="center">✅</td>
139
+ <td align="center">✅</td>
140
+ </tr>
141
+ <tr>
142
+ <td>🤖 NPU</td>
143
+ <td align="center">AMD Ryzen™ AI 300 series</td>
144
+ <td align="center">—</td>
145
+ <td align="center">—</td>
146
+ <td align="center">✅</td>
147
+ <td align="center">—</td>
148
+ </tr>
149
+ </tbody>
150
+ </table>
151
+
152
+
153
+
154
+ #### Inference Engines Overview
155
+ | Engine | Description |
156
+ | :--- | :--- |
157
+ | **OnnxRuntime GenAI (OGA)** | Microsoft engine that runs `.onnx` models and enables hardware vendors to provide their own execution providers (EPs) to support specialized hardware, such as neural processing units (NPUs). |
158
+ | **llamacpp** | Community-driven engine with strong GPU acceleration, support for thousands of `.gguf` models, and advanced features such as vision-language models (VLMs) and mixture-of-experts (MoEs). |
159
+ | **Hugging Face (HF)** | Hugging Face's `transformers` library can run the original `.safetensors` trained weights for models on Meta's PyTorch engine, which provides a source of truth for accuracy measurement. |
160
+
99
161
  ## Contributing
100
162
 
101
163
  We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
@@ -0,0 +1,70 @@
1
+ lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
2
+ lemonade/api.py,sha256=X7DxBgsOl5L_z6uTkwoJWf8x0rjXWS2JoeEqmo9bMfc,3873
3
+ lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
4
+ lemonade/cli.py,sha256=XzptHh6LTl5OdGRnxiLykQ8QBl2rQmhWH5w0KPJVyY4,4359
5
+ lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
+ lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
+ lemonade/version.py,sha256=SWqJTEDnx2fOon29wQowBCNjEkhyhMbbqVsSu4EpdWI,22
8
+ lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
10
+ lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
11
+ lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
12
+ lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
13
+ lemonade/common/network.py,sha256=EkWTxjJ-UM469nEJdzEl-5UNO7dPfVfFzU2SVMMPaD0,1425
14
+ lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
15
+ lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
16
+ lemonade/common/system_info.py,sha256=qOwteG_mBo-ImilbiK7Gq37sWIE9ugF0dbWcj9zLD40,12234
17
+ lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
18
+ lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
19
+ lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
20
+ lemonade/profilers/profiler.py,sha256=y_iMGr1ToQ6rcwcIcXck4ajapisLXCfHggiV-IpPF98,1666
21
+ lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
22
+ lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
23
+ lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
24
+ lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
25
+ lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9532
26
+ lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
27
+ lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
28
+ lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
29
+ lemonade/tools/prompt.py,sha256=AT3p5rCGHEs9ozeGxwWl07iKF-mgLxFOkYLjU2btFHs,8638
30
+ lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
31
+ lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
32
+ lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
33
+ lemonade/tools/huggingface/utils.py,sha256=xybIWOEXHaMuw-nAEu3aITdvZSHcGKgZ9kFS5mIWcEg,13873
34
+ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPdv3Q,5946
35
+ lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
36
+ lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
+ lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
38
+ lemonade/tools/oga/load.py,sha256=7Sdf6PFPrqbadPabyJb_uPRUIP09qj21ZYdXz47MqsE,28570
39
+ lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
40
+ lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
42
+ lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
43
+ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
+ lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
45
+ lemonade/tools/report/table.py,sha256=di8IZkolt_kaZfWri6GQkhPE1zCELqcrBcG1x1fzWqg,24843
46
+ lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
+ lemonade/tools/server/llamacpp.py,sha256=U2eE9zfwE5sWUnS8A9oSf0Ak4v8dbjnX3fBb76g6uiE,14969
48
+ lemonade/tools/server/serve.py,sha256=2Z3mbK-iVXAGA6jBDgJSwuWMbBRbmN_E0lMN2h-u6Wo,52230
49
+ lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
50
+ lemonade/tools/server/tray.py,sha256=SakwhZKPgo7VtWP4q10SaCcZdxKG95dnNsXdTu9Eei0,16030
51
+ lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
52
+ lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
53
+ lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
54
+ lemonade/tools/server/static/webapp.html,sha256=im7YQkwvbuqrbO-sLhStVqtA6B7HKAn2azZka1KoeJQ,21260
55
+ lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
56
+ lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
57
+ lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
58
+ lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
59
+ lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
60
+ lemonade_sdk-8.0.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
61
+ lemonade_sdk-8.0.0.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
62
+ lemonade_server/cli.py,sha256=fm1eORLKElHfzqO5VVicDmn9EbmqIffi1bynqacJeyw,11744
63
+ lemonade_server/model_manager.py,sha256=HqbahDMRv1x8jyQj4pa1rXanlPmcCykt8tlI6WfaxjE,13023
64
+ lemonade_server/pydantic_models.py,sha256=2ALw47C1VWGe2nKWjlEAzP1ggKYsky4xlahUFxQJCMs,2298
65
+ lemonade_server/server_models.json,sha256=wTK_H9XDHLxqMWQJqbBsJwm50PhOR4gURyVj9Jm35PQ,6992
66
+ lemonade_sdk-8.0.0.dist-info/METADATA,sha256=fJV_bzC7VCQjqpHTDkb8G58fvBlbsuqOa_zEJNZW5JU,7940
67
+ lemonade_sdk-8.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
+ lemonade_sdk-8.0.0.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
69
+ lemonade_sdk-8.0.0.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
70
+ lemonade_sdk-8.0.0.dist-info/RECORD,,
lemonade_server/cli.py CHANGED
@@ -1,9 +1,19 @@
1
1
  import argparse
2
2
  import sys
3
3
  import os
4
- from typing import Tuple
4
+ from typing import Tuple, Optional
5
5
  import psutil
6
6
  from typing import List
7
+ import subprocess
8
+
9
+
10
+ # Error codes for different CLI scenarios
11
+ class ExitCodes:
12
+ SUCCESS = 0
13
+ GENERAL_ERROR = 1
14
+ SERVER_ALREADY_RUNNING = 2
15
+ TIMEOUT_STOPPING_SERVER = 3
16
+ ERROR_STOPPING_SERVER = 4
7
17
 
8
18
 
9
19
  class PullError(Exception):
@@ -12,9 +22,16 @@ class PullError(Exception):
12
22
  """
13
23
 
14
24
 
25
+ class DeleteError(Exception):
26
+ """
27
+ The delete command has failed to delete an LLM
28
+ """
29
+
30
+
15
31
  def serve(
16
32
  port: int,
17
33
  log_level: str = None,
34
+ tray: bool = False,
18
35
  ):
19
36
  """
20
37
  Execute the serve command
@@ -29,7 +46,7 @@ def serve(
29
46
  "Please stop the existing server before starting a new instance."
30
47
  ),
31
48
  )
32
- sys.exit(1)
49
+ sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
33
50
 
34
51
  # Otherwise, start the server
35
52
  print("Starting Lemonade Server...")
@@ -46,6 +63,7 @@ def serve(
46
63
  port=port,
47
64
  log_level=log_level,
48
65
  truncate_inputs=truncate_inputs,
66
+ tray=tray,
49
67
  )
50
68
 
51
69
 
@@ -63,21 +81,49 @@ def stop():
63
81
  # Stop the server
64
82
  try:
65
83
  process = psutil.Process(running_pid)
84
+
85
+ # Get all child processes (including llama-server)
86
+ children = process.children(recursive=True)
87
+
88
+ # Terminate the main process first
66
89
  process.terminate()
90
+
91
+ # Then terminate all children
92
+ for child in children:
93
+ try:
94
+ child.terminate()
95
+ except psutil.NoSuchProcess:
96
+ pass # Child already terminated
97
+
98
+ # Wait for main process
67
99
  process.wait(timeout=10)
100
+
101
+ # Kill any children that didn't terminate gracefully
102
+ for child in children:
103
+ try:
104
+ if child.is_running():
105
+ child.kill()
106
+ except psutil.NoSuchProcess:
107
+ pass # Child already terminated
68
108
  except psutil.NoSuchProcess:
69
109
  # Process already terminated
70
110
  pass
71
111
  except psutil.TimeoutExpired:
72
112
  print("Timed out waiting for Lemonade Server to stop.")
73
- sys.exit(1)
113
+ sys.exit(ExitCodes.TIMEOUT_STOPPING_SERVER)
74
114
  except Exception as e: # pylint: disable=broad-exception-caught
75
115
  print(f"Error stopping Lemonade Server: {e}")
76
- sys.exit(1)
116
+ sys.exit(ExitCodes.ERROR_STOPPING_SERVER)
77
117
  print("Lemonade Server stopped successfully.")
78
118
 
79
119
 
80
- def pull(model_names: List[str]):
120
+ def pull(
121
+ model_names: List[str],
122
+ checkpoint: Optional[str] = None,
123
+ recipe: Optional[str] = None,
124
+ reasoning: bool = False,
125
+ mmproj: str = "",
126
+ ):
81
127
  """
82
128
  Install an LLM based on its Lemonade Server model name
83
129
 
@@ -95,10 +141,20 @@ def pull(model_names: List[str]):
95
141
  base_url = f"http://localhost:{port}/api/v1"
96
142
 
97
143
  for model_name in model_names:
144
+ payload = {"model_name": model_name}
145
+
146
+ if checkpoint and recipe:
147
+ # Add the parameters for registering a new model
148
+ payload["checkpoint"] = checkpoint
149
+ payload["recipe"] = recipe
150
+
151
+ if reasoning:
152
+ payload["reasoning"] = reasoning
153
+ if mmproj:
154
+ payload["mmproj"] = mmproj
155
+
98
156
  # Install the model
99
- pull_response = requests.post(
100
- f"{base_url}/pull", json={"model_name": model_name}
101
- )
157
+ pull_response = requests.post(f"{base_url}/pull", json=payload)
102
158
 
103
159
  if pull_response.status_code != 200:
104
160
  raise PullError(
@@ -110,7 +166,48 @@ def pull(model_names: List[str]):
110
166
  else:
111
167
  from lemonade_server.model_manager import ModelManager
112
168
 
113
- ModelManager().download_models(model_names)
169
+ ModelManager().download_models(
170
+ model_names,
171
+ checkpoint=checkpoint,
172
+ recipe=recipe,
173
+ reasoning=reasoning,
174
+ mmproj=mmproj,
175
+ )
176
+
177
+
178
+ def delete(model_names: List[str]):
179
+ """
180
+ Delete an LLM based on its Lemonade Server model name
181
+
182
+ If Lemonade Server is running, use the delete endpoint to delete the model
183
+ so that the Lemonade Server instance is aware of the deletion.
184
+
185
+ Otherwise, use ModelManager to delete the model.
186
+ """
187
+
188
+ server_running, port = status(verbose=False)
189
+
190
+ if server_running:
191
+ import requests
192
+
193
+ base_url = f"http://localhost:{port}/api/v1"
194
+
195
+ for model_name in model_names:
196
+ # Delete the model
197
+ delete_response = requests.post(
198
+ f"{base_url}/delete", json={"model_name": model_name}
199
+ )
200
+
201
+ if delete_response.status_code != 200:
202
+ raise DeleteError(
203
+ f"Failed to delete {model_name}. Check the "
204
+ "Lemonade Server log for more information."
205
+ )
206
+ else:
207
+ from lemonade_server.model_manager import ModelManager
208
+
209
+ for model_name in model_names:
210
+ ModelManager().delete_model(model_name)
114
211
 
115
212
 
116
213
  def version():
@@ -147,18 +244,18 @@ def is_lemonade_server(pid):
147
244
  """
148
245
  try:
149
246
  process = psutil.Process(pid)
247
+
150
248
  while True:
151
- if process.name() in [ # Windows
249
+ process_name = process.name()
250
+ if process_name in [ # Windows
152
251
  "lemonade-server-dev.exe",
153
252
  "lemonade-server.exe",
154
- "lemonade.exe",
155
- ] or process.name() in [ # Linux
253
+ ] or process_name in [ # Linux
156
254
  "lemonade-server-dev",
157
255
  "lemonade-server",
158
- "lemonade",
159
256
  ]:
160
257
  return True
161
- elif "llama-server" in process.name():
258
+ elif "llama-server" in process_name:
162
259
  return False
163
260
  if not process.parent():
164
261
  return False
@@ -174,16 +271,23 @@ def get_server_info() -> Tuple[int | None, int | None]:
174
271
  1. Lemonade Server's PID
175
272
  2. The port that Lemonade Server is running on
176
273
  """
177
- # Go over all python processes that have a port open
178
- for process in psutil.process_iter(["pid", "name"]):
179
- try:
180
- connections = process.net_connections()
181
- for conn in connections:
182
- if conn.status == "LISTEN":
183
- if is_lemonade_server(process.info["pid"]):
184
- return process.info["pid"], conn.laddr.port
185
- except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
186
- continue
274
+
275
+ # Get all network connections and filter for localhost IPv4 listening ports
276
+ try:
277
+ connections = psutil.net_connections(kind="tcp4")
278
+
279
+ for conn in connections:
280
+ if (
281
+ conn.status == "LISTEN"
282
+ and conn.laddr
283
+ and conn.laddr.ip in ["127.0.0.1"]
284
+ and conn.pid is not None
285
+ ):
286
+ if is_lemonade_server(conn.pid):
287
+ return conn.pid, conn.laddr.port
288
+
289
+ except Exception:
290
+ pass
187
291
 
188
292
  return None, None
189
293
 
@@ -214,6 +318,12 @@ def main():
214
318
  choices=["critical", "error", "warning", "info", "debug", "trace"],
215
319
  default="info",
216
320
  )
321
+ if os.name == "nt":
322
+ serve_parser.add_argument(
323
+ "--no-tray",
324
+ action="store_true",
325
+ help="Do not show a tray icon when the server is running",
326
+ )
217
327
 
218
328
  # Status command
219
329
  status_parser = subparsers.add_parser("status", help="Check if server is running")
@@ -235,20 +345,65 @@ def main():
235
345
  help="Lemonade Server model name",
236
346
  nargs="+",
237
347
  )
348
+ pull_parser.add_argument(
349
+ "--checkpoint",
350
+ help="For registering a new model: Hugging Face checkpoint to source the model from",
351
+ )
352
+ pull_parser.add_argument(
353
+ "--recipe",
354
+ help="For registering a new model: lemonade.api recipe to use with the model",
355
+ )
356
+ pull_parser.add_argument(
357
+ "--reasoning",
358
+ help="For registering a new model: whether the model is a reasoning model or not",
359
+ type=bool,
360
+ default=False,
361
+ )
362
+ pull_parser.add_argument(
363
+ "--mmproj",
364
+ help="For registering a new multimodal model: full file name of the .mmproj file in the checkpoint",
365
+ )
366
+
367
+ # Delete command
368
+ delete_parser = subparsers.add_parser(
369
+ "delete",
370
+ help="Delete an LLM",
371
+ epilog=(
372
+ "More information: "
373
+ "https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/server_models.md"
374
+ ),
375
+ )
376
+ delete_parser.add_argument(
377
+ "model",
378
+ help="Lemonade Server model name",
379
+ nargs="+",
380
+ )
238
381
 
239
382
  args = parser.parse_args()
240
383
 
384
+ if os.name != "nt":
385
+ args.no_tray = True
386
+
241
387
  if args.version:
242
388
  version()
243
389
  elif args.command == "serve":
244
390
  serve(
245
- args.port,
246
- args.log_level,
391
+ port=args.port,
392
+ log_level=args.log_level,
393
+ tray=not args.no_tray,
247
394
  )
248
395
  elif args.command == "status":
249
396
  status()
250
397
  elif args.command == "pull":
251
- pull(args.model)
398
+ pull(
399
+ args.model,
400
+ checkpoint=args.checkpoint,
401
+ recipe=args.recipe,
402
+ reasoning=args.reasoning,
403
+ mmproj=args.mmproj,
404
+ )
405
+ elif args.command == "delete":
406
+ delete(args.model)
252
407
  elif args.command == "stop":
253
408
  stop()
254
409
  elif args.command == "help" or not args.command: