lemonade-sdk 7.0.3__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/api.py +3 -3
- lemonade/cli.py +11 -17
- lemonade/common/build.py +0 -47
- lemonade/common/network.py +50 -0
- lemonade/common/status.py +2 -21
- lemonade/common/system_info.py +19 -4
- lemonade/profilers/memory_tracker.py +3 -1
- lemonade/tools/accuracy.py +3 -4
- lemonade/tools/adapter.py +1 -2
- lemonade/tools/{huggingface_bench.py → huggingface/bench.py} +2 -87
- lemonade/tools/huggingface/load.py +235 -0
- lemonade/tools/{huggingface_load.py → huggingface/utils.py} +87 -255
- lemonade/tools/humaneval.py +9 -3
- lemonade/tools/{llamacpp_bench.py → llamacpp/bench.py} +1 -1
- lemonade/tools/{llamacpp.py → llamacpp/load.py} +18 -2
- lemonade/tools/mmlu.py +7 -15
- lemonade/tools/{ort_genai/oga.py → oga/load.py} +31 -422
- lemonade/tools/oga/utils.py +423 -0
- lemonade/tools/perplexity.py +4 -3
- lemonade/tools/prompt.py +2 -1
- lemonade/tools/quark/quark_load.py +2 -1
- lemonade/tools/quark/quark_quantize.py +5 -5
- lemonade/tools/report/table.py +3 -3
- lemonade/tools/server/llamacpp.py +159 -34
- lemonade/tools/server/serve.py +169 -147
- lemonade/tools/server/static/favicon.ico +0 -0
- lemonade/tools/server/static/styles.css +568 -0
- lemonade/tools/server/static/webapp.html +439 -0
- lemonade/tools/server/tray.py +458 -0
- lemonade/tools/server/{port_utils.py → utils/port.py} +22 -3
- lemonade/tools/server/utils/system_tray.py +395 -0
- lemonade/tools/server/{instructions.py → webapp.py} +4 -10
- lemonade/version.py +1 -1
- lemonade_install/install.py +46 -28
- {lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/METADATA +84 -22
- lemonade_sdk-8.0.0.dist-info/RECORD +70 -0
- lemonade_server/cli.py +182 -27
- lemonade_server/model_manager.py +192 -20
- lemonade_server/pydantic_models.py +9 -4
- lemonade_server/server_models.json +5 -3
- lemonade/common/analyze_model.py +0 -26
- lemonade/common/labels.py +0 -61
- lemonade/common/onnx_helpers.py +0 -176
- lemonade/common/plugins.py +0 -10
- lemonade/common/tensor_helpers.py +0 -83
- lemonade/tools/server/static/instructions.html +0 -262
- lemonade_sdk-7.0.3.dist-info/RECORD +0 -69
- /lemonade/tools/{ort_genai → oga}/__init__.py +0 -0
- /lemonade/tools/{ort_genai/oga_bench.py → oga/bench.py} +0 -0
- /lemonade/tools/server/{thread_utils.py → utils/thread.py} +0 -0
- {lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/WHEEL +0 -0
- {lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-7.0.3.dist-info → lemonade_sdk-8.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version:
|
|
3
|
+
Version: 8.0.0
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.12
|
|
@@ -9,36 +9,42 @@ License-File: LICENSE
|
|
|
9
9
|
License-File: NOTICE.md
|
|
10
10
|
Requires-Dist: invoke>=2.0.0
|
|
11
11
|
Requires-Dist: onnx<1.18.0,>=1.11.0
|
|
12
|
-
Requires-Dist: torch>=1.12.1
|
|
13
12
|
Requires-Dist: pyyaml>=5.4
|
|
14
13
|
Requires-Dist: typeguard>=2.3.13
|
|
15
14
|
Requires-Dist: packaging>=20.9
|
|
16
15
|
Requires-Dist: numpy<2.0.0
|
|
17
|
-
Requires-Dist: pandas>=1.5.3
|
|
18
16
|
Requires-Dist: fasteners
|
|
19
17
|
Requires-Dist: GitPython>=3.1.40
|
|
20
18
|
Requires-Dist: psutil>=6.1.1
|
|
21
19
|
Requires-Dist: wmi
|
|
20
|
+
Requires-Dist: py-cpuinfo
|
|
22
21
|
Requires-Dist: pytz
|
|
23
22
|
Requires-Dist: zstandard
|
|
24
|
-
Requires-Dist:
|
|
23
|
+
Requires-Dist: fastapi
|
|
24
|
+
Requires-Dist: uvicorn[standard]
|
|
25
|
+
Requires-Dist: openai>=1.81.0
|
|
26
|
+
Requires-Dist: transformers<=4.51.3
|
|
27
|
+
Requires-Dist: jinja2
|
|
25
28
|
Requires-Dist: tabulate
|
|
26
29
|
Requires-Dist: huggingface-hub==0.30.2
|
|
30
|
+
Provides-Extra: oga-hybrid-minimal
|
|
31
|
+
Requires-Dist: onnx==1.16.1; extra == "oga-hybrid-minimal"
|
|
32
|
+
Requires-Dist: numpy==1.26.4; extra == "oga-hybrid-minimal"
|
|
33
|
+
Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid-minimal"
|
|
34
|
+
Provides-Extra: oga-cpu-minimal
|
|
35
|
+
Requires-Dist: onnxruntime-genai==0.6.0; extra == "oga-cpu-minimal"
|
|
36
|
+
Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "oga-cpu-minimal"
|
|
27
37
|
Provides-Extra: llm
|
|
28
38
|
Requires-Dist: torch>=2.6.0; extra == "llm"
|
|
29
|
-
Requires-Dist: transformers<=4.51.3; extra == "llm"
|
|
30
39
|
Requires-Dist: accelerate; extra == "llm"
|
|
31
|
-
Requires-Dist: py-cpuinfo; extra == "llm"
|
|
32
40
|
Requires-Dist: sentencepiece; extra == "llm"
|
|
33
41
|
Requires-Dist: datasets; extra == "llm"
|
|
42
|
+
Requires-Dist: pandas>=1.5.3; extra == "llm"
|
|
43
|
+
Requires-Dist: matplotlib; extra == "llm"
|
|
34
44
|
Requires-Dist: human-eval-windows==1.0.4; extra == "llm"
|
|
35
|
-
Requires-Dist: fastapi; extra == "llm"
|
|
36
|
-
Requires-Dist: uvicorn[standard]; extra == "llm"
|
|
37
|
-
Requires-Dist: openai>=1.81.0; extra == "llm"
|
|
38
45
|
Requires-Dist: lm-eval[api]; extra == "llm"
|
|
39
46
|
Provides-Extra: llm-oga-cpu
|
|
40
|
-
Requires-Dist:
|
|
41
|
-
Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "llm-oga-cpu"
|
|
47
|
+
Requires-Dist: lemonade-sdk[oga-cpu-minimal]; extra == "llm-oga-cpu"
|
|
42
48
|
Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cpu"
|
|
43
49
|
Provides-Extra: llm-oga-igpu
|
|
44
50
|
Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
|
|
@@ -57,9 +63,7 @@ Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
|
|
|
57
63
|
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
|
|
58
64
|
Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-npu"
|
|
59
65
|
Provides-Extra: llm-oga-hybrid
|
|
60
|
-
Requires-Dist:
|
|
61
|
-
Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
|
|
62
|
-
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
|
|
66
|
+
Requires-Dist: lemonade-sdk[oga-hybrid-minimal]; extra == "llm-oga-hybrid"
|
|
63
67
|
Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-hybrid"
|
|
64
68
|
Provides-Extra: llm-oga-unified
|
|
65
69
|
Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
|
|
@@ -78,24 +82,82 @@ Dynamic: summary
|
|
|
78
82
|
|
|
79
83
|
## 🍋 Lemonade SDK: Quickly serve, benchmark and deploy LLMs
|
|
80
84
|
|
|
81
|
-
The [Lemonade SDK](./docs/README.md)
|
|
85
|
+
The [Lemonade SDK](./docs/README.md) makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
|
|
82
86
|
|
|
83
87
|
<div align="center">
|
|
84
88
|
<img src="https://download.amd.com/images/lemonade_640x480_1.gif" alt="Lemonade Demo" title="Lemonade in Action">
|
|
85
89
|
</div>
|
|
86
90
|
|
|
91
|
+
### Features
|
|
92
|
+
|
|
87
93
|
The [Lemonade SDK](./docs/README.md) is comprised of the following:
|
|
88
94
|
|
|
89
|
-
- 🌐 **Lemonade Server**: A server
|
|
90
|
-
- 🐍 **Lemonade
|
|
91
|
-
- 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs
|
|
92
|
-
- Prompting
|
|
93
|
-
- Measuring
|
|
94
|
-
- Benchmarking
|
|
95
|
-
- Profiling the memory
|
|
95
|
+
- 🌐 **[Lemonade Server](https://lemonade-server.ai/docs)**: A local LLM server for running ONNX and GGUF models using the OpenAI API standard. Install and enable your applications with NPU and GPU acceleration in minutes.
|
|
96
|
+
- 🐍 **Lemonade API**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
|
|
97
|
+
- 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
|
|
98
|
+
- Prompting with templates.
|
|
99
|
+
- Measuring accuracy with a variety of tests.
|
|
100
|
+
- Benchmarking to get the time-to-first-token and tokens per second.
|
|
101
|
+
- Profiling the memory utilization.
|
|
96
102
|
|
|
97
103
|
### [Click here to get started with Lemonade.](./docs/README.md)
|
|
98
104
|
|
|
105
|
+
### Supported Configurations
|
|
106
|
+
|
|
107
|
+
Maximum LLM performance requires the right hardware accelerator with the right inference engine for your scenario. Lemonade supports the following configurations, while also making it easy to switch between them at runtime.
|
|
108
|
+
|
|
109
|
+
<table border="1" cellpadding="6" cellspacing="0">
|
|
110
|
+
<thead>
|
|
111
|
+
<tr>
|
|
112
|
+
<th rowspan="2">Hardware</th>
|
|
113
|
+
<th colspan="3" align="center">🛠️ Engine Support</th>
|
|
114
|
+
<th colspan="2" align="center">🖥️ OS (x86/x64)</th>
|
|
115
|
+
</tr>
|
|
116
|
+
<tr>
|
|
117
|
+
<th align="center">OGA</th>
|
|
118
|
+
<th align="center">llamacpp</th>
|
|
119
|
+
<th align="center">HF</th>
|
|
120
|
+
<th align="center">Windows</th>
|
|
121
|
+
<th align="center">Linux</th>
|
|
122
|
+
</tr>
|
|
123
|
+
</thead>
|
|
124
|
+
<tbody>
|
|
125
|
+
<tr>
|
|
126
|
+
<td>🧠 CPU</td>
|
|
127
|
+
<td align="center">All platforms</td>
|
|
128
|
+
<td align="center">All platforms</td>
|
|
129
|
+
<td align="center">All platforms</td>
|
|
130
|
+
<td align="center">✅</td>
|
|
131
|
+
<td align="center">✅</td>
|
|
132
|
+
</tr>
|
|
133
|
+
<tr>
|
|
134
|
+
<td>🎮 GPU</td>
|
|
135
|
+
<td align="center">—</td>
|
|
136
|
+
<td align="center">Vulkan: All platforms<br><small>Focus: Radeon™ 7000/9000</small></td>
|
|
137
|
+
<td align="center">—</td>
|
|
138
|
+
<td align="center">✅</td>
|
|
139
|
+
<td align="center">✅</td>
|
|
140
|
+
</tr>
|
|
141
|
+
<tr>
|
|
142
|
+
<td>🤖 NPU</td>
|
|
143
|
+
<td align="center">AMD Ryzen™ AI 300 series</td>
|
|
144
|
+
<td align="center">—</td>
|
|
145
|
+
<td align="center">—</td>
|
|
146
|
+
<td align="center">✅</td>
|
|
147
|
+
<td align="center">—</td>
|
|
148
|
+
</tr>
|
|
149
|
+
</tbody>
|
|
150
|
+
</table>
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
#### Inference Engines Overview
|
|
155
|
+
| Engine | Description |
|
|
156
|
+
| :--- | :--- |
|
|
157
|
+
| **OnnxRuntime GenAI (OGA)** | Microsoft engine that runs `.onnx` models and enables hardware vendors to provide their own execution providers (EPs) to support specialized hardware, such as neural processing units (NPUs). |
|
|
158
|
+
| **llamacpp** | Community-driven engine with strong GPU acceleration, support for thousands of `.gguf` models, and advanced features such as vision-language models (VLMs) and mixture-of-experts (MoEs). |
|
|
159
|
+
| **Hugging Face (HF)** | Hugging Face's `transformers` library can run the original `.safetensors` trained weights for models on Meta's PyTorch engine, which provides a source of truth for accuracy measurement. |
|
|
160
|
+
|
|
99
161
|
## Contributing
|
|
100
162
|
|
|
101
163
|
We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
|
|
2
|
+
lemonade/api.py,sha256=X7DxBgsOl5L_z6uTkwoJWf8x0rjXWS2JoeEqmo9bMfc,3873
|
|
3
|
+
lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
|
|
4
|
+
lemonade/cli.py,sha256=XzptHh6LTl5OdGRnxiLykQ8QBl2rQmhWH5w0KPJVyY4,4359
|
|
5
|
+
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
|
+
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
+
lemonade/version.py,sha256=SWqJTEDnx2fOon29wQowBCNjEkhyhMbbqVsSu4EpdWI,22
|
|
8
|
+
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
|
|
10
|
+
lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
|
|
11
|
+
lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
|
|
12
|
+
lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
|
|
13
|
+
lemonade/common/network.py,sha256=EkWTxjJ-UM469nEJdzEl-5UNO7dPfVfFzU2SVMMPaD0,1425
|
|
14
|
+
lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
|
|
15
|
+
lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
|
|
16
|
+
lemonade/common/system_info.py,sha256=qOwteG_mBo-ImilbiK7Gq37sWIE9ugF0dbWcj9zLD40,12234
|
|
17
|
+
lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
|
|
18
|
+
lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
|
|
19
|
+
lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
|
|
20
|
+
lemonade/profilers/profiler.py,sha256=y_iMGr1ToQ6rcwcIcXck4ajapisLXCfHggiV-IpPF98,1666
|
|
21
|
+
lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
|
|
22
|
+
lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
|
|
23
|
+
lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
|
|
24
|
+
lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
|
|
25
|
+
lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9532
|
|
26
|
+
lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
|
|
27
|
+
lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
|
|
28
|
+
lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
|
|
29
|
+
lemonade/tools/prompt.py,sha256=AT3p5rCGHEs9ozeGxwWl07iKF-mgLxFOkYLjU2btFHs,8638
|
|
30
|
+
lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
|
|
31
|
+
lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
|
|
32
|
+
lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
|
|
33
|
+
lemonade/tools/huggingface/utils.py,sha256=xybIWOEXHaMuw-nAEu3aITdvZSHcGKgZ9kFS5mIWcEg,13873
|
|
34
|
+
lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPdv3Q,5946
|
|
35
|
+
lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
|
|
36
|
+
lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
+
lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
|
|
38
|
+
lemonade/tools/oga/load.py,sha256=7Sdf6PFPrqbadPabyJb_uPRUIP09qj21ZYdXz47MqsE,28570
|
|
39
|
+
lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
|
|
40
|
+
lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
|
+
lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
|
|
42
|
+
lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
|
|
43
|
+
lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
|
+
lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
|
|
45
|
+
lemonade/tools/report/table.py,sha256=di8IZkolt_kaZfWri6GQkhPE1zCELqcrBcG1x1fzWqg,24843
|
|
46
|
+
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
+
lemonade/tools/server/llamacpp.py,sha256=U2eE9zfwE5sWUnS8A9oSf0Ak4v8dbjnX3fBb76g6uiE,14969
|
|
48
|
+
lemonade/tools/server/serve.py,sha256=2Z3mbK-iVXAGA6jBDgJSwuWMbBRbmN_E0lMN2h-u6Wo,52230
|
|
49
|
+
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
50
|
+
lemonade/tools/server/tray.py,sha256=SakwhZKPgo7VtWP4q10SaCcZdxKG95dnNsXdTu9Eei0,16030
|
|
51
|
+
lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
|
|
52
|
+
lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
|
|
53
|
+
lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
|
|
54
|
+
lemonade/tools/server/static/webapp.html,sha256=im7YQkwvbuqrbO-sLhStVqtA6B7HKAn2azZka1KoeJQ,21260
|
|
55
|
+
lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
|
|
56
|
+
lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
|
|
57
|
+
lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
|
|
58
|
+
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
59
|
+
lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
|
|
60
|
+
lemonade_sdk-8.0.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
61
|
+
lemonade_sdk-8.0.0.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
|
|
62
|
+
lemonade_server/cli.py,sha256=fm1eORLKElHfzqO5VVicDmn9EbmqIffi1bynqacJeyw,11744
|
|
63
|
+
lemonade_server/model_manager.py,sha256=HqbahDMRv1x8jyQj4pa1rXanlPmcCykt8tlI6WfaxjE,13023
|
|
64
|
+
lemonade_server/pydantic_models.py,sha256=2ALw47C1VWGe2nKWjlEAzP1ggKYsky4xlahUFxQJCMs,2298
|
|
65
|
+
lemonade_server/server_models.json,sha256=wTK_H9XDHLxqMWQJqbBsJwm50PhOR4gURyVj9Jm35PQ,6992
|
|
66
|
+
lemonade_sdk-8.0.0.dist-info/METADATA,sha256=fJV_bzC7VCQjqpHTDkb8G58fvBlbsuqOa_zEJNZW5JU,7940
|
|
67
|
+
lemonade_sdk-8.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
68
|
+
lemonade_sdk-8.0.0.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
|
|
69
|
+
lemonade_sdk-8.0.0.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
70
|
+
lemonade_sdk-8.0.0.dist-info/RECORD,,
|
lemonade_server/cli.py
CHANGED
|
@@ -1,9 +1,19 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import sys
|
|
3
3
|
import os
|
|
4
|
-
from typing import Tuple
|
|
4
|
+
from typing import Tuple, Optional
|
|
5
5
|
import psutil
|
|
6
6
|
from typing import List
|
|
7
|
+
import subprocess
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# Error codes for different CLI scenarios
|
|
11
|
+
class ExitCodes:
|
|
12
|
+
SUCCESS = 0
|
|
13
|
+
GENERAL_ERROR = 1
|
|
14
|
+
SERVER_ALREADY_RUNNING = 2
|
|
15
|
+
TIMEOUT_STOPPING_SERVER = 3
|
|
16
|
+
ERROR_STOPPING_SERVER = 4
|
|
7
17
|
|
|
8
18
|
|
|
9
19
|
class PullError(Exception):
|
|
@@ -12,9 +22,16 @@ class PullError(Exception):
|
|
|
12
22
|
"""
|
|
13
23
|
|
|
14
24
|
|
|
25
|
+
class DeleteError(Exception):
|
|
26
|
+
"""
|
|
27
|
+
The delete command has failed to delete an LLM
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
|
|
15
31
|
def serve(
|
|
16
32
|
port: int,
|
|
17
33
|
log_level: str = None,
|
|
34
|
+
tray: bool = False,
|
|
18
35
|
):
|
|
19
36
|
"""
|
|
20
37
|
Execute the serve command
|
|
@@ -29,7 +46,7 @@ def serve(
|
|
|
29
46
|
"Please stop the existing server before starting a new instance."
|
|
30
47
|
),
|
|
31
48
|
)
|
|
32
|
-
sys.exit(
|
|
49
|
+
sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
|
|
33
50
|
|
|
34
51
|
# Otherwise, start the server
|
|
35
52
|
print("Starting Lemonade Server...")
|
|
@@ -46,6 +63,7 @@ def serve(
|
|
|
46
63
|
port=port,
|
|
47
64
|
log_level=log_level,
|
|
48
65
|
truncate_inputs=truncate_inputs,
|
|
66
|
+
tray=tray,
|
|
49
67
|
)
|
|
50
68
|
|
|
51
69
|
|
|
@@ -63,21 +81,49 @@ def stop():
|
|
|
63
81
|
# Stop the server
|
|
64
82
|
try:
|
|
65
83
|
process = psutil.Process(running_pid)
|
|
84
|
+
|
|
85
|
+
# Get all child processes (including llama-server)
|
|
86
|
+
children = process.children(recursive=True)
|
|
87
|
+
|
|
88
|
+
# Terminate the main process first
|
|
66
89
|
process.terminate()
|
|
90
|
+
|
|
91
|
+
# Then terminate all children
|
|
92
|
+
for child in children:
|
|
93
|
+
try:
|
|
94
|
+
child.terminate()
|
|
95
|
+
except psutil.NoSuchProcess:
|
|
96
|
+
pass # Child already terminated
|
|
97
|
+
|
|
98
|
+
# Wait for main process
|
|
67
99
|
process.wait(timeout=10)
|
|
100
|
+
|
|
101
|
+
# Kill any children that didn't terminate gracefully
|
|
102
|
+
for child in children:
|
|
103
|
+
try:
|
|
104
|
+
if child.is_running():
|
|
105
|
+
child.kill()
|
|
106
|
+
except psutil.NoSuchProcess:
|
|
107
|
+
pass # Child already terminated
|
|
68
108
|
except psutil.NoSuchProcess:
|
|
69
109
|
# Process already terminated
|
|
70
110
|
pass
|
|
71
111
|
except psutil.TimeoutExpired:
|
|
72
112
|
print("Timed out waiting for Lemonade Server to stop.")
|
|
73
|
-
sys.exit(
|
|
113
|
+
sys.exit(ExitCodes.TIMEOUT_STOPPING_SERVER)
|
|
74
114
|
except Exception as e: # pylint: disable=broad-exception-caught
|
|
75
115
|
print(f"Error stopping Lemonade Server: {e}")
|
|
76
|
-
sys.exit(
|
|
116
|
+
sys.exit(ExitCodes.ERROR_STOPPING_SERVER)
|
|
77
117
|
print("Lemonade Server stopped successfully.")
|
|
78
118
|
|
|
79
119
|
|
|
80
|
-
def pull(
|
|
120
|
+
def pull(
|
|
121
|
+
model_names: List[str],
|
|
122
|
+
checkpoint: Optional[str] = None,
|
|
123
|
+
recipe: Optional[str] = None,
|
|
124
|
+
reasoning: bool = False,
|
|
125
|
+
mmproj: str = "",
|
|
126
|
+
):
|
|
81
127
|
"""
|
|
82
128
|
Install an LLM based on its Lemonade Server model name
|
|
83
129
|
|
|
@@ -95,10 +141,20 @@ def pull(model_names: List[str]):
|
|
|
95
141
|
base_url = f"http://localhost:{port}/api/v1"
|
|
96
142
|
|
|
97
143
|
for model_name in model_names:
|
|
144
|
+
payload = {"model_name": model_name}
|
|
145
|
+
|
|
146
|
+
if checkpoint and recipe:
|
|
147
|
+
# Add the parameters for registering a new model
|
|
148
|
+
payload["checkpoint"] = checkpoint
|
|
149
|
+
payload["recipe"] = recipe
|
|
150
|
+
|
|
151
|
+
if reasoning:
|
|
152
|
+
payload["reasoning"] = reasoning
|
|
153
|
+
if mmproj:
|
|
154
|
+
payload["mmproj"] = mmproj
|
|
155
|
+
|
|
98
156
|
# Install the model
|
|
99
|
-
pull_response = requests.post(
|
|
100
|
-
f"{base_url}/pull", json={"model_name": model_name}
|
|
101
|
-
)
|
|
157
|
+
pull_response = requests.post(f"{base_url}/pull", json=payload)
|
|
102
158
|
|
|
103
159
|
if pull_response.status_code != 200:
|
|
104
160
|
raise PullError(
|
|
@@ -110,7 +166,48 @@ def pull(model_names: List[str]):
|
|
|
110
166
|
else:
|
|
111
167
|
from lemonade_server.model_manager import ModelManager
|
|
112
168
|
|
|
113
|
-
ModelManager().download_models(
|
|
169
|
+
ModelManager().download_models(
|
|
170
|
+
model_names,
|
|
171
|
+
checkpoint=checkpoint,
|
|
172
|
+
recipe=recipe,
|
|
173
|
+
reasoning=reasoning,
|
|
174
|
+
mmproj=mmproj,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def delete(model_names: List[str]):
|
|
179
|
+
"""
|
|
180
|
+
Delete an LLM based on its Lemonade Server model name
|
|
181
|
+
|
|
182
|
+
If Lemonade Server is running, use the delete endpoint to delete the model
|
|
183
|
+
so that the Lemonade Server instance is aware of the deletion.
|
|
184
|
+
|
|
185
|
+
Otherwise, use ModelManager to delete the model.
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
server_running, port = status(verbose=False)
|
|
189
|
+
|
|
190
|
+
if server_running:
|
|
191
|
+
import requests
|
|
192
|
+
|
|
193
|
+
base_url = f"http://localhost:{port}/api/v1"
|
|
194
|
+
|
|
195
|
+
for model_name in model_names:
|
|
196
|
+
# Delete the model
|
|
197
|
+
delete_response = requests.post(
|
|
198
|
+
f"{base_url}/delete", json={"model_name": model_name}
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
if delete_response.status_code != 200:
|
|
202
|
+
raise DeleteError(
|
|
203
|
+
f"Failed to delete {model_name}. Check the "
|
|
204
|
+
"Lemonade Server log for more information."
|
|
205
|
+
)
|
|
206
|
+
else:
|
|
207
|
+
from lemonade_server.model_manager import ModelManager
|
|
208
|
+
|
|
209
|
+
for model_name in model_names:
|
|
210
|
+
ModelManager().delete_model(model_name)
|
|
114
211
|
|
|
115
212
|
|
|
116
213
|
def version():
|
|
@@ -147,18 +244,18 @@ def is_lemonade_server(pid):
|
|
|
147
244
|
"""
|
|
148
245
|
try:
|
|
149
246
|
process = psutil.Process(pid)
|
|
247
|
+
|
|
150
248
|
while True:
|
|
151
|
-
|
|
249
|
+
process_name = process.name()
|
|
250
|
+
if process_name in [ # Windows
|
|
152
251
|
"lemonade-server-dev.exe",
|
|
153
252
|
"lemonade-server.exe",
|
|
154
|
-
|
|
155
|
-
] or process.name() in [ # Linux
|
|
253
|
+
] or process_name in [ # Linux
|
|
156
254
|
"lemonade-server-dev",
|
|
157
255
|
"lemonade-server",
|
|
158
|
-
"lemonade",
|
|
159
256
|
]:
|
|
160
257
|
return True
|
|
161
|
-
elif "llama-server" in
|
|
258
|
+
elif "llama-server" in process_name:
|
|
162
259
|
return False
|
|
163
260
|
if not process.parent():
|
|
164
261
|
return False
|
|
@@ -174,16 +271,23 @@ def get_server_info() -> Tuple[int | None, int | None]:
|
|
|
174
271
|
1. Lemonade Server's PID
|
|
175
272
|
2. The port that Lemonade Server is running on
|
|
176
273
|
"""
|
|
177
|
-
|
|
178
|
-
for
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
274
|
+
|
|
275
|
+
# Get all network connections and filter for localhost IPv4 listening ports
|
|
276
|
+
try:
|
|
277
|
+
connections = psutil.net_connections(kind="tcp4")
|
|
278
|
+
|
|
279
|
+
for conn in connections:
|
|
280
|
+
if (
|
|
281
|
+
conn.status == "LISTEN"
|
|
282
|
+
and conn.laddr
|
|
283
|
+
and conn.laddr.ip in ["127.0.0.1"]
|
|
284
|
+
and conn.pid is not None
|
|
285
|
+
):
|
|
286
|
+
if is_lemonade_server(conn.pid):
|
|
287
|
+
return conn.pid, conn.laddr.port
|
|
288
|
+
|
|
289
|
+
except Exception:
|
|
290
|
+
pass
|
|
187
291
|
|
|
188
292
|
return None, None
|
|
189
293
|
|
|
@@ -214,6 +318,12 @@ def main():
|
|
|
214
318
|
choices=["critical", "error", "warning", "info", "debug", "trace"],
|
|
215
319
|
default="info",
|
|
216
320
|
)
|
|
321
|
+
if os.name == "nt":
|
|
322
|
+
serve_parser.add_argument(
|
|
323
|
+
"--no-tray",
|
|
324
|
+
action="store_true",
|
|
325
|
+
help="Do not show a tray icon when the server is running",
|
|
326
|
+
)
|
|
217
327
|
|
|
218
328
|
# Status command
|
|
219
329
|
status_parser = subparsers.add_parser("status", help="Check if server is running")
|
|
@@ -235,20 +345,65 @@ def main():
|
|
|
235
345
|
help="Lemonade Server model name",
|
|
236
346
|
nargs="+",
|
|
237
347
|
)
|
|
348
|
+
pull_parser.add_argument(
|
|
349
|
+
"--checkpoint",
|
|
350
|
+
help="For registering a new model: Hugging Face checkpoint to source the model from",
|
|
351
|
+
)
|
|
352
|
+
pull_parser.add_argument(
|
|
353
|
+
"--recipe",
|
|
354
|
+
help="For registering a new model: lemonade.api recipe to use with the model",
|
|
355
|
+
)
|
|
356
|
+
pull_parser.add_argument(
|
|
357
|
+
"--reasoning",
|
|
358
|
+
help="For registering a new model: whether the model is a reasoning model or not",
|
|
359
|
+
type=bool,
|
|
360
|
+
default=False,
|
|
361
|
+
)
|
|
362
|
+
pull_parser.add_argument(
|
|
363
|
+
"--mmproj",
|
|
364
|
+
help="For registering a new multimodal model: full file name of the .mmproj file in the checkpoint",
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# Delete command
|
|
368
|
+
delete_parser = subparsers.add_parser(
|
|
369
|
+
"delete",
|
|
370
|
+
help="Delete an LLM",
|
|
371
|
+
epilog=(
|
|
372
|
+
"More information: "
|
|
373
|
+
"https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/server_models.md"
|
|
374
|
+
),
|
|
375
|
+
)
|
|
376
|
+
delete_parser.add_argument(
|
|
377
|
+
"model",
|
|
378
|
+
help="Lemonade Server model name",
|
|
379
|
+
nargs="+",
|
|
380
|
+
)
|
|
238
381
|
|
|
239
382
|
args = parser.parse_args()
|
|
240
383
|
|
|
384
|
+
if os.name != "nt":
|
|
385
|
+
args.no_tray = True
|
|
386
|
+
|
|
241
387
|
if args.version:
|
|
242
388
|
version()
|
|
243
389
|
elif args.command == "serve":
|
|
244
390
|
serve(
|
|
245
|
-
args.port,
|
|
246
|
-
args.log_level,
|
|
391
|
+
port=args.port,
|
|
392
|
+
log_level=args.log_level,
|
|
393
|
+
tray=not args.no_tray,
|
|
247
394
|
)
|
|
248
395
|
elif args.command == "status":
|
|
249
396
|
status()
|
|
250
397
|
elif args.command == "pull":
|
|
251
|
-
pull(
|
|
398
|
+
pull(
|
|
399
|
+
args.model,
|
|
400
|
+
checkpoint=args.checkpoint,
|
|
401
|
+
recipe=args.recipe,
|
|
402
|
+
reasoning=args.reasoning,
|
|
403
|
+
mmproj=args.mmproj,
|
|
404
|
+
)
|
|
405
|
+
elif args.command == "delete":
|
|
406
|
+
delete(args.model)
|
|
252
407
|
elif args.command == "stop":
|
|
253
408
|
stop()
|
|
254
409
|
elif args.command == "help" or not args.command:
|