lemonade-sdk 8.1.1__py3-none-any.whl → 8.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/common/inference_engines.py +1 -1
- lemonade/tools/llamacpp/utils.py +114 -14
- lemonade/tools/management_tools.py +1 -1
- lemonade/tools/oga/utils.py +54 -33
- lemonade/tools/server/llamacpp.py +96 -4
- lemonade/tools/server/serve.py +80 -10
- lemonade/tools/server/static/js/chat.js +735 -0
- lemonade/tools/server/static/js/model-settings.js +162 -0
- lemonade/tools/server/static/js/models.js +865 -0
- lemonade/tools/server/static/js/shared.js +491 -0
- lemonade/tools/server/static/styles.css +652 -26
- lemonade/tools/server/static/webapp.html +145 -1091
- lemonade/tools/server/tray.py +1 -1
- lemonade/tools/server/utils/port.py +5 -4
- lemonade/version.py +1 -1
- {lemonade_sdk-8.1.1.dist-info → lemonade_sdk-8.1.3.dist-info}/METADATA +7 -6
- {lemonade_sdk-8.1.1.dist-info → lemonade_sdk-8.1.3.dist-info}/RECORD +26 -22
- {lemonade_sdk-8.1.1.dist-info → lemonade_sdk-8.1.3.dist-info}/entry_points.txt +1 -0
- lemonade_server/cli.py +66 -17
- lemonade_server/model_manager.py +1 -1
- lemonade_server/pydantic_models.py +15 -3
- lemonade_server/server_models.json +54 -3
- {lemonade_sdk-8.1.1.dist-info → lemonade_sdk-8.1.3.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.1.1.dist-info → lemonade_sdk-8.1.3.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.1.1.dist-info → lemonade_sdk-8.1.3.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.1.1.dist-info → lemonade_sdk-8.1.3.dist-info}/top_level.txt +0 -0
lemonade/tools/server/tray.py
CHANGED
|
@@ -427,7 +427,7 @@ class LemonadeTray(SystemTray):
|
|
|
427
427
|
Start the uvicorn server.
|
|
428
428
|
"""
|
|
429
429
|
self.server = self.server_factory()
|
|
430
|
-
self.server.uvicorn_server = self.server.run_in_thread()
|
|
430
|
+
self.server.uvicorn_server = self.server.run_in_thread(self.server.host)
|
|
431
431
|
self.server.uvicorn_server.run()
|
|
432
432
|
|
|
433
433
|
def run(self):
|
|
@@ -5,6 +5,7 @@ import importlib
|
|
|
5
5
|
import asyncio
|
|
6
6
|
from contextlib import asynccontextmanager
|
|
7
7
|
from fastapi import FastAPI
|
|
8
|
+
from lemonade.version import __version__
|
|
8
9
|
|
|
9
10
|
_lazy_imports = {
|
|
10
11
|
"TextIteratorStreamer": ("transformers", "TextIteratorStreamer"),
|
|
@@ -42,8 +43,8 @@ async def lifespan(app: FastAPI):
|
|
|
42
43
|
logging.info(
|
|
43
44
|
"\n"
|
|
44
45
|
"\n"
|
|
45
|
-
"🍋 Lemonade Server Ready!\n"
|
|
46
|
-
f"🍋 Open http://
|
|
46
|
+
f"🍋 Lemonade Server v{__version__} Ready!\n"
|
|
47
|
+
f"🍋 Open http://{app.host_}:{app.port} in your browser for:\n"
|
|
47
48
|
"🍋 💬 chat\n"
|
|
48
49
|
"🍋 💻 model management\n"
|
|
49
50
|
"🍋 📄 docs\n"
|
|
@@ -52,8 +53,8 @@ async def lifespan(app: FastAPI):
|
|
|
52
53
|
logging.info(
|
|
53
54
|
"\n"
|
|
54
55
|
"\n"
|
|
55
|
-
"[Lemonade] Lemonade Server Ready!\n"
|
|
56
|
-
f"[Lemonade] Open http://
|
|
56
|
+
f"[Lemonade] Lemonade Server v{__version__} Ready!\n"
|
|
57
|
+
f"[Lemonade] Open http://{app.host_}:{app.port} in your browser for:\n"
|
|
57
58
|
"[Lemonade] chat\n"
|
|
58
59
|
"[Lemonade] model management\n"
|
|
59
60
|
"[Lemonade] docs\n"
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.1.
|
|
1
|
+
__version__ = "8.1.3"
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.1.
|
|
3
|
+
Version: 8.1.3
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
|
-
Requires-Python: >=3.10, <3.
|
|
6
|
+
Requires-Python: >=3.10, <3.14
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
License-File: LICENSE
|
|
9
9
|
License-File: NOTICE.md
|
|
10
10
|
Requires-Dist: invoke>=2.0.0
|
|
11
|
-
Requires-Dist: onnx
|
|
11
|
+
Requires-Dist: onnx==1.18.0
|
|
12
12
|
Requires-Dist: pyyaml>=5.4
|
|
13
13
|
Requires-Dist: typeguard>=2.3.13
|
|
14
14
|
Requires-Dist: packaging>=20.9
|
|
15
|
-
Requires-Dist: numpy
|
|
15
|
+
Requires-Dist: numpy
|
|
16
16
|
Requires-Dist: fasteners
|
|
17
17
|
Requires-Dist: GitPython>=3.1.40
|
|
18
18
|
Requires-Dist: psutil>=6.1.1
|
|
@@ -41,9 +41,10 @@ Requires-Dist: accelerate; extra == "dev"
|
|
|
41
41
|
Requires-Dist: datasets; extra == "dev"
|
|
42
42
|
Requires-Dist: pandas>=1.5.3; extra == "dev"
|
|
43
43
|
Requires-Dist: matplotlib; extra == "dev"
|
|
44
|
-
Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "dev"
|
|
45
44
|
Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
|
|
46
45
|
Requires-Dist: lm-eval[api]; extra == "dev"
|
|
46
|
+
Provides-Extra: model-generate
|
|
47
|
+
Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "model-generate"
|
|
47
48
|
Provides-Extra: oga-hybrid
|
|
48
49
|
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
|
|
49
50
|
Provides-Extra: oga-unified
|
|
@@ -105,7 +106,7 @@ Dynamic: summary
|
|
|
105
106
|
<img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" />
|
|
106
107
|
</a>
|
|
107
108
|
<a href="docs/README.md#installation" title="Check out our instructions">
|
|
108
|
-
<img src="https://img.shields.io/badge/Python-3.10
|
|
109
|
+
<img src="https://img.shields.io/badge/Python-3.10--3.13-blue?logo=python&logoColor=white" alt="Made with Python" />
|
|
109
110
|
</a>
|
|
110
111
|
<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
|
|
111
112
|
<img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg" alt="PRs Welcome" />
|
|
@@ -4,13 +4,13 @@ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
|
|
|
4
4
|
lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
|
|
5
5
|
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
6
|
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
-
lemonade/version.py,sha256=
|
|
7
|
+
lemonade/version.py,sha256=gnc1sclqzDLnQB9vbqA0LgSMz4H-bYCuu--_P-HWhAc,22
|
|
8
8
|
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
|
|
10
10
|
lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
|
|
11
11
|
lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
|
|
12
12
|
lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
|
|
13
|
-
lemonade/common/inference_engines.py,sha256=
|
|
13
|
+
lemonade/common/inference_engines.py,sha256=pJxn0zOf3gEmjGAIWXNdCibfzarzc7LRbZjoQyygkcU,12591
|
|
14
14
|
lemonade/common/network.py,sha256=p1lWJkN0H5hCpb4rKi3Zc47W_BRrrm-7ghdTALJLGqU,1944
|
|
15
15
|
lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
|
|
16
16
|
lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
|
|
@@ -24,7 +24,7 @@ lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11
|
|
|
24
24
|
lemonade/tools/adapter.py,sha256=Ex63Y1SPCOSV4M_QtzEn3YVd39d3yew0lpmEFgp8aH4,3169
|
|
25
25
|
lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
|
|
26
26
|
lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
|
|
27
|
-
lemonade/tools/management_tools.py,sha256=
|
|
27
|
+
lemonade/tools/management_tools.py,sha256=HQBcr7LYuMqVRYQtvnkNpfutBTA7lblszyoAjjVGu1Y,10201
|
|
28
28
|
lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
|
|
29
29
|
lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
|
|
30
30
|
lemonade/tools/prompt.py,sha256=PyLksp1k8jsZsU7XBRK61k1DUHhbdLa20h-AP8Noh3w,9011
|
|
@@ -34,11 +34,11 @@ lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1
|
|
|
34
34
|
lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
|
|
35
35
|
lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
|
|
36
36
|
lemonade/tools/llamacpp/load.py,sha256=DFCvQN548Ch9H8U_rHOiYviinzw6vixb5-V7xLj7XE4,6499
|
|
37
|
-
lemonade/tools/llamacpp/utils.py,sha256=
|
|
37
|
+
lemonade/tools/llamacpp/utils.py,sha256=Auid9FepxwLIgDahaDNIxwz8kP_ap8Opd3eSF6t637g,32336
|
|
38
38
|
lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
39
|
lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
|
|
40
40
|
lemonade/tools/oga/load.py,sha256=6Pf_QrHpIXDbfpTwFNRj4RmWTxI-RImhYuqRvmTVgmY,33722
|
|
41
|
-
lemonade/tools/oga/utils.py,sha256=
|
|
41
|
+
lemonade/tools/oga/utils.py,sha256=F8UVLKlfYcLa2SUqlehar8-jaX2Aw4u58DjHNNvLdOA,17675
|
|
42
42
|
lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
43
|
lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
|
|
44
44
|
lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
|
|
@@ -46,27 +46,31 @@ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
|
46
46
|
lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
|
|
47
47
|
lemonade/tools/report/table.py,sha256=ssqy1bZqF-wptNzKEOj6_9REtCNZyXO8R5vakAtg3R4,27973
|
|
48
48
|
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
-
lemonade/tools/server/llamacpp.py,sha256=
|
|
50
|
-
lemonade/tools/server/serve.py,sha256=
|
|
49
|
+
lemonade/tools/server/llamacpp.py,sha256=jVkaPx1ZbHYiJll3wnDR0fh-e0yfg7UB0BXlLWPx4dE,20998
|
|
50
|
+
lemonade/tools/server/serve.py,sha256=3wnB19YThQLHkjbzy7PCWppQY_j5xKB24GcqM8IybxI,58857
|
|
51
51
|
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
52
|
-
lemonade/tools/server/tray.py,sha256=
|
|
52
|
+
lemonade/tools/server/tray.py,sha256=YJ4-vJlM6tJ0ojY_wVM6COuNscETFkQPt-BaNqYa9YQ,17640
|
|
53
53
|
lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
|
|
54
54
|
lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
|
|
55
|
-
lemonade/tools/server/static/styles.css,sha256=
|
|
56
|
-
lemonade/tools/server/static/webapp.html,sha256=
|
|
57
|
-
lemonade/tools/server/
|
|
55
|
+
lemonade/tools/server/static/styles.css,sha256=X_mqf3XCOo_pZEqkDiVzMGCy8ARseEBq5DdGnAdfVk0,43383
|
|
56
|
+
lemonade/tools/server/static/webapp.html,sha256=FX2MZUsljfgxxuF12KBdgvNkso_z-sHewWc0SEGGcGM,18138
|
|
57
|
+
lemonade/tools/server/static/js/chat.js,sha256=BTvREuEt0NrN8qhAuda5tTAoUN6tbsoukevA-zyTrwQ,27193
|
|
58
|
+
lemonade/tools/server/static/js/model-settings.js,sha256=JXHeG7xVrRU181Hj7CZflERAi1Z6t-qwYFR4aH5nf5I,5820
|
|
59
|
+
lemonade/tools/server/static/js/models.js,sha256=bbX7c8B59ioim86T3x9PFESvF8y3cHPYUO6nhc4SCDs,32500
|
|
60
|
+
lemonade/tools/server/static/js/shared.js,sha256=4iqDNWiKEB7eYS4fdnTy-RwO_ksROrLYLmT2YSomG1M,17065
|
|
61
|
+
lemonade/tools/server/utils/port.py,sha256=J7-g-Aqygb50jNoHLhhRfBZVM-uhGlcB5-oYBAehvgw,2263
|
|
58
62
|
lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
|
|
59
63
|
lemonade/tools/server/utils/thread.py,sha256=Z-PDzGcpgfN2qxTmtlROWqrUN0B2fXdPrqo_J10fR_w,2772
|
|
60
64
|
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
61
65
|
lemonade_install/install.py,sha256=Zl_JtEIhbqZZTvxcqtq895IomEN-JNxp9xOZEtahMHQ,28289
|
|
62
|
-
lemonade_sdk-8.1.
|
|
63
|
-
lemonade_sdk-8.1.
|
|
64
|
-
lemonade_server/cli.py,sha256
|
|
65
|
-
lemonade_server/model_manager.py,sha256=
|
|
66
|
-
lemonade_server/pydantic_models.py,sha256=
|
|
67
|
-
lemonade_server/server_models.json,sha256=
|
|
68
|
-
lemonade_sdk-8.1.
|
|
69
|
-
lemonade_sdk-8.1.
|
|
70
|
-
lemonade_sdk-8.1.
|
|
71
|
-
lemonade_sdk-8.1.
|
|
72
|
-
lemonade_sdk-8.1.
|
|
66
|
+
lemonade_sdk-8.1.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
67
|
+
lemonade_sdk-8.1.3.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
|
|
68
|
+
lemonade_server/cli.py,sha256=-haIK4Q9cYFwna5-m6vgxW9qMaGW-_lDFB49zXxDk2A,18755
|
|
69
|
+
lemonade_server/model_manager.py,sha256=cFaHJVOsabwekAPryXAPdo6qrXYBD_yht7XPg2QImqc,10791
|
|
70
|
+
lemonade_server/pydantic_models.py,sha256=oTFnDVCax2Gerz7RBJOJF0FVQjKoUPJZbBo-EgogQyk,3161
|
|
71
|
+
lemonade_server/server_models.json,sha256=DAdG4ebIt5Dy5MM3kmXn1pO0XbNMph1gdpzbacBDVuc,11664
|
|
72
|
+
lemonade_sdk-8.1.3.dist-info/METADATA,sha256=3As4CPILSkJVZMKsyqHZX6o9P8aBsixEJuQTtOas25w,17086
|
|
73
|
+
lemonade_sdk-8.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
74
|
+
lemonade_sdk-8.1.3.dist-info/entry_points.txt,sha256=7sRvpNhi1E7amnM7RZo57e8yFF9iA5uuRaIeJ1Xre6w,193
|
|
75
|
+
lemonade_sdk-8.1.3.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
76
|
+
lemonade_sdk-8.1.3.dist-info/RECORD,,
|
lemonade_server/cli.py
CHANGED
|
@@ -4,6 +4,13 @@ import os
|
|
|
4
4
|
from typing import Tuple, Optional
|
|
5
5
|
import psutil
|
|
6
6
|
from typing import List
|
|
7
|
+
from lemonade_server.pydantic_models import (
|
|
8
|
+
DEFAULT_PORT,
|
|
9
|
+
DEFAULT_HOST,
|
|
10
|
+
DEFAULT_LOG_LEVEL,
|
|
11
|
+
DEFAULT_LLAMACPP_BACKEND,
|
|
12
|
+
DEFAULT_CTX_SIZE,
|
|
13
|
+
)
|
|
7
14
|
|
|
8
15
|
|
|
9
16
|
# Error codes for different CLI scenarios
|
|
@@ -47,6 +54,7 @@ class ModelLoadError(Exception):
|
|
|
47
54
|
|
|
48
55
|
def serve(
|
|
49
56
|
port: int = None,
|
|
57
|
+
host: str = None,
|
|
50
58
|
log_level: str = None,
|
|
51
59
|
tray: bool = False,
|
|
52
60
|
use_thread: bool = False,
|
|
@@ -59,26 +67,20 @@ def serve(
|
|
|
59
67
|
|
|
60
68
|
# Otherwise, start the server
|
|
61
69
|
print("Starting Lemonade Server...")
|
|
62
|
-
from lemonade.tools.server.serve import
|
|
63
|
-
Server,
|
|
64
|
-
DEFAULT_PORT,
|
|
65
|
-
DEFAULT_LOG_LEVEL,
|
|
66
|
-
DEFAULT_LLAMACPP_BACKEND,
|
|
67
|
-
DEFAULT_CTX_SIZE,
|
|
68
|
-
)
|
|
70
|
+
from lemonade.tools.server.serve import Server
|
|
69
71
|
|
|
70
72
|
port = port if port is not None else DEFAULT_PORT
|
|
73
|
+
host = host if host is not None else DEFAULT_HOST
|
|
71
74
|
log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
|
|
72
75
|
llamacpp_backend = (
|
|
73
76
|
llamacpp_backend if llamacpp_backend is not None else DEFAULT_LLAMACPP_BACKEND
|
|
74
77
|
)
|
|
75
|
-
|
|
76
|
-
# Use ctx_size if provided, otherwise use default
|
|
77
78
|
ctx_size = ctx_size if ctx_size is not None else DEFAULT_CTX_SIZE
|
|
78
79
|
|
|
79
80
|
# Start the server
|
|
80
81
|
server = Server(
|
|
81
82
|
port=port,
|
|
83
|
+
host=host,
|
|
82
84
|
log_level=log_level,
|
|
83
85
|
ctx_size=ctx_size,
|
|
84
86
|
tray=tray,
|
|
@@ -259,7 +261,9 @@ def delete(model_names: List[str]):
|
|
|
259
261
|
def run(
|
|
260
262
|
model_name: str,
|
|
261
263
|
port: int = None,
|
|
264
|
+
host: str = "localhost",
|
|
262
265
|
log_level: str = None,
|
|
266
|
+
tray: bool = False,
|
|
263
267
|
llamacpp_backend: str = None,
|
|
264
268
|
ctx_size: int = None,
|
|
265
269
|
):
|
|
@@ -270,13 +274,14 @@ def run(
|
|
|
270
274
|
import time
|
|
271
275
|
|
|
272
276
|
# Start the server if not running
|
|
273
|
-
_,
|
|
274
|
-
server_previously_running =
|
|
277
|
+
_, running_port = get_server_info()
|
|
278
|
+
server_previously_running = running_port is not None
|
|
275
279
|
if not server_previously_running:
|
|
276
280
|
port, server_thread = serve(
|
|
277
281
|
port=port,
|
|
282
|
+
host=host,
|
|
278
283
|
log_level=log_level,
|
|
279
|
-
tray=
|
|
284
|
+
tray=tray,
|
|
280
285
|
use_thread=True,
|
|
281
286
|
llamacpp_backend=llamacpp_backend,
|
|
282
287
|
ctx_size=ctx_size,
|
|
@@ -291,7 +296,7 @@ def run(
|
|
|
291
296
|
load(model_name, port)
|
|
292
297
|
|
|
293
298
|
# Open the webapp with the specified model
|
|
294
|
-
url = f"http://
|
|
299
|
+
url = f"http://{host}:{port}/?model={model_name}#llm-chat"
|
|
295
300
|
print(f"You can now chat with {model_name} at {url}")
|
|
296
301
|
webbrowser.open(url)
|
|
297
302
|
|
|
@@ -440,26 +445,67 @@ def list_models():
|
|
|
440
445
|
print(tabulate(table_data, headers=headers, tablefmt="simple"))
|
|
441
446
|
|
|
442
447
|
|
|
448
|
+
def developer_entrypoint():
|
|
449
|
+
"""
|
|
450
|
+
Developer entry point that starts the server with debug logging
|
|
451
|
+
Equivalent to running: lemonade-server-dev serve --log-level debug [additional args]
|
|
452
|
+
|
|
453
|
+
This function automatically prepends "serve --log-level debug" to any arguments
|
|
454
|
+
passed to the lsdev command.
|
|
455
|
+
"""
|
|
456
|
+
# Save original sys.argv
|
|
457
|
+
original_argv = sys.argv.copy()
|
|
458
|
+
|
|
459
|
+
try:
|
|
460
|
+
# Take any additional arguments passed to lsdev and append them
|
|
461
|
+
# after "serve --log-level debug"
|
|
462
|
+
additional_args = sys.argv[1:] if len(sys.argv) > 1 else []
|
|
463
|
+
|
|
464
|
+
# Set sys.argv to simulate "serve --log-level debug" + additional args
|
|
465
|
+
sys.argv = [sys.argv[0], "serve", "--log-level", "debug"] + additional_args
|
|
466
|
+
main()
|
|
467
|
+
finally:
|
|
468
|
+
# Restore original sys.argv
|
|
469
|
+
sys.argv = original_argv
|
|
470
|
+
|
|
471
|
+
|
|
443
472
|
def _add_server_arguments(parser):
|
|
444
473
|
"""Add common server arguments to a parser"""
|
|
445
|
-
|
|
474
|
+
|
|
475
|
+
parser.add_argument(
|
|
476
|
+
"--port",
|
|
477
|
+
type=int,
|
|
478
|
+
help="Port number to serve on",
|
|
479
|
+
default=DEFAULT_PORT,
|
|
480
|
+
)
|
|
481
|
+
parser.add_argument(
|
|
482
|
+
"--host",
|
|
483
|
+
type=str,
|
|
484
|
+
help="Address to bind for connections",
|
|
485
|
+
default=DEFAULT_HOST,
|
|
486
|
+
)
|
|
446
487
|
parser.add_argument(
|
|
447
488
|
"--log-level",
|
|
448
489
|
type=str,
|
|
449
490
|
help="Log level for the server",
|
|
450
491
|
choices=["critical", "error", "warning", "info", "debug", "trace"],
|
|
451
|
-
default=
|
|
492
|
+
default=DEFAULT_LOG_LEVEL,
|
|
452
493
|
)
|
|
453
494
|
parser.add_argument(
|
|
454
495
|
"--llamacpp",
|
|
455
496
|
type=str,
|
|
456
|
-
help=
|
|
497
|
+
help="LlamaCpp backend to use",
|
|
457
498
|
choices=["vulkan", "rocm"],
|
|
499
|
+
default=DEFAULT_LLAMACPP_BACKEND,
|
|
458
500
|
)
|
|
459
501
|
parser.add_argument(
|
|
460
502
|
"--ctx-size",
|
|
461
503
|
type=int,
|
|
462
|
-
help=
|
|
504
|
+
help=(
|
|
505
|
+
f"Context size for the model (default: {DEFAULT_CTX_SIZE} for llamacpp, "
|
|
506
|
+
"truncates prompts for other recipes)"
|
|
507
|
+
),
|
|
508
|
+
default=DEFAULT_CTX_SIZE,
|
|
463
509
|
)
|
|
464
510
|
|
|
465
511
|
|
|
@@ -578,6 +624,7 @@ def main():
|
|
|
578
624
|
sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
|
|
579
625
|
serve(
|
|
580
626
|
port=args.port,
|
|
627
|
+
host=args.host,
|
|
581
628
|
log_level=args.log_level,
|
|
582
629
|
tray=not args.no_tray,
|
|
583
630
|
llamacpp_backend=args.llamacpp,
|
|
@@ -603,7 +650,9 @@ def main():
|
|
|
603
650
|
run(
|
|
604
651
|
args.model,
|
|
605
652
|
port=args.port,
|
|
653
|
+
host=args.host,
|
|
606
654
|
log_level=args.log_level,
|
|
655
|
+
tray=not args.no_tray,
|
|
607
656
|
llamacpp_backend=args.llamacpp,
|
|
608
657
|
ctx_size=args.ctx_size,
|
|
609
658
|
)
|
lemonade_server/model_manager.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from typing import Optional, Union, List
|
|
2
3
|
|
|
3
4
|
from pydantic import BaseModel
|
|
4
5
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
DEFAULT_PORT = int(os.getenv("LEMONADE_PORT", "8000"))
|
|
7
|
+
DEFAULT_HOST = os.getenv("LEMONADE_HOST", "localhost")
|
|
8
|
+
DEFAULT_LOG_LEVEL = os.getenv("LEMONADE_LOG_LEVEL", "info")
|
|
9
|
+
DEFAULT_LLAMACPP_BACKEND = os.getenv("LEMONADE_LLAMACPP", "vulkan")
|
|
10
|
+
DEFAULT_CTX_SIZE = int(os.getenv("LEMONADE_CTX_SIZE", "4096"))
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
class LoadConfig(BaseModel):
|
|
@@ -39,6 +42,9 @@ class CompletionRequest(BaseModel):
|
|
|
39
42
|
logprobs: int | None = False
|
|
40
43
|
stop: list[str] | str | None = None
|
|
41
44
|
temperature: float | None = None
|
|
45
|
+
repeat_penalty: float | None = None
|
|
46
|
+
top_k: int | None = None
|
|
47
|
+
top_p: float | None = None
|
|
42
48
|
max_tokens: int | None = None
|
|
43
49
|
|
|
44
50
|
|
|
@@ -56,6 +62,9 @@ class ChatCompletionRequest(BaseModel):
|
|
|
56
62
|
logprobs: int | None = False
|
|
57
63
|
stop: list[str] | str | None = None
|
|
58
64
|
temperature: float | None = None
|
|
65
|
+
repeat_penalty: float | None = None
|
|
66
|
+
top_k: int | None = None
|
|
67
|
+
top_p: float | None = None
|
|
59
68
|
tools: list[dict] | None = None
|
|
60
69
|
max_tokens: int | None = None
|
|
61
70
|
max_completion_tokens: int | None = None
|
|
@@ -95,6 +104,9 @@ class ResponsesRequest(BaseModel):
|
|
|
95
104
|
model: str
|
|
96
105
|
max_output_tokens: int | None = None
|
|
97
106
|
temperature: float | None = None
|
|
107
|
+
repeat_penalty: float | None = None
|
|
108
|
+
top_k: int | None = None
|
|
109
|
+
top_p: float | None = None
|
|
98
110
|
stream: bool = False
|
|
99
111
|
|
|
100
112
|
|
|
@@ -114,6 +114,51 @@
|
|
|
114
114
|
"recipe": "oga-npu",
|
|
115
115
|
"suggested": true
|
|
116
116
|
},
|
|
117
|
+
"DeepSeek-R1-Distill-Llama-8B-NPU": {
|
|
118
|
+
"checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
119
|
+
"recipe": "oga-npu",
|
|
120
|
+
"suggested": true
|
|
121
|
+
},
|
|
122
|
+
"DeepSeek-R1-Distill-Qwen-7B-NPU": {
|
|
123
|
+
"checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
124
|
+
"recipe": "oga-npu",
|
|
125
|
+
"suggested": false
|
|
126
|
+
},
|
|
127
|
+
"DeepSeek-R1-Distill-Qwen-1.5B-NPU": {
|
|
128
|
+
"checkpoint": "amd/DeepSeek-R1-Distill-Qwen-1.5B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
129
|
+
"recipe": "oga-npu",
|
|
130
|
+
"suggested": false
|
|
131
|
+
},
|
|
132
|
+
"Llama-3.2-3B-Instruct-NPU": {
|
|
133
|
+
"checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
134
|
+
"recipe": "oga-npu",
|
|
135
|
+
"suggested": false
|
|
136
|
+
},
|
|
137
|
+
"Llama-3.2-1B-Instruct-NPU": {
|
|
138
|
+
"checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
139
|
+
"recipe": "oga-npu",
|
|
140
|
+
"suggested": false
|
|
141
|
+
},
|
|
142
|
+
"Mistral-7B-v0.3-Instruct-NPU": {
|
|
143
|
+
"checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
144
|
+
"recipe": "oga-npu",
|
|
145
|
+
"suggested": true
|
|
146
|
+
},
|
|
147
|
+
"Phi-3.5-Mini-Instruct-NPU": {
|
|
148
|
+
"checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
149
|
+
"recipe": "oga-npu",
|
|
150
|
+
"suggested": true
|
|
151
|
+
},
|
|
152
|
+
"ChatGLM-3-6b-Instruct-NPU": {
|
|
153
|
+
"checkpoint": "amd/chatglm3-6b-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
154
|
+
"recipe": "oga-npu",
|
|
155
|
+
"suggested": false
|
|
156
|
+
},
|
|
157
|
+
"AMD-OLMo-1B-Instruct-NPU": {
|
|
158
|
+
"checkpoint": "amd/AMD-OLMo-1B-SFT-DPO-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
159
|
+
"recipe": "oga-npu",
|
|
160
|
+
"suggested": false
|
|
161
|
+
},
|
|
117
162
|
"Llama-3.2-1B-Instruct-DirectML": {
|
|
118
163
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
|
|
119
164
|
"recipe": "oga-igpu",
|
|
@@ -223,8 +268,8 @@
|
|
|
223
268
|
"checkpoint": "unsloth/cogito-v2-preview-llama-109B-MoE-GGUF:Q4_K_M",
|
|
224
269
|
"mmproj": "mmproj-F16.gguf",
|
|
225
270
|
"recipe": "llamacpp",
|
|
226
|
-
"suggested":
|
|
227
|
-
"labels": ["vision"
|
|
271
|
+
"suggested": false,
|
|
272
|
+
"labels": ["vision"]
|
|
228
273
|
},
|
|
229
274
|
"nomic-embed-text-v1-GGUF": {
|
|
230
275
|
"checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
|
|
@@ -260,7 +305,7 @@
|
|
|
260
305
|
"checkpoint": "Qwen/Qwen2.5-Coder-32B-Instruct-GGUF:Q4_K_M",
|
|
261
306
|
"recipe": "llamacpp",
|
|
262
307
|
"suggested": true,
|
|
263
|
-
"labels": ["
|
|
308
|
+
"labels": ["coding"]
|
|
264
309
|
},
|
|
265
310
|
"gpt-oss-120b-GGUF": {
|
|
266
311
|
"checkpoint": "unsloth/gpt-oss-120b-GGUF:Q4_K_M",
|
|
@@ -273,5 +318,11 @@
|
|
|
273
318
|
"recipe": "llamacpp",
|
|
274
319
|
"suggested": true,
|
|
275
320
|
"labels": ["hot", "reasoning"]
|
|
321
|
+
},
|
|
322
|
+
"GLM-4.5-Air-UD-Q4K-XL-GGUF": {
|
|
323
|
+
"checkpoint": "unsloth/GLM-4.5-Air-GGUF:UD-Q4_K_XL",
|
|
324
|
+
"recipe": "llamacpp",
|
|
325
|
+
"suggested": true,
|
|
326
|
+
"labels": ["reasoning","hot"]
|
|
276
327
|
}
|
|
277
328
|
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|