lemonade-sdk 8.1.1__py3-none-any.whl → 8.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -427,7 +427,7 @@ class LemonadeTray(SystemTray):
427
427
  Start the uvicorn server.
428
428
  """
429
429
  self.server = self.server_factory()
430
- self.server.uvicorn_server = self.server.run_in_thread()
430
+ self.server.uvicorn_server = self.server.run_in_thread(self.server.host)
431
431
  self.server.uvicorn_server.run()
432
432
 
433
433
  def run(self):
@@ -5,6 +5,7 @@ import importlib
5
5
  import asyncio
6
6
  from contextlib import asynccontextmanager
7
7
  from fastapi import FastAPI
8
+ from lemonade.version import __version__
8
9
 
9
10
  _lazy_imports = {
10
11
  "TextIteratorStreamer": ("transformers", "TextIteratorStreamer"),
@@ -42,8 +43,8 @@ async def lifespan(app: FastAPI):
42
43
  logging.info(
43
44
  "\n"
44
45
  "\n"
45
- "🍋 Lemonade Server Ready!\n"
46
- f"🍋 Open http://localhost:{app.port} in your browser for:\n"
46
+ f"🍋 Lemonade Server v{__version__} Ready!\n"
47
+ f"🍋 Open http://{app.host_}:{app.port} in your browser for:\n"
47
48
  "🍋 💬 chat\n"
48
49
  "🍋 💻 model management\n"
49
50
  "🍋 📄 docs\n"
@@ -52,8 +53,8 @@ async def lifespan(app: FastAPI):
52
53
  logging.info(
53
54
  "\n"
54
55
  "\n"
55
- "[Lemonade] Lemonade Server Ready!\n"
56
- f"[Lemonade] Open http://localhost:{app.port} in your browser for:\n"
56
+ f"[Lemonade] Lemonade Server v{__version__} Ready!\n"
57
+ f"[Lemonade] Open http://{app.host_}:{app.port} in your browser for:\n"
57
58
  "[Lemonade] chat\n"
58
59
  "[Lemonade] model management\n"
59
60
  "[Lemonade] docs\n"
lemonade/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "8.1.1"
1
+ __version__ = "8.1.3"
@@ -1,18 +1,18 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.1.1
3
+ Version: 8.1.3
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
- Requires-Python: >=3.10, <3.13
6
+ Requires-Python: >=3.10, <3.14
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  License-File: NOTICE.md
10
10
  Requires-Dist: invoke>=2.0.0
11
- Requires-Dist: onnx<1.18.0,>=1.11.0
11
+ Requires-Dist: onnx==1.18.0
12
12
  Requires-Dist: pyyaml>=5.4
13
13
  Requires-Dist: typeguard>=2.3.13
14
14
  Requires-Dist: packaging>=20.9
15
- Requires-Dist: numpy<2.0.0
15
+ Requires-Dist: numpy
16
16
  Requires-Dist: fasteners
17
17
  Requires-Dist: GitPython>=3.1.40
18
18
  Requires-Dist: psutil>=6.1.1
@@ -41,9 +41,10 @@ Requires-Dist: accelerate; extra == "dev"
41
41
  Requires-Dist: datasets; extra == "dev"
42
42
  Requires-Dist: pandas>=1.5.3; extra == "dev"
43
43
  Requires-Dist: matplotlib; extra == "dev"
44
- Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "dev"
45
44
  Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
46
45
  Requires-Dist: lm-eval[api]; extra == "dev"
46
+ Provides-Extra: model-generate
47
+ Requires-Dist: model-generate==1.5.0; (platform_system == "Windows" and python_version == "3.10") and extra == "model-generate"
47
48
  Provides-Extra: oga-hybrid
48
49
  Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
49
50
  Provides-Extra: oga-unified
@@ -105,7 +106,7 @@ Dynamic: summary
105
106
  <img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" />
106
107
  </a>
107
108
  <a href="docs/README.md#installation" title="Check out our instructions">
108
- <img src="https://img.shields.io/badge/Python-3.10%20%7C%203.12-blue?logo=python&logoColor=white" alt="Made with Python" />
109
+ <img src="https://img.shields.io/badge/Python-3.10--3.13-blue?logo=python&logoColor=white" alt="Made with Python" />
109
110
  </a>
110
111
  <a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
111
112
  <img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg" alt="PRs Welcome" />
@@ -4,13 +4,13 @@ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
4
4
  lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
5
5
  lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
6
  lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
- lemonade/version.py,sha256=8YlEPKK1Cm5T4dPa2BQPpPwVVTzjPLnmqAeNcTb5nOw,22
7
+ lemonade/version.py,sha256=gnc1sclqzDLnQB9vbqA0LgSMz4H-bYCuu--_P-HWhAc,22
8
8
  lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
10
10
  lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
11
11
  lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
12
12
  lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
13
- lemonade/common/inference_engines.py,sha256=OJQcED9P1ZeQ8d11lDMNeAoaFoUuZlsDcwEZXLbqWRg,12579
13
+ lemonade/common/inference_engines.py,sha256=pJxn0zOf3gEmjGAIWXNdCibfzarzc7LRbZjoQyygkcU,12591
14
14
  lemonade/common/network.py,sha256=p1lWJkN0H5hCpb4rKi3Zc47W_BRrrm-7ghdTALJLGqU,1944
15
15
  lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
16
16
  lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
@@ -24,7 +24,7 @@ lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11
24
24
  lemonade/tools/adapter.py,sha256=Ex63Y1SPCOSV4M_QtzEn3YVd39d3yew0lpmEFgp8aH4,3169
25
25
  lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
26
26
  lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
27
- lemonade/tools/management_tools.py,sha256=U8GaJnjdXyQ9sw8UxBQMc7glpaLciaVphASaQS4kJsA,10202
27
+ lemonade/tools/management_tools.py,sha256=HQBcr7LYuMqVRYQtvnkNpfutBTA7lblszyoAjjVGu1Y,10201
28
28
  lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
29
29
  lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
30
30
  lemonade/tools/prompt.py,sha256=PyLksp1k8jsZsU7XBRK61k1DUHhbdLa20h-AP8Noh3w,9011
@@ -34,11 +34,11 @@ lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1
34
34
  lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
35
35
  lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
36
36
  lemonade/tools/llamacpp/load.py,sha256=DFCvQN548Ch9H8U_rHOiYviinzw6vixb5-V7xLj7XE4,6499
37
- lemonade/tools/llamacpp/utils.py,sha256=CTWnzbEYGPSbOizF26yCnyNrHDY19pLusU-YyND992s,29070
37
+ lemonade/tools/llamacpp/utils.py,sha256=Auid9FepxwLIgDahaDNIxwz8kP_ap8Opd3eSF6t637g,32336
38
38
  lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
39
  lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
40
40
  lemonade/tools/oga/load.py,sha256=6Pf_QrHpIXDbfpTwFNRj4RmWTxI-RImhYuqRvmTVgmY,33722
41
- lemonade/tools/oga/utils.py,sha256=Xd7tmNr69u_bCut0hZqA7saUR3NFZlp4bvWo54mOZb0,16918
41
+ lemonade/tools/oga/utils.py,sha256=F8UVLKlfYcLa2SUqlehar8-jaX2Aw4u58DjHNNvLdOA,17675
42
42
  lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
43
  lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
44
44
  lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
@@ -46,27 +46,31 @@ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
46
46
  lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
47
47
  lemonade/tools/report/table.py,sha256=ssqy1bZqF-wptNzKEOj6_9REtCNZyXO8R5vakAtg3R4,27973
48
48
  lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- lemonade/tools/server/llamacpp.py,sha256=KZO4npzefvbaPvlZbpCYsdW0tMSfmmupT8gaK9y65I8,17962
50
- lemonade/tools/server/serve.py,sha256=PAAGowj2Z5AQIW3G1l52taNyf_0U4kRFR3G735M4DsU,55513
49
+ lemonade/tools/server/llamacpp.py,sha256=jVkaPx1ZbHYiJll3wnDR0fh-e0yfg7UB0BXlLWPx4dE,20998
50
+ lemonade/tools/server/serve.py,sha256=3wnB19YThQLHkjbzy7PCWppQY_j5xKB24GcqM8IybxI,58857
51
51
  lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
52
- lemonade/tools/server/tray.py,sha256=qlQKBkQwG9W2v9GTyycvFc12_jly6vPU1uEkrIFBGTs,17624
52
+ lemonade/tools/server/tray.py,sha256=YJ4-vJlM6tJ0ojY_wVM6COuNscETFkQPt-BaNqYa9YQ,17640
53
53
  lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
54
54
  lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
55
- lemonade/tools/server/static/styles.css,sha256=M_JrH_vML65MWun-C8XCvLOFw35qZURSa77Fk4fVngQ,30029
56
- lemonade/tools/server/static/webapp.html,sha256=oU6FZHGQCq-SoT6VkWObQvYzzNS0ser5Fmqx2j_5jCI,54380
57
- lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
55
+ lemonade/tools/server/static/styles.css,sha256=X_mqf3XCOo_pZEqkDiVzMGCy8ARseEBq5DdGnAdfVk0,43383
56
+ lemonade/tools/server/static/webapp.html,sha256=FX2MZUsljfgxxuF12KBdgvNkso_z-sHewWc0SEGGcGM,18138
57
+ lemonade/tools/server/static/js/chat.js,sha256=BTvREuEt0NrN8qhAuda5tTAoUN6tbsoukevA-zyTrwQ,27193
58
+ lemonade/tools/server/static/js/model-settings.js,sha256=JXHeG7xVrRU181Hj7CZflERAi1Z6t-qwYFR4aH5nf5I,5820
59
+ lemonade/tools/server/static/js/models.js,sha256=bbX7c8B59ioim86T3x9PFESvF8y3cHPYUO6nhc4SCDs,32500
60
+ lemonade/tools/server/static/js/shared.js,sha256=4iqDNWiKEB7eYS4fdnTy-RwO_ksROrLYLmT2YSomG1M,17065
61
+ lemonade/tools/server/utils/port.py,sha256=J7-g-Aqygb50jNoHLhhRfBZVM-uhGlcB5-oYBAehvgw,2263
58
62
  lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
59
63
  lemonade/tools/server/utils/thread.py,sha256=Z-PDzGcpgfN2qxTmtlROWqrUN0B2fXdPrqo_J10fR_w,2772
60
64
  lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
61
65
  lemonade_install/install.py,sha256=Zl_JtEIhbqZZTvxcqtq895IomEN-JNxp9xOZEtahMHQ,28289
62
- lemonade_sdk-8.1.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
63
- lemonade_sdk-8.1.1.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
64
- lemonade_server/cli.py,sha256=CFfhrRgZNJCd0rDRBF3TeS3dMJgwlKGtvT5_kbsWaXk,17316
65
- lemonade_server/model_manager.py,sha256=O3fIX52AqU0z10WzPmNEA3lQ_KjOqNq_G-SxjwIgEio,10781
66
- lemonade_server/pydantic_models.py,sha256=qEvF7x7AuHCHMiByVzGGuLdQTNs233Sw9uQq5cpI6is,2721
67
- lemonade_server/server_models.json,sha256=iag_dG9S1tkHZUhkJmGAfiUJkgEazdQSv7stC1fVAsQ,9741
68
- lemonade_sdk-8.1.1.dist-info/METADATA,sha256=XT9cwNUAkhwQ6kad6l7t2nj7m8S0t-9GvaFLOMxLCyE,17065
69
- lemonade_sdk-8.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
70
- lemonade_sdk-8.1.1.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
71
- lemonade_sdk-8.1.1.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
72
- lemonade_sdk-8.1.1.dist-info/RECORD,,
66
+ lemonade_sdk-8.1.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
67
+ lemonade_sdk-8.1.3.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
68
+ lemonade_server/cli.py,sha256=-haIK4Q9cYFwna5-m6vgxW9qMaGW-_lDFB49zXxDk2A,18755
69
+ lemonade_server/model_manager.py,sha256=cFaHJVOsabwekAPryXAPdo6qrXYBD_yht7XPg2QImqc,10791
70
+ lemonade_server/pydantic_models.py,sha256=oTFnDVCax2Gerz7RBJOJF0FVQjKoUPJZbBo-EgogQyk,3161
71
+ lemonade_server/server_models.json,sha256=DAdG4ebIt5Dy5MM3kmXn1pO0XbNMph1gdpzbacBDVuc,11664
72
+ lemonade_sdk-8.1.3.dist-info/METADATA,sha256=3As4CPILSkJVZMKsyqHZX6o9P8aBsixEJuQTtOas25w,17086
73
+ lemonade_sdk-8.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
74
+ lemonade_sdk-8.1.3.dist-info/entry_points.txt,sha256=7sRvpNhi1E7amnM7RZo57e8yFF9iA5uuRaIeJ1Xre6w,193
75
+ lemonade_sdk-8.1.3.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
76
+ lemonade_sdk-8.1.3.dist-info/RECORD,,
@@ -2,3 +2,4 @@
2
2
  lemonade = lemonade:lemonadecli
3
3
  lemonade-install = lemonade_install:installcli
4
4
  lemonade-server-dev = lemonade_server.cli:main
5
+ lsdev = lemonade_server.cli:developer_entrypoint
lemonade_server/cli.py CHANGED
@@ -4,6 +4,13 @@ import os
4
4
  from typing import Tuple, Optional
5
5
  import psutil
6
6
  from typing import List
7
+ from lemonade_server.pydantic_models import (
8
+ DEFAULT_PORT,
9
+ DEFAULT_HOST,
10
+ DEFAULT_LOG_LEVEL,
11
+ DEFAULT_LLAMACPP_BACKEND,
12
+ DEFAULT_CTX_SIZE,
13
+ )
7
14
 
8
15
 
9
16
  # Error codes for different CLI scenarios
@@ -47,6 +54,7 @@ class ModelLoadError(Exception):
47
54
 
48
55
  def serve(
49
56
  port: int = None,
57
+ host: str = None,
50
58
  log_level: str = None,
51
59
  tray: bool = False,
52
60
  use_thread: bool = False,
@@ -59,26 +67,20 @@ def serve(
59
67
 
60
68
  # Otherwise, start the server
61
69
  print("Starting Lemonade Server...")
62
- from lemonade.tools.server.serve import (
63
- Server,
64
- DEFAULT_PORT,
65
- DEFAULT_LOG_LEVEL,
66
- DEFAULT_LLAMACPP_BACKEND,
67
- DEFAULT_CTX_SIZE,
68
- )
70
+ from lemonade.tools.server.serve import Server
69
71
 
70
72
  port = port if port is not None else DEFAULT_PORT
73
+ host = host if host is not None else DEFAULT_HOST
71
74
  log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
72
75
  llamacpp_backend = (
73
76
  llamacpp_backend if llamacpp_backend is not None else DEFAULT_LLAMACPP_BACKEND
74
77
  )
75
-
76
- # Use ctx_size if provided, otherwise use default
77
78
  ctx_size = ctx_size if ctx_size is not None else DEFAULT_CTX_SIZE
78
79
 
79
80
  # Start the server
80
81
  server = Server(
81
82
  port=port,
83
+ host=host,
82
84
  log_level=log_level,
83
85
  ctx_size=ctx_size,
84
86
  tray=tray,
@@ -259,7 +261,9 @@ def delete(model_names: List[str]):
259
261
  def run(
260
262
  model_name: str,
261
263
  port: int = None,
264
+ host: str = "localhost",
262
265
  log_level: str = None,
266
+ tray: bool = False,
263
267
  llamacpp_backend: str = None,
264
268
  ctx_size: int = None,
265
269
  ):
@@ -270,13 +274,14 @@ def run(
270
274
  import time
271
275
 
272
276
  # Start the server if not running
273
- _, port = get_server_info()
274
- server_previously_running = port is not None
277
+ _, running_port = get_server_info()
278
+ server_previously_running = running_port is not None
275
279
  if not server_previously_running:
276
280
  port, server_thread = serve(
277
281
  port=port,
282
+ host=host,
278
283
  log_level=log_level,
279
- tray=True,
284
+ tray=tray,
280
285
  use_thread=True,
281
286
  llamacpp_backend=llamacpp_backend,
282
287
  ctx_size=ctx_size,
@@ -291,7 +296,7 @@ def run(
291
296
  load(model_name, port)
292
297
 
293
298
  # Open the webapp with the specified model
294
- url = f"http://localhost:{port}/?model={model_name}#llm-chat"
299
+ url = f"http://{host}:{port}/?model={model_name}#llm-chat"
295
300
  print(f"You can now chat with {model_name} at {url}")
296
301
  webbrowser.open(url)
297
302
 
@@ -440,26 +445,67 @@ def list_models():
440
445
  print(tabulate(table_data, headers=headers, tablefmt="simple"))
441
446
 
442
447
 
448
+ def developer_entrypoint():
449
+ """
450
+ Developer entry point that starts the server with debug logging
451
+ Equivalent to running: lemonade-server-dev serve --log-level debug [additional args]
452
+
453
+ This function automatically prepends "serve --log-level debug" to any arguments
454
+ passed to the lsdev command.
455
+ """
456
+ # Save original sys.argv
457
+ original_argv = sys.argv.copy()
458
+
459
+ try:
460
+ # Take any additional arguments passed to lsdev and append them
461
+ # after "serve --log-level debug"
462
+ additional_args = sys.argv[1:] if len(sys.argv) > 1 else []
463
+
464
+ # Set sys.argv to simulate "serve --log-level debug" + additional args
465
+ sys.argv = [sys.argv[0], "serve", "--log-level", "debug"] + additional_args
466
+ main()
467
+ finally:
468
+ # Restore original sys.argv
469
+ sys.argv = original_argv
470
+
471
+
443
472
  def _add_server_arguments(parser):
444
473
  """Add common server arguments to a parser"""
445
- parser.add_argument("--port", type=int, help="Port number to serve on")
474
+
475
+ parser.add_argument(
476
+ "--port",
477
+ type=int,
478
+ help="Port number to serve on",
479
+ default=DEFAULT_PORT,
480
+ )
481
+ parser.add_argument(
482
+ "--host",
483
+ type=str,
484
+ help="Address to bind for connections",
485
+ default=DEFAULT_HOST,
486
+ )
446
487
  parser.add_argument(
447
488
  "--log-level",
448
489
  type=str,
449
490
  help="Log level for the server",
450
491
  choices=["critical", "error", "warning", "info", "debug", "trace"],
451
- default="info",
492
+ default=DEFAULT_LOG_LEVEL,
452
493
  )
453
494
  parser.add_argument(
454
495
  "--llamacpp",
455
496
  type=str,
456
- help=f"LlamaCpp backend to use",
497
+ help="LlamaCpp backend to use",
457
498
  choices=["vulkan", "rocm"],
499
+ default=DEFAULT_LLAMACPP_BACKEND,
458
500
  )
459
501
  parser.add_argument(
460
502
  "--ctx-size",
461
503
  type=int,
462
- help="Context size for the model (default: 4096 for llamacpp, truncates prompts for other recipes)",
504
+ help=(
505
+ f"Context size for the model (default: {DEFAULT_CTX_SIZE} for llamacpp, "
506
+ "truncates prompts for other recipes)"
507
+ ),
508
+ default=DEFAULT_CTX_SIZE,
463
509
  )
464
510
 
465
511
 
@@ -578,6 +624,7 @@ def main():
578
624
  sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
579
625
  serve(
580
626
  port=args.port,
627
+ host=args.host,
581
628
  log_level=args.log_level,
582
629
  tray=not args.no_tray,
583
630
  llamacpp_backend=args.llamacpp,
@@ -603,7 +650,9 @@ def main():
603
650
  run(
604
651
  args.model,
605
652
  port=args.port,
653
+ host=args.host,
606
654
  log_level=args.log_level,
655
+ tray=not args.no_tray,
607
656
  llamacpp_backend=args.llamacpp,
608
657
  ctx_size=args.ctx_size,
609
658
  )
@@ -43,7 +43,7 @@ class ModelManager:
43
43
  if "reasoning" in model_info:
44
44
  model_info["labels"] = (
45
45
  ["reasoning"]
46
- if not model_info["labels"]
46
+ if not model_info.get("labels", None)
47
47
  else model_info["labels"] + ["reasoning"]
48
48
  )
49
49
  del model_info["reasoning"]
@@ -1,10 +1,13 @@
1
+ import os
1
2
  from typing import Optional, Union, List
2
3
 
3
4
  from pydantic import BaseModel
4
5
 
5
- # Set to a high number to allow for interesting experiences in real apps
6
- # Tests should use the max_new_tokens argument to set a lower value
7
- DEFAULT_MAX_NEW_TOKENS = 1500
6
+ DEFAULT_PORT = int(os.getenv("LEMONADE_PORT", "8000"))
7
+ DEFAULT_HOST = os.getenv("LEMONADE_HOST", "localhost")
8
+ DEFAULT_LOG_LEVEL = os.getenv("LEMONADE_LOG_LEVEL", "info")
9
+ DEFAULT_LLAMACPP_BACKEND = os.getenv("LEMONADE_LLAMACPP", "vulkan")
10
+ DEFAULT_CTX_SIZE = int(os.getenv("LEMONADE_CTX_SIZE", "4096"))
8
11
 
9
12
 
10
13
  class LoadConfig(BaseModel):
@@ -39,6 +42,9 @@ class CompletionRequest(BaseModel):
39
42
  logprobs: int | None = False
40
43
  stop: list[str] | str | None = None
41
44
  temperature: float | None = None
45
+ repeat_penalty: float | None = None
46
+ top_k: int | None = None
47
+ top_p: float | None = None
42
48
  max_tokens: int | None = None
43
49
 
44
50
 
@@ -56,6 +62,9 @@ class ChatCompletionRequest(BaseModel):
56
62
  logprobs: int | None = False
57
63
  stop: list[str] | str | None = None
58
64
  temperature: float | None = None
65
+ repeat_penalty: float | None = None
66
+ top_k: int | None = None
67
+ top_p: float | None = None
59
68
  tools: list[dict] | None = None
60
69
  max_tokens: int | None = None
61
70
  max_completion_tokens: int | None = None
@@ -95,6 +104,9 @@ class ResponsesRequest(BaseModel):
95
104
  model: str
96
105
  max_output_tokens: int | None = None
97
106
  temperature: float | None = None
107
+ repeat_penalty: float | None = None
108
+ top_k: int | None = None
109
+ top_p: float | None = None
98
110
  stream: bool = False
99
111
 
100
112
 
@@ -114,6 +114,51 @@
114
114
  "recipe": "oga-npu",
115
115
  "suggested": true
116
116
  },
117
+ "DeepSeek-R1-Distill-Llama-8B-NPU": {
118
+ "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
119
+ "recipe": "oga-npu",
120
+ "suggested": true
121
+ },
122
+ "DeepSeek-R1-Distill-Qwen-7B-NPU": {
123
+ "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
124
+ "recipe": "oga-npu",
125
+ "suggested": false
126
+ },
127
+ "DeepSeek-R1-Distill-Qwen-1.5B-NPU": {
128
+ "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-1.5B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
129
+ "recipe": "oga-npu",
130
+ "suggested": false
131
+ },
132
+ "Llama-3.2-3B-Instruct-NPU": {
133
+ "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
134
+ "recipe": "oga-npu",
135
+ "suggested": false
136
+ },
137
+ "Llama-3.2-1B-Instruct-NPU": {
138
+ "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
139
+ "recipe": "oga-npu",
140
+ "suggested": false
141
+ },
142
+ "Mistral-7B-v0.3-Instruct-NPU": {
143
+ "checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
144
+ "recipe": "oga-npu",
145
+ "suggested": true
146
+ },
147
+ "Phi-3.5-Mini-Instruct-NPU": {
148
+ "checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
149
+ "recipe": "oga-npu",
150
+ "suggested": true
151
+ },
152
+ "ChatGLM-3-6b-Instruct-NPU": {
153
+ "checkpoint": "amd/chatglm3-6b-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
154
+ "recipe": "oga-npu",
155
+ "suggested": false
156
+ },
157
+ "AMD-OLMo-1B-Instruct-NPU": {
158
+ "checkpoint": "amd/AMD-OLMo-1B-SFT-DPO-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
159
+ "recipe": "oga-npu",
160
+ "suggested": false
161
+ },
117
162
  "Llama-3.2-1B-Instruct-DirectML": {
118
163
  "checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
119
164
  "recipe": "oga-igpu",
@@ -223,8 +268,8 @@
223
268
  "checkpoint": "unsloth/cogito-v2-preview-llama-109B-MoE-GGUF:Q4_K_M",
224
269
  "mmproj": "mmproj-F16.gguf",
225
270
  "recipe": "llamacpp",
226
- "suggested": true,
227
- "labels": ["vision","hot"]
271
+ "suggested": false,
272
+ "labels": ["vision"]
228
273
  },
229
274
  "nomic-embed-text-v1-GGUF": {
230
275
  "checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
@@ -260,7 +305,7 @@
260
305
  "checkpoint": "Qwen/Qwen2.5-Coder-32B-Instruct-GGUF:Q4_K_M",
261
306
  "recipe": "llamacpp",
262
307
  "suggested": true,
263
- "labels": ["reasoning", "coding"]
308
+ "labels": ["coding"]
264
309
  },
265
310
  "gpt-oss-120b-GGUF": {
266
311
  "checkpoint": "unsloth/gpt-oss-120b-GGUF:Q4_K_M",
@@ -273,5 +318,11 @@
273
318
  "recipe": "llamacpp",
274
319
  "suggested": true,
275
320
  "labels": ["hot", "reasoning"]
321
+ },
322
+ "GLM-4.5-Air-UD-Q4K-XL-GGUF": {
323
+ "checkpoint": "unsloth/GLM-4.5-Air-GGUF:UD-Q4_K_XL",
324
+ "recipe": "llamacpp",
325
+ "suggested": true,
326
+ "labels": ["reasoning","hot"]
276
327
  }
277
328
  }