lemonade-sdk 8.0.4__py3-none-any.whl → 8.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -1,12 +1,17 @@
1
1
  import argparse
2
2
  import abc
3
+ import json
3
4
  from typing import List
4
5
  import lemonade.common.filesystem as fs
5
6
  import lemonade.common.exceptions as exp
6
7
  import lemonade.common.printing as printing
7
8
  from lemonade.tools.tool import ToolParser
8
9
  from lemonade.version import __version__ as lemonade_version
9
- from lemonade.common.system_info import get_system_info_dict
10
+ from lemonade.common.system_info import (
11
+ get_system_info_dict,
12
+ get_device_info_dict,
13
+ get_system_info,
14
+ )
10
15
  from lemonade.common.build import output_dir
11
16
  import lemonade.cache as lemonade_cache
12
17
 
@@ -245,28 +250,69 @@ class SystemInfo(ManagementTool):
245
250
  @staticmethod
246
251
  def parser(add_help: bool = True) -> argparse.ArgumentParser:
247
252
  parser = __class__.helpful_parser(
248
- short_description="Print system information",
253
+ short_description="Print system and device information",
249
254
  add_help=add_help,
250
255
  )
251
256
 
257
+ parser.add_argument(
258
+ "--format", choices=["table", "json"], default="table", help="Output format"
259
+ )
260
+
261
+ parser.add_argument(
262
+ "--verbose",
263
+ action="store_true",
264
+ help="Show detailed system information",
265
+ )
266
+
252
267
  return parser
253
268
 
254
269
  @staticmethod
255
270
  def pretty_print(my_dict: dict, level=0):
256
271
  for k, v in my_dict.items():
272
+ if k == "available" and v is True:
273
+ continue
274
+
257
275
  if isinstance(v, dict):
258
- print(" " * level + f"{k}:")
259
- SystemInfo.pretty_print(v, level + 1)
276
+ # Special handling for device availability
277
+ if v.get("available") is False:
278
+ error_msg = v.get("error", "Not available")
279
+ print(" " * level + f"{k}: {error_msg}")
280
+ else:
281
+ print(" " * level + f"{k}:")
282
+ SystemInfo.pretty_print(v, level + 1)
260
283
  elif isinstance(v, list):
261
284
  print(" " * level + f"{k}:")
262
285
  for item in v:
263
- print(" " * (level + 1) + f"{item}")
286
+ if isinstance(item, dict):
287
+ SystemInfo.pretty_print(item, level + 1)
288
+ print()
289
+ else:
290
+ print(" " * (level + 1) + f"{item}")
264
291
  else:
265
292
  print(" " * level + f"{k}: {v}")
266
293
 
267
- def run(self, _):
294
+ def run(self, _, format="table", verbose=False):
295
+ # Get basic system info
268
296
  system_info_dict = get_system_info_dict()
269
- self.pretty_print(system_info_dict)
297
+
298
+ # Always include devices
299
+ system_info_dict["Devices"] = get_device_info_dict()
300
+
301
+ # Filter out verbose-only information if not in verbose mode
302
+ if not verbose:
303
+ essential_keys = ["OS Version", "Processor", "Physical Memory", "Devices"]
304
+ system_info_dict = {
305
+ k: v for k, v in system_info_dict.items() if k in essential_keys
306
+ }
307
+ else:
308
+ # In verbose mode, add Python packages at the end
309
+ system_info = get_system_info()
310
+ system_info_dict["Python Packages"] = system_info.get_python_packages()
311
+
312
+ if format == "json":
313
+ print(json.dumps(system_info_dict, indent=2))
314
+ else:
315
+ self.pretty_print(system_info_dict)
270
316
 
271
317
 
272
318
  # This file was originally licensed under Apache 2.0. It has been modified.
@@ -228,6 +228,7 @@ class Server(ManagementTool):
228
228
  self.app.get(f"{prefix}/health")(self.health)
229
229
  self.app.get(f"{prefix}/halt")(self.halt_generation)
230
230
  self.app.get(f"{prefix}/stats")(self.send_stats)
231
+ self.app.get(f"{prefix}/system-info")(self.get_system_info)
231
232
  self.app.post(f"{prefix}/completions")(self.completions)
232
233
  self.app.post(f"{prefix}/responses")(self.responses)
233
234
 
@@ -1276,6 +1277,34 @@ class Server(ManagementTool):
1276
1277
  ),
1277
1278
  }
1278
1279
 
1280
+ async def get_system_info(self, request: Request):
1281
+ """
1282
+ Return system and device enumeration information.
1283
+ Supports optional 'verbose' query parameter.
1284
+ """
1285
+ from lemonade.common.system_info import (
1286
+ get_system_info_dict,
1287
+ get_device_info_dict,
1288
+ get_system_info as get_system_info_obj,
1289
+ )
1290
+
1291
+ # Get verbose parameter from query string (default to False)
1292
+ verbose = request.query_params.get("verbose", "false").lower() in ["true", "1"]
1293
+
1294
+ info = get_system_info_dict()
1295
+ info["devices"] = get_device_info_dict()
1296
+
1297
+ # Filter out verbose-only information if not in verbose mode
1298
+ if not verbose:
1299
+ essential_keys = ["OS Version", "Processor", "Physical Memory", "devices"]
1300
+ info = {k: v for k, v in info.items() if k in essential_keys}
1301
+ else:
1302
+ # In verbose mode, add Python packages at the end
1303
+ system_info_obj = get_system_info_obj()
1304
+ info["Python Packages"] = system_info_obj.get_python_packages()
1305
+
1306
+ return info
1307
+
1279
1308
  def model_load_failure(self, model_reference: str, message: Optional[str] = None):
1280
1309
  """
1281
1310
  Clean up after a model load failure, then log it and raise
@@ -90,7 +90,10 @@ body {
90
90
  border-radius: 8px;
91
91
  border: 1px solid #e0e0e0;
92
92
  max-width: 1000px;
93
- width: 100%;
93
+ min-width: 320px;
94
+ width: calc(100% - 2rem); /* Responsive width with margin */
95
+ margin-left: 1rem;
96
+ margin-right: 1rem;
94
97
  }
95
98
 
96
99
  .tabs {
@@ -130,8 +133,11 @@ body {
130
133
  .chat-container {
131
134
  display: flex;
132
135
  flex-direction: column;
133
- height: 600px;
136
+ height: calc(100vh - 650px); /* Subtract space for navbar, title, wall of logos, etc */
137
+ min-height: 300px;
138
+ max-height: 1200px;
134
139
  max-width: 800px;
140
+ width: 100%;
135
141
  margin: 0 auto;
136
142
  border: 1px solid #e0e0e0;
137
143
  border-radius: 8px;
@@ -638,6 +644,7 @@ body {
638
644
  border-radius: 6px;
639
645
  overflow: hidden;
640
646
  transition: box-shadow 0.2s ease;
647
+ min-width: 0;
641
648
  }
642
649
 
643
650
  .register-model-name-group:focus-within {
@@ -686,6 +693,8 @@ body {
686
693
  transition: border-color 0.2s ease, box-shadow 0.2s ease;
687
694
  outline: none;
688
695
  box-shadow: 0 2px 8px rgba(0,0,0,0.06);
696
+ box-sizing: border-box;
697
+ min-width: 0;
689
698
  }
690
699
 
691
700
  #register-model-name:focus {
@@ -709,6 +718,7 @@ body {
709
718
  transition: border-color 0.2s ease, box-shadow 0.2s ease;
710
719
  outline: none;
711
720
  box-shadow: 0 2px 8px rgba(0,0,0,0.06);
721
+ box-sizing: border-box;
712
722
  }
713
723
 
714
724
  #register-recipe:focus {
@@ -748,6 +758,8 @@ body {
748
758
  transition: border-color 0.2s ease, box-shadow 0.2s ease;
749
759
  outline: none;
750
760
  box-shadow: 0 2px 8px rgba(0,0,0,0.06);
761
+ box-sizing: border-box;
762
+ min-width: 0;
751
763
  }
752
764
 
753
765
  #register-mmproj:focus, #register-checkpoint:focus {
@@ -930,6 +942,18 @@ body {
930
942
  transform: translateX(0) translateY(-2px) !important;
931
943
  }
932
944
 
945
+ @media (max-width: 800px) {
946
+ .model-mgmt-container {
947
+ flex-direction: column;
948
+ gap: 1.5em;
949
+ }
950
+
951
+ .model-mgmt-pane {
952
+ flex: none;
953
+ width: 100%;
954
+ }
955
+ }
956
+
933
957
  @media (max-width: 600px) {
934
958
  .title {
935
959
  font-size: 2rem;
@@ -942,61 +966,20 @@ body {
942
966
  margin-top: 1rem;
943
967
  }
944
968
  .model-mgmt-container {
945
- flex-direction: column;
946
969
  gap: 1em;
947
970
  }
948
- }
949
-
950
- @media (max-width: 800px) {
951
- .model-mgmt-register-form {
952
- padding: 1.2em 1em 1em 1em;
953
- margin: 0 1em 1.5em 1em;
954
- }
955
971
 
956
- .register-form-row {
957
- flex-direction: column;
958
- align-items: stretch;
959
- gap: 0.6em;
960
- }
961
-
962
- .register-label {
963
- min-width: 0;
964
- font-size: 0.9em;
965
- }
966
-
967
- .register-label.reasoning-inline {
968
- margin-left: 0;
969
- margin-top: 0.5em;
970
- justify-content: flex-start;
971
- }
972
-
973
- #register-model-name, #register-mmproj, #register-checkpoint {
974
- min-width: 0;
975
- width: 100%;
976
- }
977
-
978
- #register-recipe {
979
- min-width: 0;
980
- width: 100%;
972
+ .tab-container {
973
+ margin-left: 0.5rem;
974
+ margin-right: 0.5rem;
975
+ width: calc(100% - 1rem);
981
976
  }
982
977
 
983
- .register-doc-link {
984
- margin-left: 0;
985
- margin-top: 0.5em;
986
- align-self: flex-start;
987
- }
988
-
989
- .register-form-row:last-child {
990
- flex-direction: row;
991
- justify-content: space-between;
992
- align-items: center;
993
- flex-wrap: wrap;
994
- gap: 1em;
995
- }
996
-
997
- #register-submit {
998
- padding: 0.8em 1.5em;
999
- font-size: 0.9em;
978
+ /* Ensure all input fields are properly constrained on very narrow screens */
979
+ #register-model-name, #register-mmproj, #register-checkpoint, #register-recipe {
980
+ max-width: 100%;
981
+ overflow: hidden;
982
+ text-overflow: ellipsis;
1000
983
  }
1001
984
  }
1002
985
 
@@ -1141,7 +1124,7 @@ body {
1141
1124
  display: none;
1142
1125
  }
1143
1126
 
1144
- @media (max-width: 600px) {
1127
+ @media (max-width: 800px) {
1145
1128
  .app-logos-grid {
1146
1129
  gap: 0.5rem;
1147
1130
  padding: 0 0.5rem;
@@ -327,7 +327,11 @@
327
327
  let filteredModels = [];
328
328
  let defaultIndex = 0;
329
329
 
330
- data.data.forEach(function(model) {
330
+ // Check if model is specified in URL parameters
331
+ const urlModel = new URLSearchParams(window.location.search).get('model');
332
+ let urlModelIndex = -1;
333
+
334
+ data.data.forEach(function(model, index) {
331
335
  const modelId = model.id || model.name || model;
332
336
  const modelInfo = allModels[modelId] || {};
333
337
  const labels = modelInfo.labels || [];
@@ -341,9 +345,17 @@
341
345
  const opt = document.createElement('option');
342
346
  opt.value = modelId;
343
347
  opt.textContent = modelId;
348
+
349
+ // Check if this model matches the URL parameter
350
+ if (urlModel && modelId === urlModel) {
351
+ urlModelIndex = filteredModels.length - 1;
352
+ }
353
+
354
+ // Default fallback for backwards compatibility
344
355
  if (modelId === 'Llama-3.2-1B-Instruct-Hybrid') {
345
356
  defaultIndex = filteredModels.length - 1;
346
357
  }
358
+
347
359
  select.appendChild(opt);
348
360
  });
349
361
 
@@ -352,7 +364,16 @@
352
364
  return;
353
365
  }
354
366
 
355
- select.selectedIndex = defaultIndex;
367
+ // Select the URL-specified model if found, otherwise use default
368
+ if (urlModelIndex !== -1) {
369
+ select.selectedIndex = urlModelIndex;
370
+ console.log(`Selected model from URL parameter: ${urlModel}`);
371
+ } else {
372
+ select.selectedIndex = defaultIndex;
373
+ if (urlModel) {
374
+ console.warn(`Model '${urlModel}' specified in URL not found in available models`);
375
+ }
376
+ }
356
377
  } catch (e) {
357
378
  const select = document.getElementById('model-select');
358
379
  select.innerHTML = `<option>Error loading models: ${e.message}</option>`;
lemonade/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "8.0.4"
1
+ __version__ = "8.0.5"
@@ -0,0 +1,295 @@
1
+ Metadata-Version: 2.4
2
+ Name: lemonade-sdk
3
+ Version: 8.0.5
4
+ Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
+ Author-email: lemonade@amd.com
6
+ Requires-Python: >=3.10, <3.13
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ License-File: NOTICE.md
10
+ Requires-Dist: invoke>=2.0.0
11
+ Requires-Dist: onnx<1.18.0,>=1.11.0
12
+ Requires-Dist: pyyaml>=5.4
13
+ Requires-Dist: typeguard>=2.3.13
14
+ Requires-Dist: packaging>=20.9
15
+ Requires-Dist: numpy<2.0.0
16
+ Requires-Dist: fasteners
17
+ Requires-Dist: GitPython>=3.1.40
18
+ Requires-Dist: psutil>=6.1.1
19
+ Requires-Dist: wmi
20
+ Requires-Dist: py-cpuinfo
21
+ Requires-Dist: pytz
22
+ Requires-Dist: zstandard
23
+ Requires-Dist: fastapi
24
+ Requires-Dist: uvicorn[standard]
25
+ Requires-Dist: openai>=1.81.0
26
+ Requires-Dist: transformers<=4.51.3
27
+ Requires-Dist: jinja2
28
+ Requires-Dist: tabulate
29
+ Requires-Dist: sentencepiece
30
+ Requires-Dist: huggingface-hub==0.33.0
31
+ Provides-Extra: oga-hybrid
32
+ Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
33
+ Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
34
+ Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
35
+ Provides-Extra: oga-cpu
36
+ Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
37
+ Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
38
+ Provides-Extra: dev
39
+ Requires-Dist: torch>=2.6.0; extra == "dev"
40
+ Requires-Dist: accelerate; extra == "dev"
41
+ Requires-Dist: datasets; extra == "dev"
42
+ Requires-Dist: pandas>=1.5.3; extra == "dev"
43
+ Requires-Dist: matplotlib; extra == "dev"
44
+ Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
45
+ Requires-Dist: lm-eval[api]; extra == "dev"
46
+ Provides-Extra: oga-hybrid-minimal
47
+ Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
48
+ Provides-Extra: oga-cpu-minimal
49
+ Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
50
+ Provides-Extra: llm
51
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm"
52
+ Provides-Extra: llm-oga-cpu
53
+ Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
54
+ Provides-Extra: llm-oga-igpu
55
+ Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
56
+ Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
57
+ Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
58
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
59
+ Provides-Extra: llm-oga-cuda
60
+ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
61
+ Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
62
+ Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
63
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
64
+ Provides-Extra: llm-oga-npu
65
+ Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
66
+ Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
67
+ Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
68
+ Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
69
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
70
+ Provides-Extra: llm-oga-hybrid
71
+ Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
72
+ Provides-Extra: llm-oga-unified
73
+ Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
74
+ Dynamic: author-email
75
+ Dynamic: description
76
+ Dynamic: description-content-type
77
+ Dynamic: license-file
78
+ Dynamic: provides-extra
79
+ Dynamic: requires-dist
80
+ Dynamic: requires-python
81
+ Dynamic: summary
82
+
83
+ ## 🍋 Lemonade: Local LLM Serving with GPU and NPU acceleration
84
+
85
+ <p align="center">
86
+ <a href="https://discord.gg/5xXzkMu8Zk">
87
+ <img src="https://img.shields.io/badge/Discord-7289DA?logo=discord&logoColor=white" alt="Discord" />
88
+ </a>
89
+ <a href="https://github.com/lemonade-sdk/lemonade/tree/main/test" title="Check out our tests">
90
+ <img src="https://github.com/lemonade-sdk/lemonade/actions/workflows/test_lemonade.yml/badge.svg" alt="Lemonade tests" />
91
+ </a>
92
+ <a href="docs/README.md#installation" title="Check out our instructions">
93
+ <img src="https://img.shields.io/badge/Windows-11-0078D6?logo=windows&logoColor=white" alt="Windows 11" />
94
+ </a>
95
+ <a href="https://lemonade-server.ai/#linux" title="Ubuntu 24.04 & 25.04 Supported">
96
+ <img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" />
97
+ </a>
98
+ <a href="docs/README.md#installation" title="Check out our instructions">
99
+ <img src="https://img.shields.io/badge/Python-3.10%20%7C%203.12-blue?logo=python&logoColor=white" alt="Made with Python" />
100
+ </a>
101
+ <a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
102
+ <img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg" alt="PRs Welcome" />
103
+ </a>
104
+ <a href="https://github.com/lemonade-sdk/lemonade/releases/latest" title="Download the latest release">
105
+ <img src="https://img.shields.io/github/v/release/lemonade-sdk/lemonade?include_prereleases" alt="Latest Release" />
106
+ </a>
107
+ <a href="https://tooomm.github.io/github-release-stats/?username=lemonade-sdk&repository=lemonade">
108
+ <img src="https://img.shields.io/github/downloads/lemonade-sdk/lemonade/total.svg" alt="GitHub downloads" />
109
+ </a>
110
+ <a href="https://github.com/lemonade-sdk/lemonade/issues">
111
+ <img src="https://img.shields.io/github/issues/lemonade-sdk/lemonade" alt="GitHub issues" />
112
+ </a>
113
+ <a href="https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE">
114
+ <img src="https://img.shields.io/badge/License-Apache-yellow.svg" alt="License: Apache" />
115
+ </a>
116
+ <a href="https://github.com/psf/black">
117
+ <img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Code style: black" />
118
+ </a>
119
+ <a href="https://star-history.com/#lemonade-sdk/lemonade">
120
+ <img src="https://img.shields.io/badge/Star%20History-View-brightgreen" alt="Star History Chart" />
121
+ </a>
122
+ </p>
123
+ <p align="center">
124
+ <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/banner.png?raw=true" alt="Lemonade Banner" />
125
+ </p>
126
+ <h3 align="center">
127
+ <a href="https://lemonade-server.ai">Download</a> |
128
+ <a href="https://lemonade-server.ai/docs/">Documentation</a> |
129
+ <a href="https://discord.gg/5xXzkMu8Zk">Discord</a>
130
+ </h3>
131
+
132
+ Lemonade makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
133
+
134
+ ## Getting Started
135
+
136
+ <div align="center">
137
+
138
+ | Step 1: Download & Install | Step 2: Launch and Pull Models | Step 3: Start chatting! |
139
+ |:---------------------------:|:-------------------------------:|:------------------------:|
140
+ | <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/install.gif?raw=true" alt="Download & Install" width="245" /> | <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/launch_and_pull.gif?raw=true" alt="Launch and Pull Models" width="245" /> | <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/chat.gif?raw=true" alt="Start chatting!" width="245" /> |
141
+ |Install using a [GUI](https://github.com/lemonade-sdk/lemonade/releases/latest/download/Lemonade_Server_Installer.exe) (Windows only), [pip](https://lemonade-server.ai/install_options.html), or [from source](https://lemonade-server.ai/install_options.html). |Use the [Model Manager](#model-library) to install models|A built-in chat interface is available!|
142
+ </div>
143
+
144
+ ### Use it with your favorite OpenAI-compatible app!
145
+
146
+ <p align="center">
147
+ <a href="https://lemonade-server.ai/docs/server/apps/open-webui/" title="Open WebUI" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/openwebui.jpg" alt="Open WebUI" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/continue/" title="Continue" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/continue_dev.png" alt="Continue" width="60" /></a>&nbsp;&nbsp;<a href="https://github.com/amd/gaia" title="Gaia" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/gaia.ico" alt="Gaia" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/anythingLLM/" title="AnythingLLM" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/anything_llm.png" alt="AnythingLLM" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/ai-dev-gallery/" title="AI Dev Gallery" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_dev_gallery.webp" alt="AI Dev Gallery" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/lm-eval/" title="LM-Eval" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/lm_eval.png" alt="LM-Eval" width="60" /></a>&nbsp;&nbsp;<a href="https://lemonade-server.ai/docs/server/apps/codeGPT/" title="CodeGPT" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/codegpt.jpg" alt="CodeGPT" width="60" /></a>&nbsp;&nbsp;<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/apps/ai-toolkit.md" title="AI Toolkit" target="_blank"><img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_toolkit.png" alt="AI Toolkit" width="60" /></a>
148
+ </p>
149
+
150
+ > [!TIP]
151
+ > Want your app featured here? Let's do it! Shoot us a message on [Discord](https://discord.gg/5xXzkMu8Zk), [create an issue](https://github.com/lemonade-sdk/lemonade/issues), or email lemonade@amd.com.
152
+
153
+ ## Using the CLI
154
+
155
+ To run and chat with Gemma 3:
156
+
157
+ ```
158
+ lemonade-server run Gemma-3-4b-it-GGUF
159
+ ```
160
+
161
+ To install models ahead of time, use the `pull` command:
162
+
163
+ ```
164
+ lemonade-server pull Gemma-3-4b-it-GGUF
165
+ ```
166
+
167
+ To check all models available, use the `list` command:
168
+
169
+ ```
170
+ lemonade-server list
171
+ ```
172
+
173
+ > Note: If you installed from source, use the `lemonade-server-dev` command instead.
174
+
175
+ ## Model Library
176
+
177
+ Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/models/).
178
+
179
+ You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
180
+ <p align="center">
181
+ <img src="https://github.com/lemonade-sdk/assets/blob/main/docs/model_manager.png?raw=true" alt="Model Manager" width="650" />
182
+ </p>
183
+
184
+ ## Supported Configurations
185
+
186
+ Lemonade supports the following configurations, while also making it easy to switch between them at runtime. Find more information about it [here](./docs/README.md#software-and-hardware-overview).
187
+
188
+ <table>
189
+ <thead>
190
+ <tr>
191
+ <th rowspan="2">Hardware</th>
192
+ <th colspan="3" align="center">🛠️ Engine Support</th>
193
+ <th colspan="2" align="center">🖥️ OS (x86/x64)</th>
194
+ </tr>
195
+ <tr>
196
+ <th align="center">OGA</th>
197
+ <th align="center">llamacpp</th>
198
+ <th align="center">HF</th>
199
+ <th align="center">Windows</th>
200
+ <th align="center">Linux</th>
201
+ </tr>
202
+ </thead>
203
+ <tbody>
204
+ <tr>
205
+ <td><strong>🧠 CPU</strong></td>
206
+ <td align="center">All platforms</td>
207
+ <td align="center">All platforms</td>
208
+ <td align="center">All platforms</td>
209
+ <td align="center">✅</td>
210
+ <td align="center">✅</td>
211
+ </tr>
212
+ <tr>
213
+ <td><strong>🎮 GPU</strong></td>
214
+ <td align="center">—</td>
215
+ <td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
216
+ <td align="center">—</td>
217
+ <td align="center">✅</td>
218
+ <td align="center">✅</td>
219
+ </tr>
220
+ <tr>
221
+ <td><strong>🤖 NPU</strong></td>
222
+ <td align="center">AMD Ryzen™ AI 300 series</td>
223
+ <td align="center">—</td>
224
+ <td align="center">—</td>
225
+ <td align="center">✅</td>
226
+ <td align="center">—</td>
227
+ </tr>
228
+ </tbody>
229
+ </table>
230
+
231
+
232
+ ## Integrate Lemonade Server with Your Application
233
+
234
+ You can use any OpenAI-compatible client library by configuring it to use `http://localhost:8000/api/v1` as the base URL. A table containing official and popular OpenAI clients on different languages is shown below.
235
+
236
+ Feel free to pick and choose your preferred language.
237
+
238
+
239
+ | Python | C++ | Java | C# | Node.js | Go | Ruby | Rust | PHP |
240
+ |--------|-----|------|----|---------|----|-------|------|-----|
241
+ | [openai-python](https://github.com/openai/openai-python) | [openai-cpp](https://github.com/olrea/openai-cpp) | [openai-java](https://github.com/openai/openai-java) | [openai-dotnet](https://github.com/openai/openai-dotnet) | [openai-node](https://github.com/openai/openai-node) | [go-openai](https://github.com/sashabaranov/go-openai) | [ruby-openai](https://github.com/alexrudall/ruby-openai) | [async-openai](https://github.com/64bit/async-openai) | [openai-php](https://github.com/openai-php/client) |
242
+
243
+
244
+ ### Python Client Example
245
+ ```python
246
+ from openai import OpenAI
247
+
248
+ # Initialize the client to use Lemonade Server
249
+ client = OpenAI(
250
+ base_url="http://localhost:8000/api/v1",
251
+ api_key="lemonade" # required but unused
252
+ )
253
+
254
+ # Create a chat completion
255
+ completion = client.chat.completions.create(
256
+ model="Llama-3.2-1B-Instruct-Hybrid", # or any other available model
257
+ messages=[
258
+ {"role": "user", "content": "What is the capital of France?"}
259
+ ]
260
+ )
261
+
262
+ # Print the response
263
+ print(completion.choices[0].message.content)
264
+ ```
265
+
266
+ For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
267
+
268
+ ## Beyond an LLM Server
269
+
270
+ The [Lemonade SDK](./docs/README.md) also include the following components:
271
+
272
+ - 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
273
+ - 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
274
+
275
+ ## Contributing
276
+
277
+ We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
278
+
279
+ New contributors can find beginner-friendly issues tagged with "Good First Issue" to get started.
280
+
281
+ <a href="https://github.com/lemonade-sdk/lemonade/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22">
282
+ <img src="https://img.shields.io/badge/🍋Lemonade-Good%20First%20Issue-yellowgreen?colorA=38b000&colorB=cccccc" alt="Good First Issue" />
283
+ </a>
284
+
285
+ ## Maintainers
286
+
287
+ This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), email [lemonade@amd.com](mailto:lemonade@amd.com), or join our [Discord](https://discord.gg/5xXzkMu8Zk).
288
+
289
+ ## License
290
+
291
+ This project is licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE). Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
292
+
293
+ <!--This file was originally licensed under Apache 2.0. It has been modified.
294
+ Modifications Copyright (c) 2025 AMD-->
295
+