lemonade-sdk 8.0.4__py3-none-any.whl → 8.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

lemonade_server/cli.py CHANGED
@@ -27,43 +27,75 @@ class DeleteError(Exception):
27
27
  """
28
28
 
29
29
 
30
+ class ServerTimeoutError(Exception):
31
+ """
32
+ The server failed to start within the timeout period
33
+ """
34
+
35
+
36
+ class ModelNotAvailableError(Exception):
37
+ """
38
+ The specified model is not available on the server
39
+ """
40
+
41
+
30
42
  def serve(
31
- port: int,
43
+ port: int = None,
32
44
  log_level: str = None,
33
45
  tray: bool = False,
46
+ use_thread: bool = False,
34
47
  ):
35
48
  """
36
49
  Execute the serve command
37
50
  """
38
51
 
39
- # Check if Lemonade Server is already running
40
- _, running_port = get_server_info()
41
- if running_port is not None:
42
- print(
43
- (
44
- f"Lemonade Server is already running on port {running_port}\n"
45
- "Please stop the existing server before starting a new instance."
46
- ),
47
- )
48
- sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
49
-
50
52
  # Otherwise, start the server
51
53
  print("Starting Lemonade Server...")
52
54
  from lemonade.tools.server.serve import Server, DEFAULT_PORT, DEFAULT_LOG_LEVEL
53
55
 
54
- server = Server()
55
56
  port = port if port is not None else DEFAULT_PORT
56
57
  log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
57
58
 
58
59
  # Hidden environment variable to enable input truncation (experimental feature)
59
60
  truncate_inputs = "LEMONADE_TRUNCATE_INPUTS" in os.environ
60
61
 
61
- server.run(
62
- port=port,
63
- log_level=log_level,
64
- truncate_inputs=truncate_inputs,
65
- tray=tray,
66
- )
62
+ # Start the server
63
+ serve_kwargs = {
64
+ "log_level": log_level,
65
+ "truncate_inputs": truncate_inputs,
66
+ "tray": tray,
67
+ }
68
+ server = Server()
69
+ if not use_thread:
70
+ server.run(
71
+ port=port,
72
+ **serve_kwargs,
73
+ )
74
+ else:
75
+ from threading import Thread
76
+ import time
77
+
78
+ # Start a background thread to run the server
79
+ server_thread = Thread(
80
+ target=server.run,
81
+ args=(port,),
82
+ kwargs=serve_kwargs,
83
+ daemon=True,
84
+ )
85
+ server_thread.start()
86
+
87
+ # Wait for the server to be ready
88
+ max_wait_time = 30
89
+ wait_interval = 0.5
90
+ waited = 0
91
+ while waited < max_wait_time:
92
+ time.sleep(wait_interval)
93
+ _, running_port = get_server_info()
94
+ if running_port is not None:
95
+ break
96
+ waited += wait_interval
97
+
98
+ return port, server_thread
67
99
 
68
100
 
69
101
  def stop():
@@ -161,9 +193,8 @@ def pull(
161
193
  if pull_response.status_code != 200:
162
194
  raise PullError(
163
195
  f"Failed to install {model_name}. Check the "
164
- "Lemonade Server log for more information. A list of supported models "
165
- "is provided at "
166
- "https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/server_models.md"
196
+ "Lemonade Server log for more information. You can list "
197
+ "supported models with `lemonade-server list`"
167
198
  )
168
199
  else:
169
200
  from lemonade_server.model_manager import ModelManager
@@ -212,6 +243,53 @@ def delete(model_names: List[str]):
212
243
  ModelManager().delete_model(model_name)
213
244
 
214
245
 
246
+ def run(model_name: str):
247
+ """
248
+ Start the server if not running and open the webapp with the specified model
249
+ """
250
+ import webbrowser
251
+ import time
252
+
253
+ # Start the server if not running
254
+ _, port = get_server_info()
255
+ server_previously_running = port is not None
256
+ if not server_previously_running:
257
+ port, server_thread = serve(use_thread=True, tray=True, log_level="info")
258
+
259
+ # Pull model
260
+ pull([model_name])
261
+
262
+ # Load model
263
+ load(model_name, port)
264
+
265
+ # Open the webapp with the specified model
266
+ url = f"http://localhost:{port}/?model={model_name}#llm-chat"
267
+ print(f"You can now chat with {model_name} at {url}")
268
+ webbrowser.open(url)
269
+
270
+ # Keep the server running if we started it
271
+ if not server_previously_running:
272
+ while server_thread.is_alive():
273
+ time.sleep(0.5)
274
+
275
+
276
+ def load(model_name: str, port: int):
277
+ """
278
+ Load a model using the endpoint
279
+ """
280
+ import requests
281
+
282
+ base_url = f"http://localhost:{port}/api/v1"
283
+
284
+ # Load the model
285
+ load_response = requests.post(f"{base_url}/load", json={"model_name": model_name})
286
+ if load_response.status_code != 200:
287
+ raise ModelLoadError(
288
+ f"Failed to load {model_name}. Check the "
289
+ "Lemonade Server log for more information."
290
+ )
291
+
292
+
215
293
  def version():
216
294
  """
217
295
  Print the version number
@@ -294,6 +372,46 @@ def get_server_info() -> Tuple[int | None, int | None]:
294
372
  return None, None
295
373
 
296
374
 
375
+ def list_models():
376
+ """
377
+ List recommended models and their download status
378
+ """
379
+ from tabulate import tabulate
380
+ from lemonade_server.model_manager import ModelManager
381
+
382
+ model_manager = ModelManager()
383
+
384
+ # Get all supported models and downloaded models
385
+ supported_models = model_manager.supported_models
386
+ downloaded_models = model_manager.downloaded_models
387
+
388
+ # Filter to only show recommended models
389
+ recommended_models = {
390
+ model_name: model_info
391
+ for model_name, model_info in supported_models.items()
392
+ if model_info.get("suggested", False)
393
+ }
394
+
395
+ # Create table data
396
+ table_data = []
397
+ for model_name, model_info in recommended_models.items():
398
+ downloaded_status = "Yes" if model_name in downloaded_models else "No"
399
+
400
+ # Get model labels/type
401
+ labels = model_info.get("labels", [])
402
+ model_type = ", ".join(labels) if labels else "-"
403
+
404
+ table_data.append([model_name, downloaded_status, model_type])
405
+
406
+ # Sort by model name for consistent display
407
+ # Show downloaded models first
408
+ table_data.sort(key=lambda x: (x[1] == "No", x[0].lower()))
409
+
410
+ # Display table
411
+ headers = ["Model Name", "Downloaded", "Details"]
412
+ print(tabulate(table_data, headers=headers, tablefmt="simple"))
413
+
414
+
297
415
  def main():
298
416
  parser = argparse.ArgumentParser(
299
417
  description="Serve LLMs on CPU, GPU, and NPU.",
@@ -333,6 +451,11 @@ def main():
333
451
  # Stop command
334
452
  stop_parser = subparsers.add_parser("stop", help="Stop the server")
335
453
 
454
+ # List command
455
+ list_parser = subparsers.add_parser(
456
+ "list", help="List recommended models and their download status"
457
+ )
458
+
336
459
  # Pull command
337
460
  pull_parser = subparsers.add_parser(
338
461
  "pull",
@@ -381,6 +504,16 @@ def main():
381
504
  nargs="+",
382
505
  )
383
506
 
507
+ # Run command
508
+ run_parser = subparsers.add_parser(
509
+ "run",
510
+ help="Chat with specified model (starts server if needed)",
511
+ )
512
+ run_parser.add_argument(
513
+ "model",
514
+ help="Lemonade Server model name to run",
515
+ )
516
+
384
517
  args = parser.parse_args()
385
518
 
386
519
  if os.name != "nt":
@@ -389,6 +522,15 @@ def main():
389
522
  if args.version:
390
523
  version()
391
524
  elif args.command == "serve":
525
+ _, running_port = get_server_info()
526
+ if running_port is not None:
527
+ print(
528
+ (
529
+ f"Lemonade Server is already running on port {running_port}\n"
530
+ "Please stop the existing server before starting a new instance."
531
+ ),
532
+ )
533
+ sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
392
534
  serve(
393
535
  port=args.port,
394
536
  log_level=args.log_level,
@@ -396,6 +538,8 @@ def main():
396
538
  )
397
539
  elif args.command == "status":
398
540
  status()
541
+ elif args.command == "list":
542
+ list_models()
399
543
  elif args.command == "pull":
400
544
  pull(
401
545
  args.model,
@@ -408,6 +552,8 @@ def main():
408
552
  delete(args.model)
409
553
  elif args.command == "stop":
410
554
  stop()
555
+ elif args.command == "run":
556
+ run(args.model)
411
557
  elif args.command == "help" or not args.command:
412
558
  parser.print_help()
413
559
 
@@ -6,31 +6,13 @@ import huggingface_hub
6
6
  from importlib.metadata import distributions
7
7
  from lemonade_server.pydantic_models import PullConfig
8
8
  from lemonade.cache import DEFAULT_CACHE_DIR
9
+ from lemonade.tools.llamacpp.utils import parse_checkpoint, download_gguf
9
10
 
10
11
  USER_MODELS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
11
12
 
12
13
 
13
14
  class ModelManager:
14
15
 
15
- @staticmethod
16
- def parse_checkpoint(checkpoint: str) -> tuple[str, str | None]:
17
- """
18
- Parse a checkpoint string that may contain a variant separated by a colon.
19
-
20
- For GGUF models, the format is "repository:variant" (e.g., "unsloth/Qwen3-0.6B-GGUF:Q4_0").
21
- For other models, there is no variant.
22
-
23
- Args:
24
- checkpoint: The checkpoint string, potentially with variant
25
-
26
- Returns:
27
- tuple: (base_checkpoint, variant) where variant is None if no colon is present
28
- """
29
- if ":" in checkpoint:
30
- base_checkpoint, variant = checkpoint.split(":", 1)
31
- return base_checkpoint, variant
32
- return checkpoint, None
33
-
34
16
  @property
35
17
  def supported_models(self) -> dict:
36
18
  """
@@ -98,7 +80,7 @@ class ModelManager:
98
80
  downloaded_models = {}
99
81
  downloaded_checkpoints = self.downloaded_hf_checkpoints
100
82
  for model in self.supported_models:
101
- base_checkpoint = self.parse_checkpoint(
83
+ base_checkpoint = parse_checkpoint(
102
84
  self.supported_models[model]["checkpoint"]
103
85
  )[0]
104
86
  if base_checkpoint in downloaded_checkpoints:
@@ -113,132 +95,6 @@ class ModelManager:
113
95
  """
114
96
  return self.filter_models_by_backend(self.downloaded_models)
115
97
 
116
- def identify_gguf_models(
117
- self, checkpoint: str, variant: str, mmproj: str
118
- ) -> tuple[dict, list[str]]:
119
- """
120
- Identifies the GGUF model files in the repository that match the variant.
121
- """
122
-
123
- hint = """
124
- The CHECKPOINT:VARIANT scheme is used to specify model files in Hugging Face repositories.
125
-
126
- The VARIANT format can be one of several types:
127
- 1. Full filename: exact file to download
128
- 2. None/empty: gets the first .gguf file in the repository (excludes mmproj files)
129
- 3. Quantization variant: find a single file ending with the variant name (case insensitive)
130
- 4. Folder name: downloads all .gguf files in the folder that matches the variant name (case insensitive)
131
-
132
- Examples:
133
- - "unsloth/Qwen3-8B-GGUF:qwen3.gguf" -> downloads "qwen3.gguf"
134
- - "unsloth/Qwen3-30B-A3B-GGUF" -> downloads "Qwen3-30B-A3B-GGUF.gguf"
135
- - "unsloth/Qwen3-8B-GGUF:Q4_1" -> downloads "Qwen3-8B-GGUF-Q4_1.gguf"
136
- - "unsloth/Qwen3-30B-A3B-GGUF:Q4_0" -> downloads all files in "Q4_0/" folder
137
- """
138
-
139
- repo_files = huggingface_hub.list_repo_files(checkpoint)
140
- sharded_files = []
141
-
142
- # (case 1) If variant ends in .gguf, use it directly
143
- if variant and variant.endswith(".gguf"):
144
- variant_name = variant
145
- if variant_name not in repo_files:
146
- raise ValueError(
147
- f"File {variant} not found in Hugging Face repository {checkpoint}. {hint}"
148
- )
149
- # (case 2) If no variant is provided, get the first .gguf file in the repository
150
- elif variant is None:
151
- all_variants = [
152
- f for f in repo_files if f.endswith(".gguf") and "mmproj" not in f
153
- ]
154
- if len(all_variants) == 0:
155
- raise ValueError(
156
- f"No .gguf files found in Hugging Face repository {checkpoint}. {hint}"
157
- )
158
- variant_name = all_variants[0]
159
- else:
160
- # (case 3) Find a single file ending with the variant name (case insensitive)
161
- end_with_variant = [
162
- f
163
- for f in repo_files
164
- if f.lower().endswith(f"{variant}.gguf".lower())
165
- and "mmproj" not in f.lower()
166
- ]
167
- if len(end_with_variant) == 1:
168
- variant_name = end_with_variant[0]
169
- elif len(end_with_variant) > 1:
170
- raise ValueError(
171
- f"Multiple .gguf files found for variant {variant}, but only one is allowed. {hint}"
172
- )
173
- # (case 4) Check whether the variant corresponds to a folder with sharded files (case insensitive)
174
- else:
175
- sharded_files = [
176
- f
177
- for f in repo_files
178
- if f.endswith(".gguf")
179
- and f.lower().startswith(f"{variant}/".lower())
180
- ]
181
-
182
- if not sharded_files:
183
- raise ValueError(
184
- f"No .gguf files found for variant {variant}. {hint}"
185
- )
186
-
187
- # Sort to ensure consistent ordering
188
- sharded_files.sort()
189
-
190
- # Use first file as primary (this is how llamacpp handles it)
191
- variant_name = sharded_files[0]
192
-
193
- core_files = {"variant": variant_name}
194
-
195
- # If there is a mmproj file, add it to the patterns
196
- if mmproj:
197
- if mmproj not in repo_files:
198
- raise ValueError(
199
- f"The provided mmproj file {mmproj} was not found in {checkpoint}."
200
- )
201
- core_files["mmproj"] = mmproj
202
-
203
- return core_files, sharded_files
204
-
205
- def download_gguf(self, model_config: PullConfig) -> dict:
206
- """
207
- Downloads the GGUF file for the given model configuration.
208
-
209
- For sharded models, if the variant points to a folder (e.g. Q4_0), all files in that folder
210
- will be downloaded but only the first file will be returned for loading.
211
- """
212
-
213
- # This code handles all cases by constructing the appropriate filename or pattern
214
- checkpoint, variant = self.parse_checkpoint(model_config.checkpoint)
215
-
216
- # Identify the GGUF model files in the repository that match the variant
217
- core_files, sharded_files = self.identify_gguf_models(
218
- checkpoint, variant, model_config.mmproj
219
- )
220
-
221
- # Download the files
222
- snapshot_folder = huggingface_hub.snapshot_download(
223
- repo_id=checkpoint,
224
- allow_patterns=list(core_files.values()) + sharded_files,
225
- )
226
-
227
- # Ensure we downloaded all expected files
228
- for file in list(core_files.values()) + sharded_files:
229
- expected_path = os.path.join(snapshot_folder, file)
230
- if not os.path.exists(expected_path):
231
- raise ValueError(
232
- f"Hugging Face snapshot download for {model_config.checkpoint} "
233
- f"expected file {file} not found at {expected_path}"
234
- )
235
-
236
- # Return a dict of the full path of the core GGUF files
237
- return {
238
- file_name: os.path.join(snapshot_folder, file_path)
239
- for file_name, file_path in core_files.items()
240
- }
241
-
242
98
  def download_models(
243
99
  self,
244
100
  models: list[str],
@@ -317,7 +173,7 @@ class ModelManager:
317
173
  print(f"Downloading {model} ({checkpoint_to_download})")
318
174
 
319
175
  if "gguf" in checkpoint_to_download.lower():
320
- self.download_gguf(gguf_model_config)
176
+ download_gguf(gguf_model_config.checkpoint, gguf_model_config.mmproj)
321
177
  else:
322
178
  huggingface_hub.snapshot_download(repo_id=checkpoint_to_download)
323
179
 
@@ -373,7 +229,7 @@ class ModelManager:
373
229
  print(f"Deleting {model_name} ({checkpoint})")
374
230
 
375
231
  # Handle GGUF models that have the format "checkpoint:variant"
376
- base_checkpoint = self.parse_checkpoint(checkpoint)[0]
232
+ base_checkpoint = parse_checkpoint(checkpoint)[0]
377
233
 
378
234
  try:
379
235
  # Get the local path using snapshot_download with local_files_only=True
@@ -213,5 +213,16 @@
213
213
  "recipe": "llamacpp",
214
214
  "suggested": false,
215
215
  "labels": ["reranking"]
216
+ },
217
+ "Devstral-Small-2507-GGUF":{
218
+ "checkpoint": "mistralai/Devstral-Small-2507_gguf:Q4_K_M",
219
+ "recipe": "llamacpp",
220
+ "suggested": true
221
+ },
222
+ "Qwen2.5-Coder-32B-Instruct-GGUF": {
223
+ "checkpoint": "Qwen/Qwen2.5-Coder-32B-Instruct-GGUF:Q4_K_M",
224
+ "recipe": "llamacpp",
225
+ "suggested": true,
226
+ "labels": ["reasoning"]
216
227
  }
217
228
  }
@@ -1,176 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: lemonade-sdk
3
- Version: 8.0.4
4
- Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
- Author-email: lemonade@amd.com
6
- Requires-Python: >=3.10, <3.12
7
- Description-Content-Type: text/markdown
8
- License-File: LICENSE
9
- License-File: NOTICE.md
10
- Requires-Dist: invoke>=2.0.0
11
- Requires-Dist: onnx<1.18.0,>=1.11.0
12
- Requires-Dist: pyyaml>=5.4
13
- Requires-Dist: typeguard>=2.3.13
14
- Requires-Dist: packaging>=20.9
15
- Requires-Dist: numpy<2.0.0
16
- Requires-Dist: fasteners
17
- Requires-Dist: GitPython>=3.1.40
18
- Requires-Dist: psutil>=6.1.1
19
- Requires-Dist: wmi
20
- Requires-Dist: py-cpuinfo
21
- Requires-Dist: pytz
22
- Requires-Dist: zstandard
23
- Requires-Dist: fastapi
24
- Requires-Dist: uvicorn[standard]
25
- Requires-Dist: openai>=1.81.0
26
- Requires-Dist: transformers<=4.51.3
27
- Requires-Dist: jinja2
28
- Requires-Dist: tabulate
29
- Requires-Dist: sentencepiece
30
- Requires-Dist: huggingface-hub==0.33.0
31
- Provides-Extra: oga-hybrid
32
- Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
33
- Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
34
- Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
35
- Provides-Extra: oga-cpu
36
- Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
37
- Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
38
- Provides-Extra: dev
39
- Requires-Dist: torch>=2.6.0; extra == "dev"
40
- Requires-Dist: accelerate; extra == "dev"
41
- Requires-Dist: datasets; extra == "dev"
42
- Requires-Dist: pandas>=1.5.3; extra == "dev"
43
- Requires-Dist: matplotlib; extra == "dev"
44
- Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
45
- Requires-Dist: lm-eval[api]; extra == "dev"
46
- Provides-Extra: oga-hybrid-minimal
47
- Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
48
- Provides-Extra: oga-cpu-minimal
49
- Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
50
- Provides-Extra: llm
51
- Requires-Dist: lemonade-sdk[dev]; extra == "llm"
52
- Provides-Extra: llm-oga-cpu
53
- Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
54
- Provides-Extra: llm-oga-igpu
55
- Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
56
- Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
57
- Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
58
- Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
59
- Provides-Extra: llm-oga-cuda
60
- Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
61
- Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
62
- Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
63
- Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
64
- Provides-Extra: llm-oga-npu
65
- Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
66
- Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
67
- Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
68
- Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
69
- Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
70
- Provides-Extra: llm-oga-hybrid
71
- Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
72
- Provides-Extra: llm-oga-unified
73
- Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
74
- Dynamic: author-email
75
- Dynamic: description
76
- Dynamic: description-content-type
77
- Dynamic: license-file
78
- Dynamic: provides-extra
79
- Dynamic: requires-dist
80
- Dynamic: requires-python
81
- Dynamic: summary
82
-
83
- [![Lemonade tests](https://github.com/lemonade-sdk/lemonade/actions/workflows/test_lemonade.yml/badge.svg)](https://github.com/lemonade-sdk/lemonade/tree/main/test "Check out our tests")
84
- [![OS - Windows | Linux](https://img.shields.io/badge/OS-windows%20%7C%20linux-blue)](docs/README.md#installation "Check out our instructions")
85
- [![Made with Python](https://img.shields.io/badge/Python-3.10-blue?logo=python&logoColor=white)](docs/README.md#installation "Check out our instructions")
86
-
87
- ## 🍋 Lemonade SDK: Quickly serve, benchmark and deploy LLMs
88
-
89
- The [Lemonade SDK](./docs/README.md) makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
90
-
91
- <div align="center">
92
- <img src="https://download.amd.com/images/lemonade_640x480_1.gif" alt="Lemonade Demo" title="Lemonade in Action">
93
- </div>
94
-
95
- ### Features
96
-
97
- The [Lemonade SDK](./docs/README.md) is comprised of the following:
98
-
99
- - 🌐 **[Lemonade Server](https://lemonade-server.ai/docs)**: A local LLM server for running ONNX and GGUF models using the OpenAI API standard. Install and enable your applications with NPU and GPU acceleration in minutes.
100
- - 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
101
- - 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
102
- - Prompting with templates.
103
- - Measuring accuracy with a variety of tests.
104
- - Benchmarking to get the time-to-first-token and tokens per second.
105
- - Profiling the memory utilization.
106
-
107
- ### [Click here to get started with Lemonade.](./docs/README.md)
108
-
109
- ### Supported Configurations
110
-
111
- Maximum LLM performance requires the right hardware accelerator with the right inference engine for your scenario. Lemonade supports the following configurations, while also making it easy to switch between them at runtime.
112
-
113
- <table border="1" cellpadding="6" cellspacing="0">
114
- <thead>
115
- <tr>
116
- <th rowspan="2">Hardware</th>
117
- <th colspan="3" align="center">🛠️ Engine Support</th>
118
- <th colspan="2" align="center">🖥️ OS (x86/x64)</th>
119
- </tr>
120
- <tr>
121
- <th align="center">OGA</th>
122
- <th align="center">llamacpp</th>
123
- <th align="center">HF</th>
124
- <th align="center">Windows</th>
125
- <th align="center">Linux</th>
126
- </tr>
127
- </thead>
128
- <tbody>
129
- <tr>
130
- <td>🧠 CPU</td>
131
- <td align="center">All platforms</td>
132
- <td align="center">All platforms</td>
133
- <td align="center">All platforms</td>
134
- <td align="center">✅</td>
135
- <td align="center">✅</td>
136
- </tr>
137
- <tr>
138
- <td>🎮 GPU</td>
139
- <td align="center">—</td>
140
- <td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
141
- <td align="center">—</td>
142
- <td align="center">✅</td>
143
- <td align="center">✅</td>
144
- </tr>
145
- <tr>
146
- <td>🤖 NPU</td>
147
- <td align="center">AMD Ryzen™ AI 300 series</td>
148
- <td align="center">—</td>
149
- <td align="center">—</td>
150
- <td align="center">✅</td>
151
- <td align="center">—</td>
152
- </tr>
153
- </tbody>
154
- </table>
155
-
156
- To learn more about the supported hardware and software, visit the documentation [here](./docs/README.md#software-and-hardware-overview).
157
-
158
- ## Integrate Lemonade Server with Your Application
159
-
160
- Lemonade Server enables languages including Python, C++, Java, C#, Node.js, Go, Ruby, Rust, and PHP. For the full list and integration details, see [docs/server/README.md](./docs/server/README.md).
161
-
162
- ## Contributing
163
-
164
- We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
165
-
166
- ## Maintainers
167
-
168
- This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues) or email [lemonade@amd.com](mailto:lemonade@amd.com).
169
-
170
- ## License
171
-
172
- This project is licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE). Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
173
-
174
- <!--This file was originally licensed under Apache 2.0. It has been modified.
175
- Modifications Copyright (c) 2025 AMD-->
176
-