lemonade-sdk 8.0.4__py3-none-any.whl → 8.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/api.py +50 -0
- lemonade/common/inference_engines.py +415 -0
- lemonade/common/system_info.py +493 -47
- lemonade/tools/management_tools.py +53 -7
- lemonade/tools/server/serve.py +29 -0
- lemonade/tools/server/static/styles.css +36 -53
- lemonade/tools/server/static/webapp.html +23 -2
- lemonade/version.py +1 -1
- lemonade_sdk-8.0.5.dist-info/METADATA +295 -0
- {lemonade_sdk-8.0.4.dist-info → lemonade_sdk-8.0.5.dist-info}/RECORD +16 -15
- lemonade_server/cli.py +168 -22
- lemonade_sdk-8.0.4.dist-info/METADATA +0 -176
- {lemonade_sdk-8.0.4.dist-info → lemonade_sdk-8.0.5.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.4.dist-info → lemonade_sdk-8.0.5.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.4.dist-info → lemonade_sdk-8.0.5.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.4.dist-info → lemonade_sdk-8.0.5.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.4.dist-info → lemonade_sdk-8.0.5.dist-info}/top_level.txt +0 -0
lemonade/api.py
CHANGED
|
@@ -5,6 +5,11 @@ from lemonade.state import State
|
|
|
5
5
|
import lemonade.common.printing as printing
|
|
6
6
|
import lemonade.cache as cache
|
|
7
7
|
from lemonade.tools.adapter import ModelAdapter, TokenizerAdapter
|
|
8
|
+
from lemonade.common.system_info import (
|
|
9
|
+
get_system_info_dict,
|
|
10
|
+
get_device_info_dict,
|
|
11
|
+
get_system_info as get_system_info_obj,
|
|
12
|
+
)
|
|
8
13
|
|
|
9
14
|
|
|
10
15
|
class NotSupported(Exception):
|
|
@@ -121,5 +126,50 @@ def from_pretrained(
|
|
|
121
126
|
_raise_not_supported(recipe, checkpoint)
|
|
122
127
|
|
|
123
128
|
|
|
129
|
+
def get_system_info(verbose: bool = False) -> Dict:
|
|
130
|
+
"""
|
|
131
|
+
Get comprehensive system information including hardware details and device information.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
dict: Complete system information including:
|
|
135
|
+
- Basic system info (OS, processor, memory, BIOS, etc.).
|
|
136
|
+
- Device information (CPU, AMD iGPU, AMD dGPU, NPU).
|
|
137
|
+
- Inference engine availability per device.
|
|
138
|
+
- Python package versions (verbose mode only).
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
# Get basic system info
|
|
142
|
+
info = get_system_info_dict()
|
|
143
|
+
|
|
144
|
+
# Add device information
|
|
145
|
+
info["Devices"] = get_device_info_dict()
|
|
146
|
+
|
|
147
|
+
# Filter out verbose-only information if not in verbose mode
|
|
148
|
+
if not verbose:
|
|
149
|
+
essential_keys = ["OS Version", "Processor", "Physical Memory", "Devices"]
|
|
150
|
+
info = {k: v for k, v in info.items() if k in essential_keys}
|
|
151
|
+
else:
|
|
152
|
+
# In verbose mode, add Python packages at the end
|
|
153
|
+
system_info_obj = get_system_info_obj()
|
|
154
|
+
info["Python Packages"] = system_info_obj.get_python_packages()
|
|
155
|
+
|
|
156
|
+
return info
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def get_device_info() -> Dict:
|
|
160
|
+
"""
|
|
161
|
+
Get device information including CPU, AMD iGPU, AMD dGPU, and NPU details.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
dict: Device information including:
|
|
165
|
+
- cpu: CPU details with inference engine availability.
|
|
166
|
+
- amd_igpu: AMD integrated GPU information.
|
|
167
|
+
- amd_dgpu: List of AMD discrete GPU information.
|
|
168
|
+
- npu: NPU information.
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
return get_device_info_dict()
|
|
172
|
+
|
|
173
|
+
|
|
124
174
|
# This file was originally licensed under Apache 2.0. It has been modified.
|
|
125
175
|
# Modifications Copyright (c) 2025 AMD
|
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import importlib.util
|
|
4
|
+
import importlib.metadata
|
|
5
|
+
import platform
|
|
6
|
+
import subprocess
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import Dict, Optional
|
|
9
|
+
import transformers
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class InferenceEngineDetector:
|
|
13
|
+
"""
|
|
14
|
+
Main class for detecting inference engine availability.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self):
|
|
18
|
+
self.oga_detector = OGADetector()
|
|
19
|
+
self.llamacpp_detector = LlamaCppDetector()
|
|
20
|
+
self.transformers_detector = TransformersDetector()
|
|
21
|
+
|
|
22
|
+
def detect_engines_for_device(self, device_type: str) -> Dict[str, Dict]:
|
|
23
|
+
"""
|
|
24
|
+
Detect all available inference engines for a specific device type.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
dict: Engine availability information
|
|
31
|
+
"""
|
|
32
|
+
engines = {}
|
|
33
|
+
|
|
34
|
+
# Detect OGA availability
|
|
35
|
+
oga_info = self.oga_detector.detect_for_device(device_type)
|
|
36
|
+
if oga_info:
|
|
37
|
+
engines["oga"] = oga_info
|
|
38
|
+
|
|
39
|
+
# Detect llama.cpp availability
|
|
40
|
+
llamacpp_info = self.llamacpp_detector.detect_for_device(device_type)
|
|
41
|
+
if llamacpp_info:
|
|
42
|
+
engines["llamacpp"] = llamacpp_info
|
|
43
|
+
|
|
44
|
+
# Detect Transformers availability
|
|
45
|
+
transformers_info = self.transformers_detector.detect_for_device(device_type)
|
|
46
|
+
if transformers_info:
|
|
47
|
+
engines["transformers"] = transformers_info
|
|
48
|
+
|
|
49
|
+
return engines
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class BaseEngineDetector(ABC):
|
|
53
|
+
"""
|
|
54
|
+
Base class for engine-specific detectors.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
@abstractmethod
|
|
58
|
+
def detect_for_device(self, device_type: str) -> Optional[Dict]:
|
|
59
|
+
"""
|
|
60
|
+
Detect engine availability for specific device type.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
@abstractmethod
|
|
64
|
+
def is_installed(self) -> bool:
|
|
65
|
+
"""
|
|
66
|
+
Check if the engine package/binary is installed.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class OGADetector(BaseEngineDetector):
|
|
71
|
+
"""
|
|
72
|
+
Detector for ONNX Runtime GenAI (OGA).
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def detect_for_device(self, device_type: str) -> Optional[Dict]:
|
|
76
|
+
"""
|
|
77
|
+
Detect OGA availability for specific device.
|
|
78
|
+
"""
|
|
79
|
+
# Check package installation based on device type
|
|
80
|
+
if device_type == "npu":
|
|
81
|
+
if not self.is_npu_package_installed():
|
|
82
|
+
return {
|
|
83
|
+
"available": False,
|
|
84
|
+
"error": "NPU packages not installed (need "
|
|
85
|
+
"onnxruntime-genai-directml-ryzenai or onnxruntime-vitisai)",
|
|
86
|
+
}
|
|
87
|
+
else:
|
|
88
|
+
# For other devices, check general OGA installation
|
|
89
|
+
if not self.is_installed():
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
import onnxruntime as ort
|
|
94
|
+
|
|
95
|
+
# Map device types to ORT providers
|
|
96
|
+
device_provider_map = {
|
|
97
|
+
"cpu": "cpu",
|
|
98
|
+
"amd_igpu": "dml",
|
|
99
|
+
"amd_dgpu": "dml",
|
|
100
|
+
"npu": "vitisai",
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if device_type not in device_provider_map:
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
backend = device_provider_map[device_type]
|
|
107
|
+
|
|
108
|
+
# Map backends to ORT provider names
|
|
109
|
+
provider_map = {
|
|
110
|
+
"cpu": "CPUExecutionProvider",
|
|
111
|
+
"dml": "DmlExecutionProvider",
|
|
112
|
+
"vitisai": "VitisAIExecutionProvider",
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
required_provider = provider_map[backend]
|
|
116
|
+
available_providers = ort.get_available_providers()
|
|
117
|
+
|
|
118
|
+
if required_provider in available_providers:
|
|
119
|
+
result = {
|
|
120
|
+
"available": True,
|
|
121
|
+
"version": self._get_oga_version(device_type),
|
|
122
|
+
"backend": backend,
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
# Add dependency versions in details
|
|
126
|
+
result["details"] = {
|
|
127
|
+
"dependency_versions": {"onnxruntime": ort.__version__}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return result
|
|
131
|
+
else:
|
|
132
|
+
if device_type == "npu":
|
|
133
|
+
error_msg = (
|
|
134
|
+
"VitisAI provider not available - "
|
|
135
|
+
"check AMD NPU driver installation"
|
|
136
|
+
)
|
|
137
|
+
else:
|
|
138
|
+
error_msg = f"{backend.upper()} provider not available"
|
|
139
|
+
|
|
140
|
+
return {
|
|
141
|
+
"available": False,
|
|
142
|
+
"error": error_msg,
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
except (ImportError, AttributeError) as e:
|
|
146
|
+
return {"available": False, "error": f"OGA detection failed: {str(e)}"}
|
|
147
|
+
|
|
148
|
+
def is_installed(self) -> bool:
|
|
149
|
+
"""
|
|
150
|
+
Check if OGA is installed.
|
|
151
|
+
"""
|
|
152
|
+
return importlib.util.find_spec("onnxruntime_genai") is not None
|
|
153
|
+
|
|
154
|
+
def is_npu_package_installed(self) -> bool:
|
|
155
|
+
"""
|
|
156
|
+
Check if NPU-specific OGA packages are installed.
|
|
157
|
+
"""
|
|
158
|
+
try:
|
|
159
|
+
|
|
160
|
+
installed_packages = [
|
|
161
|
+
dist.metadata["name"].lower()
|
|
162
|
+
for dist in importlib.metadata.distributions()
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
# Check for NPU-specific packages
|
|
166
|
+
npu_packages = ["onnxruntime-genai-directml-ryzenai", "onnxruntime-vitisai"]
|
|
167
|
+
|
|
168
|
+
for package in npu_packages:
|
|
169
|
+
if package.lower() in installed_packages:
|
|
170
|
+
return True
|
|
171
|
+
return False
|
|
172
|
+
except (ImportError, AttributeError):
|
|
173
|
+
return False
|
|
174
|
+
|
|
175
|
+
def _get_oga_version(self, device_type: str) -> str:
|
|
176
|
+
"""
|
|
177
|
+
Get OGA version.
|
|
178
|
+
"""
|
|
179
|
+
try:
|
|
180
|
+
# For NPU, try NPU-specific packages first
|
|
181
|
+
if device_type == "npu":
|
|
182
|
+
try:
|
|
183
|
+
import onnxruntime_genai_directml_ryzenai as og
|
|
184
|
+
|
|
185
|
+
return og.__version__
|
|
186
|
+
except ImportError:
|
|
187
|
+
pass
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
import onnxruntime_vitisai as og
|
|
191
|
+
|
|
192
|
+
return og.__version__
|
|
193
|
+
except ImportError:
|
|
194
|
+
pass
|
|
195
|
+
|
|
196
|
+
# Fall back to general onnxruntime_genai
|
|
197
|
+
import onnxruntime_genai as og
|
|
198
|
+
|
|
199
|
+
return og.__version__
|
|
200
|
+
except (ImportError, AttributeError):
|
|
201
|
+
return "unknown"
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class LlamaCppDetector(BaseEngineDetector):
|
|
205
|
+
"""
|
|
206
|
+
Detector for llama.cpp.
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
def detect_for_device(self, device_type: str) -> Optional[Dict]:
|
|
210
|
+
"""
|
|
211
|
+
Detect llama.cpp availability for specific device.
|
|
212
|
+
"""
|
|
213
|
+
try:
|
|
214
|
+
# Map device types to llama.cpp backends
|
|
215
|
+
device_backend_map = {
|
|
216
|
+
"cpu": "cpu",
|
|
217
|
+
"amd_igpu": "vulkan",
|
|
218
|
+
"amd_dgpu": "vulkan",
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if device_type not in device_backend_map:
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
backend = device_backend_map[device_type]
|
|
225
|
+
is_installed = self.is_installed()
|
|
226
|
+
|
|
227
|
+
# Check requirements based on backend
|
|
228
|
+
if backend == "vulkan":
|
|
229
|
+
vulkan_available = self._check_vulkan_support()
|
|
230
|
+
if not vulkan_available:
|
|
231
|
+
return {"available": False, "error": "Vulkan not available"}
|
|
232
|
+
|
|
233
|
+
# Vulkan is available
|
|
234
|
+
if is_installed:
|
|
235
|
+
result = {
|
|
236
|
+
"available": True,
|
|
237
|
+
"version": self._get_llamacpp_version(),
|
|
238
|
+
"backend": backend,
|
|
239
|
+
}
|
|
240
|
+
return result
|
|
241
|
+
else:
|
|
242
|
+
return {
|
|
243
|
+
"available": False,
|
|
244
|
+
"error": "llama.cpp binaries not installed",
|
|
245
|
+
}
|
|
246
|
+
else:
|
|
247
|
+
# CPU backend
|
|
248
|
+
if is_installed:
|
|
249
|
+
result = {
|
|
250
|
+
"available": True,
|
|
251
|
+
"version": self._get_llamacpp_version(),
|
|
252
|
+
"backend": backend,
|
|
253
|
+
}
|
|
254
|
+
return result
|
|
255
|
+
else:
|
|
256
|
+
return {
|
|
257
|
+
"available": False,
|
|
258
|
+
"error": "llama.cpp binaries not installed",
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
except (ImportError, OSError, subprocess.SubprocessError) as e:
|
|
262
|
+
return {
|
|
263
|
+
"available": False,
|
|
264
|
+
"error": f"llama.cpp detection failed: {str(e)}",
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
def is_installed(self) -> bool:
|
|
268
|
+
"""
|
|
269
|
+
Check if llama.cpp binaries are available.
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
# Check lemonade-managed binary locations
|
|
273
|
+
try:
|
|
274
|
+
|
|
275
|
+
# Check lemonade server directory
|
|
276
|
+
server_base_dir = os.path.join(
|
|
277
|
+
os.path.dirname(sys.executable), "llama_server"
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
if platform.system().lower() == "windows":
|
|
281
|
+
server_exe_path = os.path.join(server_base_dir, "llama-server.exe")
|
|
282
|
+
else:
|
|
283
|
+
# Check both build/bin and root directory locations
|
|
284
|
+
build_bin_path = os.path.join(
|
|
285
|
+
server_base_dir, "build", "bin", "llama-server"
|
|
286
|
+
)
|
|
287
|
+
root_path = os.path.join(server_base_dir, "llama-server")
|
|
288
|
+
server_exe_path = (
|
|
289
|
+
build_bin_path if os.path.exists(build_bin_path) else root_path
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
if os.path.exists(server_exe_path):
|
|
293
|
+
return True
|
|
294
|
+
|
|
295
|
+
except (ImportError, OSError):
|
|
296
|
+
pass
|
|
297
|
+
|
|
298
|
+
return False
|
|
299
|
+
|
|
300
|
+
def _check_vulkan_support(self) -> bool:
|
|
301
|
+
"""
|
|
302
|
+
Check if Vulkan is available for GPU acceleration.
|
|
303
|
+
"""
|
|
304
|
+
try:
|
|
305
|
+
# Run vulkaninfo to check Vulkan availability
|
|
306
|
+
result = subprocess.run(
|
|
307
|
+
["vulkaninfo", "--summary"],
|
|
308
|
+
capture_output=True,
|
|
309
|
+
text=True,
|
|
310
|
+
timeout=10,
|
|
311
|
+
check=False,
|
|
312
|
+
)
|
|
313
|
+
return result.returncode == 0
|
|
314
|
+
except (
|
|
315
|
+
subprocess.TimeoutExpired,
|
|
316
|
+
FileNotFoundError,
|
|
317
|
+
subprocess.SubprocessError,
|
|
318
|
+
):
|
|
319
|
+
try:
|
|
320
|
+
# Check for Vulkan DLL on Windows
|
|
321
|
+
vulkan_dll_paths = [
|
|
322
|
+
"C:\\Windows\\System32\\vulkan-1.dll",
|
|
323
|
+
"C:\\Windows\\SysWOW64\\vulkan-1.dll",
|
|
324
|
+
]
|
|
325
|
+
# Check for Vulkan libraries on Linux
|
|
326
|
+
vulkan_lib_paths = [
|
|
327
|
+
"/usr/lib/x86_64-linux-gnu/libvulkan.so.1",
|
|
328
|
+
"/usr/lib/libvulkan.so.1",
|
|
329
|
+
"/lib/x86_64-linux-gnu/libvulkan.so.1",
|
|
330
|
+
]
|
|
331
|
+
return any(os.path.exists(path) for path in vulkan_dll_paths) or any(
|
|
332
|
+
os.path.exists(path) for path in vulkan_lib_paths
|
|
333
|
+
)
|
|
334
|
+
except OSError:
|
|
335
|
+
return False
|
|
336
|
+
|
|
337
|
+
def _get_llamacpp_version(self) -> str:
|
|
338
|
+
"""
|
|
339
|
+
Get llama.cpp version from lemonade's managed installation.
|
|
340
|
+
"""
|
|
341
|
+
try:
|
|
342
|
+
server_base_dir = os.path.join(
|
|
343
|
+
os.path.dirname(sys.executable), "llama_server"
|
|
344
|
+
)
|
|
345
|
+
version_file = os.path.join(server_base_dir, "version.txt")
|
|
346
|
+
|
|
347
|
+
if os.path.exists(version_file):
|
|
348
|
+
with open(version_file, "r", encoding="utf-8") as f:
|
|
349
|
+
version = f.read().strip()
|
|
350
|
+
return version
|
|
351
|
+
except (ImportError, OSError):
|
|
352
|
+
pass
|
|
353
|
+
|
|
354
|
+
return "unknown"
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
class TransformersDetector(BaseEngineDetector):
|
|
358
|
+
"""
|
|
359
|
+
Detector for Transformers/PyTorch.
|
|
360
|
+
"""
|
|
361
|
+
|
|
362
|
+
def detect_for_device(self, device_type: str) -> Optional[Dict]:
|
|
363
|
+
"""
|
|
364
|
+
Detect Transformers availability for specific device.
|
|
365
|
+
"""
|
|
366
|
+
if not self.is_installed():
|
|
367
|
+
return None
|
|
368
|
+
|
|
369
|
+
try:
|
|
370
|
+
import torch
|
|
371
|
+
|
|
372
|
+
if device_type == "cpu":
|
|
373
|
+
result = {
|
|
374
|
+
"available": True,
|
|
375
|
+
"version": transformers.__version__,
|
|
376
|
+
"backend": "cpu",
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
# Add dependency versions in details
|
|
380
|
+
result["details"] = {
|
|
381
|
+
"dependency_versions": {"torch": torch.__version__}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
return result
|
|
385
|
+
else:
|
|
386
|
+
return None
|
|
387
|
+
|
|
388
|
+
except (ImportError, AttributeError) as e:
|
|
389
|
+
return {
|
|
390
|
+
"available": False,
|
|
391
|
+
"error": f"Transformers detection failed: {str(e)}",
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
def is_installed(self) -> bool:
|
|
395
|
+
"""
|
|
396
|
+
Check if Transformers and PyTorch are installed.
|
|
397
|
+
"""
|
|
398
|
+
return (
|
|
399
|
+
importlib.util.find_spec("transformers") is not None
|
|
400
|
+
and importlib.util.find_spec("torch") is not None
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def detect_inference_engines(device_type: str) -> Dict[str, Dict]:
|
|
405
|
+
"""
|
|
406
|
+
Helper function to detect inference engines for a device type.
|
|
407
|
+
|
|
408
|
+
Args:
|
|
409
|
+
device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
dict: Engine availability information.
|
|
413
|
+
"""
|
|
414
|
+
detector = InferenceEngineDetector()
|
|
415
|
+
return detector.detect_engines_for_device(device_type)
|