lemonade-sdk 8.0.6__py3-none-any.whl → 8.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/common/inference_engines.py +62 -77
- lemonade/common/network.py +18 -1
- lemonade/common/system_info.py +61 -44
- lemonade/tools/llamacpp/bench.py +3 -1
- lemonade/tools/llamacpp/load.py +13 -4
- lemonade/tools/llamacpp/utils.py +229 -61
- lemonade/tools/oga/load.py +239 -112
- lemonade/tools/oga/utils.py +19 -7
- lemonade/tools/server/llamacpp.py +30 -53
- lemonade/tools/server/serve.py +64 -123
- lemonade/tools/server/static/styles.css +208 -6
- lemonade/tools/server/static/webapp.html +510 -71
- lemonade/tools/server/tray.py +4 -2
- lemonade/tools/server/utils/thread.py +2 -4
- lemonade/version.py +1 -1
- lemonade_install/install.py +90 -86
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/METADATA +74 -24
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/RECORD +27 -27
- lemonade_server/cli.py +79 -26
- lemonade_server/model_manager.py +4 -3
- lemonade_server/pydantic_models.py +1 -4
- lemonade_server/server_models.json +60 -11
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.1.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,6 @@ import os
|
|
|
2
2
|
import sys
|
|
3
3
|
import importlib.util
|
|
4
4
|
import importlib.metadata
|
|
5
|
-
import platform
|
|
6
5
|
import subprocess
|
|
7
6
|
from abc import ABC, abstractmethod
|
|
8
7
|
from typing import Dict, Optional
|
|
@@ -19,7 +18,9 @@ class InferenceEngineDetector:
|
|
|
19
18
|
self.llamacpp_detector = LlamaCppDetector()
|
|
20
19
|
self.transformers_detector = TransformersDetector()
|
|
21
20
|
|
|
22
|
-
def detect_engines_for_device(
|
|
21
|
+
def detect_engines_for_device(
|
|
22
|
+
self, device_type: str, device_name: str
|
|
23
|
+
) -> Dict[str, Dict]:
|
|
23
24
|
"""
|
|
24
25
|
Detect all available inference engines for a specific device type.
|
|
25
26
|
|
|
@@ -36,10 +37,19 @@ class InferenceEngineDetector:
|
|
|
36
37
|
if oga_info:
|
|
37
38
|
engines["oga"] = oga_info
|
|
38
39
|
|
|
39
|
-
# Detect llama.cpp availability
|
|
40
|
-
llamacpp_info = self.llamacpp_detector.detect_for_device(
|
|
40
|
+
# Detect llama.cpp vulkan availability
|
|
41
|
+
llamacpp_info = self.llamacpp_detector.detect_for_device(
|
|
42
|
+
device_type, device_name, "vulkan"
|
|
43
|
+
)
|
|
44
|
+
if llamacpp_info:
|
|
45
|
+
engines["llamacpp-vulkan"] = llamacpp_info
|
|
46
|
+
|
|
47
|
+
# Detect llama.cpp rocm availability
|
|
48
|
+
llamacpp_info = self.llamacpp_detector.detect_for_device(
|
|
49
|
+
device_type, device_name, "rocm"
|
|
50
|
+
)
|
|
41
51
|
if llamacpp_info:
|
|
42
|
-
engines["llamacpp"] = llamacpp_info
|
|
52
|
+
engines["llamacpp-rocm"] = llamacpp_info
|
|
43
53
|
|
|
44
54
|
# Detect Transformers availability
|
|
45
55
|
transformers_info = self.transformers_detector.detect_for_device(device_type)
|
|
@@ -206,57 +216,40 @@ class LlamaCppDetector(BaseEngineDetector):
|
|
|
206
216
|
Detector for llama.cpp.
|
|
207
217
|
"""
|
|
208
218
|
|
|
209
|
-
def detect_for_device(
|
|
219
|
+
def detect_for_device(
|
|
220
|
+
self, device_type: str, device_name: str, backend: str
|
|
221
|
+
) -> Optional[Dict]:
|
|
210
222
|
"""
|
|
211
223
|
Detect llama.cpp availability for specific device.
|
|
212
224
|
"""
|
|
213
225
|
try:
|
|
214
|
-
# Map device types to llama.cpp backends
|
|
215
|
-
device_backend_map = {
|
|
216
|
-
"cpu": "cpu",
|
|
217
|
-
"amd_igpu": "vulkan",
|
|
218
|
-
"amd_dgpu": "vulkan",
|
|
219
|
-
}
|
|
220
226
|
|
|
221
|
-
if device_type not in
|
|
227
|
+
if device_type not in ["cpu", "amd_igpu", "amd_dgpu"]:
|
|
222
228
|
return None
|
|
223
229
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
# CPU backend
|
|
248
|
-
if is_installed:
|
|
249
|
-
result = {
|
|
250
|
-
"available": True,
|
|
251
|
-
"version": self._get_llamacpp_version(),
|
|
252
|
-
"backend": backend,
|
|
253
|
-
}
|
|
254
|
-
return result
|
|
255
|
-
else:
|
|
256
|
-
return {
|
|
257
|
-
"available": False,
|
|
258
|
-
"error": "llama.cpp binaries not installed",
|
|
259
|
-
}
|
|
230
|
+
# Check if the device is supported by the backend
|
|
231
|
+
if device_type == "cpu":
|
|
232
|
+
device_supported = True
|
|
233
|
+
elif device_type == "amd_igpu" or device_type == "amd_dgpu":
|
|
234
|
+
if backend == "vulkan":
|
|
235
|
+
device_supported = self._check_vulkan_support()
|
|
236
|
+
elif backend == "rocm":
|
|
237
|
+
device_supported = self._check_rocm_support(device_name.lower())
|
|
238
|
+
if not device_supported:
|
|
239
|
+
return {"available": False, "error": f"{backend} not available"}
|
|
240
|
+
|
|
241
|
+
is_installed = self.is_installed(backend)
|
|
242
|
+
if not is_installed:
|
|
243
|
+
return {
|
|
244
|
+
"available": False,
|
|
245
|
+
"error": f"{backend} binaries not installed",
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return {
|
|
249
|
+
"available": True,
|
|
250
|
+
"version": self._get_llamacpp_version(backend),
|
|
251
|
+
"backend": backend,
|
|
252
|
+
}
|
|
260
253
|
|
|
261
254
|
except (ImportError, OSError, subprocess.SubprocessError) as e:
|
|
262
255
|
return {
|
|
@@ -264,35 +257,17 @@ class LlamaCppDetector(BaseEngineDetector):
|
|
|
264
257
|
"error": f"llama.cpp detection failed: {str(e)}",
|
|
265
258
|
}
|
|
266
259
|
|
|
267
|
-
def is_installed(self) -> bool:
|
|
260
|
+
def is_installed(self, backend: str) -> bool:
|
|
268
261
|
"""
|
|
269
|
-
Check if llama.cpp binaries are available.
|
|
262
|
+
Check if llama.cpp binaries are available for any backend.
|
|
270
263
|
"""
|
|
264
|
+
from lemonade.tools.llamacpp.utils import get_llama_server_exe_path
|
|
271
265
|
|
|
272
|
-
# Check lemonade-managed binary locations
|
|
273
266
|
try:
|
|
274
|
-
|
|
275
|
-
# Check lemonade server directory
|
|
276
|
-
server_base_dir = os.path.join(
|
|
277
|
-
os.path.dirname(sys.executable), "llama_server"
|
|
278
|
-
)
|
|
279
|
-
|
|
280
|
-
if platform.system().lower() == "windows":
|
|
281
|
-
server_exe_path = os.path.join(server_base_dir, "llama-server.exe")
|
|
282
|
-
else:
|
|
283
|
-
# Check both build/bin and root directory locations
|
|
284
|
-
build_bin_path = os.path.join(
|
|
285
|
-
server_base_dir, "build", "bin", "llama-server"
|
|
286
|
-
)
|
|
287
|
-
root_path = os.path.join(server_base_dir, "llama-server")
|
|
288
|
-
server_exe_path = (
|
|
289
|
-
build_bin_path if os.path.exists(build_bin_path) else root_path
|
|
290
|
-
)
|
|
291
|
-
|
|
267
|
+
server_exe_path = get_llama_server_exe_path(backend)
|
|
292
268
|
if os.path.exists(server_exe_path):
|
|
293
269
|
return True
|
|
294
|
-
|
|
295
|
-
except (ImportError, OSError):
|
|
270
|
+
except (ImportError, OSError, ValueError):
|
|
296
271
|
pass
|
|
297
272
|
|
|
298
273
|
return False
|
|
@@ -334,13 +309,22 @@ class LlamaCppDetector(BaseEngineDetector):
|
|
|
334
309
|
except OSError:
|
|
335
310
|
return False
|
|
336
311
|
|
|
337
|
-
def
|
|
312
|
+
def _check_rocm_support(self, device_name: str) -> bool:
|
|
313
|
+
"""
|
|
314
|
+
Check if ROCM is available for GPU acceleration.
|
|
315
|
+
"""
|
|
316
|
+
from lemonade.tools.llamacpp.utils import identify_rocm_arch_from_name
|
|
317
|
+
|
|
318
|
+
return identify_rocm_arch_from_name(device_name) is not None
|
|
319
|
+
|
|
320
|
+
def _get_llamacpp_version(self, backend: str) -> str:
|
|
338
321
|
"""
|
|
339
|
-
Get llama.cpp version from lemonade's managed installation.
|
|
322
|
+
Get llama.cpp version from lemonade's managed installation for specific backend.
|
|
340
323
|
"""
|
|
341
324
|
try:
|
|
325
|
+
# Use backend-specific path - same logic as get_llama_folder_path in utils.py
|
|
342
326
|
server_base_dir = os.path.join(
|
|
343
|
-
os.path.dirname(sys.executable), "llama_server"
|
|
327
|
+
os.path.dirname(sys.executable), backend, "llama_server"
|
|
344
328
|
)
|
|
345
329
|
version_file = os.path.join(server_base_dir, "version.txt")
|
|
346
330
|
|
|
@@ -401,15 +385,16 @@ class TransformersDetector(BaseEngineDetector):
|
|
|
401
385
|
)
|
|
402
386
|
|
|
403
387
|
|
|
404
|
-
def detect_inference_engines(device_type: str) -> Dict[str, Dict]:
|
|
388
|
+
def detect_inference_engines(device_type: str, device_name: str) -> Dict[str, Dict]:
|
|
405
389
|
"""
|
|
406
390
|
Helper function to detect inference engines for a device type.
|
|
407
391
|
|
|
408
392
|
Args:
|
|
409
393
|
device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
|
|
394
|
+
device_name: device name
|
|
410
395
|
|
|
411
396
|
Returns:
|
|
412
397
|
dict: Engine availability information.
|
|
413
398
|
"""
|
|
414
399
|
detector = InferenceEngineDetector()
|
|
415
|
-
return detector.detect_engines_for_device(device_type)
|
|
400
|
+
return detector.detect_engines_for_device(device_type, device_name)
|
lemonade/common/network.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from typing import Optional
|
|
3
3
|
import socket
|
|
4
|
-
from huggingface_hub import model_info
|
|
4
|
+
from huggingface_hub import model_info, snapshot_download
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
def is_offline():
|
|
@@ -48,3 +48,20 @@ def get_base_model(checkpoint: str) -> Optional[str]:
|
|
|
48
48
|
except Exception: # pylint: disable=broad-except
|
|
49
49
|
pass
|
|
50
50
|
return None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def custom_snapshot_download(repo_id, **kwargs):
|
|
54
|
+
"""
|
|
55
|
+
Custom snapshot download with retry logic for Windows symlink privilege errors.
|
|
56
|
+
"""
|
|
57
|
+
for attempt in range(2):
|
|
58
|
+
try:
|
|
59
|
+
return snapshot_download(repo_id=repo_id, **kwargs)
|
|
60
|
+
except OSError as e:
|
|
61
|
+
if (
|
|
62
|
+
hasattr(e, "winerror")
|
|
63
|
+
and e.winerror == 1314 # pylint: disable=no-member
|
|
64
|
+
and attempt < 1
|
|
65
|
+
):
|
|
66
|
+
continue
|
|
67
|
+
raise
|
lemonade/common/system_info.py
CHANGED
|
@@ -47,11 +47,10 @@ class SystemInfo(ABC):
|
|
|
47
47
|
Returns:
|
|
48
48
|
dict: Device information.
|
|
49
49
|
"""
|
|
50
|
-
|
|
51
50
|
device_dict = {
|
|
52
51
|
"cpu": self.get_cpu_device(),
|
|
53
|
-
"amd_igpu": self.get_amd_igpu_device(),
|
|
54
|
-
"amd_dgpu": self.get_amd_dgpu_devices(),
|
|
52
|
+
"amd_igpu": self.get_amd_igpu_device(include_inference_engines=True),
|
|
53
|
+
"amd_dgpu": self.get_amd_dgpu_devices(include_inference_engines=True),
|
|
55
54
|
"npu": self.get_npu_device(),
|
|
56
55
|
}
|
|
57
56
|
return device_dict
|
|
@@ -66,7 +65,7 @@ class SystemInfo(ABC):
|
|
|
66
65
|
"""
|
|
67
66
|
|
|
68
67
|
@abstractmethod
|
|
69
|
-
def get_amd_igpu_device(self) -> dict:
|
|
68
|
+
def get_amd_igpu_device(self, include_inference_engines: bool = False) -> dict:
|
|
70
69
|
"""
|
|
71
70
|
Retrieves AMD integrated GPU device information.
|
|
72
71
|
|
|
@@ -75,7 +74,7 @@ class SystemInfo(ABC):
|
|
|
75
74
|
"""
|
|
76
75
|
|
|
77
76
|
@abstractmethod
|
|
78
|
-
def get_amd_dgpu_devices(self) -> list:
|
|
77
|
+
def get_amd_dgpu_devices(self, include_inference_engines: bool = False) -> list:
|
|
79
78
|
"""
|
|
80
79
|
Retrieves AMD discrete GPU device information.
|
|
81
80
|
|
|
@@ -143,8 +142,9 @@ class WindowsSystemInfo(SystemInfo):
|
|
|
143
142
|
processors = self.connection.Win32_Processor()
|
|
144
143
|
if processors:
|
|
145
144
|
processor = processors[0]
|
|
145
|
+
cpu_name = processor.Name.strip()
|
|
146
146
|
cpu_info = {
|
|
147
|
-
"name":
|
|
147
|
+
"name": cpu_name,
|
|
148
148
|
"cores": processor.NumberOfCores,
|
|
149
149
|
"threads": processor.NumberOfLogicalProcessors,
|
|
150
150
|
"max_clock_speed_mhz": processor.MaxClockSpeed,
|
|
@@ -152,7 +152,9 @@ class WindowsSystemInfo(SystemInfo):
|
|
|
152
152
|
}
|
|
153
153
|
|
|
154
154
|
# Add inference engine detection
|
|
155
|
-
cpu_info["inference_engines"] = self._detect_inference_engines(
|
|
155
|
+
cpu_info["inference_engines"] = self._detect_inference_engines(
|
|
156
|
+
"cpu", cpu_name
|
|
157
|
+
)
|
|
156
158
|
return cpu_info
|
|
157
159
|
|
|
158
160
|
except Exception as e: # pylint: disable=broad-except
|
|
@@ -160,7 +162,7 @@ class WindowsSystemInfo(SystemInfo):
|
|
|
160
162
|
|
|
161
163
|
return {"available": False, "error": "No CPU information found"}
|
|
162
164
|
|
|
163
|
-
def _detect_amd_gpus(self, gpu_type: str):
|
|
165
|
+
def _detect_amd_gpus(self, gpu_type: str, include_inference_engines: bool = False):
|
|
164
166
|
"""
|
|
165
167
|
Shared AMD GPU detection logic for both integrated and discrete GPUs.
|
|
166
168
|
Uses keyword-based classification for simplicity and reliability.
|
|
@@ -194,23 +196,25 @@ class WindowsSystemInfo(SystemInfo):
|
|
|
194
196
|
gpu_type == "discrete" and not is_integrated
|
|
195
197
|
):
|
|
196
198
|
|
|
197
|
-
driver_version = self.get_driver_version(
|
|
198
|
-
"AMD-OpenCL User Mode Driver"
|
|
199
|
-
)
|
|
200
|
-
|
|
201
199
|
device_type = "amd_igpu" if is_integrated else "amd_dgpu"
|
|
202
200
|
gpu_info = {
|
|
203
201
|
"name": controller.Name,
|
|
204
|
-
"driver_version": (
|
|
205
|
-
driver_version if driver_version else "Unknown"
|
|
206
|
-
),
|
|
207
202
|
"available": True,
|
|
208
203
|
}
|
|
209
204
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
205
|
+
driver_version = self.get_driver_version(
|
|
206
|
+
"AMD-OpenCL User Mode Driver"
|
|
207
|
+
)
|
|
208
|
+
gpu_info["driver_version"] = (
|
|
209
|
+
driver_version if driver_version else "Unknown"
|
|
213
210
|
)
|
|
211
|
+
|
|
212
|
+
if include_inference_engines:
|
|
213
|
+
gpu_info["inference_engines"] = (
|
|
214
|
+
self._detect_inference_engines(
|
|
215
|
+
device_type, controller.Name
|
|
216
|
+
)
|
|
217
|
+
)
|
|
214
218
|
gpu_devices.append(gpu_info)
|
|
215
219
|
|
|
216
220
|
except Exception as e: # pylint: disable=broad-except
|
|
@@ -219,32 +223,36 @@ class WindowsSystemInfo(SystemInfo):
|
|
|
219
223
|
|
|
220
224
|
return gpu_devices
|
|
221
225
|
|
|
222
|
-
def get_amd_igpu_device(self) -> dict:
|
|
226
|
+
def get_amd_igpu_device(self, include_inference_engines: bool = False) -> dict:
|
|
223
227
|
"""
|
|
224
228
|
Retrieves AMD integrated GPU device information using keyword-based classification.
|
|
225
229
|
|
|
226
230
|
Returns:
|
|
227
231
|
dict: AMD iGPU device information.
|
|
228
232
|
"""
|
|
229
|
-
igpu_devices = self._detect_amd_gpus(
|
|
233
|
+
igpu_devices = self._detect_amd_gpus(
|
|
234
|
+
"integrated", include_inference_engines=include_inference_engines
|
|
235
|
+
)
|
|
230
236
|
return (
|
|
231
237
|
igpu_devices[0]
|
|
232
238
|
if igpu_devices
|
|
233
239
|
else {"available": False, "error": "No AMD integrated GPU found"}
|
|
234
240
|
)
|
|
235
241
|
|
|
236
|
-
def get_amd_dgpu_devices(self):
|
|
242
|
+
def get_amd_dgpu_devices(self, include_inference_engines: bool = False):
|
|
237
243
|
"""
|
|
238
244
|
Retrieves AMD discrete GPU device information using keyword-based classification.
|
|
239
245
|
|
|
240
246
|
Returns:
|
|
241
247
|
list: List of AMD dGPU device information.
|
|
242
248
|
"""
|
|
243
|
-
dgpu_devices = self._detect_amd_gpus(
|
|
249
|
+
dgpu_devices = self._detect_amd_gpus(
|
|
250
|
+
"discrete", include_inference_engines=include_inference_engines
|
|
251
|
+
)
|
|
244
252
|
return (
|
|
245
253
|
dgpu_devices
|
|
246
254
|
if dgpu_devices
|
|
247
|
-
else {"available": False, "error": "No AMD discrete GPU found"}
|
|
255
|
+
else [{"available": False, "error": "No AMD discrete GPU found"}]
|
|
248
256
|
)
|
|
249
257
|
|
|
250
258
|
def get_npu_device(self) -> dict:
|
|
@@ -267,7 +275,9 @@ class WindowsSystemInfo(SystemInfo):
|
|
|
267
275
|
}
|
|
268
276
|
|
|
269
277
|
# Add inference engine detection
|
|
270
|
-
npu_info["inference_engines"] = self._detect_inference_engines(
|
|
278
|
+
npu_info["inference_engines"] = self._detect_inference_engines(
|
|
279
|
+
"npu", "AMD NPU"
|
|
280
|
+
)
|
|
271
281
|
return npu_info
|
|
272
282
|
except Exception as e: # pylint: disable=broad-except
|
|
273
283
|
return {"available": False, "error": f"NPU detection failed: {e}"}
|
|
@@ -438,12 +448,13 @@ class WindowsSystemInfo(SystemInfo):
|
|
|
438
448
|
info_dict["Windows Power Setting"] = self.get_windows_power_setting()
|
|
439
449
|
return info_dict
|
|
440
450
|
|
|
441
|
-
def _detect_inference_engines(self, device_type: str) -> dict:
|
|
451
|
+
def _detect_inference_engines(self, device_type: str, device_name: str) -> dict:
|
|
442
452
|
"""
|
|
443
453
|
Detect available inference engines for a specific device type.
|
|
444
454
|
|
|
445
455
|
Args:
|
|
446
456
|
device_type: Device type ("cpu", "amd_igpu", "amd_dgpu", "npu")
|
|
457
|
+
device_name: Device name
|
|
447
458
|
|
|
448
459
|
Returns:
|
|
449
460
|
dict: Available inference engines and their information.
|
|
@@ -451,7 +462,7 @@ class WindowsSystemInfo(SystemInfo):
|
|
|
451
462
|
try:
|
|
452
463
|
from .inference_engines import detect_inference_engines
|
|
453
464
|
|
|
454
|
-
return detect_inference_engines(device_type)
|
|
465
|
+
return detect_inference_engines(device_type, device_name)
|
|
455
466
|
except Exception as e: # pylint: disable=broad-except
|
|
456
467
|
return {"error": f"Inference engine detection failed: {str(e)}"}
|
|
457
468
|
|
|
@@ -467,13 +478,13 @@ class WSLSystemInfo(SystemInfo):
|
|
|
467
478
|
"""
|
|
468
479
|
return {"available": False, "error": "Device detection not supported in WSL"}
|
|
469
480
|
|
|
470
|
-
def get_amd_igpu_device(self) -> dict:
|
|
481
|
+
def get_amd_igpu_device(self, include_inference_engines: bool = False) -> dict:
|
|
471
482
|
"""
|
|
472
483
|
Retrieves AMD integrated GPU device information in WSL environment.
|
|
473
484
|
"""
|
|
474
485
|
return {"available": False, "error": "GPU detection not supported in WSL"}
|
|
475
486
|
|
|
476
|
-
def get_amd_dgpu_devices(self) -> list:
|
|
487
|
+
def get_amd_dgpu_devices(self, include_inference_engines: bool = False) -> list:
|
|
477
488
|
"""
|
|
478
489
|
Retrieves AMD discrete GPU device information in WSL environment.
|
|
479
490
|
"""
|
|
@@ -556,6 +567,7 @@ class LinuxSystemInfo(SystemInfo):
|
|
|
556
567
|
cpu_data["architecture"] = line.split(":")[1].strip()
|
|
557
568
|
|
|
558
569
|
if "name" in cpu_data:
|
|
570
|
+
cpu_name = cpu_data.get("name", "Unknown")
|
|
559
571
|
cpu_info = {
|
|
560
572
|
"name": cpu_data.get("name", "Unknown"),
|
|
561
573
|
"cores": cpu_data.get("cores", "Unknown"),
|
|
@@ -565,14 +577,16 @@ class LinuxSystemInfo(SystemInfo):
|
|
|
565
577
|
}
|
|
566
578
|
|
|
567
579
|
# Add inference engine detection
|
|
568
|
-
cpu_info["inference_engines"] = self._detect_inference_engines(
|
|
580
|
+
cpu_info["inference_engines"] = self._detect_inference_engines(
|
|
581
|
+
"cpu", cpu_name
|
|
582
|
+
)
|
|
569
583
|
return cpu_info
|
|
570
584
|
except Exception as e: # pylint: disable=broad-except
|
|
571
585
|
return {"available": False, "error": f"CPU detection failed: {e}"}
|
|
572
586
|
|
|
573
587
|
return {"available": False, "error": "No CPU information found"}
|
|
574
588
|
|
|
575
|
-
def _detect_amd_gpus(self, gpu_type: str):
|
|
589
|
+
def _detect_amd_gpus(self, gpu_type: str, include_inference_engines: bool = False):
|
|
576
590
|
"""
|
|
577
591
|
Shared AMD GPU detection logic for both integrated and discrete GPUs.
|
|
578
592
|
Uses keyword-based classification for simplicity and reliability.
|
|
@@ -611,11 +625,10 @@ class LinuxSystemInfo(SystemInfo):
|
|
|
611
625
|
"name": device_name,
|
|
612
626
|
"available": True,
|
|
613
627
|
}
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
)
|
|
628
|
+
if include_inference_engines:
|
|
629
|
+
gpu_info["inference_engines"] = (
|
|
630
|
+
self._detect_inference_engines(device_type, device_name)
|
|
631
|
+
)
|
|
619
632
|
gpu_devices.append(gpu_info)
|
|
620
633
|
|
|
621
634
|
except Exception as e: # pylint: disable=broad-except
|
|
@@ -624,32 +637,36 @@ class LinuxSystemInfo(SystemInfo):
|
|
|
624
637
|
|
|
625
638
|
return gpu_devices
|
|
626
639
|
|
|
627
|
-
def get_amd_igpu_device(self) -> dict:
|
|
640
|
+
def get_amd_igpu_device(self, include_inference_engines: bool = False) -> dict:
|
|
628
641
|
"""
|
|
629
642
|
Retrieves AMD integrated GPU device information using keyword-based classification.
|
|
630
643
|
|
|
631
644
|
Returns:
|
|
632
645
|
dict: AMD iGPU device information.
|
|
633
646
|
"""
|
|
634
|
-
igpu_devices = self._detect_amd_gpus(
|
|
647
|
+
igpu_devices = self._detect_amd_gpus(
|
|
648
|
+
"integrated", include_inference_engines=include_inference_engines
|
|
649
|
+
)
|
|
635
650
|
return (
|
|
636
651
|
igpu_devices[0]
|
|
637
652
|
if igpu_devices
|
|
638
653
|
else {"available": False, "error": "No AMD integrated GPU found"}
|
|
639
654
|
)
|
|
640
655
|
|
|
641
|
-
def get_amd_dgpu_devices(self):
|
|
656
|
+
def get_amd_dgpu_devices(self, include_inference_engines: bool = False) -> list:
|
|
642
657
|
"""
|
|
643
658
|
Retrieves AMD discrete GPU device information using keyword-based classification.
|
|
644
659
|
|
|
645
660
|
Returns:
|
|
646
661
|
list: List of AMD dGPU device information.
|
|
647
662
|
"""
|
|
648
|
-
dgpu_devices = self._detect_amd_gpus(
|
|
663
|
+
dgpu_devices = self._detect_amd_gpus(
|
|
664
|
+
"discrete", include_inference_engines=include_inference_engines
|
|
665
|
+
)
|
|
649
666
|
return (
|
|
650
667
|
dgpu_devices
|
|
651
668
|
if dgpu_devices
|
|
652
|
-
else {"available": False, "error": "No AMD discrete GPU found"}
|
|
669
|
+
else [{"available": False, "error": "No AMD discrete GPU found"}]
|
|
653
670
|
)
|
|
654
671
|
|
|
655
672
|
def get_npu_device(self) -> dict:
|
|
@@ -741,7 +758,7 @@ class LinuxSystemInfo(SystemInfo):
|
|
|
741
758
|
info_dict["Physical Memory"] = self.get_physical_memory()
|
|
742
759
|
return info_dict
|
|
743
760
|
|
|
744
|
-
def _detect_inference_engines(self, device_type: str) -> dict:
|
|
761
|
+
def _detect_inference_engines(self, device_type: str, device_name: str) -> dict:
|
|
745
762
|
"""
|
|
746
763
|
Detect available inference engines for a specific device type.
|
|
747
764
|
|
|
@@ -752,7 +769,7 @@ class LinuxSystemInfo(SystemInfo):
|
|
|
752
769
|
dict: Available inference engines and their information.
|
|
753
770
|
"""
|
|
754
771
|
try:
|
|
755
|
-
return detect_inference_engines(device_type)
|
|
772
|
+
return detect_inference_engines(device_type, device_name)
|
|
756
773
|
except Exception as e: # pylint: disable=broad-except
|
|
757
774
|
return {"error": f"Inference engine detection failed: {str(e)}"}
|
|
758
775
|
|
|
@@ -771,7 +788,7 @@ class UnsupportedOSSystemInfo(SystemInfo):
|
|
|
771
788
|
"error": "Device detection not supported on this operating system",
|
|
772
789
|
}
|
|
773
790
|
|
|
774
|
-
def get_amd_igpu_device(self) -> dict:
|
|
791
|
+
def get_amd_igpu_device(self, include_inference_engines: bool = False) -> dict:
|
|
775
792
|
"""
|
|
776
793
|
Retrieves AMD integrated GPU device information for unsupported OS.
|
|
777
794
|
"""
|
|
@@ -780,7 +797,7 @@ class UnsupportedOSSystemInfo(SystemInfo):
|
|
|
780
797
|
"error": "Device detection not supported on this operating system",
|
|
781
798
|
}
|
|
782
799
|
|
|
783
|
-
def get_amd_dgpu_devices(self) -> list:
|
|
800
|
+
def get_amd_dgpu_devices(self, include_inference_engines: bool = False) -> list:
|
|
784
801
|
"""
|
|
785
802
|
Retrieves AMD discrete GPU device information for unsupported OS.
|
|
786
803
|
"""
|
lemonade/tools/llamacpp/bench.py
CHANGED
|
@@ -68,7 +68,9 @@ class LlamaCppBench(Bench):
|
|
|
68
68
|
# and error handling
|
|
69
69
|
model.time_to_first_token = None
|
|
70
70
|
model.tokens_per_second = None
|
|
71
|
-
raw_output, stderr = model.generate(
|
|
71
|
+
raw_output, stderr = model.generate(
|
|
72
|
+
prompt, max_new_tokens=output_tokens, return_raw=True
|
|
73
|
+
)
|
|
72
74
|
|
|
73
75
|
if model.time_to_first_token is None or model.tokens_per_second is None:
|
|
74
76
|
error_msg = (
|
lemonade/tools/llamacpp/load.py
CHANGED
|
@@ -65,6 +65,13 @@ class LoadLlamaCpp(FirstTool):
|
|
|
65
65
|
help="Set this flag to indicate the model is a reasoning model",
|
|
66
66
|
)
|
|
67
67
|
|
|
68
|
+
parser.add_argument(
|
|
69
|
+
"--backend",
|
|
70
|
+
choices=["vulkan", "rocm"],
|
|
71
|
+
default="vulkan",
|
|
72
|
+
help="Backend to use for llama.cpp (default: vulkan)",
|
|
73
|
+
)
|
|
74
|
+
|
|
68
75
|
return parser
|
|
69
76
|
|
|
70
77
|
def run(
|
|
@@ -76,6 +83,7 @@ class LoadLlamaCpp(FirstTool):
|
|
|
76
83
|
threads: int = 1,
|
|
77
84
|
output_tokens: int = 512,
|
|
78
85
|
reasoning: bool = False,
|
|
86
|
+
backend: str = "vulkan",
|
|
79
87
|
) -> State:
|
|
80
88
|
"""
|
|
81
89
|
Load a llama.cpp model
|
|
@@ -93,8 +101,7 @@ class LoadLlamaCpp(FirstTool):
|
|
|
93
101
|
LlamaCppAdapter,
|
|
94
102
|
)
|
|
95
103
|
|
|
96
|
-
|
|
97
|
-
install_llamacpp()
|
|
104
|
+
install_llamacpp(backend)
|
|
98
105
|
|
|
99
106
|
# Check if input is a local folder containing a .GGUF model
|
|
100
107
|
if os.path.isdir(input):
|
|
@@ -153,7 +160,7 @@ class LoadLlamaCpp(FirstTool):
|
|
|
153
160
|
full_model_path = snapshot_files["variant"]
|
|
154
161
|
model_to_use = os.path.basename(full_model_path)
|
|
155
162
|
|
|
156
|
-
llama_cli_exe_path = get_llama_cli_exe_path()
|
|
163
|
+
llama_cli_exe_path = get_llama_cli_exe_path(backend)
|
|
157
164
|
printing.log_info(f"Using llama_cli for GGUF model: {llama_cli_exe_path}")
|
|
158
165
|
|
|
159
166
|
# Get the directory containing the executable for shared libraries
|
|
@@ -175,7 +182,9 @@ class LoadLlamaCpp(FirstTool):
|
|
|
175
182
|
|
|
176
183
|
# Save initial stats
|
|
177
184
|
state.save_stat(Keys.DEVICE, device)
|
|
178
|
-
state.save_stat(
|
|
185
|
+
state.save_stat(
|
|
186
|
+
Keys.LLAMA_CLI_VERSION_INFO, get_llama_installed_version(backend)
|
|
187
|
+
)
|
|
179
188
|
|
|
180
189
|
status.add_to_state(state=state, name=input, model=model_to_use)
|
|
181
190
|
return state
|