lemonade-sdk 8.1.9__py3-none-any.whl → 8.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/common/inference_engines.py +13 -4
- lemonade/common/system_info.py +570 -1
- lemonade/tools/llamacpp/utils.py +4 -3
- lemonade/tools/server/serve.py +32 -0
- lemonade/tools/server/tray.py +64 -0
- lemonade/version.py +1 -1
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.10.dist-info}/METADATA +1 -1
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.10.dist-info}/RECORD +15 -15
- lemonade_server/cli.py +2 -0
- lemonade_server/settings.py +39 -39
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.10.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.10.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.10.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.10.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.10.dist-info}/top_level.txt +0 -0
|
@@ -24,7 +24,7 @@ class InferenceEngineDetector:
|
|
|
24
24
|
Detect all available inference engines for a specific device type.
|
|
25
25
|
|
|
26
26
|
Args:
|
|
27
|
-
device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
|
|
27
|
+
device_type: "cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu", or "npu"
|
|
28
28
|
|
|
29
29
|
Returns:
|
|
30
30
|
dict: Engine availability information
|
|
@@ -223,17 +223,26 @@ class LlamaCppDetector(BaseEngineDetector):
|
|
|
223
223
|
"""
|
|
224
224
|
try:
|
|
225
225
|
|
|
226
|
-
if device_type not in ["cpu", "amd_igpu", "amd_dgpu"]:
|
|
226
|
+
if device_type not in ["cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu"]:
|
|
227
227
|
return None
|
|
228
228
|
|
|
229
229
|
# Check if the device is supported by the backend
|
|
230
230
|
if device_type == "cpu":
|
|
231
231
|
device_supported = True
|
|
232
|
-
elif device_type
|
|
232
|
+
elif device_type in ["amd_igpu", "amd_dgpu"]:
|
|
233
233
|
if backend == "vulkan":
|
|
234
234
|
device_supported = self._check_vulkan_support()
|
|
235
235
|
elif backend == "rocm":
|
|
236
236
|
device_supported = self._check_rocm_support(device_name.lower())
|
|
237
|
+
else:
|
|
238
|
+
device_supported = False
|
|
239
|
+
elif device_type == "nvidia_dgpu":
|
|
240
|
+
if backend == "vulkan":
|
|
241
|
+
device_supported = self._check_vulkan_support()
|
|
242
|
+
else:
|
|
243
|
+
device_supported = False
|
|
244
|
+
else:
|
|
245
|
+
device_supported = False
|
|
237
246
|
if not device_supported:
|
|
238
247
|
return {"available": False, "error": f"{backend} not available"}
|
|
239
248
|
|
|
@@ -390,7 +399,7 @@ def detect_inference_engines(device_type: str, device_name: str) -> Dict[str, Di
|
|
|
390
399
|
Helper function to detect inference engines for a device type.
|
|
391
400
|
|
|
392
401
|
Args:
|
|
393
|
-
device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
|
|
402
|
+
device_type: "cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu", or "npu"
|
|
394
403
|
device_name: device name
|
|
395
404
|
|
|
396
405
|
Returns:
|
lemonade/common/system_info.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
import importlib.metadata
|
|
3
|
+
import logging
|
|
3
4
|
import platform
|
|
4
5
|
import re
|
|
5
6
|
import subprocess
|
|
6
7
|
import ctypes
|
|
8
|
+
import glob
|
|
7
9
|
from .inference_engines import detect_inference_engines
|
|
8
10
|
|
|
9
11
|
# AMD GPU classification keywords - shared across all OS implementations
|
|
@@ -19,6 +21,28 @@ AMD_DISCRETE_GPU_KEYWORDS = [
|
|
|
19
21
|
"fury",
|
|
20
22
|
]
|
|
21
23
|
|
|
24
|
+
# NVIDIA GPU classification keywords - shared across all OS implementations
|
|
25
|
+
# NVIDIA GPUs are typically discrete by default, but we include keywords for clarity
|
|
26
|
+
NVIDIA_DISCRETE_GPU_KEYWORDS = [
|
|
27
|
+
"geforce",
|
|
28
|
+
"rtx",
|
|
29
|
+
"gtx",
|
|
30
|
+
"quadro",
|
|
31
|
+
"tesla",
|
|
32
|
+
"titan",
|
|
33
|
+
"a100",
|
|
34
|
+
"a40",
|
|
35
|
+
"a30",
|
|
36
|
+
"a10",
|
|
37
|
+
"a6000",
|
|
38
|
+
"a5000",
|
|
39
|
+
"a4000",
|
|
40
|
+
"a2000",
|
|
41
|
+
"t1000",
|
|
42
|
+
"t600",
|
|
43
|
+
"t400",
|
|
44
|
+
]
|
|
45
|
+
|
|
22
46
|
|
|
23
47
|
class SystemInfo(ABC):
|
|
24
48
|
"""
|
|
@@ -51,6 +75,7 @@ class SystemInfo(ABC):
|
|
|
51
75
|
"cpu": self.get_cpu_device(),
|
|
52
76
|
"amd_igpu": self.get_amd_igpu_device(include_inference_engines=True),
|
|
53
77
|
"amd_dgpu": self.get_amd_dgpu_devices(include_inference_engines=True),
|
|
78
|
+
"nvidia_dgpu": self.get_nvidia_dgpu_devices(include_inference_engines=True),
|
|
54
79
|
"npu": self.get_npu_device(),
|
|
55
80
|
}
|
|
56
81
|
return device_dict
|
|
@@ -82,6 +107,15 @@ class SystemInfo(ABC):
|
|
|
82
107
|
list: List of AMD dGPU device information.
|
|
83
108
|
"""
|
|
84
109
|
|
|
110
|
+
@abstractmethod
|
|
111
|
+
def get_nvidia_dgpu_devices(self, include_inference_engines: bool = False) -> list:
|
|
112
|
+
"""
|
|
113
|
+
Retrieves NVIDIA discrete GPU device information.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
list: List of NVIDIA dGPU device information.
|
|
117
|
+
"""
|
|
118
|
+
|
|
85
119
|
@abstractmethod
|
|
86
120
|
def get_npu_device(self) -> dict:
|
|
87
121
|
"""
|
|
@@ -173,34 +207,56 @@ class WindowsSystemInfo(SystemInfo):
|
|
|
173
207
|
Returns:
|
|
174
208
|
list: List of detected GPU info dictionaries
|
|
175
209
|
"""
|
|
210
|
+
logging.debug(f"Starting AMD GPU detection for type: {gpu_type}")
|
|
176
211
|
gpu_devices = []
|
|
177
212
|
try:
|
|
178
213
|
video_controllers = self.connection.Win32_VideoController()
|
|
179
|
-
|
|
214
|
+
logging.debug(f"Found {len(video_controllers)} video controllers")
|
|
215
|
+
|
|
216
|
+
for i, controller in enumerate(video_controllers):
|
|
217
|
+
logging.debug(
|
|
218
|
+
f"Controller {i}: Name='{controller.Name}', "
|
|
219
|
+
f"PNPDeviceID='{getattr(controller, 'PNPDeviceID', 'N/A')}'"
|
|
220
|
+
)
|
|
221
|
+
|
|
180
222
|
if (
|
|
181
223
|
controller.Name
|
|
182
224
|
and "AMD" in controller.Name
|
|
183
225
|
and "Radeon" in controller.Name
|
|
184
226
|
):
|
|
227
|
+
logging.debug(f"Found AMD Radeon GPU: {controller.Name}")
|
|
185
228
|
|
|
186
229
|
name_lower = controller.Name.lower()
|
|
230
|
+
logging.debug(f"GPU name (lowercase): {name_lower}")
|
|
187
231
|
|
|
188
232
|
# Keyword-based classification - simple and reliable
|
|
233
|
+
matching_keywords = [
|
|
234
|
+
kw for kw in AMD_DISCRETE_GPU_KEYWORDS if kw in name_lower
|
|
235
|
+
]
|
|
189
236
|
is_discrete_by_name = any(
|
|
190
237
|
kw in name_lower for kw in AMD_DISCRETE_GPU_KEYWORDS
|
|
191
238
|
)
|
|
192
239
|
is_integrated = not is_discrete_by_name
|
|
193
240
|
|
|
241
|
+
logging.debug(f"Matching discrete keywords: {matching_keywords}")
|
|
242
|
+
logging.debug(
|
|
243
|
+
f"Classified as discrete: {not is_integrated}, integrated: {is_integrated}"
|
|
244
|
+
)
|
|
245
|
+
|
|
194
246
|
# Filter based on requested type
|
|
195
247
|
if (gpu_type == "integrated" and is_integrated) or (
|
|
196
248
|
gpu_type == "discrete" and not is_integrated
|
|
197
249
|
):
|
|
250
|
+
logging.debug(
|
|
251
|
+
f"GPU matches requested type '{gpu_type}', processing..."
|
|
252
|
+
)
|
|
198
253
|
|
|
199
254
|
device_type = "amd_igpu" if is_integrated else "amd_dgpu"
|
|
200
255
|
gpu_info = {
|
|
201
256
|
"name": controller.Name,
|
|
202
257
|
"available": True,
|
|
203
258
|
}
|
|
259
|
+
logging.debug(f"Created GPU info for {device_type}: {gpu_info}")
|
|
204
260
|
|
|
205
261
|
driver_version = self.get_driver_version(
|
|
206
262
|
"AMD-OpenCL User Mode Driver"
|
|
@@ -208,6 +264,21 @@ class WindowsSystemInfo(SystemInfo):
|
|
|
208
264
|
gpu_info["driver_version"] = (
|
|
209
265
|
driver_version if driver_version else "Unknown"
|
|
210
266
|
)
|
|
267
|
+
logging.debug(f"Driver version: {gpu_info['driver_version']}")
|
|
268
|
+
|
|
269
|
+
# Get VRAM information for discrete GPUs
|
|
270
|
+
if not is_integrated: # Only add VRAM for discrete GPUs
|
|
271
|
+
# Try dxdiag first (most reliable for dedicated memory)
|
|
272
|
+
vram_gb = self._get_gpu_vram_dxdiag_simple(controller.Name)
|
|
273
|
+
|
|
274
|
+
# Fallback to WMI if dxdiag fails
|
|
275
|
+
if vram_gb == 0.0:
|
|
276
|
+
vram_gb = self._get_gpu_vram_wmi(controller)
|
|
277
|
+
|
|
278
|
+
if vram_gb > 0.0:
|
|
279
|
+
gpu_info["vram_gb"] = vram_gb
|
|
280
|
+
else:
|
|
281
|
+
gpu_info["vram_gb"] = "Unknown"
|
|
211
282
|
|
|
212
283
|
if include_inference_engines:
|
|
213
284
|
gpu_info["inference_engines"] = (
|
|
@@ -216,11 +287,26 @@ class WindowsSystemInfo(SystemInfo):
|
|
|
216
287
|
)
|
|
217
288
|
)
|
|
218
289
|
gpu_devices.append(gpu_info)
|
|
290
|
+
logging.debug(f"Added GPU to devices list: {gpu_info}")
|
|
291
|
+
else:
|
|
292
|
+
logging.debug(
|
|
293
|
+
f"GPU does not match requested type '{gpu_type}', skipping"
|
|
294
|
+
)
|
|
295
|
+
continue
|
|
296
|
+
else:
|
|
297
|
+
logging.debug(
|
|
298
|
+
f"Skipping non-AMD/non-Radeon controller: {controller.Name}"
|
|
299
|
+
)
|
|
219
300
|
|
|
220
301
|
except Exception as e: # pylint: disable=broad-except
|
|
221
302
|
error_msg = f"AMD {gpu_type} GPU detection failed: {e}"
|
|
303
|
+
logging.debug(f"Exception in AMD GPU detection: {e}")
|
|
222
304
|
return [{"available": False, "error": error_msg}]
|
|
223
305
|
|
|
306
|
+
logging.debug(
|
|
307
|
+
f"AMD GPU detection completed. Found {len(gpu_devices)} {gpu_type} GPUs: "
|
|
308
|
+
f"{[gpu.get('name', 'Unknown') for gpu in gpu_devices]}"
|
|
309
|
+
)
|
|
224
310
|
return gpu_devices
|
|
225
311
|
|
|
226
312
|
def get_amd_igpu_device(self, include_inference_engines: bool = False) -> dict:
|
|
@@ -255,6 +341,67 @@ class WindowsSystemInfo(SystemInfo):
|
|
|
255
341
|
else [{"available": False, "error": "No AMD discrete GPU found"}]
|
|
256
342
|
)
|
|
257
343
|
|
|
344
|
+
def get_nvidia_dgpu_devices(self, include_inference_engines: bool = False) -> list:
|
|
345
|
+
"""
|
|
346
|
+
Retrieves NVIDIA discrete GPU device information using WMI.
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
list: List of NVIDIA dGPU device information.
|
|
350
|
+
"""
|
|
351
|
+
gpu_devices = []
|
|
352
|
+
try:
|
|
353
|
+
video_controllers = self.connection.Win32_VideoController()
|
|
354
|
+
for controller in video_controllers:
|
|
355
|
+
if controller.Name and "NVIDIA" in controller.Name.upper():
|
|
356
|
+
name_lower = controller.Name.lower()
|
|
357
|
+
|
|
358
|
+
# Most NVIDIA GPUs are discrete, but we can check keywords for confirmation
|
|
359
|
+
is_discrete = (
|
|
360
|
+
any(kw in name_lower for kw in NVIDIA_DISCRETE_GPU_KEYWORDS)
|
|
361
|
+
or "nvidia" in name_lower
|
|
362
|
+
) # Default to discrete for NVIDIA
|
|
363
|
+
|
|
364
|
+
if is_discrete:
|
|
365
|
+
gpu_info = {
|
|
366
|
+
"name": controller.Name,
|
|
367
|
+
"available": True,
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
# Try to get NVIDIA driver version using multiple methods
|
|
371
|
+
driver_version = self._get_nvidia_driver_version_windows()
|
|
372
|
+
gpu_info["driver_version"] = (
|
|
373
|
+
driver_version if driver_version else "Unknown"
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
# Get VRAM information
|
|
377
|
+
vram_gb = self._get_gpu_vram_wmi(controller)
|
|
378
|
+
if vram_gb == 0.0:
|
|
379
|
+
# Fallback to nvidia-smi
|
|
380
|
+
vram_gb = self._get_nvidia_vram_smi()
|
|
381
|
+
|
|
382
|
+
if vram_gb > 0.0:
|
|
383
|
+
gpu_info["vram_gb"] = vram_gb
|
|
384
|
+
else:
|
|
385
|
+
gpu_info["vram_gb"] = "Unknown"
|
|
386
|
+
|
|
387
|
+
if include_inference_engines:
|
|
388
|
+
gpu_info["inference_engines"] = (
|
|
389
|
+
self._detect_inference_engines(
|
|
390
|
+
"nvidia_dgpu", controller.Name
|
|
391
|
+
)
|
|
392
|
+
)
|
|
393
|
+
gpu_devices.append(gpu_info)
|
|
394
|
+
|
|
395
|
+
except Exception as e: # pylint: disable=broad-except
|
|
396
|
+
error_msg = f"NVIDIA discrete GPU detection failed: {e}"
|
|
397
|
+
return [{"available": False, "error": error_msg}]
|
|
398
|
+
|
|
399
|
+
return (
|
|
400
|
+
gpu_devices
|
|
401
|
+
if gpu_devices
|
|
402
|
+
else [{"available": False, "error": "No NVIDIA discrete GPU found"}]
|
|
403
|
+
)
|
|
404
|
+
|
|
258
405
|
def get_npu_device(self) -> dict:
|
|
259
406
|
"""
|
|
260
407
|
Retrieves NPU device information using existing methods.
|
|
@@ -374,6 +521,169 @@ class WindowsSystemInfo(SystemInfo):
|
|
|
374
521
|
return drivers[0].DriverVersion
|
|
375
522
|
return ""
|
|
376
523
|
|
|
524
|
+
def _get_gpu_vram_wmi(self, controller) -> float:
|
|
525
|
+
"""
|
|
526
|
+
Get GPU VRAM from WMI VideoController.
|
|
527
|
+
|
|
528
|
+
Args:
|
|
529
|
+
controller: WMI Win32_VideoController object
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
float: VRAM in GB, or 0.0 if detection fails
|
|
533
|
+
"""
|
|
534
|
+
try:
|
|
535
|
+
if hasattr(controller, "AdapterRAM"):
|
|
536
|
+
adapter_ram = controller.AdapterRAM
|
|
537
|
+
if adapter_ram and adapter_ram > 0:
|
|
538
|
+
# AdapterRAM is in bytes, convert to GB
|
|
539
|
+
vram_bytes = int(adapter_ram)
|
|
540
|
+
vram_gb = round(vram_bytes / (1024**3), 1)
|
|
541
|
+
return vram_gb
|
|
542
|
+
except (ValueError, AttributeError):
|
|
543
|
+
pass
|
|
544
|
+
return 0.0
|
|
545
|
+
|
|
546
|
+
def _get_gpu_vram_dxdiag_simple(self, gpu_name: str) -> float:
|
|
547
|
+
"""
|
|
548
|
+
Get GPU VRAM using dxdiag, looking specifically for dedicated memory.
|
|
549
|
+
|
|
550
|
+
Args:
|
|
551
|
+
gpu_name: Name of the GPU to look for
|
|
552
|
+
|
|
553
|
+
Returns:
|
|
554
|
+
float: VRAM in GB, or 0.0 if detection fails
|
|
555
|
+
"""
|
|
556
|
+
try:
|
|
557
|
+
import tempfile
|
|
558
|
+
import os
|
|
559
|
+
|
|
560
|
+
with tempfile.NamedTemporaryFile(
|
|
561
|
+
mode="w+", suffix=".txt", delete=False
|
|
562
|
+
) as temp_file:
|
|
563
|
+
temp_path = temp_file.name
|
|
564
|
+
|
|
565
|
+
try:
|
|
566
|
+
subprocess.run(
|
|
567
|
+
["dxdiag", "/t", temp_path],
|
|
568
|
+
check=True,
|
|
569
|
+
timeout=30,
|
|
570
|
+
capture_output=True,
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
with open(temp_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
574
|
+
dxdiag_output = f.read()
|
|
575
|
+
|
|
576
|
+
lines = dxdiag_output.split("\n")
|
|
577
|
+
found_gpu = False
|
|
578
|
+
|
|
579
|
+
for line in lines:
|
|
580
|
+
line = line.strip()
|
|
581
|
+
|
|
582
|
+
# Check if this is our GPU
|
|
583
|
+
if "Card name:" in line and gpu_name.lower() in line.lower():
|
|
584
|
+
found_gpu = True
|
|
585
|
+
continue
|
|
586
|
+
|
|
587
|
+
# Look for dedicated memory line
|
|
588
|
+
if found_gpu and "Dedicated Memory:" in line:
|
|
589
|
+
memory_match = re.search(
|
|
590
|
+
r"(\d+(?:\.\d+)?)\s*MB", line, re.IGNORECASE
|
|
591
|
+
)
|
|
592
|
+
if memory_match:
|
|
593
|
+
vram_mb = float(memory_match.group(1))
|
|
594
|
+
vram_gb = round(vram_mb / 1024, 1)
|
|
595
|
+
return vram_gb
|
|
596
|
+
|
|
597
|
+
# Reset if we hit another display device
|
|
598
|
+
if "Card name:" in line and gpu_name.lower() not in line.lower():
|
|
599
|
+
found_gpu = False
|
|
600
|
+
|
|
601
|
+
finally:
|
|
602
|
+
try:
|
|
603
|
+
os.unlink(temp_path)
|
|
604
|
+
except Exception: # pylint: disable=broad-except
|
|
605
|
+
pass
|
|
606
|
+
|
|
607
|
+
except Exception: # pylint: disable=broad-except
|
|
608
|
+
pass
|
|
609
|
+
|
|
610
|
+
return 0.0
|
|
611
|
+
|
|
612
|
+
def _get_nvidia_driver_version_windows(self) -> str:
|
|
613
|
+
"""
|
|
614
|
+
Get NVIDIA driver version on Windows using nvidia-smi and WMI fallback.
|
|
615
|
+
|
|
616
|
+
Returns:
|
|
617
|
+
str: Driver version, or empty string if detection fails
|
|
618
|
+
"""
|
|
619
|
+
# Primary: Try nvidia-smi command
|
|
620
|
+
try:
|
|
621
|
+
output = (
|
|
622
|
+
subprocess.check_output(
|
|
623
|
+
[
|
|
624
|
+
"nvidia-smi",
|
|
625
|
+
"--query-gpu=driver_version",
|
|
626
|
+
"--format=csv,noheader,nounits",
|
|
627
|
+
],
|
|
628
|
+
stderr=subprocess.DEVNULL,
|
|
629
|
+
)
|
|
630
|
+
.decode()
|
|
631
|
+
.strip()
|
|
632
|
+
)
|
|
633
|
+
if output and output != "N/A":
|
|
634
|
+
return output.split("\n")[0]
|
|
635
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
636
|
+
pass
|
|
637
|
+
|
|
638
|
+
# Fallback: Try WMI Win32_PnPSignedDriver with NVIDIA patterns
|
|
639
|
+
try:
|
|
640
|
+
nvidia_patterns = [
|
|
641
|
+
"NVIDIA GeForce",
|
|
642
|
+
"NVIDIA RTX",
|
|
643
|
+
"NVIDIA GTX",
|
|
644
|
+
"NVIDIA Quadro",
|
|
645
|
+
]
|
|
646
|
+
all_drivers = self.connection.Win32_PnPSignedDriver()
|
|
647
|
+
for driver in all_drivers:
|
|
648
|
+
if driver.DeviceName and any(
|
|
649
|
+
pattern in driver.DeviceName for pattern in nvidia_patterns
|
|
650
|
+
):
|
|
651
|
+
if driver.DriverVersion:
|
|
652
|
+
return driver.DriverVersion
|
|
653
|
+
except Exception: # pylint: disable=broad-except
|
|
654
|
+
pass
|
|
655
|
+
|
|
656
|
+
return ""
|
|
657
|
+
|
|
658
|
+
def _get_nvidia_vram_smi(self) -> float:
|
|
659
|
+
"""
|
|
660
|
+
Get NVIDIA GPU VRAM using nvidia-smi command.
|
|
661
|
+
|
|
662
|
+
Returns:
|
|
663
|
+
float: VRAM in GB, or 0.0 if detection fails
|
|
664
|
+
"""
|
|
665
|
+
try:
|
|
666
|
+
output = (
|
|
667
|
+
subprocess.check_output(
|
|
668
|
+
[
|
|
669
|
+
"nvidia-smi",
|
|
670
|
+
"--query-gpu=memory.total",
|
|
671
|
+
"--format=csv,noheader,nounits",
|
|
672
|
+
],
|
|
673
|
+
stderr=subprocess.DEVNULL,
|
|
674
|
+
)
|
|
675
|
+
.decode()
|
|
676
|
+
.strip()
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
# nvidia-smi returns memory in MB
|
|
680
|
+
vram_mb = int(output.split("\n")[0])
|
|
681
|
+
vram_gb = round(vram_mb / 1024, 1)
|
|
682
|
+
return vram_gb
|
|
683
|
+
except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
|
|
684
|
+
pass
|
|
685
|
+
return 0.0
|
|
686
|
+
|
|
377
687
|
@staticmethod
|
|
378
688
|
def get_npu_power_mode() -> str:
|
|
379
689
|
"""
|
|
@@ -490,6 +800,14 @@ class WSLSystemInfo(SystemInfo):
|
|
|
490
800
|
"""
|
|
491
801
|
return []
|
|
492
802
|
|
|
803
|
+
def get_nvidia_dgpu_devices(self, include_inference_engines: bool = False) -> list:
|
|
804
|
+
"""
|
|
805
|
+
Retrieves NVIDIA discrete GPU device information in WSL environment.
|
|
806
|
+
"""
|
|
807
|
+
return [
|
|
808
|
+
{"available": False, "error": "NVIDIA GPU detection not supported in WSL"}
|
|
809
|
+
]
|
|
810
|
+
|
|
493
811
|
def get_npu_device(self) -> dict:
|
|
494
812
|
"""
|
|
495
813
|
Retrieves NPU device information in WSL environment.
|
|
@@ -625,6 +943,20 @@ class LinuxSystemInfo(SystemInfo):
|
|
|
625
943
|
"name": device_name,
|
|
626
944
|
"available": True,
|
|
627
945
|
}
|
|
946
|
+
|
|
947
|
+
# Get VRAM information for discrete GPUs
|
|
948
|
+
if not is_integrated: # Only add VRAM for discrete GPUs
|
|
949
|
+
vram_gb = self._get_amd_vram_rocm_smi_linux()
|
|
950
|
+
if vram_gb == 0.0:
|
|
951
|
+
# Fallback to sysfs - extract PCI ID from lspci line
|
|
952
|
+
pci_id = line.split()[0] if line else ""
|
|
953
|
+
vram_gb = self._get_amd_vram_sysfs(pci_id)
|
|
954
|
+
|
|
955
|
+
if vram_gb > 0.0:
|
|
956
|
+
gpu_info["vram_gb"] = vram_gb
|
|
957
|
+
else:
|
|
958
|
+
gpu_info["vram_gb"] = "Unknown"
|
|
959
|
+
|
|
628
960
|
if include_inference_engines:
|
|
629
961
|
gpu_info["inference_engines"] = (
|
|
630
962
|
self._detect_inference_engines(device_type, device_name)
|
|
@@ -669,6 +1001,66 @@ class LinuxSystemInfo(SystemInfo):
|
|
|
669
1001
|
else [{"available": False, "error": "No AMD discrete GPU found"}]
|
|
670
1002
|
)
|
|
671
1003
|
|
|
1004
|
+
def get_nvidia_dgpu_devices(self, include_inference_engines: bool = False) -> list:
|
|
1005
|
+
"""
|
|
1006
|
+
Retrieves NVIDIA discrete GPU device information using lspci.
|
|
1007
|
+
|
|
1008
|
+
Returns:
|
|
1009
|
+
list: List of NVIDIA dGPU device information.
|
|
1010
|
+
"""
|
|
1011
|
+
gpu_devices = []
|
|
1012
|
+
try:
|
|
1013
|
+
lspci_output = subprocess.check_output(
|
|
1014
|
+
"lspci | grep -i 'vga\\|3d\\|display'", shell=True
|
|
1015
|
+
).decode()
|
|
1016
|
+
|
|
1017
|
+
for line in lspci_output.split("\n"):
|
|
1018
|
+
if line.strip() and "NVIDIA" in line.upper():
|
|
1019
|
+
name_lower = line.lower()
|
|
1020
|
+
|
|
1021
|
+
# Most NVIDIA GPUs are discrete, check keywords for confirmation
|
|
1022
|
+
is_discrete = (
|
|
1023
|
+
any(kw in name_lower for kw in NVIDIA_DISCRETE_GPU_KEYWORDS)
|
|
1024
|
+
or "nvidia" in name_lower
|
|
1025
|
+
) # Default to discrete for NVIDIA
|
|
1026
|
+
|
|
1027
|
+
if is_discrete:
|
|
1028
|
+
device_name = line.split(": ")[1] if ": " in line else line
|
|
1029
|
+
|
|
1030
|
+
gpu_info = {
|
|
1031
|
+
"name": device_name,
|
|
1032
|
+
"available": True,
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
# Try to get NVIDIA driver version using multiple methods
|
|
1036
|
+
driver_version = self._get_nvidia_driver_version_linux()
|
|
1037
|
+
gpu_info["driver_version"] = (
|
|
1038
|
+
driver_version if driver_version else "Unknown"
|
|
1039
|
+
)
|
|
1040
|
+
|
|
1041
|
+
# Get VRAM information
|
|
1042
|
+
vram_gb = self._get_nvidia_vram_smi_linux()
|
|
1043
|
+
if vram_gb > 0.0:
|
|
1044
|
+
gpu_info["vram_gb"] = vram_gb
|
|
1045
|
+
|
|
1046
|
+
if include_inference_engines:
|
|
1047
|
+
gpu_info["inference_engines"] = (
|
|
1048
|
+
self._detect_inference_engines(
|
|
1049
|
+
"nvidia_dgpu", device_name
|
|
1050
|
+
)
|
|
1051
|
+
)
|
|
1052
|
+
gpu_devices.append(gpu_info)
|
|
1053
|
+
|
|
1054
|
+
except Exception as e: # pylint: disable=broad-except
|
|
1055
|
+
error_msg = f"NVIDIA discrete GPU detection failed: {e}"
|
|
1056
|
+
return [{"available": False, "error": error_msg}]
|
|
1057
|
+
|
|
1058
|
+
return (
|
|
1059
|
+
gpu_devices
|
|
1060
|
+
if gpu_devices
|
|
1061
|
+
else [{"available": False, "error": "No NVIDIA discrete GPU found"}]
|
|
1062
|
+
)
|
|
1063
|
+
|
|
672
1064
|
def get_npu_device(self) -> dict:
|
|
673
1065
|
"""
|
|
674
1066
|
Retrieves NPU device information (limited support on Linux).
|
|
@@ -681,6 +1073,69 @@ class LinuxSystemInfo(SystemInfo):
|
|
|
681
1073
|
"error": "NPU detection not yet implemented for Linux",
|
|
682
1074
|
}
|
|
683
1075
|
|
|
1076
|
+
def _get_nvidia_driver_version_linux(self) -> str:
|
|
1077
|
+
"""
|
|
1078
|
+
Get NVIDIA driver version on Linux using nvidia-smi and proc fallback.
|
|
1079
|
+
|
|
1080
|
+
Returns:
|
|
1081
|
+
str: Driver version, or empty string if detection fails
|
|
1082
|
+
"""
|
|
1083
|
+
# Primary: Try nvidia-smi command
|
|
1084
|
+
try:
|
|
1085
|
+
output = (
|
|
1086
|
+
subprocess.check_output(
|
|
1087
|
+
"nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits",
|
|
1088
|
+
shell=True,
|
|
1089
|
+
stderr=subprocess.DEVNULL,
|
|
1090
|
+
)
|
|
1091
|
+
.decode()
|
|
1092
|
+
.strip()
|
|
1093
|
+
)
|
|
1094
|
+
if output and output != "N/A":
|
|
1095
|
+
return output.split("\n")[0]
|
|
1096
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
1097
|
+
pass
|
|
1098
|
+
|
|
1099
|
+
# Fallback: Try /proc/driver/nvidia/version
|
|
1100
|
+
try:
|
|
1101
|
+
with open("/proc/driver/nvidia/version", "r", encoding="utf-8") as f:
|
|
1102
|
+
content = f.read()
|
|
1103
|
+
# Look for version pattern like "NVRM version:
|
|
1104
|
+
# NVIDIA UNIX x86_64 Kernel Module 470.82.00"
|
|
1105
|
+
match = re.search(r"Kernel Module\s+(\d+\.\d+(?:\.\d+)?)", content)
|
|
1106
|
+
if match:
|
|
1107
|
+
return match.group(1)
|
|
1108
|
+
except (FileNotFoundError, IOError):
|
|
1109
|
+
pass
|
|
1110
|
+
|
|
1111
|
+
return ""
|
|
1112
|
+
|
|
1113
|
+
def _get_nvidia_vram_smi_linux(self) -> float:
|
|
1114
|
+
"""
|
|
1115
|
+
Get NVIDIA GPU VRAM on Linux using nvidia-smi command.
|
|
1116
|
+
|
|
1117
|
+
Returns:
|
|
1118
|
+
float: VRAM in GB, or 0.0 if detection fails
|
|
1119
|
+
"""
|
|
1120
|
+
try:
|
|
1121
|
+
output = (
|
|
1122
|
+
subprocess.check_output(
|
|
1123
|
+
"nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits",
|
|
1124
|
+
shell=True,
|
|
1125
|
+
stderr=subprocess.DEVNULL,
|
|
1126
|
+
)
|
|
1127
|
+
.decode()
|
|
1128
|
+
.strip()
|
|
1129
|
+
)
|
|
1130
|
+
|
|
1131
|
+
# nvidia-smi returns memory in MB
|
|
1132
|
+
vram_mb = int(output.split("\n")[0])
|
|
1133
|
+
vram_gb = round(vram_mb / 1024, 1)
|
|
1134
|
+
return vram_gb
|
|
1135
|
+
except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
|
|
1136
|
+
pass
|
|
1137
|
+
return 0.0
|
|
1138
|
+
|
|
684
1139
|
@staticmethod
|
|
685
1140
|
def get_processor_name() -> str:
|
|
686
1141
|
"""
|
|
@@ -758,6 +1213,109 @@ class LinuxSystemInfo(SystemInfo):
|
|
|
758
1213
|
info_dict["Physical Memory"] = self.get_physical_memory()
|
|
759
1214
|
return info_dict
|
|
760
1215
|
|
|
1216
|
+
def _get_nvidia_vram_smi_linux(self) -> float:
|
|
1217
|
+
"""
|
|
1218
|
+
Get NVIDIA GPU VRAM using nvidia-smi command on Linux.
|
|
1219
|
+
|
|
1220
|
+
Returns:
|
|
1221
|
+
float: VRAM in GB, or 0.0 if detection fails
|
|
1222
|
+
"""
|
|
1223
|
+
try:
|
|
1224
|
+
output = (
|
|
1225
|
+
subprocess.check_output(
|
|
1226
|
+
[
|
|
1227
|
+
"nvidia-smi",
|
|
1228
|
+
"--query-gpu=memory.total",
|
|
1229
|
+
"--format=csv,noheader,nounits",
|
|
1230
|
+
],
|
|
1231
|
+
stderr=subprocess.DEVNULL,
|
|
1232
|
+
)
|
|
1233
|
+
.decode()
|
|
1234
|
+
.strip()
|
|
1235
|
+
)
|
|
1236
|
+
|
|
1237
|
+
# nvidia-smi returns memory in MB
|
|
1238
|
+
vram_mb = int(output.split("\n")[0])
|
|
1239
|
+
vram_gb = round(vram_mb / 1024, 1)
|
|
1240
|
+
return vram_gb
|
|
1241
|
+
except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
|
|
1242
|
+
pass
|
|
1243
|
+
return 0.0
|
|
1244
|
+
|
|
1245
|
+
def _get_amd_vram_rocm_smi_linux(self) -> float:
|
|
1246
|
+
"""
|
|
1247
|
+
Get AMD GPU VRAM using rocm-smi command on Linux.
|
|
1248
|
+
|
|
1249
|
+
Returns:
|
|
1250
|
+
float: VRAM in GB, or 0.0 if detection fails
|
|
1251
|
+
"""
|
|
1252
|
+
try:
|
|
1253
|
+
output = (
|
|
1254
|
+
subprocess.check_output(
|
|
1255
|
+
["rocm-smi", "--showmeminfo", "vram", "--csv"],
|
|
1256
|
+
stderr=subprocess.DEVNULL,
|
|
1257
|
+
)
|
|
1258
|
+
.decode()
|
|
1259
|
+
.strip()
|
|
1260
|
+
)
|
|
1261
|
+
|
|
1262
|
+
# Parse CSV output to extract VRAM
|
|
1263
|
+
lines = output.split("\n")
|
|
1264
|
+
for line in lines:
|
|
1265
|
+
if "Total VRAM" in line or "vram" in line.lower():
|
|
1266
|
+
# Extract numeric value (assuming it's in MB or GB)
|
|
1267
|
+
numbers = re.findall(r"\d+", line)
|
|
1268
|
+
if numbers:
|
|
1269
|
+
vram_value = int(numbers[0])
|
|
1270
|
+
# Assume MB if value is large, GB if small
|
|
1271
|
+
if vram_value > 100: # Likely MB
|
|
1272
|
+
vram_gb = round(vram_value / 1024, 1)
|
|
1273
|
+
else: # Likely GB
|
|
1274
|
+
vram_gb = float(vram_value)
|
|
1275
|
+
return vram_gb
|
|
1276
|
+
except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
|
|
1277
|
+
pass
|
|
1278
|
+
return 0.0
|
|
1279
|
+
|
|
1280
|
+
def _get_amd_vram_sysfs(self, pci_id: str) -> float:
|
|
1281
|
+
"""
|
|
1282
|
+
Get AMD GPU VRAM using sysfs on Linux.
|
|
1283
|
+
|
|
1284
|
+
Args:
|
|
1285
|
+
pci_id: PCI ID of the GPU (e.g., "0000:01:00.0")
|
|
1286
|
+
|
|
1287
|
+
Returns:
|
|
1288
|
+
float: VRAM in GB, or 0.0 if detection fails
|
|
1289
|
+
"""
|
|
1290
|
+
try:
|
|
1291
|
+
# Try different sysfs paths for VRAM information
|
|
1292
|
+
sysfs_paths = [
|
|
1293
|
+
f"/sys/bus/pci/devices/{pci_id}/mem_info_vram_total",
|
|
1294
|
+
"/sys/class/drm/card*/device/mem_info_vram_total",
|
|
1295
|
+
]
|
|
1296
|
+
|
|
1297
|
+
for path in sysfs_paths:
|
|
1298
|
+
try:
|
|
1299
|
+
if "*" in path:
|
|
1300
|
+
# Handle wildcard paths
|
|
1301
|
+
matching_paths = glob.glob(path)
|
|
1302
|
+
for match_path in matching_paths:
|
|
1303
|
+
with open(match_path, "r", encoding="utf-8") as f:
|
|
1304
|
+
vram_bytes = int(f.read().strip())
|
|
1305
|
+
vram_gb = round(vram_bytes / (1024**3), 1)
|
|
1306
|
+
if vram_gb > 0:
|
|
1307
|
+
return vram_gb
|
|
1308
|
+
else:
|
|
1309
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
1310
|
+
vram_bytes = int(f.read().strip())
|
|
1311
|
+
vram_gb = round(vram_bytes / (1024**3), 1)
|
|
1312
|
+
return vram_gb
|
|
1313
|
+
except (FileNotFoundError, ValueError, PermissionError):
|
|
1314
|
+
continue
|
|
1315
|
+
except Exception: # pylint: disable=broad-except
|
|
1316
|
+
pass
|
|
1317
|
+
return 0.0
|
|
1318
|
+
|
|
761
1319
|
def _detect_inference_engines(self, device_type: str, device_name: str) -> dict:
|
|
762
1320
|
"""
|
|
763
1321
|
Detect available inference engines for a specific device type.
|
|
@@ -803,6 +1361,17 @@ class UnsupportedOSSystemInfo(SystemInfo):
|
|
|
803
1361
|
"""
|
|
804
1362
|
return []
|
|
805
1363
|
|
|
1364
|
+
def get_nvidia_dgpu_devices(self, include_inference_engines: bool = False) -> list:
|
|
1365
|
+
"""
|
|
1366
|
+
Retrieves NVIDIA discrete GPU device information for unsupported OS.
|
|
1367
|
+
"""
|
|
1368
|
+
return [
|
|
1369
|
+
{
|
|
1370
|
+
"available": False,
|
|
1371
|
+
"error": "Device detection not supported on this operating system",
|
|
1372
|
+
}
|
|
1373
|
+
]
|
|
1374
|
+
|
|
806
1375
|
def get_npu_device(self) -> dict:
|
|
807
1376
|
"""
|
|
808
1377
|
Retrieves NPU device information for unsupported OS.
|
lemonade/tools/llamacpp/utils.py
CHANGED
|
@@ -510,14 +510,14 @@ def identify_gguf_models(
|
|
|
510
510
|
The CHECKPOINT:VARIANT scheme is used to specify model files in Hugging Face repositories.
|
|
511
511
|
|
|
512
512
|
The VARIANT format can be one of several types:
|
|
513
|
-
0. wildcard (*): download all files in the repo
|
|
513
|
+
0. wildcard (*): download all .gguf files in the repo
|
|
514
514
|
1. Full filename: exact file to download
|
|
515
515
|
2. None/empty: gets the first .gguf file in the repository (excludes mmproj files)
|
|
516
516
|
3. Quantization variant: find a single file ending with the variant name (case insensitive)
|
|
517
517
|
4. Folder name: downloads all .gguf files in the folder that matches the variant name (case insensitive)
|
|
518
518
|
|
|
519
519
|
Examples:
|
|
520
|
-
- "ggml-org/gpt-oss-120b-GGUF:*" -> downloads all files in repo
|
|
520
|
+
- "ggml-org/gpt-oss-120b-GGUF:*" -> downloads all .gguf files in repo
|
|
521
521
|
- "unsloth/Qwen3-8B-GGUF:qwen3.gguf" -> downloads "qwen3.gguf"
|
|
522
522
|
- "unsloth/Qwen3-30B-A3B-GGUF" -> downloads "Qwen3-30B-A3B-GGUF.gguf"
|
|
523
523
|
- "unsloth/Qwen3-8B-GGUF:Q4_1" -> downloads "Qwen3-8B-GGUF-Q4_1.gguf"
|
|
@@ -531,13 +531,14 @@ def identify_gguf_models(
|
|
|
531
531
|
|
|
532
532
|
# (case 0) Wildcard, download everything
|
|
533
533
|
if variant and variant == "*":
|
|
534
|
-
sharded_files = repo_files
|
|
534
|
+
sharded_files = [f for f in repo_files if f.endswith(".gguf")]
|
|
535
535
|
|
|
536
536
|
# Sort to ensure consistent ordering
|
|
537
537
|
sharded_files.sort()
|
|
538
538
|
|
|
539
539
|
# Use first file as primary (this is how llamacpp handles it)
|
|
540
540
|
variant_name = sharded_files[0]
|
|
541
|
+
|
|
541
542
|
# (case 1) If variant ends in .gguf, use it directly
|
|
542
543
|
elif variant and variant.endswith(".gguf"):
|
|
543
544
|
variant_name = variant
|
lemonade/tools/server/serve.py
CHANGED
|
@@ -164,6 +164,7 @@ class Server:
|
|
|
164
164
|
- /api/v1/chat/completions: chat completion responses using HTTP chunked transfer encoding.
|
|
165
165
|
- /api/v1/responses: responses API using HTTP chunked transfer encoding.
|
|
166
166
|
- /api/v1/models: list all available models.
|
|
167
|
+
- /api/v1/models/{model_id}: retrieve a specific model by ID.
|
|
167
168
|
"""
|
|
168
169
|
|
|
169
170
|
def __init__(
|
|
@@ -269,6 +270,7 @@ class Server:
|
|
|
269
270
|
self.app.post(f"{prefix}/chat/completions")(self.chat_completions)
|
|
270
271
|
self.app.post(f"{prefix}/embeddings")(self.embeddings)
|
|
271
272
|
self.app.get(f"{prefix}/models")(self.models)
|
|
273
|
+
self.app.get(f"{prefix}/models/{{model_id}}")(self.retrieve_model)
|
|
272
274
|
|
|
273
275
|
# JinaAI routes (jina.ai/reranker/)
|
|
274
276
|
self.app.post(f"{prefix}/reranking")(self.reranking)
|
|
@@ -1590,6 +1592,36 @@ class Server:
|
|
|
1590
1592
|
|
|
1591
1593
|
return {"object": "list", "data": models_list}
|
|
1592
1594
|
|
|
1595
|
+
async def retrieve_model(self, model_id: str):
|
|
1596
|
+
"""
|
|
1597
|
+
Retrieve a specific model by ID in OpenAI-compatible format.
|
|
1598
|
+
"""
|
|
1599
|
+
# Raise an error if the model does not exist
|
|
1600
|
+
if model_id not in self.local_models:
|
|
1601
|
+
# Mimic the error format of the OpenAI API
|
|
1602
|
+
raise HTTPException(
|
|
1603
|
+
status_code=404,
|
|
1604
|
+
detail={
|
|
1605
|
+
"message": f"model {model_id} not found",
|
|
1606
|
+
"type": "api_error",
|
|
1607
|
+
"param": None,
|
|
1608
|
+
"code": None,
|
|
1609
|
+
},
|
|
1610
|
+
)
|
|
1611
|
+
|
|
1612
|
+
# Return the specific model
|
|
1613
|
+
model_info = self.local_models[model_id]
|
|
1614
|
+
model = ServerModel(
|
|
1615
|
+
id=model_id,
|
|
1616
|
+
owned_by="lemonade",
|
|
1617
|
+
object="model",
|
|
1618
|
+
created=int(time.time()),
|
|
1619
|
+
checkpoint=model_info["checkpoint"],
|
|
1620
|
+
recipe=model_info["recipe"],
|
|
1621
|
+
)
|
|
1622
|
+
|
|
1623
|
+
return model
|
|
1624
|
+
|
|
1593
1625
|
def setup_middleware_timer(self):
|
|
1594
1626
|
logging.info("Middleware set up")
|
|
1595
1627
|
|
lemonade/tools/server/tray.py
CHANGED
|
@@ -7,9 +7,12 @@ import webbrowser
|
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
import logging
|
|
9
9
|
import tempfile
|
|
10
|
+
|
|
10
11
|
import requests
|
|
11
12
|
from packaging.version import parse as parse_version
|
|
12
13
|
|
|
14
|
+
from lemonade_server.pydantic_models import DEFAULT_CTX_SIZE
|
|
15
|
+
|
|
13
16
|
from lemonade.version import __version__
|
|
14
17
|
from lemonade.tools.server.utils.system_tray import SystemTray, Menu, MenuItem
|
|
15
18
|
|
|
@@ -57,6 +60,7 @@ class LemonadeTray(SystemTray):
|
|
|
57
60
|
self.executor = ThreadPoolExecutor(max_workers=1)
|
|
58
61
|
self.log_file = log_file
|
|
59
62
|
self.port = port
|
|
63
|
+
self.ctx_size = DEFAULT_CTX_SIZE
|
|
60
64
|
self.server_factory = server_factory
|
|
61
65
|
self.debug_logs_enabled = log_level == "debug"
|
|
62
66
|
|
|
@@ -282,6 +286,41 @@ class LemonadeTray(SystemTray):
|
|
|
282
286
|
self.logger.error(f"Error changing port: {str(e)}")
|
|
283
287
|
self.show_balloon_notification("Error", f"Failed to change port: {str(e)}")
|
|
284
288
|
|
|
289
|
+
def change_context_size(self, _, __, new_ctx_size):
|
|
290
|
+
"""
|
|
291
|
+
Change the server context size and restart the server.
|
|
292
|
+
"""
|
|
293
|
+
try:
|
|
294
|
+
# Stop the current server
|
|
295
|
+
if self.server_thread and self.server_thread.is_alive():
|
|
296
|
+
# Set should_exit flag on the uvicorn server instance
|
|
297
|
+
if (
|
|
298
|
+
hasattr(self.server, "uvicorn_server")
|
|
299
|
+
and self.server.uvicorn_server
|
|
300
|
+
):
|
|
301
|
+
self.server.uvicorn_server.should_exit = True
|
|
302
|
+
self.server_thread.join(timeout=2)
|
|
303
|
+
# Update the context size in both the tray and the server instance
|
|
304
|
+
self.ctx_size = new_ctx_size
|
|
305
|
+
if self.server:
|
|
306
|
+
self.server.ctx_size = new_ctx_size
|
|
307
|
+
# Restart the server
|
|
308
|
+
self.server_thread = threading.Thread(target=self.start_server, daemon=True)
|
|
309
|
+
self.server_thread.start()
|
|
310
|
+
# Show notification
|
|
311
|
+
ctx_size_label = (
|
|
312
|
+
f"{new_ctx_size//1024}K" if new_ctx_size >= 1024 else str(new_ctx_size)
|
|
313
|
+
)
|
|
314
|
+
self.show_balloon_notification(
|
|
315
|
+
"Context Size Changed",
|
|
316
|
+
f"Lemonade Server context size is now {ctx_size_label}",
|
|
317
|
+
)
|
|
318
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
319
|
+
self.logger.error(f"Error changing context size: {str(e)}")
|
|
320
|
+
self.show_balloon_notification(
|
|
321
|
+
"Error", f"Failed to change context size: {str(e)}"
|
|
322
|
+
)
|
|
323
|
+
|
|
285
324
|
def _using_installer(self):
|
|
286
325
|
"""
|
|
287
326
|
Check if the user is using the NSIS installer by checking for embeddable python
|
|
@@ -438,6 +477,30 @@ class LemonadeTray(SystemTray):
|
|
|
438
477
|
|
|
439
478
|
port_submenu = Menu(*port_menu_items)
|
|
440
479
|
|
|
480
|
+
# Create context size selection submenu with 6 options
|
|
481
|
+
ctx_size_menu_items = []
|
|
482
|
+
ctx_size_options = [
|
|
483
|
+
("4K", 4096),
|
|
484
|
+
("8K", 8192),
|
|
485
|
+
("16K", 16384),
|
|
486
|
+
("32K", 32768),
|
|
487
|
+
("64K", 65536),
|
|
488
|
+
("128K", 131072),
|
|
489
|
+
]
|
|
490
|
+
|
|
491
|
+
for ctx_label, ctx_value in ctx_size_options:
|
|
492
|
+
# Create a function that returns the lambda to properly capture the ctx_size variable
|
|
493
|
+
def create_ctx_handler(ctx_size):
|
|
494
|
+
return lambda icon, item: self.change_context_size(icon, item, ctx_size)
|
|
495
|
+
|
|
496
|
+
ctx_item = MenuItem(
|
|
497
|
+
f"Context size {ctx_label}", create_ctx_handler(ctx_value)
|
|
498
|
+
)
|
|
499
|
+
ctx_item.checked = ctx_value == self.ctx_size
|
|
500
|
+
ctx_size_menu_items.append(ctx_item)
|
|
501
|
+
|
|
502
|
+
ctx_size_submenu = Menu(*ctx_size_menu_items)
|
|
503
|
+
|
|
441
504
|
# Create the Logs submenu
|
|
442
505
|
debug_log_text = "Enable Debug Logs"
|
|
443
506
|
debug_log_item = MenuItem(debug_log_text, self.toggle_debug_logs)
|
|
@@ -452,6 +515,7 @@ class LemonadeTray(SystemTray):
|
|
|
452
515
|
if status_successfully_checked:
|
|
453
516
|
items.append(MenuItem("Load Model", None, submenu=load_submenu))
|
|
454
517
|
items.append(MenuItem("Port", None, submenu=port_submenu))
|
|
518
|
+
items.append(MenuItem("Context Size", None, submenu=ctx_size_submenu))
|
|
455
519
|
items.append(Menu.SEPARATOR)
|
|
456
520
|
|
|
457
521
|
# Only show upgrade option if newer version is available
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.1.
|
|
1
|
+
__version__ = "8.1.10"
|
|
@@ -4,17 +4,17 @@ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
|
|
|
4
4
|
lemonade/cli.py,sha256=qU5bW7RQAUKNSpvrhVyzn68NMxyi-336Ke_JU4bsv1Q,5708
|
|
5
5
|
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
6
|
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
-
lemonade/version.py,sha256=
|
|
7
|
+
lemonade/version.py,sha256=BSj3P5N0EwzL0-jahgtLzMLFGfZHooaYa76BeUiW2wc,23
|
|
8
8
|
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
|
|
10
10
|
lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
|
|
11
11
|
lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
|
|
12
12
|
lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
|
|
13
|
-
lemonade/common/inference_engines.py,sha256=
|
|
13
|
+
lemonade/common/inference_engines.py,sha256=3bUGQe9wtfTiwt8kvI_ry077uyc9lid2G1fJX95kN1A,12969
|
|
14
14
|
lemonade/common/network.py,sha256=qXpUjDYQEYM_gH3JwTtU-pu_yCKcaa1IeohJRPy91-A,2903
|
|
15
15
|
lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
|
|
16
16
|
lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
|
|
17
|
-
lemonade/common/system_info.py,sha256=
|
|
17
|
+
lemonade/common/system_info.py,sha256=Msa0pCSj3ZN3nerjY8wdqjjJLg6GPhbWf2htSNcFIHc,49607
|
|
18
18
|
lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
|
|
19
19
|
lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
|
|
20
20
|
lemonade/profilers/agt_power.py,sha256=t_37VEg8LPapjSKSjJln-jFznZtTIf5UpzlAXcVGOrc,16771
|
|
@@ -36,7 +36,7 @@ lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1
|
|
|
36
36
|
lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
|
|
37
37
|
lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
|
|
38
38
|
lemonade/tools/llamacpp/load.py,sha256=DFCvQN548Ch9H8U_rHOiYviinzw6vixb5-V7xLj7XE4,6499
|
|
39
|
-
lemonade/tools/llamacpp/utils.py,sha256=
|
|
39
|
+
lemonade/tools/llamacpp/utils.py,sha256=WEjdGmVxl30rt0a62MNo_X8ndFQ2SIrqtA2uF3klE6g,33090
|
|
40
40
|
lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
41
|
lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
|
|
42
42
|
lemonade/tools/oga/load.py,sha256=x-A-nhoni-WyDpVCLcWRAMfs5ouac9MJzxT-rsnLPw8,34226
|
|
@@ -46,9 +46,9 @@ lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTgu
|
|
|
46
46
|
lemonade/tools/report/table.py,sha256=Kv_Epd8a6KIrdzSC2EgIl6uTKw7E5eMq10Tg16O0WxM,27996
|
|
47
47
|
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
48
|
lemonade/tools/server/llamacpp.py,sha256=8HdTkrU2ht8L1ldXqkfYfYhXiA8TvySuaslinAMqr-c,9002
|
|
49
|
-
lemonade/tools/server/serve.py,sha256=
|
|
49
|
+
lemonade/tools/server/serve.py,sha256=W6wugCEaRNsDpWKIcsACrKZRFEwt7H1qWr6kG07WCek,62739
|
|
50
50
|
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
51
|
-
lemonade/tools/server/tray.py,sha256=
|
|
51
|
+
lemonade/tools/server/tray.py,sha256=2PQxoEWH-zzUGlveTW4heij4UC9SzxXcFlGs0JtFjF0,22226
|
|
52
52
|
lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
|
|
53
53
|
lemonade/tools/server/wrapped_server.py,sha256=DlzsGUwLQzjOFRfTTxfnhvmM_9lvAki96jWIAz7Czds,16713
|
|
54
54
|
lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
|
|
@@ -63,15 +63,15 @@ lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu
|
|
|
63
63
|
lemonade/tools/server/utils/thread.py,sha256=Z-PDzGcpgfN2qxTmtlROWqrUN0B2fXdPrqo_J10fR_w,2772
|
|
64
64
|
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
65
65
|
lemonade_install/install.py,sha256=Dow7kt-K9WI4PH15hBwkKtOxede3dAaOmH4I1y_P5H4,27008
|
|
66
|
-
lemonade_sdk-8.1.
|
|
67
|
-
lemonade_sdk-8.1.
|
|
68
|
-
lemonade_server/cli.py,sha256=
|
|
66
|
+
lemonade_sdk-8.1.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
67
|
+
lemonade_sdk-8.1.10.dist-info/licenses/NOTICE.md,sha256=RSca9LE5e6pvdWA_LXAUCcACIHPmINKqkRX-AVRqBGo,3499
|
|
68
|
+
lemonade_server/cli.py,sha256=0Ht82D1z4Z2MuxTc07jF2glaegX4jT3Jh--sS6ZO6Qc,19302
|
|
69
69
|
lemonade_server/model_manager.py,sha256=V8QRf1nlh3wAFtUHoSF_JeAXeR7sfaZE1uTfppcIfcw,20492
|
|
70
70
|
lemonade_server/pydantic_models.py,sha256=49MyOlb5feLUlKsGcI75tWaflWckrItqcSVkdCY4e3A,3269
|
|
71
71
|
lemonade_server/server_models.json,sha256=0H_G6Jw6Yuz6t0RZnFnq0SbBCsw_cQLe9j24TkyF2eI,12344
|
|
72
|
-
lemonade_server/settings.py,sha256=
|
|
73
|
-
lemonade_sdk-8.1.
|
|
74
|
-
lemonade_sdk-8.1.
|
|
75
|
-
lemonade_sdk-8.1.
|
|
76
|
-
lemonade_sdk-8.1.
|
|
77
|
-
lemonade_sdk-8.1.
|
|
72
|
+
lemonade_server/settings.py,sha256=JOlZmirUXO9rA6BCODVFwyXrrHtYoH_LiKYm49lGm_c,1260
|
|
73
|
+
lemonade_sdk-8.1.10.dist-info/METADATA,sha256=EYovzTHGnvWEZI-v_Gg1X0ajXeXiQPydqOkisnh08ME,15023
|
|
74
|
+
lemonade_sdk-8.1.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
75
|
+
lemonade_sdk-8.1.10.dist-info/entry_points.txt,sha256=7sRvpNhi1E7amnM7RZo57e8yFF9iA5uuRaIeJ1Xre6w,193
|
|
76
|
+
lemonade_sdk-8.1.10.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
77
|
+
lemonade_sdk-8.1.10.dist-info/RECORD,,
|
lemonade_server/cli.py
CHANGED
|
@@ -375,9 +375,11 @@ def is_lemonade_server(pid):
|
|
|
375
375
|
if process_name in [ # Windows
|
|
376
376
|
"lemonade-server-dev.exe",
|
|
377
377
|
"lemonade-server.exe",
|
|
378
|
+
"lsdev.exe",
|
|
378
379
|
] or process_name in [ # Linux
|
|
379
380
|
"lemonade-server-dev",
|
|
380
381
|
"lemonade-server",
|
|
382
|
+
"lsdev",
|
|
381
383
|
]:
|
|
382
384
|
return True
|
|
383
385
|
elif "llama-server" in process_name:
|
lemonade_server/settings.py
CHANGED
|
@@ -1,39 +1,39 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import os
|
|
3
|
-
from lemonade.cache import DEFAULT_CACHE_DIR
|
|
4
|
-
|
|
5
|
-
# Define the path for the user settings file, placing it in the cache directory
|
|
6
|
-
USER_SETTINGS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_settings.json")
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def save_setting(key, value):
|
|
10
|
-
"""Save a setting to the user_settings.json file."""
|
|
11
|
-
# Ensure the cache directory exists
|
|
12
|
-
os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True)
|
|
13
|
-
|
|
14
|
-
settings = {}
|
|
15
|
-
if os.path.exists(USER_SETTINGS_FILE):
|
|
16
|
-
with open(USER_SETTINGS_FILE, "r") as f:
|
|
17
|
-
try:
|
|
18
|
-
settings = json.load(f)
|
|
19
|
-
except json.JSONDecodeError:
|
|
20
|
-
# If the file is empty or corrupt, start with a fresh dictionary
|
|
21
|
-
pass
|
|
22
|
-
|
|
23
|
-
settings[key] = value
|
|
24
|
-
with open(USER_SETTINGS_FILE, "w") as f:
|
|
25
|
-
json.dump(settings, f, indent=4)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def load_setting(key, default=None):
|
|
29
|
-
"""Load a setting from the user_settings.json file."""
|
|
30
|
-
if not os.path.exists(USER_SETTINGS_FILE):
|
|
31
|
-
return default
|
|
32
|
-
|
|
33
|
-
with open(USER_SETTINGS_FILE, "r") as f:
|
|
34
|
-
try:
|
|
35
|
-
settings = json.load(f)
|
|
36
|
-
return settings.get(key, default)
|
|
37
|
-
except json.JSONDecodeError:
|
|
38
|
-
# Return default if the file is empty or corrupt
|
|
39
|
-
return default
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from lemonade.cache import DEFAULT_CACHE_DIR
|
|
4
|
+
|
|
5
|
+
# Define the path for the user settings file, placing it in the cache directory
|
|
6
|
+
USER_SETTINGS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_settings.json")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def save_setting(key, value):
|
|
10
|
+
"""Save a setting to the user_settings.json file."""
|
|
11
|
+
# Ensure the cache directory exists
|
|
12
|
+
os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True)
|
|
13
|
+
|
|
14
|
+
settings = {}
|
|
15
|
+
if os.path.exists(USER_SETTINGS_FILE):
|
|
16
|
+
with open(USER_SETTINGS_FILE, "r") as f:
|
|
17
|
+
try:
|
|
18
|
+
settings = json.load(f)
|
|
19
|
+
except json.JSONDecodeError:
|
|
20
|
+
# If the file is empty or corrupt, start with a fresh dictionary
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
settings[key] = value
|
|
24
|
+
with open(USER_SETTINGS_FILE, "w") as f:
|
|
25
|
+
json.dump(settings, f, indent=4)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def load_setting(key, default=None):
|
|
29
|
+
"""Load a setting from the user_settings.json file."""
|
|
30
|
+
if not os.path.exists(USER_SETTINGS_FILE):
|
|
31
|
+
return default
|
|
32
|
+
|
|
33
|
+
with open(USER_SETTINGS_FILE, "r") as f:
|
|
34
|
+
try:
|
|
35
|
+
settings = json.load(f)
|
|
36
|
+
return settings.get(key, default)
|
|
37
|
+
except json.JSONDecodeError:
|
|
38
|
+
# Return default if the file is empty or corrupt
|
|
39
|
+
return default
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|