lemonade-sdk 8.1.1__tar.gz → 8.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- {lemonade_sdk-8.1.1/src/lemonade_sdk.egg-info → lemonade_sdk-8.1.2}/PKG-INFO +1 -1
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/setup.py +1 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/inference_engines.py +1 -1
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/llamacpp/utils.py +114 -14
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/management_tools.py +1 -1
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/serve.py +7 -3
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/static/webapp.html +2 -1
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/tray.py +1 -1
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/utils/port.py +2 -2
- lemonade_sdk-8.1.2/src/lemonade/version.py +1 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2/src/lemonade_sdk.egg-info}/PKG-INFO +1 -1
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_sdk.egg-info/entry_points.txt +1 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_server/cli.py +37 -2
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_server/model_manager.py +1 -1
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_server/server_models.json +45 -0
- lemonade_sdk-8.1.1/src/lemonade/version.py +0 -1
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/LICENSE +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/NOTICE.md +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/README.md +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/setup.cfg +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/__init__.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/api.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/cache.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/cli.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/__init__.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/build.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/cli_helpers.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/exceptions.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/filesystem.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/network.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/printing.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/status.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/system_info.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/common/test_helpers.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/profilers/__init__.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/profilers/memory_tracker.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/profilers/profiler.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/sequence.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/state.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/__init__.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/accuracy.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/adapter.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/bench.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/huggingface/bench.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/huggingface/load.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/huggingface/utils.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/humaneval.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/llamacpp/bench.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/llamacpp/load.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/mmlu.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/oga/__init__.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/oga/bench.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/oga/load.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/oga/utils.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/perplexity.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/prompt.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/quark/__init__.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/quark/quark_load.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/quark/quark_quantize.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/report/__init__.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/report/llm_report.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/report/table.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/__init__.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/llamacpp.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/static/favicon.ico +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/static/styles.css +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/tool_calls.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/utils/system_tray.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/utils/thread.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/server/webapp.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade/tools/tool.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_install/__init__.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_install/install.py +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_sdk.egg-info/SOURCES.txt +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_sdk.egg-info/requires.txt +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
- {lemonade_sdk-8.1.1 → lemonade_sdk-8.1.2}/src/lemonade_server/pydantic_models.py +0 -0
|
@@ -5,7 +5,6 @@ import importlib.metadata
|
|
|
5
5
|
import subprocess
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
7
|
from typing import Dict, Optional
|
|
8
|
-
import transformers
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
class InferenceEngineDetector:
|
|
@@ -352,6 +351,7 @@ class TransformersDetector(BaseEngineDetector):
|
|
|
352
351
|
|
|
353
352
|
try:
|
|
354
353
|
import torch
|
|
354
|
+
import transformers
|
|
355
355
|
|
|
356
356
|
if device_type == "cpu":
|
|
357
357
|
result = {
|
|
@@ -57,7 +57,7 @@ def identify_rocm_arch_from_name(device_name: str) -> str | None:
|
|
|
57
57
|
return None
|
|
58
58
|
|
|
59
59
|
|
|
60
|
-
def
|
|
60
|
+
def identify_rocm_arch() -> str:
|
|
61
61
|
"""
|
|
62
62
|
Identify the appropriate ROCm target architecture based on the device info
|
|
63
63
|
Returns tuple of (architecture, gpu_type) where gpu_type is 'igpu' or 'dgpu'
|
|
@@ -68,21 +68,54 @@ def identify_rocm_arch_and_hip_id() -> tuple[str, str]:
|
|
|
68
68
|
amd_igpu = system_info.get_amd_igpu_device()
|
|
69
69
|
amd_dgpu = system_info.get_amd_dgpu_devices()
|
|
70
70
|
target_arch = None
|
|
71
|
-
gpu_count = 0
|
|
72
71
|
for gpu in [amd_igpu] + amd_dgpu:
|
|
73
72
|
if gpu.get("available") and gpu.get("name"):
|
|
74
|
-
gpu_count += 1
|
|
75
73
|
target_arch = identify_rocm_arch_from_name(gpu["name"].lower())
|
|
76
74
|
if target_arch:
|
|
77
75
|
break
|
|
78
76
|
|
|
79
|
-
|
|
80
|
-
# Here, we assume that the iGPU will always show up before the dGPUs (if available)
|
|
81
|
-
# We also assume that selecting the dGPU is preferred over the iGPU
|
|
82
|
-
# Multiple GPUs are not supported at the moment
|
|
83
|
-
hip_id = str(gpu_count - 1)
|
|
77
|
+
return target_arch
|
|
84
78
|
|
|
85
|
-
|
|
79
|
+
|
|
80
|
+
def identify_hip_id() -> str:
|
|
81
|
+
"""
|
|
82
|
+
Identify the HIP ID
|
|
83
|
+
"""
|
|
84
|
+
# Get HIP devices
|
|
85
|
+
hip_devices = get_hip_devices()
|
|
86
|
+
logging.debug(f"HIP devices found: {hip_devices}")
|
|
87
|
+
if len(hip_devices) == 0:
|
|
88
|
+
raise ValueError("No HIP devices found when identifying HIP ID")
|
|
89
|
+
|
|
90
|
+
# Identify HIP devices that are compatible with our ROCm builds
|
|
91
|
+
rocm_devices = []
|
|
92
|
+
for device in hip_devices:
|
|
93
|
+
device_id, device_name = device
|
|
94
|
+
if identify_rocm_arch_from_name(device_name):
|
|
95
|
+
rocm_devices.append([device_id, device_name])
|
|
96
|
+
logging.debug(f"ROCm devices found: {rocm_devices}")
|
|
97
|
+
|
|
98
|
+
# If no ROCm devices are found, use the last HIP device
|
|
99
|
+
# This might be needed in some scenarios where HIP reports generic device names
|
|
100
|
+
# Example: "AMD Radeon Graphics" for STX Halo iGPU on Ubuntu 24.04
|
|
101
|
+
if len(rocm_devices) == 0:
|
|
102
|
+
rocm_devices = [hip_devices[-1]]
|
|
103
|
+
logging.warning(
|
|
104
|
+
"No ROCm devices found when identifying HIP ID. "
|
|
105
|
+
f"Falling back to the following device: {rocm_devices[0]}"
|
|
106
|
+
)
|
|
107
|
+
elif len(rocm_devices) > 1:
|
|
108
|
+
logging.warning(
|
|
109
|
+
f"Multiple ROCm devices found when identifying HIP ID: {rocm_devices}"
|
|
110
|
+
"The last device will be used."
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Select the last device
|
|
114
|
+
device_selected = rocm_devices[-1]
|
|
115
|
+
logging.debug(f"Selected ROCm device: {device_selected}")
|
|
116
|
+
|
|
117
|
+
# Return the device ID
|
|
118
|
+
return device_selected[0]
|
|
86
119
|
|
|
87
120
|
|
|
88
121
|
def get_llama_version(backend: str) -> str:
|
|
@@ -277,7 +310,7 @@ def install_llamacpp(backend):
|
|
|
277
310
|
target_arch = None
|
|
278
311
|
if backend == "rocm":
|
|
279
312
|
# Identify the target architecture
|
|
280
|
-
target_arch
|
|
313
|
+
target_arch = identify_rocm_arch()
|
|
281
314
|
if not target_arch:
|
|
282
315
|
system = platform.system().lower()
|
|
283
316
|
if system == "linux":
|
|
@@ -293,10 +326,6 @@ def install_llamacpp(backend):
|
|
|
293
326
|
f"for supported configurations. {hint}"
|
|
294
327
|
)
|
|
295
328
|
|
|
296
|
-
# Set HIP_VISIBLE_DEVICES=0 for igpu, =1 for dgpu
|
|
297
|
-
env_file_path = os.path.join(llama_server_exe_dir, ".env")
|
|
298
|
-
set_key(env_file_path, "HIP_VISIBLE_DEVICES", hip_id)
|
|
299
|
-
|
|
300
329
|
# Direct download for Vulkan/ROCm
|
|
301
330
|
llama_archive_url, filename = get_binary_url_and_filename(backend, target_arch)
|
|
302
331
|
llama_archive_path = os.path.join(llama_server_exe_dir, filename)
|
|
@@ -315,6 +344,12 @@ def install_llamacpp(backend):
|
|
|
315
344
|
else:
|
|
316
345
|
raise NotImplementedError(f"Unsupported archive format: {filename}")
|
|
317
346
|
|
|
347
|
+
# Identify and set HIP ID
|
|
348
|
+
if backend == "rocm":
|
|
349
|
+
hip_id = identify_hip_id()
|
|
350
|
+
env_file_path = os.path.join(llama_server_exe_dir, ".env")
|
|
351
|
+
set_key(env_file_path, "HIP_VISIBLE_DEVICES", str(hip_id))
|
|
352
|
+
|
|
318
353
|
# Make executable on Linux - need to update paths after extraction
|
|
319
354
|
if platform.system().lower() == "linux":
|
|
320
355
|
# Re-get the paths since extraction might have changed the directory structure
|
|
@@ -778,3 +813,68 @@ class LlamaCppAdapter(ModelAdapter):
|
|
|
778
813
|
error_msg = f"Failed to run llama.cpp command: {str(e)}\n"
|
|
779
814
|
error_msg += f"Command: {' '.join(cmd)}"
|
|
780
815
|
raise Exception(error_msg)
|
|
816
|
+
|
|
817
|
+
|
|
818
|
+
def get_hip_devices():
|
|
819
|
+
"""Get list of HIP devices with their IDs and names."""
|
|
820
|
+
import ctypes
|
|
821
|
+
import sys
|
|
822
|
+
import os
|
|
823
|
+
import glob
|
|
824
|
+
from ctypes import c_int, POINTER
|
|
825
|
+
from ctypes.util import find_library
|
|
826
|
+
|
|
827
|
+
# Get llama.cpp path
|
|
828
|
+
rocm_path = get_llama_folder_path("rocm")
|
|
829
|
+
|
|
830
|
+
# Load HIP library
|
|
831
|
+
hip_library_pattern = (
|
|
832
|
+
"amdhip64*.dll" if sys.platform.startswith("win") else "libamdhip64*.so"
|
|
833
|
+
)
|
|
834
|
+
search_pattern = os.path.join(rocm_path, hip_library_pattern)
|
|
835
|
+
matching_files = glob.glob(search_pattern)
|
|
836
|
+
if not matching_files:
|
|
837
|
+
raise RuntimeError(
|
|
838
|
+
f"Could not find HIP runtime library matching pattern: {search_pattern}"
|
|
839
|
+
)
|
|
840
|
+
try:
|
|
841
|
+
libhip = ctypes.CDLL(matching_files[0])
|
|
842
|
+
except OSError:
|
|
843
|
+
raise RuntimeError(f"Could not load HIP runtime library from {path}")
|
|
844
|
+
|
|
845
|
+
# Setup function signatures
|
|
846
|
+
hipError_t = c_int
|
|
847
|
+
hipDeviceProp_t = ctypes.c_char * 2048
|
|
848
|
+
libhip.hipGetDeviceCount.restype = hipError_t
|
|
849
|
+
libhip.hipGetDeviceCount.argtypes = [POINTER(c_int)]
|
|
850
|
+
libhip.hipGetDeviceProperties.restype = hipError_t
|
|
851
|
+
libhip.hipGetDeviceProperties.argtypes = [POINTER(hipDeviceProp_t), c_int]
|
|
852
|
+
libhip.hipGetErrorString.restype = ctypes.c_char_p
|
|
853
|
+
libhip.hipGetErrorString.argtypes = [hipError_t]
|
|
854
|
+
|
|
855
|
+
# Get device count
|
|
856
|
+
device_count = c_int()
|
|
857
|
+
err = libhip.hipGetDeviceCount(ctypes.byref(device_count))
|
|
858
|
+
if err != 0:
|
|
859
|
+
logging.error(
|
|
860
|
+
"hipGetDeviceCount failed:", libhip.hipGetErrorString(err).decode()
|
|
861
|
+
)
|
|
862
|
+
return []
|
|
863
|
+
|
|
864
|
+
# Get device properties
|
|
865
|
+
devices = []
|
|
866
|
+
for i in range(device_count.value):
|
|
867
|
+
prop = hipDeviceProp_t()
|
|
868
|
+
err = libhip.hipGetDeviceProperties(ctypes.byref(prop), i)
|
|
869
|
+
if err != 0:
|
|
870
|
+
logging.error(
|
|
871
|
+
f"hipGetDeviceProperties failed for device {i}:",
|
|
872
|
+
libhip.hipGetErrorString(err).decode(),
|
|
873
|
+
)
|
|
874
|
+
continue
|
|
875
|
+
|
|
876
|
+
# Extract device name from HIP device properties
|
|
877
|
+
device_name = ctypes.string_at(prop, 256).decode("utf-8").rstrip("\x00")
|
|
878
|
+
devices.append([i, device_name])
|
|
879
|
+
|
|
880
|
+
return devices
|
|
@@ -109,7 +109,7 @@ class Cache(ManagementTool):
|
|
|
109
109
|
# pylint: disable=pointless-statement,f-string-without-interpolation
|
|
110
110
|
f"""
|
|
111
111
|
A set of functions for managing the lemonade build cache. The default
|
|
112
|
-
cache location is {lemonade_cache.DEFAULT_CACHE_DIR}, and can also be
|
|
112
|
+
cache location is {lemonade_cache.DEFAULT_CACHE_DIR}, and can also be
|
|
113
113
|
selected with
|
|
114
114
|
the global --cache-dir option or the LEMONADE_CACHE_DIR environment variable.
|
|
115
115
|
|
|
@@ -72,6 +72,7 @@ if platform.system() == "Windows":
|
|
|
72
72
|
|
|
73
73
|
|
|
74
74
|
DEFAULT_PORT = 8000
|
|
75
|
+
DEFAULT_HOST = "localhost"
|
|
75
76
|
DEFAULT_LOG_LEVEL = "info"
|
|
76
77
|
DEFAULT_LLAMACPP_BACKEND = "vulkan"
|
|
77
78
|
DEFAULT_CTX_SIZE = 4096
|
|
@@ -150,6 +151,7 @@ class Server:
|
|
|
150
151
|
def __init__(
|
|
151
152
|
self,
|
|
152
153
|
port: int = DEFAULT_PORT,
|
|
154
|
+
host: str = DEFAULT_HOST,
|
|
153
155
|
log_level: str = DEFAULT_LOG_LEVEL,
|
|
154
156
|
ctx_size: int = DEFAULT_CTX_SIZE,
|
|
155
157
|
tray: bool = False,
|
|
@@ -160,6 +162,7 @@ class Server:
|
|
|
160
162
|
|
|
161
163
|
# Save args as members
|
|
162
164
|
self.port = port
|
|
165
|
+
self.host = host
|
|
163
166
|
self.log_level = log_level
|
|
164
167
|
self.ctx_size = ctx_size
|
|
165
168
|
self.tray = tray
|
|
@@ -332,6 +335,9 @@ class Server:
|
|
|
332
335
|
# Let the app know what port it's running on, so
|
|
333
336
|
# that the lifespan can access it
|
|
334
337
|
self.app.port = self.port
|
|
338
|
+
# FastAPI already has a `host` function and we cannot use `_host` as
|
|
339
|
+
# PyLint will believe its private
|
|
340
|
+
self.app.host_ = self.host
|
|
335
341
|
|
|
336
342
|
def run(self):
|
|
337
343
|
# Common setup
|
|
@@ -340,9 +346,7 @@ class Server:
|
|
|
340
346
|
tray=self.tray,
|
|
341
347
|
)
|
|
342
348
|
|
|
343
|
-
uvicorn.run(
|
|
344
|
-
self.app, host="localhost", port=self.port, log_level=self.log_level
|
|
345
|
-
)
|
|
349
|
+
uvicorn.run(self.app, host=self.host, port=self.port, log_level=self.log_level)
|
|
346
350
|
|
|
347
351
|
def run_in_thread(self, host: str = "localhost"):
|
|
348
352
|
"""
|
|
@@ -369,7 +369,8 @@
|
|
|
369
369
|
// Helper to get server base URL
|
|
370
370
|
function getServerBaseUrl() {
|
|
371
371
|
const port = window.SERVER_PORT || 8000;
|
|
372
|
-
|
|
372
|
+
const host = window.location.hostname || 'localhost';
|
|
373
|
+
return `http://${host}:${port}`;
|
|
373
374
|
}
|
|
374
375
|
|
|
375
376
|
// Check if current model supports vision
|
|
@@ -427,7 +427,7 @@ class LemonadeTray(SystemTray):
|
|
|
427
427
|
Start the uvicorn server.
|
|
428
428
|
"""
|
|
429
429
|
self.server = self.server_factory()
|
|
430
|
-
self.server.uvicorn_server = self.server.run_in_thread()
|
|
430
|
+
self.server.uvicorn_server = self.server.run_in_thread(self.server.host)
|
|
431
431
|
self.server.uvicorn_server.run()
|
|
432
432
|
|
|
433
433
|
def run(self):
|
|
@@ -43,7 +43,7 @@ async def lifespan(app: FastAPI):
|
|
|
43
43
|
"\n"
|
|
44
44
|
"\n"
|
|
45
45
|
"🍋 Lemonade Server Ready!\n"
|
|
46
|
-
f"🍋 Open http://
|
|
46
|
+
f"🍋 Open http://{app.host_}:{app.port} in your browser for:\n"
|
|
47
47
|
"🍋 💬 chat\n"
|
|
48
48
|
"🍋 💻 model management\n"
|
|
49
49
|
"🍋 📄 docs\n"
|
|
@@ -53,7 +53,7 @@ async def lifespan(app: FastAPI):
|
|
|
53
53
|
"\n"
|
|
54
54
|
"\n"
|
|
55
55
|
"[Lemonade] Lemonade Server Ready!\n"
|
|
56
|
-
f"[Lemonade] Open http://
|
|
56
|
+
f"[Lemonade] Open http://{app.host_}:{app.port} in your browser for:\n"
|
|
57
57
|
"[Lemonade] chat\n"
|
|
58
58
|
"[Lemonade] model management\n"
|
|
59
59
|
"[Lemonade] docs\n"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "8.1.2"
|
|
@@ -47,6 +47,7 @@ class ModelLoadError(Exception):
|
|
|
47
47
|
|
|
48
48
|
def serve(
|
|
49
49
|
port: int = None,
|
|
50
|
+
host: str = "localhost",
|
|
50
51
|
log_level: str = None,
|
|
51
52
|
tray: bool = False,
|
|
52
53
|
use_thread: bool = False,
|
|
@@ -79,6 +80,7 @@ def serve(
|
|
|
79
80
|
# Start the server
|
|
80
81
|
server = Server(
|
|
81
82
|
port=port,
|
|
83
|
+
host=host,
|
|
82
84
|
log_level=log_level,
|
|
83
85
|
ctx_size=ctx_size,
|
|
84
86
|
tray=tray,
|
|
@@ -259,7 +261,9 @@ def delete(model_names: List[str]):
|
|
|
259
261
|
def run(
|
|
260
262
|
model_name: str,
|
|
261
263
|
port: int = None,
|
|
264
|
+
host: str = "localhost",
|
|
262
265
|
log_level: str = None,
|
|
266
|
+
tray: bool = False,
|
|
263
267
|
llamacpp_backend: str = None,
|
|
264
268
|
ctx_size: int = None,
|
|
265
269
|
):
|
|
@@ -275,8 +279,9 @@ def run(
|
|
|
275
279
|
if not server_previously_running:
|
|
276
280
|
port, server_thread = serve(
|
|
277
281
|
port=port,
|
|
282
|
+
host=host,
|
|
278
283
|
log_level=log_level,
|
|
279
|
-
tray=
|
|
284
|
+
tray=tray,
|
|
280
285
|
use_thread=True,
|
|
281
286
|
llamacpp_backend=llamacpp_backend,
|
|
282
287
|
ctx_size=ctx_size,
|
|
@@ -291,7 +296,7 @@ def run(
|
|
|
291
296
|
load(model_name, port)
|
|
292
297
|
|
|
293
298
|
# Open the webapp with the specified model
|
|
294
|
-
url = f"http://
|
|
299
|
+
url = f"http://{host}:{port}/?model={model_name}#llm-chat"
|
|
295
300
|
print(f"You can now chat with {model_name} at {url}")
|
|
296
301
|
webbrowser.open(url)
|
|
297
302
|
|
|
@@ -440,9 +445,36 @@ def list_models():
|
|
|
440
445
|
print(tabulate(table_data, headers=headers, tablefmt="simple"))
|
|
441
446
|
|
|
442
447
|
|
|
448
|
+
def developer_entrypoint():
|
|
449
|
+
"""
|
|
450
|
+
Developer entry point that starts the server with debug logging
|
|
451
|
+
Equivalent to running: lemonade-server-dev serve --log-level debug [additional args]
|
|
452
|
+
|
|
453
|
+
This function automatically prepends "serve --log-level debug" to any arguments
|
|
454
|
+
passed to the lsdev command.
|
|
455
|
+
"""
|
|
456
|
+
# Save original sys.argv
|
|
457
|
+
original_argv = sys.argv.copy()
|
|
458
|
+
|
|
459
|
+
try:
|
|
460
|
+
# Take any additional arguments passed to lsdev and append them
|
|
461
|
+
# after "serve --log-level debug"
|
|
462
|
+
additional_args = sys.argv[1:] if len(sys.argv) > 1 else []
|
|
463
|
+
|
|
464
|
+
# Set sys.argv to simulate "serve --log-level debug" + additional args
|
|
465
|
+
sys.argv = [sys.argv[0], "serve", "--log-level", "debug"] + additional_args
|
|
466
|
+
main()
|
|
467
|
+
finally:
|
|
468
|
+
# Restore original sys.argv
|
|
469
|
+
sys.argv = original_argv
|
|
470
|
+
|
|
471
|
+
|
|
443
472
|
def _add_server_arguments(parser):
|
|
444
473
|
"""Add common server arguments to a parser"""
|
|
445
474
|
parser.add_argument("--port", type=int, help="Port number to serve on")
|
|
475
|
+
parser.add_argument(
|
|
476
|
+
"--host", type=str, help="Address to bind for connections", default="localhost"
|
|
477
|
+
)
|
|
446
478
|
parser.add_argument(
|
|
447
479
|
"--log-level",
|
|
448
480
|
type=str,
|
|
@@ -578,6 +610,7 @@ def main():
|
|
|
578
610
|
sys.exit(ExitCodes.SERVER_ALREADY_RUNNING)
|
|
579
611
|
serve(
|
|
580
612
|
port=args.port,
|
|
613
|
+
host=args.host,
|
|
581
614
|
log_level=args.log_level,
|
|
582
615
|
tray=not args.no_tray,
|
|
583
616
|
llamacpp_backend=args.llamacpp,
|
|
@@ -603,7 +636,9 @@ def main():
|
|
|
603
636
|
run(
|
|
604
637
|
args.model,
|
|
605
638
|
port=args.port,
|
|
639
|
+
host=args.host,
|
|
606
640
|
log_level=args.log_level,
|
|
641
|
+
tray=not args.no_tray,
|
|
607
642
|
llamacpp_backend=args.llamacpp,
|
|
608
643
|
ctx_size=args.ctx_size,
|
|
609
644
|
)
|
|
@@ -114,6 +114,51 @@
|
|
|
114
114
|
"recipe": "oga-npu",
|
|
115
115
|
"suggested": true
|
|
116
116
|
},
|
|
117
|
+
"DeepSeek-R1-Distill-Llama-8B-NPU": {
|
|
118
|
+
"checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
119
|
+
"recipe": "oga-npu",
|
|
120
|
+
"suggested": true
|
|
121
|
+
},
|
|
122
|
+
"DeepSeek-R1-Distill-Qwen-7B-NPU": {
|
|
123
|
+
"checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
124
|
+
"recipe": "oga-npu",
|
|
125
|
+
"suggested": false
|
|
126
|
+
},
|
|
127
|
+
"DeepSeek-R1-Distill-Qwen-1.5B-NPU": {
|
|
128
|
+
"checkpoint": "amd/DeepSeek-R1-Distill-Qwen-1.5B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
129
|
+
"recipe": "oga-npu",
|
|
130
|
+
"suggested": false
|
|
131
|
+
},
|
|
132
|
+
"Llama-3.2-3B-Instruct-NPU": {
|
|
133
|
+
"checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
134
|
+
"recipe": "oga-npu",
|
|
135
|
+
"suggested": false
|
|
136
|
+
},
|
|
137
|
+
"Llama-3.2-1B-Instruct-NPU": {
|
|
138
|
+
"checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
139
|
+
"recipe": "oga-npu",
|
|
140
|
+
"suggested": false
|
|
141
|
+
},
|
|
142
|
+
"Mistral-7B-v0.3-Instruct-NPU": {
|
|
143
|
+
"checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
144
|
+
"recipe": "oga-npu",
|
|
145
|
+
"suggested": true
|
|
146
|
+
},
|
|
147
|
+
"Phi-3.5-Mini-Instruct-NPU": {
|
|
148
|
+
"checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
149
|
+
"recipe": "oga-npu",
|
|
150
|
+
"suggested": true
|
|
151
|
+
},
|
|
152
|
+
"ChatGLM-3-6b-Instruct-NPU": {
|
|
153
|
+
"checkpoint": "amd/chatglm3-6b-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
154
|
+
"recipe": "oga-npu",
|
|
155
|
+
"suggested": false
|
|
156
|
+
},
|
|
157
|
+
"AMD-OLMo-1B-Instruct-NPU": {
|
|
158
|
+
"checkpoint": "amd/AMD-OLMo-1B-SFT-DPO-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
159
|
+
"recipe": "oga-npu",
|
|
160
|
+
"suggested": false
|
|
161
|
+
},
|
|
117
162
|
"Llama-3.2-1B-Instruct-DirectML": {
|
|
118
163
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
|
|
119
164
|
"recipe": "oga-igpu",
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "8.1.1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|