lemonade-sdk 8.0.6__py3-none-any.whl → 8.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -10,21 +10,105 @@ import requests
10
10
  import lemonade.common.printing as printing
11
11
  from lemonade.tools.adapter import PassthroughTokenizer, ModelAdapter
12
12
 
13
- LLAMA_VERSION = "b5902"
13
+ from lemonade.common.system_info import get_system_info
14
14
 
15
+ from dotenv import set_key, load_dotenv
15
16
 
16
- def get_llama_folder_path():
17
+ LLAMA_VERSION_VULKAN = "b6097"
18
+ LLAMA_VERSION_ROCM = "b1021"
19
+
20
+
21
+ def identify_rocm_arch_from_name(device_name: str) -> str | None:
22
+ """
23
+ Identify the appropriate ROCm target architecture based on the device name
24
+ """
25
+ device_name_lower = device_name.lower()
26
+ if "radeon" not in device_name_lower:
27
+ return None
28
+
29
+ # Check iGPUs
30
+ # STX Halo iGPUs (gfx1151 architecture)
31
+ # Radeon 8050S Graphics / Radeon 8060S Graphics
32
+ target_arch = None
33
+ if any(halo_igpu in device_name_lower.lower() for halo_igpu in ["8050s", "8060s"]):
34
+ return "gfx1151"
35
+
36
+ # Check dGPUs
37
+ # RDNA4 GPUs (gfx120X architecture)
38
+ # AMD Radeon AI PRO R9700, AMD Radeon RX 9070 XT, AMD Radeon RX 9070 GRE,
39
+ # AMD Radeon RX 9070, AMD Radeon RX 9060 XT
40
+ if any(
41
+ rdna4_gpu in device_name_lower.lower()
42
+ for rdna4_gpu in ["r9700", "9060", "9070"]
43
+ ):
44
+ return "gfx120X"
45
+
46
+ # RDNA3 GPUs (gfx110X architecture)
47
+ # AMD Radeon PRO V710, AMD Radeon PRO W7900 Dual Slot, AMD Radeon PRO W7900,
48
+ # AMD Radeon PRO W7800 48GB, AMD Radeon PRO W7800, AMD Radeon PRO W7700,
49
+ # AMD Radeon RX 7900 XTX, AMD Radeon RX 7900 XT, AMD Radeon RX 7900 GRE,
50
+ # AMD Radeon RX 7800 XT, AMD Radeon RX 7700 XT
51
+ elif any(
52
+ rdna3_gpu in device_name_lower.lower()
53
+ for rdna3_gpu in ["7700", "7800", "7900", "v710"]
54
+ ):
55
+ return "gfx110X"
56
+
57
+ return None
58
+
59
+
60
+ def identify_rocm_arch_and_hip_id() -> tuple[str, str]:
61
+ """
62
+ Identify the appropriate ROCm target architecture based on the device info
63
+ Returns tuple of (architecture, gpu_type) where gpu_type is 'igpu' or 'dgpu'
64
+ """
65
+
66
+ # Check for integrated and discrete AMD GPUs
67
+ system_info = get_system_info()
68
+ amd_igpu = system_info.get_amd_igpu_device()
69
+ amd_dgpu = system_info.get_amd_dgpu_devices()
70
+ target_arch = None
71
+ gpu_count = 0
72
+ for gpu in [amd_igpu] + amd_dgpu:
73
+ if gpu.get("available") and gpu.get("name"):
74
+ gpu_count += 1
75
+ target_arch = identify_rocm_arch_from_name(gpu["name"].lower())
76
+ if target_arch:
77
+ break
78
+
79
+ # Get HIP ID based on the number of GPUs available
80
+ # Here, we assume that the iGPU will always show up before the dGPUs (if available)
81
+ # We also assume that selecting the dGPU is preferred over the iGPU
82
+ # Multiple GPUs are not supported at the moment
83
+ hip_id = str(gpu_count - 1)
84
+
85
+ return target_arch, hip_id
86
+
87
+
88
+ def get_llama_version(backend: str) -> str:
89
+ """
90
+ Select the appropriate llama.cpp version based on the backend
91
+ """
92
+ if backend == "rocm":
93
+ return LLAMA_VERSION_ROCM
94
+ elif backend == "vulkan":
95
+ return LLAMA_VERSION_VULKAN
96
+ else:
97
+ raise ValueError(f"Unsupported backend: {backend}")
98
+
99
+
100
+ def get_llama_folder_path(backend: str):
17
101
  """
18
102
  Get path for llama.cpp platform-specific executables folder
19
103
  """
20
- return os.path.join(os.path.dirname(sys.executable), "llamacpp")
104
+ return os.path.join(os.path.dirname(sys.executable), backend, "llama_server")
21
105
 
22
106
 
23
- def get_llama_exe_path(exe_name):
107
+ def get_llama_exe_path(exe_name: str, backend: str):
24
108
  """
25
109
  Get path to platform-specific llama-server executable
26
110
  """
27
- base_dir = get_llama_folder_path()
111
+ base_dir = get_llama_folder_path(backend)
28
112
  if platform.system().lower() == "windows":
29
113
  return os.path.join(base_dir, f"{exe_name}.exe")
30
114
  else: # Linux/Ubuntu
@@ -37,33 +121,33 @@ def get_llama_exe_path(exe_name):
37
121
  return os.path.join(base_dir, exe_name)
38
122
 
39
123
 
40
- def get_llama_server_exe_path():
124
+ def get_llama_server_exe_path(backend: str):
41
125
  """
42
126
  Get path to platform-specific llama-server executable
43
127
  """
44
- return get_llama_exe_path("llama-server")
128
+ return get_llama_exe_path("llama-server", backend)
45
129
 
46
130
 
47
- def get_llama_cli_exe_path():
131
+ def get_llama_cli_exe_path(backend: str):
48
132
  """
49
133
  Get path to platform-specific llama-cli executable
50
134
  """
51
- return get_llama_exe_path("llama-cli")
135
+ return get_llama_exe_path("llama-cli", backend)
52
136
 
53
137
 
54
- def get_version_txt_path():
138
+ def get_version_txt_path(backend: str):
55
139
  """
56
140
  Get path to text file that contains version information
57
141
  """
58
- return os.path.join(get_llama_folder_path(), "version.txt")
142
+ return os.path.join(get_llama_folder_path(backend), "version.txt")
59
143
 
60
144
 
61
- def get_llama_installed_version():
145
+ def get_llama_installed_version(backend: str):
62
146
  """
63
147
  Gets version of installed llama.cpp
64
148
  Returns None if llama.cpp is not installed
65
149
  """
66
- version_txt_path = get_version_txt_path()
150
+ version_txt_path = get_version_txt_path(backend)
67
151
  if os.path.exists(version_txt_path):
68
152
  with open(version_txt_path, "r", encoding="utf-8") as f:
69
153
  llama_installed_version = f.read()
@@ -71,24 +155,48 @@ def get_llama_installed_version():
71
155
  return None
72
156
 
73
157
 
74
- def get_binary_url_and_filename(version):
158
+ def get_binary_url_and_filename(backend: str, target_arch: str = None):
75
159
  """
76
- Get the appropriate llama.cpp binary URL and filename based on platform
160
+ Get the appropriate binary URL and filename based on platform and backend
161
+
162
+ Args:
163
+ backend: Backend to use
77
164
  """
78
165
  system = platform.system().lower()
79
166
 
80
- if system == "windows":
81
- filename = f"llama-{version}-bin-win-vulkan-x64.zip"
82
- elif system == "linux":
83
- filename = f"llama-{version}-bin-ubuntu-vulkan-x64.zip"
167
+ if backend == "rocm":
168
+
169
+ # ROCm support from lemonade-sdk/llamacpp-rocm
170
+ repo = "lemonade-sdk/llamacpp-rocm"
171
+ version = LLAMA_VERSION_ROCM
172
+ if system == "windows":
173
+ filename = f"llama-{version}-windows-rocm-{target_arch}-x64.zip"
174
+ elif system == "linux":
175
+ filename = f"llama-{version}-ubuntu-rocm-{target_arch}-x64.zip"
176
+ else:
177
+ raise NotImplementedError(
178
+ f"Platform {system} not supported for ROCm llamacpp. Supported: Windows, Ubuntu Linux"
179
+ )
180
+
181
+ elif backend == "vulkan":
182
+ # Original Vulkan support from ggml-org/llama.cpp
183
+ repo = "ggml-org/llama.cpp"
184
+ version = LLAMA_VERSION_VULKAN
185
+ if system == "windows":
186
+ filename = f"llama-{version}-bin-win-vulkan-x64.zip"
187
+ elif system == "linux":
188
+ filename = f"llama-{version}-bin-ubuntu-vulkan-x64.zip"
189
+ else:
190
+ raise NotImplementedError(
191
+ f"Platform {system} not supported for Vulkan llamacpp. Supported: Windows, Ubuntu Linux"
192
+ )
84
193
  else:
194
+ supported_backends = ["vulkan", "rocm"]
85
195
  raise NotImplementedError(
86
- f"Platform {system} not supported for llamacpp. Supported: Windows, Ubuntu Linux"
196
+ f"Unsupported backend: {backend}. Supported backends: {supported_backends}"
87
197
  )
88
198
 
89
- url = (
90
- f"https://github.com/ggml-org/llama.cpp/releases/download/{version}/{filename}"
91
- )
199
+ url = f"https://github.com/{repo}/releases/download/{version}/{filename}"
92
200
  return url, filename
93
201
 
94
202
 
@@ -122,7 +230,7 @@ def validate_platform_support():
122
230
  )
123
231
 
124
232
 
125
- def install_llamacpp():
233
+ def install_llamacpp(backend):
126
234
  """
127
235
  Installs or upgrades llama.cpp binaries if needed
128
236
  """
@@ -130,56 +238,108 @@ def install_llamacpp():
130
238
  # Exception will be thrown if platform is not supported
131
239
  validate_platform_support()
132
240
 
133
- # Installation location for llama.cpp
134
- llama_folder_path = get_llama_folder_path()
241
+ version = get_llama_version(backend)
242
+
243
+ # Get platform-specific paths at runtime
244
+ llama_server_exe_dir = get_llama_folder_path(backend)
245
+ llama_server_exe_path = get_llama_server_exe_path(backend)
135
246
 
136
247
  # Check whether the llamacpp install needs an upgrade
137
- if os.path.exists(llama_folder_path):
138
- if get_llama_installed_version() != LLAMA_VERSION:
248
+ version_txt_path = os.path.join(llama_server_exe_dir, "version.txt")
249
+ backend_txt_path = os.path.join(llama_server_exe_dir, "backend.txt")
250
+
251
+ logging.info(f"Using backend: {backend}")
252
+
253
+ if os.path.exists(version_txt_path) and os.path.exists(backend_txt_path):
254
+ with open(version_txt_path, "r", encoding="utf-8") as f:
255
+ llamacpp_installed_version = f.read().strip()
256
+ with open(backend_txt_path, "r", encoding="utf-8") as f:
257
+ llamacpp_installed_backend = f.read().strip()
258
+
259
+ if (
260
+ llamacpp_installed_version != version
261
+ or llamacpp_installed_backend != backend
262
+ ):
139
263
  # Remove the existing install, which will trigger a new install
140
264
  # in the next code block
141
- shutil.rmtree(llama_folder_path)
265
+ shutil.rmtree(llama_server_exe_dir)
266
+ elif os.path.exists(version_txt_path):
267
+ # Old installation without backend tracking - remove to upgrade
268
+ shutil.rmtree(llama_server_exe_dir)
142
269
 
143
270
  # Download llama.cpp server if it isn't already available
144
- if not os.path.exists(llama_folder_path):
145
- # Download llama.cpp server zip
146
- llama_zip_url, filename = get_binary_url_and_filename(LLAMA_VERSION)
147
- llama_zip_path = os.path.join(os.path.dirname(sys.executable), filename)
148
- logging.info(f"Downloading llama.cpp server from {llama_zip_url}")
271
+ if not os.path.exists(llama_server_exe_path):
272
+
273
+ # Create the directory
274
+ os.makedirs(llama_server_exe_dir, exist_ok=True)
275
+
276
+ # Identify the target architecture (only needed for ROCm)
277
+ target_arch = None
278
+ if backend == "rocm":
279
+ # Identify the target architecture
280
+ target_arch, hip_id = identify_rocm_arch_and_hip_id()
281
+ if not target_arch:
282
+ system = platform.system().lower()
283
+ if system == "linux":
284
+ hint = (
285
+ "Hint: If you think your device is supported, "
286
+ "running `sudo update-pciids` may help identify your hardware."
287
+ )
288
+ else:
289
+ hint = ""
290
+ raise ValueError(
291
+ "ROCm backend selected but no compatible ROCm target architecture found. "
292
+ "See https://github.com/lemonade-sdk/lemonade?tab=readme-ov-file#supported-configurations "
293
+ f"for supported configurations. {hint}"
294
+ )
295
+
296
+ # Set HIP_VISIBLE_DEVICES=0 for igpu, =1 for dgpu
297
+ env_file_path = os.path.join(llama_server_exe_dir, ".env")
298
+ set_key(env_file_path, "HIP_VISIBLE_DEVICES", hip_id)
299
+
300
+ # Direct download for Vulkan/ROCm
301
+ llama_archive_url, filename = get_binary_url_and_filename(backend, target_arch)
302
+ llama_archive_path = os.path.join(llama_server_exe_dir, filename)
303
+ logging.info(f"Downloading llama.cpp server from {llama_archive_url}")
149
304
 
150
- with requests.get(llama_zip_url, stream=True) as r:
305
+ with requests.get(llama_archive_url, stream=True) as r:
151
306
  r.raise_for_status()
152
- with open(llama_zip_path, "wb") as f:
307
+ with open(llama_archive_path, "wb") as f:
153
308
  for chunk in r.iter_content(chunk_size=8192):
154
309
  f.write(chunk)
155
310
 
156
- # Extract zip
157
- logging.info(f"Extracting {llama_zip_path} to {llama_folder_path}")
158
- with zipfile.ZipFile(llama_zip_path, "r") as zip_ref:
159
- zip_ref.extractall(llama_folder_path)
311
+ logging.info(f"Extracting {filename} to {llama_server_exe_dir}")
312
+ if filename.endswith(".zip"):
313
+ with zipfile.ZipFile(llama_archive_path, "r") as zip_ref:
314
+ zip_ref.extractall(llama_server_exe_dir)
315
+ else:
316
+ raise NotImplementedError(f"Unsupported archive format: {filename}")
160
317
 
161
318
  # Make executable on Linux - need to update paths after extraction
162
319
  if platform.system().lower() == "linux":
163
320
  # Re-get the paths since extraction might have changed the directory structure
164
- for updated_exe_path in [
165
- get_llama_server_exe_path(),
166
- get_llama_cli_exe_path(),
167
- ]:
168
- if os.path.exists(updated_exe_path):
169
- os.chmod(updated_exe_path, 0o755)
170
- logging.info(f"Set executable permissions for {updated_exe_path}")
321
+ exe_paths = [
322
+ (get_llama_server_exe_path(backend), "llama-server"),
323
+ (get_llama_cli_exe_path(backend), "llama-cli"),
324
+ ]
325
+
326
+ for exe_path, exe_name in exe_paths:
327
+ if os.path.exists(exe_path):
328
+ os.chmod(exe_path, 0o755)
329
+ logging.info(f"Set executable permissions for {exe_path}")
171
330
  else:
172
331
  logging.warning(
173
- f"Could not find llama.cpp executable at {updated_exe_path}"
332
+ f"Could not find {exe_name} executable at {exe_path}"
174
333
  )
175
334
 
176
- # Save version.txt
177
- with open(get_version_txt_path(), "w", encoding="utf-8") as vf:
178
- vf.write(LLAMA_VERSION)
335
+ # Save version and backend info
336
+ with open(version_txt_path, "w", encoding="utf-8") as vf:
337
+ vf.write(version)
338
+ with open(backend_txt_path, "w", encoding="utf-8") as bf:
339
+ bf.write(backend)
179
340
 
180
- # Delete zip file
181
- os.remove(llama_zip_path)
182
- logging.info("Cleaned up zip file")
341
+ # Delete the archive file
342
+ os.remove(llama_archive_path)
183
343
 
184
344
 
185
345
  def parse_checkpoint(checkpoint: str) -> tuple[str, str | None]:
@@ -215,10 +375,10 @@ def get_local_checkpoint_path(base_checkpoint, variant):
215
375
  full_model_path = None
216
376
  model_to_use = None
217
377
  try:
218
- from huggingface_hub import snapshot_download
378
+ from lemonade.common.network import custom_snapshot_download
219
379
 
220
- snapshot_path = snapshot_download(
221
- repo_id=base_checkpoint,
380
+ snapshot_path = custom_snapshot_download(
381
+ base_checkpoint,
222
382
  local_files_only=True,
223
383
  )
224
384
 
@@ -405,10 +565,10 @@ def download_gguf(config_checkpoint, config_mmproj=None) -> dict:
405
565
  core_files, sharded_files = identify_gguf_models(checkpoint, variant, config_mmproj)
406
566
 
407
567
  # Download the files
408
- from huggingface_hub import snapshot_download
568
+ from lemonade.common.network import custom_snapshot_download
409
569
 
410
- snapshot_folder = snapshot_download(
411
- repo_id=checkpoint,
570
+ snapshot_folder = custom_snapshot_download(
571
+ checkpoint,
412
572
  allow_patterns=list(core_files.values()) + sharded_files,
413
573
  )
414
574
 
@@ -525,6 +685,14 @@ class LlamaCppAdapter(ModelAdapter):
525
685
  try:
526
686
  # Set up environment with library path for Linux
527
687
  env = os.environ.copy()
688
+
689
+ # Load environment variables from .env file in the executable directory
690
+ exe_dir = os.path.dirname(self.executable)
691
+ env_file_path = os.path.join(exe_dir, ".env")
692
+ if os.path.exists(env_file_path):
693
+ load_dotenv(env_file_path, override=True)
694
+ env.update(os.environ)
695
+
528
696
  if self.lib_dir and os.name != "nt": # Not Windows
529
697
  current_ld_path = env.get("LD_LIBRARY_PATH", "")
530
698
  if current_ld_path:
@@ -573,7 +741,7 @@ class LlamaCppAdapter(ModelAdapter):
573
741
  #
574
742
  if "llama_perf_context_print: eval time =" in line:
575
743
  parts = line.split("=")[1].split()
576
- self.response_tokens = int(parts[3])
744
+ self.response_tokens = int(parts[3]) + 1 # include first token
577
745
  response_time_ms = float(parts[0])
578
746
  self.tokens_per_second = (
579
747
  1000 * self.response_tokens / response_time_ms