lemonade-sdk 8.1.8__py3-none-any.whl → 8.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -0,0 +1,429 @@
1
+ #
2
+ # This power profiler uses an external tool called HWiNFO.
3
+ # Please see the power profiling documentation for download and install instructions.
4
+ #
5
+ # The power profiling functionality is currently not part of our continuous integration
6
+ # testing framework, primarily due to the setup overhead required from the above three items.
7
+ # We will revisit in the future if we face issues.
8
+ #
9
+
10
+ import ctypes
11
+ from datetime import datetime
12
+ import os
13
+ import platform
14
+ import textwrap
15
+ import time
16
+ import subprocess
17
+ import psutil
18
+ import matplotlib.pyplot as plt
19
+ import numpy as np
20
+ import pandas as pd
21
+ import lemonade.common.printing as printing
22
+ from lemonade.profilers import Profiler
23
+ from lemonade.tools.report.table import LemonadePerfTable, DictListStat
24
+
25
+ DEFAULT_TRACK_POWER_INTERVAL_MS = 500
26
+ DEFAULT_TRACK_POWER_WARMUP_PERIOD = 5
27
+
28
+ HWINFO_PATH_ENV_VAR = "HWINFO_PATH"
29
+ DEFAULT_HWINFO_PATH = r"C:\Program Files\HWiNFO64\HWiNFO64.exe"
30
+ POWER_USAGE_CSV_FILENAME = "power_usage_hwinfo.csv"
31
+ POWER_USAGE_PNG_FILENAME = "power_usage_hwinfo.png"
32
+
33
+
34
+ class Keys:
35
+ # Path to the file containing the power usage plot
36
+ POWER_USAGE_PLOT = "power_usage_plot_hwinfo"
37
+ # Path to the file containing the power usage plot
38
+ POWER_USAGE_DATA = "power_usage_data_hwinfo"
39
+ # Path to the file containing the power usage plot
40
+ POWER_USAGE_DATA_CSV = "power_usage_data_file_hwinfo"
41
+ # Maximum power consumed by the CPU processor package during the tools sequence
42
+ PEAK_PROCESSOR_PACKAGE_POWER = "peak_processor_package_power_hwinfo"
43
+
44
+
45
+ # Add column to the Lemonade performance report table for the power data
46
+ LemonadePerfTable.table_descriptor["stat_columns"].append(
47
+ DictListStat(
48
+ "Power Usage (HWiNFO)",
49
+ Keys.POWER_USAGE_DATA,
50
+ [
51
+ ("name", "{0}:"),
52
+ ("duration", "{0:.1f}s,"),
53
+ ("energy consumed", "{0:.1f} J"),
54
+ ],
55
+ )
56
+ )
57
+
58
+
59
+ def is_user_admin() -> bool:
60
+ """Return true if platform is Windows and user is Admin"""
61
+ os_type = platform.system()
62
+ if os_type == "Windows":
63
+ try:
64
+ return ctypes.windll.shell32.IsUserAnAdmin() == 1
65
+ except AttributeError:
66
+ pass
67
+ return False
68
+
69
+
70
+ def is_process_running(executable_name):
71
+ """Checks if an executable is currently running."""
72
+ executable_name = executable_name.lower()
73
+ for process in psutil.process_iter(["pid", "name"]):
74
+ if process.info["name"].lower() == executable_name:
75
+ return True
76
+ return False
77
+
78
+
79
+ def read_data_from_csv(csv_path, columns_dict, encoding="utf-8") -> pd.DataFrame:
80
+ try:
81
+ available_columns = pd.read_csv(csv_path, nrows=0, encoding=encoding).columns
82
+ columns_to_read = list(set(columns_dict.values()) & set(available_columns))
83
+ df = pd.read_csv(csv_path, usecols=columns_to_read, encoding=encoding)
84
+ except FileNotFoundError as e:
85
+ printing.log_info(f"Power profiler file not found: {e.filename}")
86
+ return None
87
+ except ValueError as e:
88
+ printing.log_info(f"Error reading power data from {csv_path}: {e}")
89
+ return None
90
+
91
+ # Rename columns to simple name
92
+ df.rename(
93
+ columns={v: k for k, v in columns_dict.items() if v in columns_to_read},
94
+ inplace=True,
95
+ )
96
+
97
+ return df
98
+
99
+
100
+ class HWINFOPowerProfiler(Profiler):
101
+
102
+ unique_name = "power-hwinfo"
103
+
104
+ # mapping from short name to full name of the measurement in the CSV file produced by HWiNFO
105
+ columns_dict = {
106
+ "time": "Time",
107
+ "cpu_package_power": "CPU Package Power [W]",
108
+ "npu_clock": "NPU Clock [MHz]",
109
+ "gpu_clock": "GPU Clock [MHz]",
110
+ "total_cpu_usage": "Total CPU Usage [%]",
111
+ "apu_stapm_limit": "APU STAPM Limit [%]",
112
+ "cpu_tdc_limit": "CPU TDC Limit [%]",
113
+ "cpu_edc_limit": "CPU EDC Limit [%]",
114
+ "cpu_ppt_fast_limit": "CPU PPT FAST Limit [%]",
115
+ "cpu_ppt_slow_limit": "CPU PPT SLOW Limit [%]",
116
+ "thermal_limit": "Thermal Limit [%]",
117
+ }
118
+
119
+ @staticmethod
120
+ def time_to_seconds(time_str):
121
+ # Parse the time string
122
+ try:
123
+ time_obj = datetime.strptime(time_str, "%H:%M:%S.%f")
124
+ except TypeError:
125
+ raise ValueError(f"Could not parse {time_str}")
126
+
127
+ # Calculate the total seconds
128
+ total_seconds = (
129
+ time_obj.hour * 3600
130
+ + time_obj.minute * 60
131
+ + time_obj.second
132
+ + time_obj.microsecond / 1_000_000
133
+ )
134
+ return total_seconds
135
+
136
+ @staticmethod
137
+ def add_arguments_to_parser(parser):
138
+ parser.add_argument(
139
+ f"--{HWINFOPowerProfiler.unique_name}",
140
+ nargs="?",
141
+ metavar="WARMUP_PERIOD",
142
+ type=int,
143
+ default=None,
144
+ const=DEFAULT_TRACK_POWER_WARMUP_PERIOD,
145
+ help="Track power consumption using the HWiNFO application and plot the results. "
146
+ "HWiNFO is a commercial product from a third party (https://www.hwinfo.com/) "
147
+ "and should be acquired/licensed appropriately. "
148
+ "Optionally, set the warmup period in seconds "
149
+ f"(default: {DEFAULT_TRACK_POWER_WARMUP_PERIOD}). If the application is not "
150
+ f"installed at {DEFAULT_HWINFO_PATH}, set the {HWINFO_PATH_ENV_VAR} environment "
151
+ f"variable to point at it. This is a Windows only feature and Lemonade must be run "
152
+ f"from a CMD window with Administrator privileges.",
153
+ )
154
+
155
+ def __init__(self, parser_arg_value):
156
+ super().__init__()
157
+ self.warmup_period = parser_arg_value
158
+ self.status_stats += [Keys.PEAK_PROCESSOR_PACKAGE_POWER, Keys.POWER_USAGE_PLOT]
159
+ self.tracking_active = False
160
+ self.build_dir = None
161
+ self.csv_path = None
162
+ self.hwinfo_process = None
163
+ self.data = None
164
+
165
+ def start(self, build_dir):
166
+ if self.tracking_active:
167
+ raise RuntimeError("Cannot start power tracking while already tracking")
168
+
169
+ if platform.system() != "Windows":
170
+ raise RuntimeError("Power usage tracking is only enabled in Windows.")
171
+
172
+ # Check that user as running in Admin mode
173
+ if not is_user_admin():
174
+ raise RuntimeError(
175
+ "For power usage tracking, run Lemonade as an Administrator."
176
+ )
177
+
178
+ # Save the folder where data and plot will be stored
179
+ self.build_dir = build_dir
180
+
181
+ # The csv file where power data will be stored
182
+ self.csv_path = os.path.join(build_dir, POWER_USAGE_CSV_FILENAME)
183
+ if " " in self.csv_path:
184
+ raise RuntimeError(
185
+ "Can't log HWiNFO data to a file with a <space> in the path. "
186
+ "Please use the `-d` flag to specify a Lemonade cache path with no spaces."
187
+ )
188
+
189
+ # See if the HWINFO_PATH environment variables exists
190
+ # If so, use it instead of the default path
191
+ if HWINFO_PATH_ENV_VAR in os.environ:
192
+ hwinfo_path = os.getenv(HWINFO_PATH_ENV_VAR)
193
+ else:
194
+ hwinfo_path = DEFAULT_HWINFO_PATH
195
+
196
+ # Check the HWINFO executable exists
197
+ if not os.path.isfile(hwinfo_path):
198
+ raise FileNotFoundError(hwinfo_path)
199
+
200
+ # Check that executable is not already running
201
+ executable = hwinfo_path.split(os.sep)[-1]
202
+ if is_process_running(executable):
203
+ raise RuntimeError(
204
+ f"{executable} is already running. Quit it and try again."
205
+ )
206
+
207
+ # Start HWiNFO executable
208
+ try:
209
+ command = [
210
+ hwinfo_path,
211
+ f"-l{self.csv_path}",
212
+ f"-poll_rate={DEFAULT_TRACK_POWER_INTERVAL_MS}",
213
+ ]
214
+ self.hwinfo_process = subprocess.Popen(
215
+ command,
216
+ stdin=subprocess.PIPE,
217
+ stderr=subprocess.PIPE,
218
+ )
219
+ except OSError as e:
220
+ if "[WinError 740]" in str(e):
221
+ print(
222
+ "\nTo avoid `requested operation requires elevation` error, please make sure"
223
+ )
224
+ print(
225
+ "HWiNFO.exe has Properties->Compatibility->`Run this program as an "
226
+ "administrator` checked."
227
+ )
228
+ print(
229
+ "You may also need to set Windows User Account Control to `Never notify`.\n"
230
+ )
231
+ raise
232
+ self.tracking_active = True
233
+ time.sleep(self.warmup_period)
234
+
235
+ def stop(self):
236
+ if self.tracking_active:
237
+ self.tracking_active = False
238
+ time.sleep(self.warmup_period)
239
+ self.hwinfo_process.terminate()
240
+ self.hwinfo_process.wait()
241
+
242
+ def generate_results(self, state, timestamp, start_times):
243
+ if self.hwinfo_process is None:
244
+ return
245
+
246
+ if self.tracking_active:
247
+ self.stop()
248
+
249
+ df = read_data_from_csv(self.csv_path, self.columns_dict, encoding="latin1")
250
+ if df is None:
251
+ state.save_stat(Keys.POWER_USAGE_PLOT, "NONE")
252
+ return
253
+
254
+ # Remap time to seconds from start of profiling data
255
+ # Remap csv data time to elapsed seconds (i.e., substract out initial time)
256
+ try:
257
+ initial_data_time = self.time_to_seconds(df["time"].iloc[0])
258
+ df["time"] = df["time"].apply(
259
+ lambda x: (self.time_to_seconds(x) - initial_data_time)
260
+ )
261
+ except ValueError as e:
262
+ printing.log_info(
263
+ f"Badly formatted time data in {self.csv_path}: {e}. "
264
+ f"HWiNFO may have closed unexpectedly."
265
+ )
266
+ state.save_stat(Keys.POWER_USAGE_PLOT, "NONE")
267
+ return
268
+
269
+ # Make time 0 the time of the first tool starting (after the warmup period)
270
+ if start_times:
271
+ tool_start_times = sorted(start_times.values())
272
+ # First tool after warmup (if no tools, then will be time of start of cool down)
273
+ first_tool_time = tool_start_times[1]
274
+
275
+ # Map the measurement data so that zero in the measurement data aligns with
276
+ # the first_tool_time
277
+ #
278
+ # Find the difference between the timestamp first_tool_time and initial_data_time
279
+ # which is a count of seconds since midnight
280
+ #
281
+ # Find midnight prior to first_tool_time
282
+ t = time.localtime(first_tool_time)
283
+ since_midnight = (
284
+ t.tm_hour * 3600 + t.tm_min * 60 + t.tm_sec + (first_tool_time % 1)
285
+ )
286
+ delta = since_midnight - initial_data_time
287
+ df["time"] = df["time"] - delta
288
+
289
+ peak_power = max(df["cpu_package_power"])
290
+
291
+ # Create a figure
292
+ fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(16, 8))
293
+
294
+ if start_times:
295
+ tool_starts = sorted(start_times.items(), key=lambda item: item[1])
296
+ tool_name_list = [item[0] for item in tool_starts]
297
+
298
+ # Adjust to common time frame as power measurements
299
+ tool_start_list = [
300
+ max(df["time"].iloc[0], item[1] - first_tool_time)
301
+ for item in tool_starts
302
+ ]
303
+ tool_stop_list = tool_start_list[1:] + [df["time"].values[-1]]
304
+
305
+ # Extract power data time series
306
+ x_time = df["time"].to_numpy()
307
+ y_power = df["cpu_package_power"].to_numpy()
308
+
309
+ # Extract data for each stage in the build
310
+ self.data = []
311
+ for name, t0, tf in zip(tool_name_list, tool_start_list, tool_stop_list):
312
+ x = x_time[(x_time >= t0) * (x_time <= tf)]
313
+ x = np.insert(x, 0, t0)
314
+ x = np.insert(x, len(x), tf)
315
+ y = np.interp(x, x_time, y_power)
316
+ energy = np.trapz(y, x)
317
+ avg_power = energy / (tf - t0)
318
+ stage = {
319
+ "name": name,
320
+ "t": x.tolist(),
321
+ "power": y.tolist(),
322
+ "duration": float(tf - t0),
323
+ "energy consumed": float(energy),
324
+ "average power": float(avg_power),
325
+ }
326
+ self.data.append(stage)
327
+
328
+ for stage in self.data:
329
+ # Plot power usage time series
330
+ p = ax1.plot(
331
+ stage["t"],
332
+ stage["power"],
333
+ label=f"{stage['name']} ({stage['duration']:.1f}s, "
334
+ f"{stage['energy consumed']:0.1f} J)",
335
+ )
336
+ # Add a dashed line to show average power
337
+ ax1.plot(
338
+ [stage["t"][0], stage["t"][-1]],
339
+ [stage["average power"], stage["average power"]],
340
+ linestyle="--",
341
+ c=p[0].get_c(),
342
+ )
343
+ # Add average power text to plot
344
+ ax1.text(
345
+ stage["t"][0],
346
+ stage["average power"],
347
+ f"{stage['average power']:.1f} W ",
348
+ horizontalalignment="right",
349
+ verticalalignment="center",
350
+ c=p[0].get_c(),
351
+ )
352
+ else:
353
+ ax1.plot(
354
+ df["time"],
355
+ df["cpu_package_power"],
356
+ )
357
+ # Add title and labels to plots
358
+ ax1.set_ylabel(self.columns_dict["cpu_package_power"])
359
+ title_str = "HWiNFO Stats\n" + "\n".join(textwrap.wrap(state.build_name, 60))
360
+ ax1.set_title(title_str)
361
+ ax1.legend()
362
+ ax1.grid(True)
363
+
364
+ # Create second plot
365
+ ax2.plot(
366
+ df["time"],
367
+ df["npu_clock"],
368
+ label=self.columns_dict["npu_clock"],
369
+ )
370
+ ax2.plot(
371
+ df["time"],
372
+ df["gpu_clock"],
373
+ label=self.columns_dict["gpu_clock"],
374
+ )
375
+ ax2.set_xlabel("Time [s]")
376
+ ax2.set_ylabel("Clock Frequency [MHz]")
377
+ ax2.legend(loc=2)
378
+ ax2.grid(True)
379
+ # Add second y-axis for %
380
+ ax2_twin = ax2.twinx()
381
+ ax2_twin.plot(
382
+ df["time"],
383
+ df["total_cpu_usage"],
384
+ label=self.columns_dict["total_cpu_usage"],
385
+ c="g",
386
+ )
387
+ ax2_twin.set_ylim([0, 100])
388
+ vals = ax2_twin.get_yticks()
389
+ ax2_twin.set_yticks(vals)
390
+ ax2_twin.set_yticklabels([f"{v:.0f}%" for v in vals])
391
+ ax2_twin.legend(loc=1)
392
+
393
+ # Create third plot (all remaining columns)
394
+ plot3_columns = [
395
+ "apu_stapm_limit",
396
+ "cpu_tdc_limit",
397
+ "cpu_edc_limit",
398
+ "cpu_ppt_fast_limit",
399
+ "cpu_ppt_slow_limit",
400
+ "thermal_limit",
401
+ ]
402
+ for col_str in plot3_columns:
403
+ if col_str in df.columns:
404
+ ax3.plot(
405
+ df["time"],
406
+ df[col_str],
407
+ label=self.columns_dict[col_str],
408
+ )
409
+ ax3.set_xlabel("Time [s]")
410
+ ax3.set_ylim([0, 100])
411
+ vals = ax3.get_yticks()
412
+ ax3.set_yticks(vals)
413
+ ax3.set_yticklabels([f"{v:.0f}%" for v in vals])
414
+ if len(ax3.lines):
415
+ ax3.legend()
416
+ ax3.grid(True)
417
+
418
+ # Save plot to current folder AND save to cache
419
+ plot_path = os.path.join(
420
+ self.build_dir, f"{timestamp}_{POWER_USAGE_PNG_FILENAME}"
421
+ )
422
+ fig.savefig(plot_path, dpi=300, bbox_inches="tight")
423
+ plot_path = os.path.join(os.getcwd(), f"{timestamp}_{POWER_USAGE_PNG_FILENAME}")
424
+ fig.savefig(plot_path, dpi=300, bbox_inches="tight")
425
+
426
+ state.save_stat(Keys.POWER_USAGE_PLOT, plot_path)
427
+ state.save_stat(Keys.POWER_USAGE_DATA, self.data)
428
+ state.save_stat(Keys.POWER_USAGE_DATA_CSV, self.csv_path)
429
+ state.save_stat(Keys.PEAK_PROCESSOR_PACKAGE_POWER, f"{peak_power:0.1f} W")
@@ -14,8 +14,8 @@ from lemonade.common.system_info import get_system_info
14
14
 
15
15
  from dotenv import set_key, load_dotenv
16
16
 
17
- LLAMA_VERSION_VULKAN = "b6097"
18
- LLAMA_VERSION_ROCM = "b1021"
17
+ LLAMA_VERSION_VULKAN = "b6431"
18
+ LLAMA_VERSION_ROCM = "b1057"
19
19
 
20
20
 
21
21
  def identify_rocm_arch_from_name(device_name: str) -> str | None:
@@ -500,7 +500,7 @@ def get_local_checkpoint_path(base_checkpoint, variant):
500
500
 
501
501
 
502
502
  def identify_gguf_models(
503
- checkpoint: str, variant: str, mmproj: str
503
+ checkpoint: str, variant: Optional[str], mmproj: str
504
504
  ) -> tuple[dict, list[str]]:
505
505
  """
506
506
  Identifies the GGUF model files in the repository that match the variant.
@@ -510,12 +510,14 @@ def identify_gguf_models(
510
510
  The CHECKPOINT:VARIANT scheme is used to specify model files in Hugging Face repositories.
511
511
 
512
512
  The VARIANT format can be one of several types:
513
+ 0. wildcard (*): download all files in the repo
513
514
  1. Full filename: exact file to download
514
515
  2. None/empty: gets the first .gguf file in the repository (excludes mmproj files)
515
516
  3. Quantization variant: find a single file ending with the variant name (case insensitive)
516
517
  4. Folder name: downloads all .gguf files in the folder that matches the variant name (case insensitive)
517
518
 
518
519
  Examples:
520
+ - "ggml-org/gpt-oss-120b-GGUF:*" -> downloads all files in repo
519
521
  - "unsloth/Qwen3-8B-GGUF:qwen3.gguf" -> downloads "qwen3.gguf"
520
522
  - "unsloth/Qwen3-30B-A3B-GGUF" -> downloads "Qwen3-30B-A3B-GGUF.gguf"
521
523
  - "unsloth/Qwen3-8B-GGUF:Q4_1" -> downloads "Qwen3-8B-GGUF-Q4_1.gguf"
@@ -527,8 +529,17 @@ def identify_gguf_models(
527
529
  repo_files = list_repo_files(checkpoint)
528
530
  sharded_files = []
529
531
 
532
+ # (case 0) Wildcard, download everything
533
+ if variant and variant == "*":
534
+ sharded_files = repo_files
535
+
536
+ # Sort to ensure consistent ordering
537
+ sharded_files.sort()
538
+
539
+ # Use first file as primary (this is how llamacpp handles it)
540
+ variant_name = sharded_files[0]
530
541
  # (case 1) If variant ends in .gguf, use it directly
531
- if variant and variant.endswith(".gguf"):
542
+ elif variant and variant.endswith(".gguf"):
532
543
  variant_name = variant
533
544
  if variant_name not in repo_files:
534
545
  raise ValueError(
@@ -74,6 +74,17 @@ def _get_npu_driver_version():
74
74
  return None
75
75
 
76
76
 
77
+ def _compare_driver_versions(current_version, required_version):
78
+ """
79
+ Compare two driver version strings.
80
+ Returns True if current_version >= required_version, False otherwise.
81
+ Uses packaging.version for proper semantic version comparison.
82
+ """
83
+ from packaging.version import Version
84
+
85
+ return Version(current_version) >= Version(required_version)
86
+
87
+
77
88
  def import_error_heler(e: Exception):
78
89
  """
79
90
  Print a helpful message in the event of an import error
@@ -343,11 +354,13 @@ class OgaLoad(FirstTool):
343
354
  )
344
355
  _open_driver_install_page()
345
356
 
346
- elif current_driver_version != required_driver_version:
357
+ elif not _compare_driver_versions(
358
+ current_driver_version, required_driver_version
359
+ ):
347
360
  printing.log_warning(
348
361
  f"Incorrect NPU driver version detected: {current_driver_version}\n"
349
362
  f"{device.upper()} inference with RyzenAI 1.5.0 requires driver "
350
- f"version {required_driver_version}.\n"
363
+ f"version {required_driver_version} or higher.\n"
351
364
  "Please download and install the correct NPU Driver from:\n"
352
365
  f"{NPU_DRIVER_DOWNLOAD_URL}\n"
353
366
  "NPU functionality may not work properly."
@@ -162,21 +162,12 @@ class LlamaServer(WrappedServer):
162
162
  # Add port and jinja to enable tool use
163
163
  base_command.extend(["--port", str(self.port), "--jinja"])
164
164
 
165
- # Disable jinja for gpt-oss-120b on Vulkan
166
- if (
167
- self.backend == "vulkan"
168
- and "gpt-oss-120b" in snapshot_files["variant"].lower()
169
- ):
170
- base_command.remove("--jinja")
171
- logging.warning(
172
- "Jinja is disabled for gpt-oss-120b on Vulkan due to a llama.cpp bug "
173
- "(see https://github.com/ggml-org/llama.cpp/issues/15274). "
174
- "The model cannot use tools. If needed, use the ROCm backend instead."
175
- )
165
+ # Enable context shift and avoid attention sink issues by preserving the initial tokens
166
+ base_command.extend(["--context-shift", "--keep", "16"])
176
167
 
177
168
  # Use legacy reasoning formatting, since not all apps support the new
178
169
  # reasoning_content field
179
- base_command.extend(["--reasoning-format", "none"])
170
+ base_command.extend(["--reasoning-format", "auto"])
180
171
 
181
172
  # Add embeddings support if the model supports it
182
173
  if supports_embeddings: