nsight-python 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nsight/utils.py ADDED
@@ -0,0 +1,320 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ import functools
5
+ import re
6
+ import subprocess
7
+ import sys
8
+ from dataclasses import dataclass
9
+ from itertools import islice
10
+ from typing import Any, Iterator
11
+
12
+ from nsight.exceptions import CUDA_CORE_UNAVAILABLE_MSG, NCUErrorContext
13
+
14
+ # Try to import cuda-core (optional dependency)
15
+ try:
16
+ from cuda.core.experimental import (
17
+ Device,
18
+ LaunchConfig,
19
+ Program,
20
+ ProgramOptions,
21
+ launch,
22
+ )
23
+
24
+ CUDA_CORE_AVAILABLE = True
25
+ except ImportError:
26
+ CUDA_CORE_AVAILABLE = False
27
+ Device = None
28
+ LaunchConfig = None
29
+ Program = None
30
+ ProgramOptions = None
31
+ launch = None
32
+
33
+ NVTX_DOMAIN = "nsight-python"
34
+
35
+
36
+ class row_panel:
37
+ pass
38
+
39
+
40
+ class col_panel:
41
+ pass
42
+
43
+
44
+ class Colors:
45
+ """For colorful printing."""
46
+
47
+ HEADER = "\033[95m"
48
+ BLUE = "\033[0;34m"
49
+ CYAN = "\033[0;36m"
50
+ GREEN = "\033[0;32m"
51
+ ORANGE = "\033[0;33m"
52
+ RED = "\033[0;31m"
53
+ PURPLE = "\033[0;35m"
54
+ ENDC = "\033[0m"
55
+ BOLD = "\033[1m"
56
+ UNDERLINE = "\033[4m"
57
+
58
+
59
+ def purple(msg: str) -> str: # pragma: no cover
60
+ """Prints ``msg`` in purple."""
61
+ return Colors.PURPLE + msg + Colors.ENDC
62
+
63
+
64
+ # ------------------------------------------------------------------------------
65
+
66
+
67
+ @functools.lru_cache
68
+ def get_dummy_kernel_module() -> Any:
69
+ """
70
+ Returns a dummy kernel that does nothing. In case a provider fails for some, reason, but we
71
+ want to keep benchmarking we launch this dummy kernel such that during our later analysis of the
72
+ ncu-report we still find the expected number of measured kernels per provider.
73
+
74
+ The measured runtime of this kernel is ignored and the final result of the failed run will be
75
+ reported as NaN.
76
+
77
+ Raises:
78
+ ImportError: If cuda-core is not installed.
79
+ """
80
+ if not CUDA_CORE_AVAILABLE:
81
+ raise ImportError(CUDA_CORE_UNAVAILABLE_MSG)
82
+ code = "__global__ void dummy_kernel_failure() {}"
83
+ program_options = ProgramOptions(std="c++17")
84
+ prog = Program(code, code_type="c++", options=program_options)
85
+ return prog.compile("cubin", name_expressions=("dummy_kernel_failure",))
86
+
87
+
88
+ def launch_dummy_kernel_module() -> None:
89
+ """
90
+ Launch a dummy kernel module.
91
+
92
+ Raises:
93
+ ImportError: If cuda-core is not installed.
94
+ """
95
+ if not CUDA_CORE_AVAILABLE:
96
+ raise ImportError(CUDA_CORE_UNAVAILABLE_MSG)
97
+ dev = Device()
98
+ dev.set_current()
99
+ stream = dev.create_stream()
100
+ mod = get_dummy_kernel_module()
101
+ kernel = mod.get_kernel("dummy_kernel_failure")
102
+ config = LaunchConfig(grid=1, block=256)
103
+ launch(stream, config, kernel)
104
+ stream.sync()
105
+
106
+
107
+ def format_time(seconds: float) -> str:
108
+ """Convert ``seconds`` into ``HH:MM:SS`` format"""
109
+ hours, remainder = divmod(int(seconds), 3600)
110
+ minutes, seconds = divmod(remainder, 60)
111
+ return f"{hours:02}:{minutes:02}:{seconds:02}"
112
+
113
+
114
+ # Sincerely stolen (and adjusted) from attention-gym
115
+ def print_header(*lines: str) -> None:
116
+ width = max(80, max(len(line) for line in lines) + 4)
117
+ print(purple("╔" + "═" * (width - 2) + "╗"))
118
+ for line in lines:
119
+ print(purple(f"║ {line.center(width - 4)} ║"))
120
+ print(purple("╚" + "═" * (width - 2) + "╝"))
121
+
122
+
123
+ @dataclass
124
+ class NCUActionData:
125
+ name: str
126
+ value: Any
127
+ compute_clock: int
128
+ memory_clock: int
129
+ gpu: str
130
+
131
+ @staticmethod
132
+ def combine(value_reduce_op: Any) -> Any:
133
+ """
134
+ Combines two NCUActionData objects into a new one by applying the
135
+ value_reduce_op to their values.
136
+ """
137
+
138
+ def _combine(lhs: "NCUActionData", rhs: "NCUActionData") -> "NCUActionData":
139
+ assert lhs.compute_clock == rhs.compute_clock
140
+ assert lhs.memory_clock == rhs.memory_clock
141
+ assert lhs.gpu == rhs.gpu
142
+ return NCUActionData(
143
+ name=f"{lhs.name}|{rhs.name}",
144
+ value=value_reduce_op(lhs.value, rhs.value),
145
+ compute_clock=lhs.compute_clock,
146
+ memory_clock=lhs.memory_clock,
147
+ gpu=lhs.gpu,
148
+ )
149
+
150
+ return _combine
151
+
152
+
153
+ def print_progress_bar(
154
+ total_runs: int,
155
+ curr_run: int,
156
+ bar_length: int,
157
+ avg_time_per_run: float,
158
+ overwrite_output: bool,
159
+ ) -> None:
160
+ """
161
+ Prints a dynamic progress bar to the terminal.
162
+
163
+ Args:
164
+ total_runs: Total number of runs to execute.
165
+ curr_run: Current run index.
166
+ bar_length: Length of the progress bar in characters.
167
+ avg_time_per_run: Average time taken per run, used to estimate remaining time.
168
+ overwrite_output: Controls how configurations are printed:
169
+ - **True**: Overwrites the existing progress bar
170
+ - **False**: Writes a new progress bar
171
+ """
172
+ remaining_time = avg_time_per_run * (total_runs - curr_run)
173
+ formatted_time = format_time(remaining_time)
174
+
175
+ # Print progress after each run
176
+ progress = curr_run / total_runs
177
+ filled_length = int(bar_length * progress)
178
+ bar = "█" * filled_length + "-" * (bar_length - filled_length)
179
+ if overwrite_output:
180
+ sys.stdout.write("\033[1A") # Move cursor up 1 line
181
+ sys.stdout.write("\033[2K\r") # Clear line
182
+ sys.stdout.write(
183
+ f"Progress: [{bar}] {progress * 100:.2f}% | Estimated time remaining: {formatted_time}\n"
184
+ )
185
+ sys.stdout.flush()
186
+
187
+ else:
188
+ print(
189
+ f"Progress: [{bar}] {progress * 100:.2f}% | Estimated time remaining: {formatted_time}"
190
+ )
191
+
192
+
193
+ def print_config(
194
+ total_configs: int, curr_config: int, c: Any, overwrite_output: bool
195
+ ) -> None:
196
+ """
197
+ Prints the current configuration being profiled.
198
+
199
+ Args:
200
+ total_configs: Total number of configurations.
201
+ curr_config: Current configuration index.
202
+ c: The current configuration parameters.
203
+ overwrite_output: Controls how configurations are printed:
204
+ - **True**: The configuration is updated in-place
205
+ - **False**: Each configuration is printed on a new line
206
+ """
207
+ if overwrite_output:
208
+ sys.stdout.write("\033[2F") # Move cursor up two lines
209
+ sys.stdout.write("\033[2K\r") # Clear line
210
+ sys.stdout.write(
211
+ f"Config {curr_config}/{total_configs}: {str(list(map(str, c)))}\n\n"
212
+ )
213
+ sys.stdout.flush()
214
+
215
+ else:
216
+ print_header(f"Config {curr_config}/{total_configs}: {str(list(map(str, c)))}")
217
+
218
+
219
+ def batched(iterable: Any, n: int) -> Iterator[tuple[Any, ...]]:
220
+ """
221
+ Batch an iterable into tuples of size n.
222
+
223
+ This is a minimal backport of itertools.batched for Python 3.10 and 3.11,
224
+ where the standard library implementation is not available.
225
+ """
226
+ if n < 1:
227
+ raise ValueError("n must be atleast 1")
228
+
229
+ iterator = iter(iterable)
230
+ while batch := tuple(islice(iterator, n)):
231
+ yield batch
232
+
233
+
234
+ class LogParser:
235
+ """
236
+ Base class for parsing the log files
237
+ """
238
+
239
+ def parse_logs(self, log_file_path: str) -> dict[str, list[str]]:
240
+ """
241
+ Parses the log file and returns a list of log entries.
242
+
243
+ Args:
244
+ log_file_path: Path to the log file.
245
+ """
246
+ return {}
247
+
248
+
249
+ class NCULogParser(LogParser):
250
+ """
251
+ Parse NCU log file.
252
+ """
253
+
254
+ def parse_logs(self, log_file_path: str) -> dict[str, list[str]]:
255
+ """
256
+ Parses the NCU log file and returns a dictionary of log entries categorized by their type.
257
+
258
+ Args:
259
+ log_file_path: Path to the NCU log file.
260
+ """
261
+ # Dictionary to categorize logs by their category
262
+ log_entries: dict[str, list[str]] = {"ERROR": [], "PROF": [], "WARNING": []}
263
+
264
+ # Pattern for ==ERROR== messages
265
+ error_pattern = re.compile(r"^==ERROR==\s+(.*)$")
266
+ # Pattern for ==PROF== messages
267
+ prof_pattern = re.compile(r"^==PROF==\s+(.*)$")
268
+ # Pattern for ==WARNING== messages
269
+ warning_pattern = re.compile(r"^==WARNING==\s+(.*)$")
270
+
271
+ with open(log_file_path, "r") as file:
272
+ for line in file:
273
+ line = line.strip()
274
+ if error_match := error_pattern.match(line):
275
+ log_entries["ERROR"].append(error_match.group(1))
276
+ elif prof_match := prof_pattern.match(line):
277
+ log_entries["PROF"].append(prof_match.group(1))
278
+ elif warning_match := warning_pattern.match(line):
279
+ log_entries["WARNING"].append(warning_match.group(1))
280
+
281
+ return log_entries
282
+
283
+ def get_logs(self, log_file_path: str, category: str) -> list[str]:
284
+ """
285
+ Returns log entries of a specific category from the NCU log file.
286
+
287
+ Args:
288
+ log_file_path: Path to the NCU log file.
289
+ category: Category of logs (e.g., "ERROR", "PROF").
290
+ """
291
+ logs = self.parse_logs(log_file_path)
292
+ return logs.get(category, [])
293
+
294
+
295
+ def format_ncu_error_message(context: NCUErrorContext) -> str:
296
+ """
297
+ Format NCU error context into user-friendly error message.
298
+
299
+ Args:
300
+ context: The error context containing all relevant information.
301
+ """
302
+
303
+ INVALID_METRIC_ERROR_HINT = "Failed to find metric"
304
+
305
+ # FIXME: To support multiple metrics in future, parse error message itself to extract the invalid metric name and display appropriate messages.
306
+ message_parts = ["PROFILING FAILED \nErrors:"]
307
+
308
+ if context.errors and INVALID_METRIC_ERROR_HINT in context.errors[0]:
309
+ message_parts.append(
310
+ f"Invalid value '{context.metric}' for 'metric' parameter for nsight.analyze.kernel(). "
311
+ f"\nPlease refer ncu --query-metrics for list of supported metrics."
312
+ )
313
+ else:
314
+ message_parts.append("\n".join(f"- {error}" for error in context.errors))
315
+
316
+ message_parts.append(
317
+ f"\nRefer Nsight Compute CLI log file: {context.log_file_path} for more details."
318
+ )
319
+
320
+ return "\n".join(message_parts)