gpufl 0.1.0.dev0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. gpufl/.gitignore +159 -0
  2. gpufl/__init__.py +83 -0
  3. gpufl/_gpufl_client.cp313-win_amd64.pyd +0 -0
  4. gpufl/analyzer/__init__.py +1 -0
  5. gpufl/analyzer/analyzer.py +359 -0
  6. gpufl/utils.py +19 -0
  7. gpufl/viz/__init__.py +27 -0
  8. gpufl/viz/reader.py +48 -0
  9. gpufl/viz/timeline.py +380 -0
  10. gpufl/viz/visualizer.py +194 -0
  11. gpufl-0.1.0.dev0.dist-info/METADATA +192 -0
  12. gpufl-0.1.0.dev0.dist-info/RECORD +113 -0
  13. gpufl-0.1.0.dev0.dist-info/WHEEL +5 -0
  14. gpufl-0.1.0.dev0.dist-info/licenses/LICENSE +201 -0
  15. include/gmock/gmock-actions.h +2297 -0
  16. include/gmock/gmock-cardinalities.h +159 -0
  17. include/gmock/gmock-function-mocker.h +518 -0
  18. include/gmock/gmock-matchers.h +5623 -0
  19. include/gmock/gmock-more-actions.h +658 -0
  20. include/gmock/gmock-more-matchers.h +120 -0
  21. include/gmock/gmock-nice-strict.h +277 -0
  22. include/gmock/gmock-spec-builders.h +2148 -0
  23. include/gmock/gmock.h +96 -0
  24. include/gmock/internal/custom/README.md +18 -0
  25. include/gmock/internal/custom/gmock-generated-actions.h +7 -0
  26. include/gmock/internal/custom/gmock-matchers.h +37 -0
  27. include/gmock/internal/custom/gmock-port.h +40 -0
  28. include/gmock/internal/gmock-internal-utils.h +487 -0
  29. include/gmock/internal/gmock-port.h +139 -0
  30. include/gmock/internal/gmock-pp.h +279 -0
  31. include/gpufl/backends/amd/rocm_collector.cpp +10 -0
  32. include/gpufl/backends/amd/rocm_collector.hpp +18 -0
  33. include/gpufl/backends/host_collector.hpp +150 -0
  34. include/gpufl/backends/nvidia/cuda_collector.cpp +43 -0
  35. include/gpufl/backends/nvidia/cuda_collector.hpp +16 -0
  36. include/gpufl/backends/nvidia/cupti_backend.cpp +806 -0
  37. include/gpufl/backends/nvidia/cupti_backend.hpp +164 -0
  38. include/gpufl/backends/nvidia/cupti_common.hpp +146 -0
  39. include/gpufl/backends/nvidia/cupti_utils.cpp +73 -0
  40. include/gpufl/backends/nvidia/cupti_utils.hpp +37 -0
  41. include/gpufl/backends/nvidia/kernel_launch_handler.cpp +282 -0
  42. include/gpufl/backends/nvidia/kernel_launch_handler.hpp +26 -0
  43. include/gpufl/backends/nvidia/mem_transfer_handler.cpp +237 -0
  44. include/gpufl/backends/nvidia/mem_transfer_handler.hpp +26 -0
  45. include/gpufl/backends/nvidia/nvml_collector.cpp +188 -0
  46. include/gpufl/backends/nvidia/nvml_collector.hpp +38 -0
  47. include/gpufl/backends/nvidia/resource_handler.cpp +63 -0
  48. include/gpufl/backends/nvidia/resource_handler.hpp +25 -0
  49. include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +222 -0
  50. include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +42 -0
  51. include/gpufl/core/common.cpp +45 -0
  52. include/gpufl/core/common.hpp +109 -0
  53. include/gpufl/core/debug_logger.cpp +9 -0
  54. include/gpufl/core/debug_logger.hpp +43 -0
  55. include/gpufl/core/events.hpp +253 -0
  56. include/gpufl/core/gpufl.cpp +365 -0
  57. include/gpufl/core/logger.cpp +437 -0
  58. include/gpufl/core/logger.hpp +88 -0
  59. include/gpufl/core/monitor.hpp +100 -0
  60. include/gpufl/core/monitor_backend.hpp +46 -0
  61. include/gpufl/core/ring_buffer.hpp +75 -0
  62. include/gpufl/core/runtime.cpp +6 -0
  63. include/gpufl/core/runtime.hpp +30 -0
  64. include/gpufl/core/sampler.cpp +73 -0
  65. include/gpufl/core/sampler.hpp +51 -0
  66. include/gpufl/core/scope_registry.cpp +10 -0
  67. include/gpufl/core/scope_registry.hpp +8 -0
  68. include/gpufl/core/stack_registry.hpp +47 -0
  69. include/gpufl/core/stack_trace.cpp +112 -0
  70. include/gpufl/core/stack_trace.hpp +12 -0
  71. include/gpufl/core/trace_type.hpp +13 -0
  72. include/gpufl/cuda/monitor.cpp +380 -0
  73. include/gpufl/gpufl.hpp +80 -0
  74. include/gpufl.hpp +3 -0
  75. include/gtest/gtest-assertion-result.h +237 -0
  76. include/gtest/gtest-death-test.h +345 -0
  77. include/gtest/gtest-matchers.h +923 -0
  78. include/gtest/gtest-message.h +252 -0
  79. include/gtest/gtest-param-test.h +546 -0
  80. include/gtest/gtest-printers.h +1161 -0
  81. include/gtest/gtest-spi.h +250 -0
  82. include/gtest/gtest-test-part.h +192 -0
  83. include/gtest/gtest-typed-test.h +331 -0
  84. include/gtest/gtest.h +2321 -0
  85. include/gtest/gtest_pred_impl.h +279 -0
  86. include/gtest/gtest_prod.h +60 -0
  87. include/gtest/internal/custom/README.md +44 -0
  88. include/gtest/internal/custom/gtest-port.h +37 -0
  89. include/gtest/internal/custom/gtest-printers.h +42 -0
  90. include/gtest/internal/custom/gtest.h +37 -0
  91. include/gtest/internal/gtest-death-test-internal.h +307 -0
  92. include/gtest/internal/gtest-filepath.h +227 -0
  93. include/gtest/internal/gtest-internal.h +1560 -0
  94. include/gtest/internal/gtest-param-util.h +1026 -0
  95. include/gtest/internal/gtest-port-arch.h +122 -0
  96. include/gtest/internal/gtest-port.h +2481 -0
  97. include/gtest/internal/gtest-string.h +178 -0
  98. include/gtest/internal/gtest-type-util.h +220 -0
  99. lib/cmake/GTest/GTestConfig.cmake +33 -0
  100. lib/cmake/GTest/GTestConfigVersion.cmake +43 -0
  101. lib/cmake/GTest/GTestTargets-release.cmake +49 -0
  102. lib/cmake/GTest/GTestTargets.cmake +136 -0
  103. lib/cmake/gpufl_client/gpufl_clientTargets-release.cmake +19 -0
  104. lib/cmake/gpufl_client/gpufl_clientTargets.cmake +109 -0
  105. lib/gmock.lib +0 -0
  106. lib/gmock_main.lib +0 -0
  107. lib/gpufl.lib +0 -0
  108. lib/gtest.lib +0 -0
  109. lib/gtest_main.lib +0 -0
  110. lib/pkgconfig/gmock.pc +10 -0
  111. lib/pkgconfig/gmock_main.pc +10 -0
  112. lib/pkgconfig/gtest.pc +9 -0
  113. lib/pkgconfig/gtest_main.pc +10 -0
gpufl/viz/reader.py ADDED
@@ -0,0 +1,48 @@
1
+ import glob
2
+ import json
3
+ import os
4
+ import pandas as pd
5
+ from typing import List
6
+
7
+ def _parse_line(line: str) -> dict:
8
+ try: return json.loads(line)
9
+ except: return {}
10
+
11
+ def read_events(file_pattern: str) -> List[dict]:
12
+ files = glob.glob(file_pattern)
13
+ all_events = []
14
+ for fpath in files:
15
+ if not os.path.isfile(fpath): continue
16
+ with open(fpath, "r", encoding="utf-8") as f:
17
+ for line in f:
18
+ line = line.strip()
19
+ if not line or not line.startswith("{"): continue
20
+ evt = _parse_line(line)
21
+ if evt: all_events.append(evt)
22
+ return all_events
23
+
24
+ def read_df(file_pattern: str) -> pd.DataFrame:
25
+ events = read_events(file_pattern)
26
+ if not events: return pd.DataFrame()
27
+
28
+ df = pd.DataFrame(events)
29
+
30
+ # [IMPORTANT] Fill main timestamp from start/end if missing
31
+ if "ts_ns" not in df.columns:
32
+ df["ts_ns"] = pd.Series([None]*len(df), dtype="float64")
33
+
34
+ # Map ts_start_ns -> ts_ns for sorting
35
+ if "ts_start_ns" in df.columns:
36
+ df["ts_ns"] = df["ts_ns"].fillna(df["ts_start_ns"])
37
+ if "start_ns" in df.columns:
38
+ df["ts_ns"] = df["ts_ns"].fillna(df["start_ns"])
39
+
40
+ # Coerce
41
+ cols = ["ts_ns", "start_ns", "end_ns", "ts_start_ns", "ts_end_ns"]
42
+ for c in cols:
43
+ if c in df.columns:
44
+ df[c] = pd.to_numeric(df[c], errors="coerce")
45
+
46
+ # Sort
47
+ df = df.sort_values("ts_ns").reset_index(drop=True)
48
+ return df
gpufl/viz/timeline.py ADDED
@@ -0,0 +1,380 @@
1
+ from __future__ import annotations
2
+ import json
3
+ from typing import Iterable, Optional
4
+
5
+ def _require_matplotlib():
6
+ try:
7
+ import matplotlib.pyplot as plt
8
+ return plt
9
+ except ImportError:
10
+ raise ImportError("Visualization requires matplotlib.")
11
+
12
+ def _require_pandas():
13
+ try:
14
+ import pandas as pd
15
+ return pd
16
+ except ImportError:
17
+ raise ImportError("Visualization requires pandas.")
18
+
19
+ # ==========================================
20
+ # 1. HELPERS
21
+ # ==========================================
22
+
23
+ def _ensure_event_type_col(df):
24
+ if df is None: return df
25
+ if "event_type" not in df.columns and "type" in df.columns:
26
+ df = df.copy()
27
+ df["event_type"] = df["type"]
28
+ return df
29
+
30
+ def _coerce_devices_cell(x):
31
+ if isinstance(x, list): return x
32
+ if isinstance(x, str):
33
+ try: return json.loads(x)
34
+ except: return []
35
+ return []
36
+
37
+ def _coerce_host_cell(x):
38
+ if isinstance(x, dict): return x
39
+ if isinstance(x, str):
40
+ try: return json.loads(x)
41
+ except: return {}
42
+ return {}
43
+
44
+ def _explode_device_samples(df, gpu_id=0):
45
+ pd = _require_pandas()
46
+ df = _ensure_event_type_col(df)
47
+
48
+ target_types = ["scope_sample", "system_sample", "system_start", "system_stop", "kernel_start", "kernel_end", "kernel_event", "init"]
49
+ if "event_type" not in df.columns: return pd.DataFrame()
50
+
51
+ d = df[df["event_type"].isin(target_types)].copy()
52
+ if len(d) == 0: return pd.DataFrame()
53
+
54
+ if "devices" in d.columns:
55
+ d["devices"] = d["devices"].apply(_coerce_devices_cell)
56
+
57
+ rows = []
58
+ for _, r in d.iterrows():
59
+ ts = r.get("ts_ns")
60
+ # If ts_ns is missing, try start_ns (common for kernel_event)
61
+ if pd.isna(ts):
62
+ ts = r.get("start_ns")
63
+
64
+ devs = r.get("devices", [])
65
+ found = None
66
+ if isinstance(devs, list):
67
+ for dev in devs:
68
+ if isinstance(dev, dict) and dev.get("id") == gpu_id:
69
+ found = dev
70
+ break
71
+ if found:
72
+ rows.append({
73
+ "ts_ns": ts,
74
+ "util_gpu": found.get("util_gpu", 0),
75
+ "util_mem": found.get("util_mem", 0),
76
+ "used_mib": found.get("used_mib", 0),
77
+ "temp_c": found.get("temp_c", 0),
78
+ "power_mw": found.get("power_mw", 0),
79
+ "clk_sm": found.get("clk_sm", 0),
80
+ # [NEW] Extract Bandwidth and convert B/s -> GB/s
81
+ "pcie_rx_gbps": found.get("pcie_rx_bw", 0) / 1e9,
82
+ "pcie_tx_gbps": found.get("pcie_tx_bw", 0) / 1e9,
83
+ })
84
+
85
+ out = pd.DataFrame(rows)
86
+ if not out.empty:
87
+ out = out.dropna(subset=["ts_ns"]).sort_values("ts_ns")
88
+ min_ts = out["ts_ns"].min()
89
+ out["t_s_abs"] = (out["ts_ns"] - min_ts) / 1e9
90
+ return out
91
+
92
+ def _explode_host_samples(df):
93
+ pd = _require_pandas()
94
+ df = _ensure_event_type_col(df)
95
+
96
+ target_types = ["scope_sample", "system_sample", "system_start", "system_stop", "kernel_start", "kernel_event", "init", "shutdown"]
97
+ if "event_type" not in df.columns: return pd.DataFrame()
98
+
99
+ d = df[df["event_type"].isin(target_types)].copy()
100
+ if len(d) == 0 or "host" not in d.columns: return pd.DataFrame()
101
+
102
+ d["host"] = d["host"].apply(_coerce_host_cell)
103
+ rows = []
104
+ for _, r in d.iterrows():
105
+ h = r["host"]
106
+ if not h: continue
107
+
108
+ ts = r.get("ts_ns") or r.get("ts_start_ns") or r.get("start_ns")
109
+
110
+ rows.append({
111
+ "ts_ns": ts,
112
+ "cpu_pct": h.get("cpu_pct", 0),
113
+ "ram_used_mib": h.get("ram_used_mib", 0)
114
+ })
115
+ out = pd.DataFrame(rows)
116
+ if not out.empty:
117
+ out = out.dropna(subset=["ts_ns"]).sort_values("ts_ns")
118
+ out["t_s_abs"] = (out["ts_ns"] - out["ts_ns"].min()) / 1e9
119
+ return out
120
+
121
+ def _reconstruct_intervals(df, start_type, end_type, name_col="name", fallback_name="Scope"):
122
+ pd = _require_pandas()
123
+ # Support both "scope_start" and "scope_begin" for compatibility
124
+ start_types = [start_type]
125
+ if start_type == "scope_start":
126
+ start_types.append("scope_begin")
127
+
128
+ # [NEW] Handle single-event intervals like kernel_event
129
+ is_kernel = (start_type == "kernel_start" or start_type == "kernel_event")
130
+ if is_kernel:
131
+ # Include kernel_event which has both start and end
132
+ target_types = start_types + [end_type, "kernel_event"]
133
+ else:
134
+ target_types = start_types + [end_type]
135
+
136
+ subset = df[df["event_type"].isin(target_types)].copy()
137
+ if subset.empty: return []
138
+
139
+ intervals = []
140
+ # Use a dictionary of lists to handle multiple nested intervals with the same name
141
+ stacks = {}
142
+ min_ts = df["ts_ns"].min()
143
+ if pd.isna(min_ts):
144
+ # try start_ns if ts_ns is all NaN
145
+ if "start_ns" in df.columns:
146
+ min_ts = df["start_ns"].min()
147
+ if pd.isna(min_ts): min_ts = 0
148
+
149
+ for _, r in subset.iterrows():
150
+ etype = r["event_type"]
151
+ name = r.get(name_col, fallback_name)
152
+ if pd.isna(name): name = fallback_name
153
+
154
+ if etype == "kernel_event" and "start_ns" in r and "end_ns" in r:
155
+ start_ns = r["start_ns"]
156
+ end_ns = r["end_ns"]
157
+ if not pd.isna(start_ns) and not pd.isna(end_ns):
158
+ start_sec = (start_ns - min_ts) / 1e9
159
+ dur_sec = (end_ns - start_ns) / 1e9
160
+
161
+ # Add extra metrics if present
162
+ metrics = {
163
+ "occupancy": r.get("occupancy", 0),
164
+ "grid": r.get("grid", ""),
165
+ "block": r.get("block", ""),
166
+ "num_regs": r.get("num_regs", 0),
167
+ "dyn_shared": r.get("dyn_shared_bytes", 0),
168
+ "static_shared": r.get("static_shared_bytes", 0),
169
+ }
170
+ intervals.append((start_sec, dur_sec, name, metrics))
171
+ continue
172
+
173
+ ts = r.get("ts_ns")
174
+ if pd.isna(ts): ts = r.get("ts_start_ns")
175
+ if pd.isna(ts): ts = r.get("start_ns")
176
+ if pd.isna(ts): continue
177
+
178
+ if etype in start_types:
179
+ if name not in stacks:
180
+ stacks[name] = []
181
+ stacks[name].append(ts)
182
+ elif etype == end_type:
183
+ if name in stacks and stacks[name]:
184
+ start_ns = stacks[name].pop()
185
+ start_sec = (start_ns - min_ts) / 1e9
186
+ dur_sec = (ts - start_ns) / 1e9
187
+ intervals.append((start_sec, dur_sec, name, {}))
188
+ if not stacks[name]:
189
+ del stacks[name]
190
+ return intervals
191
+
192
+ # ==========================================
193
+ # 2. PLOTTERS
194
+ # ==========================================
195
+
196
+ def plot_combined_timeline(df, title="GPUFL Timeline"):
197
+ pd = _require_pandas()
198
+ plt = _require_matplotlib()
199
+
200
+ df = _ensure_event_type_col(df)
201
+ if "event_type" not in df.columns:
202
+ print("[Viz] Error: No event_type column found.")
203
+ return None
204
+
205
+ min_ts = df["ts_ns"].min()
206
+ if pd.isna(min_ts): min_ts = 0
207
+
208
+ # --- Prepare Data ---
209
+ # Try both "scope_start" and "scope_begin"
210
+ scope_data = _reconstruct_intervals(df, "scope_start", "scope_end")
211
+ if not scope_data:
212
+ scope_data = _reconstruct_intervals(df, "scope_begin", "scope_end")
213
+
214
+ if not scope_data:
215
+ app_data = _reconstruct_intervals(df, "init", "shutdown", name_col="app", fallback_name="App")
216
+ scope_data.extend(app_data)
217
+
218
+ # [NEW] Handle kernel_event
219
+ kernel_data = _reconstruct_intervals(df, "kernel_event", "kernel_end")
220
+ if not kernel_data:
221
+ kernel_data = _reconstruct_intervals(df, "kernel_start", "kernel_end")
222
+
223
+ gpu_samples = _explode_device_samples(df, gpu_id=0)
224
+ host_samples = _explode_host_samples(df)
225
+
226
+ # --- Plotting (3 Rows) ---
227
+ # Heights: GPU=2, PCIe=1.5, Host=2
228
+ fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 10), sharex=True,
229
+ gridspec_kw={'height_ratios': [2, 1.5, 2]})
230
+
231
+ # --- Helper to Overlay Markers ---
232
+ kernel_markers = [] # List of (vline, annotation)
233
+
234
+ def overlay_markers(ax, y_lim_ref=None):
235
+ """Draws vertical lines for Scopes and Kernels on the given axis."""
236
+ # Get Y-limit to position text
237
+ y_top = y_lim_ref if y_lim_ref else (ax.get_ylim()[1] if len(ax.get_lines()) > 0 else 100)
238
+
239
+ # Scopes (Red dashed)
240
+ if scope_data:
241
+ for start_sec, dur_sec, name, _ in scope_data:
242
+ ax.axvline(x=start_sec, color='tab:red', linestyle='--', alpha=0.6, linewidth=1)
243
+ ax.text(start_sec, y_top * 0.95, name, rotation=90, va='top', ha='center', fontsize=7,
244
+ color='tab:red', alpha=0.9,
245
+ bbox=dict(boxstyle='round,pad=0.1', facecolor='white', alpha=0.3, edgecolor='none'))
246
+
247
+ # Kernels (Orange solid/dashed)
248
+ if kernel_data:
249
+ for start_sec, dur_sec, name, metrics in kernel_data:
250
+ end_sec = start_sec + (dur_sec if dur_sec is not None else 0)
251
+ vl = ax.axvline(x=start_sec, color='tab:orange', linestyle='-', linewidth=1.2, picker=True)
252
+
253
+ # Enrich text with occupancy if available
254
+ display_name = name
255
+ if metrics and metrics.get("occupancy", 0) > 0:
256
+ display_name += f" ({metrics['occupancy']*100:.1f}%)"
257
+
258
+ # Create annotation but set it invisible by default
259
+ ann = ax.annotate(display_name, xy=(start_sec, y_top * 0.85),
260
+ xytext=(5, 0), textcoords="offset points",
261
+ rotation=90, va='top', ha='left', fontsize=7,
262
+ color='tab:orange', fontweight='bold',
263
+ bbox=dict(boxstyle='round,pad=0.2', fc='yellow', alpha=0.8),
264
+ visible=False)
265
+
266
+ kernel_markers.append((vl, ann))
267
+
268
+ if dur_sec and dur_sec > 0:
269
+ ax.axvline(x=end_sec, color='tab:orange', linestyle='--', linewidth=1.2)
270
+ # We usually don't need hover for "end" markers, but could add it.
271
+
272
+ # --- Row 1: GPU Metrics ---
273
+ if not gpu_samples.empty:
274
+ t = gpu_samples["t_s_abs"]
275
+ ax1.plot(t, gpu_samples["util_gpu"], label="GPU %", color='tab:green')
276
+ ax1.plot(t, gpu_samples["util_mem"], label="Mem %", color='tab:purple', linestyle="--")
277
+
278
+ # [NEW] Optional metrics from system log
279
+ if "temp_c" in gpu_samples.columns and gpu_samples["temp_c"].max() > 0:
280
+ ax1.plot(t, gpu_samples["temp_c"], label="Temp (C)", color='tab:red', alpha=0.3)
281
+ if "clk_sm" in gpu_samples.columns and gpu_samples["clk_sm"].max() > 0:
282
+ # Scale clock for visibility if needed, or use another axis. Let's just plot it.
283
+ ax1.plot(t, gpu_samples["clk_sm"] / 10, label="SM Clock (x10 MHz)", color='tab:orange', alpha=0.3)
284
+
285
+ # [NEW] Visualize Kernel Occupancy points on the timeline
286
+ if kernel_data:
287
+ k_t = [k[0] for k in kernel_data if k[3] and k[3].get("occupancy", 0) > 0]
288
+ k_occ = [k[3]["occupancy"] * 100 for k in kernel_data if k[3] and k[3].get("occupancy", 0) > 0]
289
+ if k_t:
290
+ ax1.scatter(k_t, k_occ, color='tab:orange', marker='o', s=20, label="Kernel Occupancy", zorder=5)
291
+
292
+ ax1.set_ylabel("GPU Util %")
293
+ ax1.set_ylim(-5, 105)
294
+ ax1.legend(loc="upper left", fontsize='x-small')
295
+
296
+ ax1b = ax1.twinx()
297
+ ax1b.fill_between(t, gpu_samples["used_mib"], color='tab:gray', alpha=0.1, label="VRAM Used")
298
+ ax1b.set_ylabel("VRAM (MiB)", color='gray')
299
+ ax1b.set_ylim(bottom=0)
300
+
301
+ ax1.grid(True, alpha=0.3)
302
+ ax1.set_title("GPU Metrics", fontsize=10)
303
+ overlay_markers(ax1, y_lim_ref=105)
304
+
305
+ # --- Row 2: PCIe Bandwidth (NEW) ---
306
+ if not gpu_samples.empty:
307
+ t = gpu_samples["t_s_abs"]
308
+ # Plot RX (Host -> Device) and TX (Device -> Host)
309
+ ax2.plot(t, gpu_samples["pcie_rx_gbps"], label="PCIe RX (Upload)", color='tab:blue')
310
+ ax2.plot(t, gpu_samples["pcie_tx_gbps"], label="PCIe TX (Download)", color='tab:cyan', linestyle="--")
311
+
312
+ ax2.set_ylabel("BW (GB/s)")
313
+ # Dynamically scale Y-axis but keep min at 0
314
+ ax2.set_ylim(bottom=0)
315
+ ax2.legend(loc="upper left", fontsize='x-small')
316
+
317
+ ax2.grid(True, alpha=0.3)
318
+ ax2.set_title("PCIe Bandwidth", fontsize=10)
319
+ # Overlay markers (passing None lets helper figure out Y-max from data)
320
+ overlay_markers(ax2)
321
+
322
+ # --- Row 3: Host Metrics ---
323
+ if not host_samples.empty:
324
+ t_host = host_samples["t_s_abs"]
325
+ ax3.plot(t_host, host_samples["cpu_pct"], label="CPU %", color='tab:red')
326
+ ax3.set_ylabel("CPU Util %", color='tab:red')
327
+ ax3.set_ylim(-5, 105)
328
+ ax3.tick_params(axis='y', labelcolor='tab:red')
329
+ ax3.legend(loc="upper left", fontsize='x-small')
330
+
331
+ ax3b = ax3.twinx()
332
+ ax3b.plot(t_host, host_samples["ram_used_mib"] / 1024, label="RAM (GiB)", color='tab:blue', linestyle="--")
333
+ ax3b.set_ylabel("Sys RAM (GiB)", color='tab:blue')
334
+ ax3b.tick_params(axis='y', labelcolor='tab:blue')
335
+ ax3b.set_ylim(bottom=0)
336
+ ax3b.legend(loc="upper right", fontsize='x-small')
337
+
338
+ ax3.set_xlabel("Time (seconds)")
339
+ ax3.grid(True, alpha=0.3)
340
+ ax3.set_title("Host Metrics", fontsize=10)
341
+ overlay_markers(ax3, y_lim_ref=105)
342
+
343
+ # --- Hover Interaction ---
344
+ def on_hover(event):
345
+ if event.inaxes is None: return
346
+
347
+ changed = False
348
+ for vl, ann in kernel_markers:
349
+ # Check if mouse is near the vertical line (x-axis distance)
350
+ if vl.axes == event.inaxes:
351
+ # Calculate distance in pixels for better UX
352
+ try:
353
+ # Convert data x to display x
354
+ x_display = vl.axes.transData.transform((vl.get_xdata()[0], 0))[0]
355
+ mouse_x = event.x
356
+
357
+ is_near = abs(x_display - mouse_x) < 5 # 5 pixels tolerance
358
+
359
+ if ann.get_visible() != is_near:
360
+ ann.set_visible(is_near)
361
+ changed = True
362
+ except:
363
+ pass
364
+
365
+ if changed:
366
+ fig.canvas.draw_idle()
367
+
368
+ fig.canvas.mpl_connect("motion_notify_event", on_hover)
369
+
370
+ fig.suptitle(title, fontsize=14)
371
+ plt.tight_layout()
372
+ plt.subplots_adjust(hspace=0.25)
373
+ return fig
374
+
375
+ # Legacy wrappers
376
+ def plot_kernel_timeline(df, title="Kernels"): return plot_combined_timeline(df, title)
377
+ def plot_scope_timeline(df, title="Scopes"): return plot_combined_timeline(df, title)
378
+ def plot_host_timeline(df, title="Host"): return plot_combined_timeline(df, title)
379
+ def plot_memory_timeline(df, gpu_id=0, title="Mem"): return None
380
+ def plot_utilization_timeline(df, gpu_id=0, title="Util"): return None
@@ -0,0 +1,194 @@
1
+ import os
2
+ import glob
3
+ import re
4
+ import json
5
+ from typing import Optional, List, Union, Any
6
+ import pandas as pd
7
+
8
+ from .reader import read_df
9
+ from .timeline import (
10
+ plot_combined_timeline,
11
+ _explode_device_samples,
12
+ _explode_host_samples
13
+ )
14
+
15
+ # --- Global State ---
16
+ _GLOBAL_DF = None
17
+
18
+ def _require_matplotlib():
19
+ try:
20
+ import matplotlib.pyplot as plt
21
+ return plt
22
+ except ImportError as e:
23
+ raise ImportError("Visualization requires matplotlib. Run: pip install pandas matplotlib") from e
24
+
25
+ def _check_init():
26
+ if _GLOBAL_DF is None:
27
+ raise RuntimeError("Global data not loaded. Call viz.init('path/to/logs') first.")
28
+
29
+ def init(log_pattern: Union[str, List[str]]):
30
+ """
31
+ Load log files into memory.
32
+ Args:
33
+ log_pattern: File path, directory, or glob pattern (e.g., "logs/*.log").
34
+ """
35
+ global _GLOBAL_DF
36
+ if isinstance(log_pattern, str) and os.path.isdir(log_pattern):
37
+ pattern = os.path.join(log_pattern, "*.log")
38
+ else:
39
+ pattern = log_pattern
40
+
41
+ print(f"Loading logs from: {pattern} ...")
42
+ df = read_df(pattern)
43
+
44
+ if len(df) == 0:
45
+ print("[Warn] No events found.")
46
+ _GLOBAL_DF = None
47
+ else:
48
+ # Pre-convert timestamps to numeric for speed
49
+ for col in ["ts_ns", "start_ns", "end_ns"]:
50
+ if col in df.columns:
51
+ df[col] = pd.to_numeric(df[col], errors="coerce")
52
+
53
+ # Sort by time immediately
54
+ if "ts_ns" in df.columns:
55
+ df = df.sort_values("ts_ns")
56
+
57
+ _GLOBAL_DF = df
58
+ print(f"Loaded {len(df)} events.")
59
+
60
+ def get_data() -> Any:
61
+ """Return the raw global DataFrame."""
62
+ _check_init()
63
+ return _GLOBAL_DF
64
+
65
+ def _parse_duration(duration_str: str) -> int:
66
+ """Convert '1m', '1h', '30s' to nanoseconds."""
67
+ if not duration_str: return 0
68
+ units = {"s": 1e9, "m": 60 * 1e9, "h": 3600 * 1e9, "d": 86400 * 1e9}
69
+ match = re.match(r"(\d+)([smhd])", duration_str)
70
+ if match:
71
+ val, unit = match.groups()
72
+ return int(float(val) * units[unit])
73
+ return 0
74
+
75
+ def show(last: Optional[str] = None, **filters):
76
+ """
77
+ Visualize the Stacked Timeline.
78
+
79
+ Args:
80
+ last: Time window from the end of the log (e.g., "1m", "30s", "1h").
81
+ If None, shows all data.
82
+ **filters: Key-value pairs to filter data (e.g., app="MyApp").
83
+ """
84
+ _check_init()
85
+ plt = _require_matplotlib()
86
+
87
+ df = _GLOBAL_DF.copy()
88
+
89
+ # 1. Apply Tag/App Filters
90
+ for k, v in filters.items():
91
+ if k in df.columns:
92
+ if isinstance(v, list): df = df[df[k].isin(v)]
93
+ else: df = df[df[k] == v]
94
+
95
+ if len(df) == 0:
96
+ print("No data matching filters.")
97
+ return
98
+
99
+ # 2. Apply Time Window (Tail)
100
+ if last:
101
+ duration_ns = _parse_duration(last)
102
+ if duration_ns > 0:
103
+ max_ts = df["ts_ns"].max()
104
+ if pd.isna(max_ts):
105
+ max_ts = df[["start_ns", "end_ns"]].max().max()
106
+
107
+ cutoff = max_ts - duration_ns
108
+
109
+ # Filter rows overlapping with the window
110
+ cond_ts = df["ts_ns"] >= cutoff
111
+ cond_start = df["start_ns"] >= cutoff
112
+ cond_end = df["end_ns"] >= cutoff
113
+
114
+ df = df[cond_ts | cond_start | cond_end]
115
+
116
+ if len(df) == 0:
117
+ print(f"No data found in the last {last}.")
118
+ return
119
+
120
+ # 3. Generate Combined Plot
121
+ fig = plot_combined_timeline(df, title=f"Timeline (last={last})" if last else "Full Timeline")
122
+
123
+ if fig:
124
+ plt.show()
125
+ else:
126
+ print("Not enough data to generate plot.")
127
+
128
+ def compare(group_by="app", metric="gpu", **filters):
129
+ """
130
+ Compare a specific metric across different groups (e.g. apps, tags).
131
+
132
+ Args:
133
+ group_by: Column to group by (default: "app").
134
+ metric: "cpu", "gpu", "ram"
135
+ **filters: Additional filters.
136
+ """
137
+ _check_init()
138
+ plt = _require_matplotlib()
139
+
140
+ # 1. Filter
141
+ df = _GLOBAL_DF.copy()
142
+ for k, v in filters.items():
143
+ if k in df.columns:
144
+ if isinstance(v, list): df = df[df[k].isin(v)]
145
+ else: df = df[df[k] == v]
146
+
147
+ if len(df) == 0:
148
+ print("No data matches filters.")
149
+ return
150
+
151
+ if group_by not in df.columns:
152
+ print(f"Cannot group by '{group_by}': column not found.")
153
+ return
154
+
155
+ groups = df[group_by].unique()
156
+ if len(groups) == 0:
157
+ print("No groups found.")
158
+ return
159
+
160
+ fig = plt.figure(figsize=(10, 5))
161
+ has_plot = False
162
+
163
+ # 2. Plot lines for each group
164
+ for g in groups:
165
+ sub_df = df[df[group_by] == g]
166
+ label = str(g)
167
+
168
+ if metric == "gpu":
169
+ # Uses helper from timeline.py
170
+ s = _explode_device_samples(sub_df, gpu_id=0)
171
+ if not s.empty:
172
+ # Normalize time to start at 0 for comparison
173
+ start_t = s["ts_ns"].min()
174
+ t_axis = (s["ts_ns"] - start_t) / 1e9
175
+ plt.plot(t_axis, s["util_gpu"], label=label)
176
+ has_plot = True
177
+
178
+ elif metric == "cpu":
179
+ s = _explode_host_samples(sub_df)
180
+ if not s.empty:
181
+ start_t = s["ts_ns"].min()
182
+ t_axis = (s["ts_ns"] - start_t) / 1e9
183
+ plt.plot(t_axis, s["cpu_pct"], label=label)
184
+ has_plot = True
185
+
186
+ if has_plot:
187
+ plt.title(f"Comparison: {metric.upper()} by {group_by}")
188
+ plt.xlabel("Time (s) [Relative start]")
189
+ plt.ylabel("Utilization %")
190
+ plt.legend()
191
+ plt.grid(True, alpha=0.3)
192
+ plt.show()
193
+ else:
194
+ print(f"No data found for metric '{metric}' in the selected groups.")