gpufl 0.1.0.dev0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gpufl/.gitignore +159 -0
- gpufl/__init__.py +83 -0
- gpufl/_gpufl_client.cp313-win_amd64.pyd +0 -0
- gpufl/analyzer/__init__.py +1 -0
- gpufl/analyzer/analyzer.py +359 -0
- gpufl/utils.py +19 -0
- gpufl/viz/__init__.py +27 -0
- gpufl/viz/reader.py +48 -0
- gpufl/viz/timeline.py +380 -0
- gpufl/viz/visualizer.py +194 -0
- gpufl-0.1.0.dev0.dist-info/METADATA +192 -0
- gpufl-0.1.0.dev0.dist-info/RECORD +113 -0
- gpufl-0.1.0.dev0.dist-info/WHEEL +5 -0
- gpufl-0.1.0.dev0.dist-info/licenses/LICENSE +201 -0
- include/gmock/gmock-actions.h +2297 -0
- include/gmock/gmock-cardinalities.h +159 -0
- include/gmock/gmock-function-mocker.h +518 -0
- include/gmock/gmock-matchers.h +5623 -0
- include/gmock/gmock-more-actions.h +658 -0
- include/gmock/gmock-more-matchers.h +120 -0
- include/gmock/gmock-nice-strict.h +277 -0
- include/gmock/gmock-spec-builders.h +2148 -0
- include/gmock/gmock.h +96 -0
- include/gmock/internal/custom/README.md +18 -0
- include/gmock/internal/custom/gmock-generated-actions.h +7 -0
- include/gmock/internal/custom/gmock-matchers.h +37 -0
- include/gmock/internal/custom/gmock-port.h +40 -0
- include/gmock/internal/gmock-internal-utils.h +487 -0
- include/gmock/internal/gmock-port.h +139 -0
- include/gmock/internal/gmock-pp.h +279 -0
- include/gpufl/backends/amd/rocm_collector.cpp +10 -0
- include/gpufl/backends/amd/rocm_collector.hpp +18 -0
- include/gpufl/backends/host_collector.hpp +150 -0
- include/gpufl/backends/nvidia/cuda_collector.cpp +43 -0
- include/gpufl/backends/nvidia/cuda_collector.hpp +16 -0
- include/gpufl/backends/nvidia/cupti_backend.cpp +806 -0
- include/gpufl/backends/nvidia/cupti_backend.hpp +164 -0
- include/gpufl/backends/nvidia/cupti_common.hpp +146 -0
- include/gpufl/backends/nvidia/cupti_utils.cpp +73 -0
- include/gpufl/backends/nvidia/cupti_utils.hpp +37 -0
- include/gpufl/backends/nvidia/kernel_launch_handler.cpp +282 -0
- include/gpufl/backends/nvidia/kernel_launch_handler.hpp +26 -0
- include/gpufl/backends/nvidia/mem_transfer_handler.cpp +237 -0
- include/gpufl/backends/nvidia/mem_transfer_handler.hpp +26 -0
- include/gpufl/backends/nvidia/nvml_collector.cpp +188 -0
- include/gpufl/backends/nvidia/nvml_collector.hpp +38 -0
- include/gpufl/backends/nvidia/resource_handler.cpp +63 -0
- include/gpufl/backends/nvidia/resource_handler.hpp +25 -0
- include/gpufl/backends/nvidia/sampler/cupti_sass.cpp +222 -0
- include/gpufl/backends/nvidia/sampler/cupti_sass.hpp +42 -0
- include/gpufl/core/common.cpp +45 -0
- include/gpufl/core/common.hpp +109 -0
- include/gpufl/core/debug_logger.cpp +9 -0
- include/gpufl/core/debug_logger.hpp +43 -0
- include/gpufl/core/events.hpp +253 -0
- include/gpufl/core/gpufl.cpp +365 -0
- include/gpufl/core/logger.cpp +437 -0
- include/gpufl/core/logger.hpp +88 -0
- include/gpufl/core/monitor.hpp +100 -0
- include/gpufl/core/monitor_backend.hpp +46 -0
- include/gpufl/core/ring_buffer.hpp +75 -0
- include/gpufl/core/runtime.cpp +6 -0
- include/gpufl/core/runtime.hpp +30 -0
- include/gpufl/core/sampler.cpp +73 -0
- include/gpufl/core/sampler.hpp +51 -0
- include/gpufl/core/scope_registry.cpp +10 -0
- include/gpufl/core/scope_registry.hpp +8 -0
- include/gpufl/core/stack_registry.hpp +47 -0
- include/gpufl/core/stack_trace.cpp +112 -0
- include/gpufl/core/stack_trace.hpp +12 -0
- include/gpufl/core/trace_type.hpp +13 -0
- include/gpufl/cuda/monitor.cpp +380 -0
- include/gpufl/gpufl.hpp +80 -0
- include/gpufl.hpp +3 -0
- include/gtest/gtest-assertion-result.h +237 -0
- include/gtest/gtest-death-test.h +345 -0
- include/gtest/gtest-matchers.h +923 -0
- include/gtest/gtest-message.h +252 -0
- include/gtest/gtest-param-test.h +546 -0
- include/gtest/gtest-printers.h +1161 -0
- include/gtest/gtest-spi.h +250 -0
- include/gtest/gtest-test-part.h +192 -0
- include/gtest/gtest-typed-test.h +331 -0
- include/gtest/gtest.h +2321 -0
- include/gtest/gtest_pred_impl.h +279 -0
- include/gtest/gtest_prod.h +60 -0
- include/gtest/internal/custom/README.md +44 -0
- include/gtest/internal/custom/gtest-port.h +37 -0
- include/gtest/internal/custom/gtest-printers.h +42 -0
- include/gtest/internal/custom/gtest.h +37 -0
- include/gtest/internal/gtest-death-test-internal.h +307 -0
- include/gtest/internal/gtest-filepath.h +227 -0
- include/gtest/internal/gtest-internal.h +1560 -0
- include/gtest/internal/gtest-param-util.h +1026 -0
- include/gtest/internal/gtest-port-arch.h +122 -0
- include/gtest/internal/gtest-port.h +2481 -0
- include/gtest/internal/gtest-string.h +178 -0
- include/gtest/internal/gtest-type-util.h +220 -0
- lib/cmake/GTest/GTestConfig.cmake +33 -0
- lib/cmake/GTest/GTestConfigVersion.cmake +43 -0
- lib/cmake/GTest/GTestTargets-release.cmake +49 -0
- lib/cmake/GTest/GTestTargets.cmake +136 -0
- lib/cmake/gpufl_client/gpufl_clientTargets-release.cmake +19 -0
- lib/cmake/gpufl_client/gpufl_clientTargets.cmake +109 -0
- lib/gmock.lib +0 -0
- lib/gmock_main.lib +0 -0
- lib/gpufl.lib +0 -0
- lib/gtest.lib +0 -0
- lib/gtest_main.lib +0 -0
- lib/pkgconfig/gmock.pc +10 -0
- lib/pkgconfig/gmock_main.pc +10 -0
- lib/pkgconfig/gtest.pc +9 -0
- lib/pkgconfig/gtest_main.pc +10 -0
gpufl/viz/reader.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import glob
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from typing import List
|
|
6
|
+
|
|
7
|
+
def _parse_line(line: str) -> dict:
|
|
8
|
+
try: return json.loads(line)
|
|
9
|
+
except: return {}
|
|
10
|
+
|
|
11
|
+
def read_events(file_pattern: str) -> List[dict]:
|
|
12
|
+
files = glob.glob(file_pattern)
|
|
13
|
+
all_events = []
|
|
14
|
+
for fpath in files:
|
|
15
|
+
if not os.path.isfile(fpath): continue
|
|
16
|
+
with open(fpath, "r", encoding="utf-8") as f:
|
|
17
|
+
for line in f:
|
|
18
|
+
line = line.strip()
|
|
19
|
+
if not line or not line.startswith("{"): continue
|
|
20
|
+
evt = _parse_line(line)
|
|
21
|
+
if evt: all_events.append(evt)
|
|
22
|
+
return all_events
|
|
23
|
+
|
|
24
|
+
def read_df(file_pattern: str) -> pd.DataFrame:
|
|
25
|
+
events = read_events(file_pattern)
|
|
26
|
+
if not events: return pd.DataFrame()
|
|
27
|
+
|
|
28
|
+
df = pd.DataFrame(events)
|
|
29
|
+
|
|
30
|
+
# [IMPORTANT] Fill main timestamp from start/end if missing
|
|
31
|
+
if "ts_ns" not in df.columns:
|
|
32
|
+
df["ts_ns"] = pd.Series([None]*len(df), dtype="float64")
|
|
33
|
+
|
|
34
|
+
# Map ts_start_ns -> ts_ns for sorting
|
|
35
|
+
if "ts_start_ns" in df.columns:
|
|
36
|
+
df["ts_ns"] = df["ts_ns"].fillna(df["ts_start_ns"])
|
|
37
|
+
if "start_ns" in df.columns:
|
|
38
|
+
df["ts_ns"] = df["ts_ns"].fillna(df["start_ns"])
|
|
39
|
+
|
|
40
|
+
# Coerce
|
|
41
|
+
cols = ["ts_ns", "start_ns", "end_ns", "ts_start_ns", "ts_end_ns"]
|
|
42
|
+
for c in cols:
|
|
43
|
+
if c in df.columns:
|
|
44
|
+
df[c] = pd.to_numeric(df[c], errors="coerce")
|
|
45
|
+
|
|
46
|
+
# Sort
|
|
47
|
+
df = df.sort_values("ts_ns").reset_index(drop=True)
|
|
48
|
+
return df
|
gpufl/viz/timeline.py
ADDED
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import json
|
|
3
|
+
from typing import Iterable, Optional
|
|
4
|
+
|
|
5
|
+
def _require_matplotlib():
|
|
6
|
+
try:
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
return plt
|
|
9
|
+
except ImportError:
|
|
10
|
+
raise ImportError("Visualization requires matplotlib.")
|
|
11
|
+
|
|
12
|
+
def _require_pandas():
|
|
13
|
+
try:
|
|
14
|
+
import pandas as pd
|
|
15
|
+
return pd
|
|
16
|
+
except ImportError:
|
|
17
|
+
raise ImportError("Visualization requires pandas.")
|
|
18
|
+
|
|
19
|
+
# ==========================================
|
|
20
|
+
# 1. HELPERS
|
|
21
|
+
# ==========================================
|
|
22
|
+
|
|
23
|
+
def _ensure_event_type_col(df):
|
|
24
|
+
if df is None: return df
|
|
25
|
+
if "event_type" not in df.columns and "type" in df.columns:
|
|
26
|
+
df = df.copy()
|
|
27
|
+
df["event_type"] = df["type"]
|
|
28
|
+
return df
|
|
29
|
+
|
|
30
|
+
def _coerce_devices_cell(x):
|
|
31
|
+
if isinstance(x, list): return x
|
|
32
|
+
if isinstance(x, str):
|
|
33
|
+
try: return json.loads(x)
|
|
34
|
+
except: return []
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
def _coerce_host_cell(x):
|
|
38
|
+
if isinstance(x, dict): return x
|
|
39
|
+
if isinstance(x, str):
|
|
40
|
+
try: return json.loads(x)
|
|
41
|
+
except: return {}
|
|
42
|
+
return {}
|
|
43
|
+
|
|
44
|
+
def _explode_device_samples(df, gpu_id=0):
|
|
45
|
+
pd = _require_pandas()
|
|
46
|
+
df = _ensure_event_type_col(df)
|
|
47
|
+
|
|
48
|
+
target_types = ["scope_sample", "system_sample", "system_start", "system_stop", "kernel_start", "kernel_end", "kernel_event", "init"]
|
|
49
|
+
if "event_type" not in df.columns: return pd.DataFrame()
|
|
50
|
+
|
|
51
|
+
d = df[df["event_type"].isin(target_types)].copy()
|
|
52
|
+
if len(d) == 0: return pd.DataFrame()
|
|
53
|
+
|
|
54
|
+
if "devices" in d.columns:
|
|
55
|
+
d["devices"] = d["devices"].apply(_coerce_devices_cell)
|
|
56
|
+
|
|
57
|
+
rows = []
|
|
58
|
+
for _, r in d.iterrows():
|
|
59
|
+
ts = r.get("ts_ns")
|
|
60
|
+
# If ts_ns is missing, try start_ns (common for kernel_event)
|
|
61
|
+
if pd.isna(ts):
|
|
62
|
+
ts = r.get("start_ns")
|
|
63
|
+
|
|
64
|
+
devs = r.get("devices", [])
|
|
65
|
+
found = None
|
|
66
|
+
if isinstance(devs, list):
|
|
67
|
+
for dev in devs:
|
|
68
|
+
if isinstance(dev, dict) and dev.get("id") == gpu_id:
|
|
69
|
+
found = dev
|
|
70
|
+
break
|
|
71
|
+
if found:
|
|
72
|
+
rows.append({
|
|
73
|
+
"ts_ns": ts,
|
|
74
|
+
"util_gpu": found.get("util_gpu", 0),
|
|
75
|
+
"util_mem": found.get("util_mem", 0),
|
|
76
|
+
"used_mib": found.get("used_mib", 0),
|
|
77
|
+
"temp_c": found.get("temp_c", 0),
|
|
78
|
+
"power_mw": found.get("power_mw", 0),
|
|
79
|
+
"clk_sm": found.get("clk_sm", 0),
|
|
80
|
+
# [NEW] Extract Bandwidth and convert B/s -> GB/s
|
|
81
|
+
"pcie_rx_gbps": found.get("pcie_rx_bw", 0) / 1e9,
|
|
82
|
+
"pcie_tx_gbps": found.get("pcie_tx_bw", 0) / 1e9,
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
out = pd.DataFrame(rows)
|
|
86
|
+
if not out.empty:
|
|
87
|
+
out = out.dropna(subset=["ts_ns"]).sort_values("ts_ns")
|
|
88
|
+
min_ts = out["ts_ns"].min()
|
|
89
|
+
out["t_s_abs"] = (out["ts_ns"] - min_ts) / 1e9
|
|
90
|
+
return out
|
|
91
|
+
|
|
92
|
+
def _explode_host_samples(df):
|
|
93
|
+
pd = _require_pandas()
|
|
94
|
+
df = _ensure_event_type_col(df)
|
|
95
|
+
|
|
96
|
+
target_types = ["scope_sample", "system_sample", "system_start", "system_stop", "kernel_start", "kernel_event", "init", "shutdown"]
|
|
97
|
+
if "event_type" not in df.columns: return pd.DataFrame()
|
|
98
|
+
|
|
99
|
+
d = df[df["event_type"].isin(target_types)].copy()
|
|
100
|
+
if len(d) == 0 or "host" not in d.columns: return pd.DataFrame()
|
|
101
|
+
|
|
102
|
+
d["host"] = d["host"].apply(_coerce_host_cell)
|
|
103
|
+
rows = []
|
|
104
|
+
for _, r in d.iterrows():
|
|
105
|
+
h = r["host"]
|
|
106
|
+
if not h: continue
|
|
107
|
+
|
|
108
|
+
ts = r.get("ts_ns") or r.get("ts_start_ns") or r.get("start_ns")
|
|
109
|
+
|
|
110
|
+
rows.append({
|
|
111
|
+
"ts_ns": ts,
|
|
112
|
+
"cpu_pct": h.get("cpu_pct", 0),
|
|
113
|
+
"ram_used_mib": h.get("ram_used_mib", 0)
|
|
114
|
+
})
|
|
115
|
+
out = pd.DataFrame(rows)
|
|
116
|
+
if not out.empty:
|
|
117
|
+
out = out.dropna(subset=["ts_ns"]).sort_values("ts_ns")
|
|
118
|
+
out["t_s_abs"] = (out["ts_ns"] - out["ts_ns"].min()) / 1e9
|
|
119
|
+
return out
|
|
120
|
+
|
|
121
|
+
def _reconstruct_intervals(df, start_type, end_type, name_col="name", fallback_name="Scope"):
|
|
122
|
+
pd = _require_pandas()
|
|
123
|
+
# Support both "scope_start" and "scope_begin" for compatibility
|
|
124
|
+
start_types = [start_type]
|
|
125
|
+
if start_type == "scope_start":
|
|
126
|
+
start_types.append("scope_begin")
|
|
127
|
+
|
|
128
|
+
# [NEW] Handle single-event intervals like kernel_event
|
|
129
|
+
is_kernel = (start_type == "kernel_start" or start_type == "kernel_event")
|
|
130
|
+
if is_kernel:
|
|
131
|
+
# Include kernel_event which has both start and end
|
|
132
|
+
target_types = start_types + [end_type, "kernel_event"]
|
|
133
|
+
else:
|
|
134
|
+
target_types = start_types + [end_type]
|
|
135
|
+
|
|
136
|
+
subset = df[df["event_type"].isin(target_types)].copy()
|
|
137
|
+
if subset.empty: return []
|
|
138
|
+
|
|
139
|
+
intervals = []
|
|
140
|
+
# Use a dictionary of lists to handle multiple nested intervals with the same name
|
|
141
|
+
stacks = {}
|
|
142
|
+
min_ts = df["ts_ns"].min()
|
|
143
|
+
if pd.isna(min_ts):
|
|
144
|
+
# try start_ns if ts_ns is all NaN
|
|
145
|
+
if "start_ns" in df.columns:
|
|
146
|
+
min_ts = df["start_ns"].min()
|
|
147
|
+
if pd.isna(min_ts): min_ts = 0
|
|
148
|
+
|
|
149
|
+
for _, r in subset.iterrows():
|
|
150
|
+
etype = r["event_type"]
|
|
151
|
+
name = r.get(name_col, fallback_name)
|
|
152
|
+
if pd.isna(name): name = fallback_name
|
|
153
|
+
|
|
154
|
+
if etype == "kernel_event" and "start_ns" in r and "end_ns" in r:
|
|
155
|
+
start_ns = r["start_ns"]
|
|
156
|
+
end_ns = r["end_ns"]
|
|
157
|
+
if not pd.isna(start_ns) and not pd.isna(end_ns):
|
|
158
|
+
start_sec = (start_ns - min_ts) / 1e9
|
|
159
|
+
dur_sec = (end_ns - start_ns) / 1e9
|
|
160
|
+
|
|
161
|
+
# Add extra metrics if present
|
|
162
|
+
metrics = {
|
|
163
|
+
"occupancy": r.get("occupancy", 0),
|
|
164
|
+
"grid": r.get("grid", ""),
|
|
165
|
+
"block": r.get("block", ""),
|
|
166
|
+
"num_regs": r.get("num_regs", 0),
|
|
167
|
+
"dyn_shared": r.get("dyn_shared_bytes", 0),
|
|
168
|
+
"static_shared": r.get("static_shared_bytes", 0),
|
|
169
|
+
}
|
|
170
|
+
intervals.append((start_sec, dur_sec, name, metrics))
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
ts = r.get("ts_ns")
|
|
174
|
+
if pd.isna(ts): ts = r.get("ts_start_ns")
|
|
175
|
+
if pd.isna(ts): ts = r.get("start_ns")
|
|
176
|
+
if pd.isna(ts): continue
|
|
177
|
+
|
|
178
|
+
if etype in start_types:
|
|
179
|
+
if name not in stacks:
|
|
180
|
+
stacks[name] = []
|
|
181
|
+
stacks[name].append(ts)
|
|
182
|
+
elif etype == end_type:
|
|
183
|
+
if name in stacks and stacks[name]:
|
|
184
|
+
start_ns = stacks[name].pop()
|
|
185
|
+
start_sec = (start_ns - min_ts) / 1e9
|
|
186
|
+
dur_sec = (ts - start_ns) / 1e9
|
|
187
|
+
intervals.append((start_sec, dur_sec, name, {}))
|
|
188
|
+
if not stacks[name]:
|
|
189
|
+
del stacks[name]
|
|
190
|
+
return intervals
|
|
191
|
+
|
|
192
|
+
# ==========================================
|
|
193
|
+
# 2. PLOTTERS
|
|
194
|
+
# ==========================================
|
|
195
|
+
|
|
196
|
+
def plot_combined_timeline(df, title="GPUFL Timeline"):
|
|
197
|
+
pd = _require_pandas()
|
|
198
|
+
plt = _require_matplotlib()
|
|
199
|
+
|
|
200
|
+
df = _ensure_event_type_col(df)
|
|
201
|
+
if "event_type" not in df.columns:
|
|
202
|
+
print("[Viz] Error: No event_type column found.")
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
min_ts = df["ts_ns"].min()
|
|
206
|
+
if pd.isna(min_ts): min_ts = 0
|
|
207
|
+
|
|
208
|
+
# --- Prepare Data ---
|
|
209
|
+
# Try both "scope_start" and "scope_begin"
|
|
210
|
+
scope_data = _reconstruct_intervals(df, "scope_start", "scope_end")
|
|
211
|
+
if not scope_data:
|
|
212
|
+
scope_data = _reconstruct_intervals(df, "scope_begin", "scope_end")
|
|
213
|
+
|
|
214
|
+
if not scope_data:
|
|
215
|
+
app_data = _reconstruct_intervals(df, "init", "shutdown", name_col="app", fallback_name="App")
|
|
216
|
+
scope_data.extend(app_data)
|
|
217
|
+
|
|
218
|
+
# [NEW] Handle kernel_event
|
|
219
|
+
kernel_data = _reconstruct_intervals(df, "kernel_event", "kernel_end")
|
|
220
|
+
if not kernel_data:
|
|
221
|
+
kernel_data = _reconstruct_intervals(df, "kernel_start", "kernel_end")
|
|
222
|
+
|
|
223
|
+
gpu_samples = _explode_device_samples(df, gpu_id=0)
|
|
224
|
+
host_samples = _explode_host_samples(df)
|
|
225
|
+
|
|
226
|
+
# --- Plotting (3 Rows) ---
|
|
227
|
+
# Heights: GPU=2, PCIe=1.5, Host=2
|
|
228
|
+
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 10), sharex=True,
|
|
229
|
+
gridspec_kw={'height_ratios': [2, 1.5, 2]})
|
|
230
|
+
|
|
231
|
+
# --- Helper to Overlay Markers ---
|
|
232
|
+
kernel_markers = [] # List of (vline, annotation)
|
|
233
|
+
|
|
234
|
+
def overlay_markers(ax, y_lim_ref=None):
|
|
235
|
+
"""Draws vertical lines for Scopes and Kernels on the given axis."""
|
|
236
|
+
# Get Y-limit to position text
|
|
237
|
+
y_top = y_lim_ref if y_lim_ref else (ax.get_ylim()[1] if len(ax.get_lines()) > 0 else 100)
|
|
238
|
+
|
|
239
|
+
# Scopes (Red dashed)
|
|
240
|
+
if scope_data:
|
|
241
|
+
for start_sec, dur_sec, name, _ in scope_data:
|
|
242
|
+
ax.axvline(x=start_sec, color='tab:red', linestyle='--', alpha=0.6, linewidth=1)
|
|
243
|
+
ax.text(start_sec, y_top * 0.95, name, rotation=90, va='top', ha='center', fontsize=7,
|
|
244
|
+
color='tab:red', alpha=0.9,
|
|
245
|
+
bbox=dict(boxstyle='round,pad=0.1', facecolor='white', alpha=0.3, edgecolor='none'))
|
|
246
|
+
|
|
247
|
+
# Kernels (Orange solid/dashed)
|
|
248
|
+
if kernel_data:
|
|
249
|
+
for start_sec, dur_sec, name, metrics in kernel_data:
|
|
250
|
+
end_sec = start_sec + (dur_sec if dur_sec is not None else 0)
|
|
251
|
+
vl = ax.axvline(x=start_sec, color='tab:orange', linestyle='-', linewidth=1.2, picker=True)
|
|
252
|
+
|
|
253
|
+
# Enrich text with occupancy if available
|
|
254
|
+
display_name = name
|
|
255
|
+
if metrics and metrics.get("occupancy", 0) > 0:
|
|
256
|
+
display_name += f" ({metrics['occupancy']*100:.1f}%)"
|
|
257
|
+
|
|
258
|
+
# Create annotation but set it invisible by default
|
|
259
|
+
ann = ax.annotate(display_name, xy=(start_sec, y_top * 0.85),
|
|
260
|
+
xytext=(5, 0), textcoords="offset points",
|
|
261
|
+
rotation=90, va='top', ha='left', fontsize=7,
|
|
262
|
+
color='tab:orange', fontweight='bold',
|
|
263
|
+
bbox=dict(boxstyle='round,pad=0.2', fc='yellow', alpha=0.8),
|
|
264
|
+
visible=False)
|
|
265
|
+
|
|
266
|
+
kernel_markers.append((vl, ann))
|
|
267
|
+
|
|
268
|
+
if dur_sec and dur_sec > 0:
|
|
269
|
+
ax.axvline(x=end_sec, color='tab:orange', linestyle='--', linewidth=1.2)
|
|
270
|
+
# We usually don't need hover for "end" markers, but could add it.
|
|
271
|
+
|
|
272
|
+
# --- Row 1: GPU Metrics ---
|
|
273
|
+
if not gpu_samples.empty:
|
|
274
|
+
t = gpu_samples["t_s_abs"]
|
|
275
|
+
ax1.plot(t, gpu_samples["util_gpu"], label="GPU %", color='tab:green')
|
|
276
|
+
ax1.plot(t, gpu_samples["util_mem"], label="Mem %", color='tab:purple', linestyle="--")
|
|
277
|
+
|
|
278
|
+
# [NEW] Optional metrics from system log
|
|
279
|
+
if "temp_c" in gpu_samples.columns and gpu_samples["temp_c"].max() > 0:
|
|
280
|
+
ax1.plot(t, gpu_samples["temp_c"], label="Temp (C)", color='tab:red', alpha=0.3)
|
|
281
|
+
if "clk_sm" in gpu_samples.columns and gpu_samples["clk_sm"].max() > 0:
|
|
282
|
+
# Scale clock for visibility if needed, or use another axis. Let's just plot it.
|
|
283
|
+
ax1.plot(t, gpu_samples["clk_sm"] / 10, label="SM Clock (x10 MHz)", color='tab:orange', alpha=0.3)
|
|
284
|
+
|
|
285
|
+
# [NEW] Visualize Kernel Occupancy points on the timeline
|
|
286
|
+
if kernel_data:
|
|
287
|
+
k_t = [k[0] for k in kernel_data if k[3] and k[3].get("occupancy", 0) > 0]
|
|
288
|
+
k_occ = [k[3]["occupancy"] * 100 for k in kernel_data if k[3] and k[3].get("occupancy", 0) > 0]
|
|
289
|
+
if k_t:
|
|
290
|
+
ax1.scatter(k_t, k_occ, color='tab:orange', marker='o', s=20, label="Kernel Occupancy", zorder=5)
|
|
291
|
+
|
|
292
|
+
ax1.set_ylabel("GPU Util %")
|
|
293
|
+
ax1.set_ylim(-5, 105)
|
|
294
|
+
ax1.legend(loc="upper left", fontsize='x-small')
|
|
295
|
+
|
|
296
|
+
ax1b = ax1.twinx()
|
|
297
|
+
ax1b.fill_between(t, gpu_samples["used_mib"], color='tab:gray', alpha=0.1, label="VRAM Used")
|
|
298
|
+
ax1b.set_ylabel("VRAM (MiB)", color='gray')
|
|
299
|
+
ax1b.set_ylim(bottom=0)
|
|
300
|
+
|
|
301
|
+
ax1.grid(True, alpha=0.3)
|
|
302
|
+
ax1.set_title("GPU Metrics", fontsize=10)
|
|
303
|
+
overlay_markers(ax1, y_lim_ref=105)
|
|
304
|
+
|
|
305
|
+
# --- Row 2: PCIe Bandwidth (NEW) ---
|
|
306
|
+
if not gpu_samples.empty:
|
|
307
|
+
t = gpu_samples["t_s_abs"]
|
|
308
|
+
# Plot RX (Host -> Device) and TX (Device -> Host)
|
|
309
|
+
ax2.plot(t, gpu_samples["pcie_rx_gbps"], label="PCIe RX (Upload)", color='tab:blue')
|
|
310
|
+
ax2.plot(t, gpu_samples["pcie_tx_gbps"], label="PCIe TX (Download)", color='tab:cyan', linestyle="--")
|
|
311
|
+
|
|
312
|
+
ax2.set_ylabel("BW (GB/s)")
|
|
313
|
+
# Dynamically scale Y-axis but keep min at 0
|
|
314
|
+
ax2.set_ylim(bottom=0)
|
|
315
|
+
ax2.legend(loc="upper left", fontsize='x-small')
|
|
316
|
+
|
|
317
|
+
ax2.grid(True, alpha=0.3)
|
|
318
|
+
ax2.set_title("PCIe Bandwidth", fontsize=10)
|
|
319
|
+
# Overlay markers (passing None lets helper figure out Y-max from data)
|
|
320
|
+
overlay_markers(ax2)
|
|
321
|
+
|
|
322
|
+
# --- Row 3: Host Metrics ---
|
|
323
|
+
if not host_samples.empty:
|
|
324
|
+
t_host = host_samples["t_s_abs"]
|
|
325
|
+
ax3.plot(t_host, host_samples["cpu_pct"], label="CPU %", color='tab:red')
|
|
326
|
+
ax3.set_ylabel("CPU Util %", color='tab:red')
|
|
327
|
+
ax3.set_ylim(-5, 105)
|
|
328
|
+
ax3.tick_params(axis='y', labelcolor='tab:red')
|
|
329
|
+
ax3.legend(loc="upper left", fontsize='x-small')
|
|
330
|
+
|
|
331
|
+
ax3b = ax3.twinx()
|
|
332
|
+
ax3b.plot(t_host, host_samples["ram_used_mib"] / 1024, label="RAM (GiB)", color='tab:blue', linestyle="--")
|
|
333
|
+
ax3b.set_ylabel("Sys RAM (GiB)", color='tab:blue')
|
|
334
|
+
ax3b.tick_params(axis='y', labelcolor='tab:blue')
|
|
335
|
+
ax3b.set_ylim(bottom=0)
|
|
336
|
+
ax3b.legend(loc="upper right", fontsize='x-small')
|
|
337
|
+
|
|
338
|
+
ax3.set_xlabel("Time (seconds)")
|
|
339
|
+
ax3.grid(True, alpha=0.3)
|
|
340
|
+
ax3.set_title("Host Metrics", fontsize=10)
|
|
341
|
+
overlay_markers(ax3, y_lim_ref=105)
|
|
342
|
+
|
|
343
|
+
# --- Hover Interaction ---
|
|
344
|
+
def on_hover(event):
|
|
345
|
+
if event.inaxes is None: return
|
|
346
|
+
|
|
347
|
+
changed = False
|
|
348
|
+
for vl, ann in kernel_markers:
|
|
349
|
+
# Check if mouse is near the vertical line (x-axis distance)
|
|
350
|
+
if vl.axes == event.inaxes:
|
|
351
|
+
# Calculate distance in pixels for better UX
|
|
352
|
+
try:
|
|
353
|
+
# Convert data x to display x
|
|
354
|
+
x_display = vl.axes.transData.transform((vl.get_xdata()[0], 0))[0]
|
|
355
|
+
mouse_x = event.x
|
|
356
|
+
|
|
357
|
+
is_near = abs(x_display - mouse_x) < 5 # 5 pixels tolerance
|
|
358
|
+
|
|
359
|
+
if ann.get_visible() != is_near:
|
|
360
|
+
ann.set_visible(is_near)
|
|
361
|
+
changed = True
|
|
362
|
+
except:
|
|
363
|
+
pass
|
|
364
|
+
|
|
365
|
+
if changed:
|
|
366
|
+
fig.canvas.draw_idle()
|
|
367
|
+
|
|
368
|
+
fig.canvas.mpl_connect("motion_notify_event", on_hover)
|
|
369
|
+
|
|
370
|
+
fig.suptitle(title, fontsize=14)
|
|
371
|
+
plt.tight_layout()
|
|
372
|
+
plt.subplots_adjust(hspace=0.25)
|
|
373
|
+
return fig
|
|
374
|
+
|
|
375
|
+
# Legacy wrappers
|
|
376
|
+
def plot_kernel_timeline(df, title="Kernels"): return plot_combined_timeline(df, title)
|
|
377
|
+
def plot_scope_timeline(df, title="Scopes"): return plot_combined_timeline(df, title)
|
|
378
|
+
def plot_host_timeline(df, title="Host"): return plot_combined_timeline(df, title)
|
|
379
|
+
def plot_memory_timeline(df, gpu_id=0, title="Mem"): return None
|
|
380
|
+
def plot_utilization_timeline(df, gpu_id=0, title="Util"): return None
|
gpufl/viz/visualizer.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import glob
|
|
3
|
+
import re
|
|
4
|
+
import json
|
|
5
|
+
from typing import Optional, List, Union, Any
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from .reader import read_df
|
|
9
|
+
from .timeline import (
|
|
10
|
+
plot_combined_timeline,
|
|
11
|
+
_explode_device_samples,
|
|
12
|
+
_explode_host_samples
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# --- Global State ---
|
|
16
|
+
_GLOBAL_DF = None
|
|
17
|
+
|
|
18
|
+
def _require_matplotlib():
|
|
19
|
+
try:
|
|
20
|
+
import matplotlib.pyplot as plt
|
|
21
|
+
return plt
|
|
22
|
+
except ImportError as e:
|
|
23
|
+
raise ImportError("Visualization requires matplotlib. Run: pip install pandas matplotlib") from e
|
|
24
|
+
|
|
25
|
+
def _check_init():
|
|
26
|
+
if _GLOBAL_DF is None:
|
|
27
|
+
raise RuntimeError("Global data not loaded. Call viz.init('path/to/logs') first.")
|
|
28
|
+
|
|
29
|
+
def init(log_pattern: Union[str, List[str]]):
|
|
30
|
+
"""
|
|
31
|
+
Load log files into memory.
|
|
32
|
+
Args:
|
|
33
|
+
log_pattern: File path, directory, or glob pattern (e.g., "logs/*.log").
|
|
34
|
+
"""
|
|
35
|
+
global _GLOBAL_DF
|
|
36
|
+
if isinstance(log_pattern, str) and os.path.isdir(log_pattern):
|
|
37
|
+
pattern = os.path.join(log_pattern, "*.log")
|
|
38
|
+
else:
|
|
39
|
+
pattern = log_pattern
|
|
40
|
+
|
|
41
|
+
print(f"Loading logs from: {pattern} ...")
|
|
42
|
+
df = read_df(pattern)
|
|
43
|
+
|
|
44
|
+
if len(df) == 0:
|
|
45
|
+
print("[Warn] No events found.")
|
|
46
|
+
_GLOBAL_DF = None
|
|
47
|
+
else:
|
|
48
|
+
# Pre-convert timestamps to numeric for speed
|
|
49
|
+
for col in ["ts_ns", "start_ns", "end_ns"]:
|
|
50
|
+
if col in df.columns:
|
|
51
|
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
|
52
|
+
|
|
53
|
+
# Sort by time immediately
|
|
54
|
+
if "ts_ns" in df.columns:
|
|
55
|
+
df = df.sort_values("ts_ns")
|
|
56
|
+
|
|
57
|
+
_GLOBAL_DF = df
|
|
58
|
+
print(f"Loaded {len(df)} events.")
|
|
59
|
+
|
|
60
|
+
def get_data() -> Any:
|
|
61
|
+
"""Return the raw global DataFrame."""
|
|
62
|
+
_check_init()
|
|
63
|
+
return _GLOBAL_DF
|
|
64
|
+
|
|
65
|
+
def _parse_duration(duration_str: str) -> int:
|
|
66
|
+
"""Convert '1m', '1h', '30s' to nanoseconds."""
|
|
67
|
+
if not duration_str: return 0
|
|
68
|
+
units = {"s": 1e9, "m": 60 * 1e9, "h": 3600 * 1e9, "d": 86400 * 1e9}
|
|
69
|
+
match = re.match(r"(\d+)([smhd])", duration_str)
|
|
70
|
+
if match:
|
|
71
|
+
val, unit = match.groups()
|
|
72
|
+
return int(float(val) * units[unit])
|
|
73
|
+
return 0
|
|
74
|
+
|
|
75
|
+
def show(last: Optional[str] = None, **filters):
|
|
76
|
+
"""
|
|
77
|
+
Visualize the Stacked Timeline.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
last: Time window from the end of the log (e.g., "1m", "30s", "1h").
|
|
81
|
+
If None, shows all data.
|
|
82
|
+
**filters: Key-value pairs to filter data (e.g., app="MyApp").
|
|
83
|
+
"""
|
|
84
|
+
_check_init()
|
|
85
|
+
plt = _require_matplotlib()
|
|
86
|
+
|
|
87
|
+
df = _GLOBAL_DF.copy()
|
|
88
|
+
|
|
89
|
+
# 1. Apply Tag/App Filters
|
|
90
|
+
for k, v in filters.items():
|
|
91
|
+
if k in df.columns:
|
|
92
|
+
if isinstance(v, list): df = df[df[k].isin(v)]
|
|
93
|
+
else: df = df[df[k] == v]
|
|
94
|
+
|
|
95
|
+
if len(df) == 0:
|
|
96
|
+
print("No data matching filters.")
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
# 2. Apply Time Window (Tail)
|
|
100
|
+
if last:
|
|
101
|
+
duration_ns = _parse_duration(last)
|
|
102
|
+
if duration_ns > 0:
|
|
103
|
+
max_ts = df["ts_ns"].max()
|
|
104
|
+
if pd.isna(max_ts):
|
|
105
|
+
max_ts = df[["start_ns", "end_ns"]].max().max()
|
|
106
|
+
|
|
107
|
+
cutoff = max_ts - duration_ns
|
|
108
|
+
|
|
109
|
+
# Filter rows overlapping with the window
|
|
110
|
+
cond_ts = df["ts_ns"] >= cutoff
|
|
111
|
+
cond_start = df["start_ns"] >= cutoff
|
|
112
|
+
cond_end = df["end_ns"] >= cutoff
|
|
113
|
+
|
|
114
|
+
df = df[cond_ts | cond_start | cond_end]
|
|
115
|
+
|
|
116
|
+
if len(df) == 0:
|
|
117
|
+
print(f"No data found in the last {last}.")
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
# 3. Generate Combined Plot
|
|
121
|
+
fig = plot_combined_timeline(df, title=f"Timeline (last={last})" if last else "Full Timeline")
|
|
122
|
+
|
|
123
|
+
if fig:
|
|
124
|
+
plt.show()
|
|
125
|
+
else:
|
|
126
|
+
print("Not enough data to generate plot.")
|
|
127
|
+
|
|
128
|
+
def compare(group_by="app", metric="gpu", **filters):
|
|
129
|
+
"""
|
|
130
|
+
Compare a specific metric across different groups (e.g. apps, tags).
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
group_by: Column to group by (default: "app").
|
|
134
|
+
metric: "cpu", "gpu", "ram"
|
|
135
|
+
**filters: Additional filters.
|
|
136
|
+
"""
|
|
137
|
+
_check_init()
|
|
138
|
+
plt = _require_matplotlib()
|
|
139
|
+
|
|
140
|
+
# 1. Filter
|
|
141
|
+
df = _GLOBAL_DF.copy()
|
|
142
|
+
for k, v in filters.items():
|
|
143
|
+
if k in df.columns:
|
|
144
|
+
if isinstance(v, list): df = df[df[k].isin(v)]
|
|
145
|
+
else: df = df[df[k] == v]
|
|
146
|
+
|
|
147
|
+
if len(df) == 0:
|
|
148
|
+
print("No data matches filters.")
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
if group_by not in df.columns:
|
|
152
|
+
print(f"Cannot group by '{group_by}': column not found.")
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
groups = df[group_by].unique()
|
|
156
|
+
if len(groups) == 0:
|
|
157
|
+
print("No groups found.")
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
fig = plt.figure(figsize=(10, 5))
|
|
161
|
+
has_plot = False
|
|
162
|
+
|
|
163
|
+
# 2. Plot lines for each group
|
|
164
|
+
for g in groups:
|
|
165
|
+
sub_df = df[df[group_by] == g]
|
|
166
|
+
label = str(g)
|
|
167
|
+
|
|
168
|
+
if metric == "gpu":
|
|
169
|
+
# Uses helper from timeline.py
|
|
170
|
+
s = _explode_device_samples(sub_df, gpu_id=0)
|
|
171
|
+
if not s.empty:
|
|
172
|
+
# Normalize time to start at 0 for comparison
|
|
173
|
+
start_t = s["ts_ns"].min()
|
|
174
|
+
t_axis = (s["ts_ns"] - start_t) / 1e9
|
|
175
|
+
plt.plot(t_axis, s["util_gpu"], label=label)
|
|
176
|
+
has_plot = True
|
|
177
|
+
|
|
178
|
+
elif metric == "cpu":
|
|
179
|
+
s = _explode_host_samples(sub_df)
|
|
180
|
+
if not s.empty:
|
|
181
|
+
start_t = s["ts_ns"].min()
|
|
182
|
+
t_axis = (s["ts_ns"] - start_t) / 1e9
|
|
183
|
+
plt.plot(t_axis, s["cpu_pct"], label=label)
|
|
184
|
+
has_plot = True
|
|
185
|
+
|
|
186
|
+
if has_plot:
|
|
187
|
+
plt.title(f"Comparison: {metric.upper()} by {group_by}")
|
|
188
|
+
plt.xlabel("Time (s) [Relative start]")
|
|
189
|
+
plt.ylabel("Utilization %")
|
|
190
|
+
plt.legend()
|
|
191
|
+
plt.grid(True, alpha=0.3)
|
|
192
|
+
plt.show()
|
|
193
|
+
else:
|
|
194
|
+
print(f"No data found for metric '{metric}' in the selected groups.")
|