gnvitop 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gnvitop-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Linwei94
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
gnvitop-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: gnvitop
3
+ Version: 0.1.0
4
+ Summary: Global nvitop: web-based GPU monitoring dashboard for all your remote servers via SSH
5
+ Author: Linwei94
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/Linwei94/gnvitop
8
+ Project-URL: Repository, https://github.com/Linwei94/gnvitop
9
+ Project-URL: Issues, https://github.com/Linwei94/gnvitop/issues
10
+ Keywords: gpu,monitor,nvidia,ssh,nvitop,dashboard,nvidia-smi
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Environment :: Web Environment
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: Intended Audience :: System Administrators
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Topic :: System :: Monitoring
19
+ Requires-Python: >=3.7
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: flask>=2.0
23
+ Requires-Dist: paramiko>=2.0
24
+ Dynamic: license-file
25
+
26
+ # gnvitop
27
+
28
+ **Global nvitop** -- a web-based GPU monitoring dashboard that monitors **all** your remote GPU servers from a single page.
29
+
30
+ Like [nvitop](https://github.com/XuehaiPan/nvitop), but for **all your servers at once**, displayed as a beautiful web dashboard.
31
+
32
+ ```
33
+ pip install gnvitop
34
+ gnvitop
35
+ ```
36
+
37
+ ## How It Works
38
+
39
+ 1. Reads your `~/.ssh/config` automatically
40
+ 2. SSH into each server and runs `nvidia-smi`
41
+ 3. Displays everything in a real-time web dashboard
42
+ 4. Auto-refreshes every 30 seconds
43
+
44
+ ```
45
+ ┌──> Server A (nvidia-smi) ──> 4x A100
46
+ gnvitop ──> Browser ──> ├──> Server B (nvidia-smi) ──> 8x V100
47
+ ├──> Server C (nvidia-smi) ──> 2x RTX 4090
48
+ └──> Server D ──> offline
49
+ ```
50
+
51
+ ## Installation
52
+
53
+ ```bash
54
+ pip install gnvitop
55
+ ```
56
+
57
+ ## Usage
58
+
59
+ ```bash
60
+ gnvitop # start and auto-open browser
61
+ gnvitop -p 8080 # custom port
62
+ gnvitop --host 0.0.0.0 # expose to LAN
63
+ gnvitop --no-browser # don't auto-open browser
64
+ gnvitop --ssh-config /path/to/config # custom SSH config
65
+ gnvitop -v # show version
66
+ ```
67
+
68
+ Or run as a module:
69
+
70
+ ```bash
71
+ python -m gnvitop
72
+ ```
73
+
74
+ ## Prerequisites
75
+
76
+ 1. **SSH config** -- your `~/.ssh/config` should have server entries:
77
+
78
+ ```
79
+ Host gpu-server-01
80
+ HostName 192.168.1.101
81
+ User alice
82
+ IdentityFile ~/.ssh/id_rsa
83
+
84
+ Host gpu-server-02
85
+ HostName 192.168.1.102
86
+ User bob
87
+ ```
88
+
89
+ 2. **SSH key auth** -- password-less login should be set up
90
+ 3. **nvidia-smi** -- must be installed on the remote servers
91
+
92
+ ## Features
93
+
94
+ - **Zero config** -- reads `~/.ssh/config` automatically, no setup needed
95
+ - **One command** -- `pip install gnvitop && gnvitop`, that's it
96
+ - **Auto browser** -- opens dashboard in your browser on start
97
+ - **Real-time** -- 30s auto-refresh with manual refresh button
98
+ - **Concurrent** -- queries all servers in parallel (10 workers)
99
+ - **Cached** -- 30s cache to avoid hammering your servers
100
+ - **Dark UI** -- clean, responsive dark-themed dashboard
101
+ - **At a glance** -- summary bar shows online hosts, total GPUs, idle GPUs, free memory
102
+ - **Color coded** -- green (online), yellow (no GPU), red (offline)
103
+ - **GPU details** -- utilization bars, memory bars, temperature with color alerts
104
+
105
+ ## Comparison with nvitop
106
+
107
+ | Feature | nvitop | gnvitop |
108
+ |---------|--------|---------|
109
+ | Monitor local GPU | Yes | No |
110
+ | Monitor remote GPUs | No | Yes |
111
+ | Multiple servers | No | Yes |
112
+ | Interface | Terminal | Web browser |
113
+ | Setup | Run on each server | Run once, reads SSH config |
114
+
115
+ **gnvitop** is not a replacement for nvitop -- it's a complement. Use nvitop for detailed local GPU monitoring, use gnvitop to get an overview of all your GPU servers from one place.
116
+
117
+ ## License
118
+
119
+ MIT
@@ -0,0 +1,94 @@
1
+ # gnvitop
2
+
3
+ **Global nvitop** -- a web-based GPU monitoring dashboard that monitors **all** your remote GPU servers from a single page.
4
+
5
+ Like [nvitop](https://github.com/XuehaiPan/nvitop), but for **all your servers at once**, displayed as a beautiful web dashboard.
6
+
7
+ ```
8
+ pip install gnvitop
9
+ gnvitop
10
+ ```
11
+
12
+ ## How It Works
13
+
14
+ 1. Reads your `~/.ssh/config` automatically
15
+ 2. SSH into each server and runs `nvidia-smi`
16
+ 3. Displays everything in a real-time web dashboard
17
+ 4. Auto-refreshes every 30 seconds
18
+
19
+ ```
20
+ ┌──> Server A (nvidia-smi) ──> 4x A100
21
+ gnvitop ──> Browser ──> ├──> Server B (nvidia-smi) ──> 8x V100
22
+ ├──> Server C (nvidia-smi) ──> 2x RTX 4090
23
+ └──> Server D ──> offline
24
+ ```
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ pip install gnvitop
30
+ ```
31
+
32
+ ## Usage
33
+
34
+ ```bash
35
+ gnvitop # start and auto-open browser
36
+ gnvitop -p 8080 # custom port
37
+ gnvitop --host 0.0.0.0 # expose to LAN
38
+ gnvitop --no-browser # don't auto-open browser
39
+ gnvitop --ssh-config /path/to/config # custom SSH config
40
+ gnvitop -v # show version
41
+ ```
42
+
43
+ Or run as a module:
44
+
45
+ ```bash
46
+ python -m gnvitop
47
+ ```
48
+
49
+ ## Prerequisites
50
+
51
+ 1. **SSH config** -- your `~/.ssh/config` should have server entries:
52
+
53
+ ```
54
+ Host gpu-server-01
55
+ HostName 192.168.1.101
56
+ User alice
57
+ IdentityFile ~/.ssh/id_rsa
58
+
59
+ Host gpu-server-02
60
+ HostName 192.168.1.102
61
+ User bob
62
+ ```
63
+
64
+ 2. **SSH key auth** -- password-less login should be set up
65
+ 3. **nvidia-smi** -- must be installed on the remote servers
66
+
67
+ ## Features
68
+
69
+ - **Zero config** -- reads `~/.ssh/config` automatically, no setup needed
70
+ - **One command** -- `pip install gnvitop && gnvitop`, that's it
71
+ - **Auto browser** -- opens dashboard in your browser on start
72
+ - **Real-time** -- 30s auto-refresh with manual refresh button
73
+ - **Concurrent** -- queries all servers in parallel (10 workers)
74
+ - **Cached** -- 30s cache to avoid hammering your servers
75
+ - **Dark UI** -- clean, responsive dark-themed dashboard
76
+ - **At a glance** -- summary bar shows online hosts, total GPUs, idle GPUs, free memory
77
+ - **Color coded** -- green (online), yellow (no GPU), red (offline)
78
+ - **GPU details** -- utilization bars, memory bars, temperature with color alerts
79
+
80
+ ## Comparison with nvitop
81
+
82
+ | Feature | nvitop | gnvitop |
83
+ |---------|--------|---------|
84
+ | Monitor local GPU | Yes | No |
85
+ | Monitor remote GPUs | No | Yes |
86
+ | Multiple servers | No | Yes |
87
+ | Interface | Terminal | Web browser |
88
+ | Setup | Run on each server | Run once, reads SSH config |
89
+
90
+ **gnvitop** is not a replacement for nvitop -- it's a complement. Use nvitop for detailed local GPU monitoring, use gnvitop to get an overview of all your GPU servers from one place.
91
+
92
+ ## License
93
+
94
+ MIT
@@ -0,0 +1,3 @@
1
+ """gnvitop - Global nvitop: web-based GPU monitoring dashboard for remote servers via SSH."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,4 @@
1
+ """Allow running with `python -m gnvitop`."""
2
+ from .cli import main
3
+
4
+ main()
@@ -0,0 +1,76 @@
1
+ #!/usr/bin/env python3
2
+ """CLI entry point for gnvitop."""
3
+
4
+ import argparse
5
+ import os
6
+ import webbrowser
7
+ import threading
8
+
9
+
10
+ def main():
11
+ parser = argparse.ArgumentParser(
12
+ prog="gnvitop",
13
+ description="Global nvitop: web-based GPU monitoring dashboard for remote servers via SSH.",
14
+ )
15
+ parser.add_argument(
16
+ "-p", "--port",
17
+ type=int,
18
+ default=5050,
19
+ help="Port to run the server on (default: 5050)",
20
+ )
21
+ parser.add_argument(
22
+ "--host",
23
+ default="127.0.0.1",
24
+ help="Host to bind to (default: 127.0.0.1)",
25
+ )
26
+ parser.add_argument(
27
+ "--no-browser",
28
+ action="store_true",
29
+ help="Do not open browser automatically",
30
+ )
31
+ parser.add_argument(
32
+ "--ssh-config",
33
+ default=None,
34
+ help="Path to SSH config file (default: ~/.ssh/config)",
35
+ )
36
+ parser.add_argument(
37
+ "-v", "--version",
38
+ action="store_true",
39
+ help="Show version and exit",
40
+ )
41
+
42
+ args = parser.parse_args()
43
+
44
+ from . import __version__
45
+
46
+ if args.version:
47
+ print(f"gnvitop {__version__}")
48
+ return
49
+
50
+ # Check SSH config exists
51
+ ssh_config = args.ssh_config or os.path.expanduser("~/.ssh/config")
52
+ if not os.path.exists(ssh_config):
53
+ print(f"Warning: SSH config not found at {ssh_config}")
54
+ print("gnvitop will start but no hosts will be queried.")
55
+ print("Create ~/.ssh/config or use --ssh-config to specify a path.\n")
56
+
57
+ # Set custom SSH config path if provided
58
+ if args.ssh_config:
59
+ from . import server
60
+ server.SSH_CONFIG_PATH = args.ssh_config
61
+
62
+ from .server import app
63
+
64
+ url = f"http://{args.host}:{args.port}"
65
+ print(f"gnvitop v{__version__} starting on {url}")
66
+ print(f"Reading SSH config from: {ssh_config}")
67
+ print("Press Ctrl+C to stop.\n")
68
+
69
+ if not args.no_browser:
70
+ threading.Timer(1.0, lambda: webbrowser.open(url)).start()
71
+
72
+ app.run(host=args.host, port=args.port, debug=False)
73
+
74
+
75
+ if __name__ == "__main__":
76
+ main()
@@ -0,0 +1,477 @@
1
+ """Embedded dashboard HTML."""
2
+
3
+ DASHBOARD_HTML = r"""<!DOCTYPE html>
4
+ <html lang="zh-CN">
5
+ <head>
6
+ <meta charset="UTF-8">
7
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
8
+ <title>GPU Monitor</title>
9
+ <style>
10
+ * { margin: 0; padding: 0; box-sizing: border-box; }
11
+
12
+ body {
13
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
14
+ background: #0f172a;
15
+ color: #e2e8f0;
16
+ min-height: 100vh;
17
+ padding: 24px;
18
+ }
19
+
20
+ .header {
21
+ display: flex;
22
+ justify-content: space-between;
23
+ align-items: center;
24
+ margin-bottom: 28px;
25
+ flex-wrap: wrap;
26
+ gap: 12px;
27
+ }
28
+
29
+ .header h1 {
30
+ font-size: 28px;
31
+ font-weight: 700;
32
+ color: #f1f5f9;
33
+ letter-spacing: -0.5px;
34
+ }
35
+
36
+ .header-right {
37
+ display: flex;
38
+ align-items: center;
39
+ gap: 16px;
40
+ }
41
+
42
+ .status-text {
43
+ font-size: 13px;
44
+ color: #94a3b8;
45
+ }
46
+
47
+ .btn-refresh {
48
+ padding: 8px 20px;
49
+ border: 1px solid #334155;
50
+ background: #1e293b;
51
+ color: #e2e8f0;
52
+ border-radius: 8px;
53
+ cursor: pointer;
54
+ font-size: 14px;
55
+ transition: all 0.2s;
56
+ }
57
+ .btn-refresh:hover { background: #334155; border-color: #475569; }
58
+ .btn-refresh:disabled { opacity: 0.5; cursor: not-allowed; }
59
+
60
+ .summary-bar {
61
+ display: flex;
62
+ gap: 16px;
63
+ margin-bottom: 24px;
64
+ flex-wrap: wrap;
65
+ }
66
+
67
+ .summary-card {
68
+ background: #1e293b;
69
+ border: 1px solid #334155;
70
+ border-radius: 10px;
71
+ padding: 16px 24px;
72
+ min-width: 160px;
73
+ flex: 1;
74
+ }
75
+
76
+ .summary-card .label {
77
+ font-size: 12px;
78
+ color: #94a3b8;
79
+ text-transform: uppercase;
80
+ letter-spacing: 0.5px;
81
+ margin-bottom: 4px;
82
+ }
83
+
84
+ .summary-card .value {
85
+ font-size: 28px;
86
+ font-weight: 700;
87
+ }
88
+
89
+ .host-grid {
90
+ display: grid;
91
+ grid-template-columns: repeat(auto-fill, minmax(420px, 1fr));
92
+ gap: 16px;
93
+ }
94
+
95
+ .host-card {
96
+ background: #1e293b;
97
+ border: 1px solid #334155;
98
+ border-radius: 12px;
99
+ overflow: hidden;
100
+ transition: border-color 0.2s;
101
+ }
102
+ .host-card:hover { border-color: #475569; }
103
+
104
+ .host-card.status-ok { border-left: 3px solid #22c55e; }
105
+ .host-card.status-no_gpu { border-left: 3px solid #eab308; }
106
+ .host-card.status-error { border-left: 3px solid #ef4444; }
107
+
108
+ .host-header {
109
+ padding: 16px 20px;
110
+ display: flex;
111
+ justify-content: space-between;
112
+ align-items: center;
113
+ border-bottom: 1px solid #334155;
114
+ }
115
+
116
+ .host-name {
117
+ font-size: 16px;
118
+ font-weight: 600;
119
+ color: #f1f5f9;
120
+ }
121
+
122
+ .host-info {
123
+ font-size: 12px;
124
+ color: #64748b;
125
+ }
126
+
127
+ .status-badge {
128
+ font-size: 11px;
129
+ padding: 3px 10px;
130
+ border-radius: 12px;
131
+ font-weight: 600;
132
+ text-transform: uppercase;
133
+ letter-spacing: 0.3px;
134
+ }
135
+ .badge-ok { background: #052e16; color: #4ade80; }
136
+ .badge-no_gpu { background: #422006; color: #facc15; }
137
+ .badge-error { background: #450a0a; color: #f87171; }
138
+
139
+ .host-body { padding: 16px 20px; }
140
+
141
+ .error-msg {
142
+ color: #f87171;
143
+ font-size: 13px;
144
+ padding: 8px 0;
145
+ }
146
+
147
+ .no-gpu-msg {
148
+ color: #facc15;
149
+ font-size: 13px;
150
+ padding: 8px 0;
151
+ }
152
+
153
+ .gpu-item {
154
+ padding: 12px 0;
155
+ }
156
+ .gpu-item + .gpu-item { border-top: 1px solid #1e293b; }
157
+
158
+ .gpu-title {
159
+ display: flex;
160
+ justify-content: space-between;
161
+ align-items: center;
162
+ margin-bottom: 10px;
163
+ }
164
+
165
+ .gpu-name {
166
+ font-size: 14px;
167
+ font-weight: 600;
168
+ color: #cbd5e1;
169
+ }
170
+
171
+ .gpu-temp {
172
+ font-size: 12px;
173
+ padding: 2px 8px;
174
+ border-radius: 6px;
175
+ font-weight: 600;
176
+ }
177
+ .temp-cool { background: #052e16; color: #4ade80; }
178
+ .temp-warm { background: #422006; color: #facc15; }
179
+ .temp-hot { background: #450a0a; color: #f87171; }
180
+
181
+ .bar-container {
182
+ margin-bottom: 8px;
183
+ }
184
+
185
+ .bar-label {
186
+ display: flex;
187
+ justify-content: space-between;
188
+ font-size: 12px;
189
+ color: #94a3b8;
190
+ margin-bottom: 4px;
191
+ }
192
+
193
+ .bar-track {
194
+ height: 8px;
195
+ background: #0f172a;
196
+ border-radius: 4px;
197
+ overflow: hidden;
198
+ }
199
+
200
+ .bar-fill {
201
+ height: 100%;
202
+ border-radius: 4px;
203
+ transition: width 0.5s ease;
204
+ }
205
+
206
+ .bar-fill.usage-low { background: linear-gradient(90deg, #22c55e, #4ade80); }
207
+ .bar-fill.usage-mid { background: linear-gradient(90deg, #eab308, #facc15); }
208
+ .bar-fill.usage-high { background: linear-gradient(90deg, #ef4444, #f87171); }
209
+
210
+ .gpu-stats {
211
+ display: grid;
212
+ grid-template-columns: repeat(3, 1fr);
213
+ gap: 8px;
214
+ margin-top: 8px;
215
+ }
216
+
217
+ .stat {
218
+ text-align: center;
219
+ background: #0f172a;
220
+ border-radius: 6px;
221
+ padding: 8px;
222
+ }
223
+
224
+ .stat .stat-value {
225
+ font-size: 16px;
226
+ font-weight: 700;
227
+ color: #f1f5f9;
228
+ }
229
+
230
+ .stat .stat-label {
231
+ font-size: 10px;
232
+ color: #64748b;
233
+ margin-top: 2px;
234
+ }
235
+
236
+ .loading {
237
+ text-align: center;
238
+ padding: 80px 20px;
239
+ color: #94a3b8;
240
+ font-size: 16px;
241
+ }
242
+
243
+ .spinner {
244
+ display: inline-block;
245
+ width: 28px;
246
+ height: 28px;
247
+ border: 3px solid #334155;
248
+ border-top-color: #60a5fa;
249
+ border-radius: 50%;
250
+ animation: spin 0.8s linear infinite;
251
+ margin-bottom: 12px;
252
+ }
253
+
254
+ @keyframes spin { to { transform: rotate(360deg); } }
255
+
256
+ .auto-refresh-toggle {
257
+ display: flex;
258
+ align-items: center;
259
+ gap: 6px;
260
+ font-size: 13px;
261
+ color: #94a3b8;
262
+ cursor: pointer;
263
+ user-select: none;
264
+ }
265
+
266
+ .auto-refresh-toggle input { cursor: pointer; }
267
+ </style>
268
+ </head>
269
+ <body>
270
+
271
+ <div class="header">
272
+ <h1>GPU Monitor</h1>
273
+ <div class="header-right">
274
+ <span class="status-text" id="update-time"></span>
275
+ <label class="auto-refresh-toggle">
276
+ <input type="checkbox" id="auto-refresh" checked>
277
+ Auto (30s)
278
+ </label>
279
+ <button class="btn-refresh" id="btn-refresh" onclick="refresh()">Refresh</button>
280
+ </div>
281
+ </div>
282
+
283
+ <div class="summary-bar" id="summary-bar"></div>
284
+ <div id="content">
285
+ <div class="loading"><div class="spinner"></div><br>Connecting to hosts...</div>
286
+ </div>
287
+
288
+ <script>
289
+ let autoRefreshTimer = null;
290
+
291
+ function usageClass(pct) {
292
+ if (pct < 50) return 'usage-low';
293
+ if (pct < 80) return 'usage-mid';
294
+ return 'usage-high';
295
+ }
296
+
297
+ function tempClass(t) {
298
+ if (t < 50) return 'temp-cool';
299
+ if (t < 75) return 'temp-warm';
300
+ return 'temp-hot';
301
+ }
302
+
303
+ function formatMB(mb) {
304
+ if (mb >= 1024) return (mb / 1024).toFixed(1) + ' GB';
305
+ return mb.toFixed(0) + ' MB';
306
+ }
307
+
308
+ function renderSummary(hosts) {
309
+ const online = hosts.filter(h => h.status === 'ok');
310
+ const totalGPUs = online.reduce((s, h) => s + h.gpus.length, 0);
311
+ const totalFree = online.reduce((s, h) => s + h.gpus.reduce((gs, g) => gs + g.memory_free_mb, 0), 0);
312
+ const idleGPUs = online.reduce((s, h) => s + h.gpus.filter(g => g.gpu_utilization_pct < 10).length, 0);
313
+
314
+ document.getElementById('summary-bar').innerHTML = `
315
+ <div class="summary-card">
316
+ <div class="label">Online Hosts</div>
317
+ <div class="value" style="color:#4ade80">${online.length}<span style="color:#64748b;font-size:16px"> / ${hosts.length}</span></div>
318
+ </div>
319
+ <div class="summary-card">
320
+ <div class="label">Total GPUs</div>
321
+ <div class="value" style="color:#60a5fa">${totalGPUs}</div>
322
+ </div>
323
+ <div class="summary-card">
324
+ <div class="label">Idle GPUs (< 10%)</div>
325
+ <div class="value" style="color:#4ade80">${idleGPUs}</div>
326
+ </div>
327
+ <div class="summary-card">
328
+ <div class="label">Total Free Memory</div>
329
+ <div class="value" style="color:#a78bfa">${formatMB(totalFree)}</div>
330
+ </div>
331
+ `;
332
+ }
333
+
334
+ function renderGPU(gpu) {
335
+ const memPct = gpu.memory_usage_pct;
336
+ const gpuPct = gpu.gpu_utilization_pct;
337
+ return `
338
+ <div class="gpu-item">
339
+ <div class="gpu-title">
340
+ <span class="gpu-name">GPU ${gpu.index}: ${gpu.name}</span>
341
+ <span class="gpu-temp ${tempClass(gpu.temperature_c)}">${gpu.temperature_c}&deg;C</span>
342
+ </div>
343
+ <div class="bar-container">
344
+ <div class="bar-label">
345
+ <span>GPU Utilization</span>
346
+ <span>${gpuPct}%</span>
347
+ </div>
348
+ <div class="bar-track">
349
+ <div class="bar-fill ${usageClass(gpuPct)}" style="width:${gpuPct}%"></div>
350
+ </div>
351
+ </div>
352
+ <div class="bar-container">
353
+ <div class="bar-label">
354
+ <span>Memory</span>
355
+ <span>${formatMB(gpu.memory_used_mb)} / ${formatMB(gpu.memory_total_mb)}</span>
356
+ </div>
357
+ <div class="bar-track">
358
+ <div class="bar-fill ${usageClass(memPct)}" style="width:${memPct}%"></div>
359
+ </div>
360
+ </div>
361
+ <div class="gpu-stats">
362
+ <div class="stat">
363
+ <div class="stat-value" style="color:${gpuPct < 10 ? '#4ade80' : gpuPct < 50 ? '#facc15' : '#f87171'}">${gpuPct}%</div>
364
+ <div class="stat-label">Utilization</div>
365
+ </div>
366
+ <div class="stat">
367
+ <div class="stat-value">${formatMB(gpu.memory_free_mb)}</div>
368
+ <div class="stat-label">Free Memory</div>
369
+ </div>
370
+ <div class="stat">
371
+ <div class="stat-value">${gpu.temperature_c}&deg;C</div>
372
+ <div class="stat-label">Temperature</div>
373
+ </div>
374
+ </div>
375
+ </div>
376
+ `;
377
+ }
378
+
379
+ function renderHosts(hosts) {
380
+ const container = document.getElementById('content');
381
+ if (!hosts.length) {
382
+ container.innerHTML = '<div class="loading">No hosts found in SSH config.</div>';
383
+ return;
384
+ }
385
+
386
+ container.innerHTML = '<div class="host-grid">' + hosts.map(host => {
387
+ let body = '';
388
+ if (host.status === 'ok') {
389
+ body = host.gpus.map(renderGPU).join('');
390
+ } else if (host.status === 'no_gpu') {
391
+ body = `<div class="no-gpu-msg">${host.error || 'No NVIDIA GPU detected'}</div>`;
392
+ } else {
393
+ body = `<div class="error-msg">${host.error || 'Unknown error'}</div>`;
394
+ }
395
+
396
+ const badgeClass = host.status === 'ok' ? 'badge-ok' : host.status === 'no_gpu' ? 'badge-no_gpu' : 'badge-error';
397
+ const badgeText = host.status === 'ok' ? 'Online' : host.status === 'no_gpu' ? 'No GPU' : 'Offline';
398
+
399
+ return `
400
+ <div class="host-card status-${host.status}">
401
+ <div class="host-header">
402
+ <div>
403
+ <div class="host-name">${host.alias}</div>
404
+ <div class="host-info">${host.user}@${host.hostname}:${host.port}</div>
405
+ </div>
406
+ <span class="status-badge ${badgeClass}">${badgeText}</span>
407
+ </div>
408
+ <div class="host-body">${body}</div>
409
+ </div>
410
+ `;
411
+ }).join('') + '</div>';
412
+ }
413
+
414
+ async function fetchData(force) {
415
+ const url = force ? '/api/refresh' : '/api/gpus';
416
+ const resp = await fetch(url);
417
+ return await resp.json();
418
+ }
419
+
420
+ async function refresh() {
421
+ const btn = document.getElementById('btn-refresh');
422
+ btn.disabled = true;
423
+ btn.textContent = 'Refreshing...';
424
+ try {
425
+ const data = await fetchData(true);
426
+ renderSummary(data.hosts);
427
+ renderHosts(data.hosts);
428
+ updateTime(data.updated_at);
429
+ } catch (e) {
430
+ console.error(e);
431
+ } finally {
432
+ btn.disabled = false;
433
+ btn.textContent = 'Refresh';
434
+ }
435
+ }
436
+
437
+ function updateTime(ts) {
438
+ const d = new Date(ts * 1000);
439
+ document.getElementById('update-time').textContent = 'Updated: ' + d.toLocaleTimeString();
440
+ }
441
+
442
+ async function init() {
443
+ try {
444
+ const data = await fetchData(false);
445
+ renderSummary(data.hosts);
446
+ renderHosts(data.hosts);
447
+ updateTime(data.updated_at);
448
+ } catch (e) {
449
+ document.getElementById('content').innerHTML =
450
+ '<div class="loading" style="color:#f87171">Failed to connect to server.</div>';
451
+ }
452
+ }
453
+
454
+ function setupAutoRefresh() {
455
+ const checkbox = document.getElementById('auto-refresh');
456
+ function doRefresh() {
457
+ fetchData(false).then(data => {
458
+ renderSummary(data.hosts);
459
+ renderHosts(data.hosts);
460
+ updateTime(data.updated_at);
461
+ }).catch(() => {});
462
+ }
463
+ checkbox.addEventListener('change', () => {
464
+ if (checkbox.checked) {
465
+ autoRefreshTimer = setInterval(doRefresh, 30000);
466
+ } else {
467
+ clearInterval(autoRefreshTimer);
468
+ }
469
+ });
470
+ autoRefreshTimer = setInterval(doRefresh, 30000);
471
+ }
472
+
473
+ init();
474
+ setupAutoRefresh();
475
+ </script>
476
+ </body>
477
+ </html>"""
@@ -0,0 +1,197 @@
1
+ #!/usr/bin/env python3
2
+ """GPU Monitor - Flask server that reads SSH config and queries remote GPUs."""
3
+
4
+ import os
5
+ import re
6
+ import time
7
+ import threading
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+
10
+ from flask import Flask, jsonify, Response
11
+ import paramiko
12
+
13
+ from . import __version__
14
+ from .dashboard import DASHBOARD_HTML
15
+
16
+ app = Flask(__name__)
17
+
18
+ SSH_CONFIG_PATH = os.path.expanduser("~/.ssh/config")
19
+ SSH_TIMEOUT = 8
20
+ GPU_QUERY_CMD = (
21
+ "nvidia-smi --query-gpu=index,name,memory.total,memory.used,memory.free,"
22
+ "utilization.gpu,temperature.gpu --format=csv,noheader,nounits 2>/dev/null"
23
+ )
24
+
25
+ cache = {"data": [], "last_update": 0}
26
+ cache_lock = threading.Lock()
27
+ CACHE_TTL = 30
28
+
29
+
30
+ def parse_ssh_config(path):
31
+ """Parse ~/.ssh/config and return a list of hosts."""
32
+ hosts = []
33
+ current = None
34
+
35
+ if not os.path.exists(path):
36
+ return hosts
37
+
38
+ with open(path, "r") as f:
39
+ for line in f:
40
+ line = line.strip()
41
+ if not line or line.startswith("#"):
42
+ continue
43
+
44
+ key_match = re.match(r"^(\w+)\s+(.+)$", line)
45
+ if not key_match:
46
+ continue
47
+
48
+ key, value = key_match.group(1), key_match.group(2)
49
+
50
+ if key.lower() == "host":
51
+ if "*" in value or "?" in value:
52
+ current = None
53
+ continue
54
+ current = {
55
+ "alias": value,
56
+ "hostname": None,
57
+ "user": None,
58
+ "port": 22,
59
+ "identity_file": None,
60
+ }
61
+ hosts.append(current)
62
+ elif current is not None:
63
+ if key.lower() == "hostname":
64
+ current["hostname"] = value
65
+ elif key.lower() == "user":
66
+ current["user"] = value
67
+ elif key.lower() == "port":
68
+ current["port"] = int(value)
69
+ elif key.lower() == "identityfile":
70
+ current["identity_file"] = os.path.expanduser(value)
71
+
72
+ return hosts
73
+
74
+
75
+ def query_gpu(host_info):
76
+ """SSH into a host and query GPU information."""
77
+ alias = host_info["alias"]
78
+ hostname = host_info["hostname"] or alias
79
+ user = host_info["user"]
80
+ port = host_info["port"]
81
+
82
+ result = {
83
+ "alias": alias,
84
+ "hostname": hostname,
85
+ "user": user or "unknown",
86
+ "port": port,
87
+ "status": "error",
88
+ "error": None,
89
+ "gpus": [],
90
+ }
91
+
92
+ try:
93
+ client = paramiko.SSHClient()
94
+ client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
95
+
96
+ connect_kwargs = {
97
+ "hostname": hostname,
98
+ "port": port,
99
+ "username": user,
100
+ "timeout": SSH_TIMEOUT,
101
+ "banner_timeout": SSH_TIMEOUT,
102
+ "auth_timeout": SSH_TIMEOUT,
103
+ "allow_agent": True,
104
+ "look_for_keys": True,
105
+ }
106
+ if host_info.get("identity_file"):
107
+ connect_kwargs["key_filename"] = host_info["identity_file"]
108
+
109
+ client.connect(**connect_kwargs)
110
+
111
+ stdin, stdout, stderr = client.exec_command(GPU_QUERY_CMD, timeout=SSH_TIMEOUT)
112
+ output = stdout.read().decode("utf-8").strip()
113
+
114
+ if not output:
115
+ result["status"] = "no_gpu"
116
+ result["error"] = "No NVIDIA GPU found or nvidia-smi not available"
117
+ else:
118
+ gpus = []
119
+ for line in output.split("\n"):
120
+ parts = [p.strip() for p in line.split(",")]
121
+ if len(parts) >= 7:
122
+ mem_total = float(parts[2])
123
+ mem_used = float(parts[3])
124
+ mem_free = float(parts[4])
125
+ utilization = float(parts[5])
126
+ gpus.append({
127
+ "index": int(parts[0]),
128
+ "name": parts[1],
129
+ "memory_total_mb": mem_total,
130
+ "memory_used_mb": mem_used,
131
+ "memory_free_mb": mem_free,
132
+ "memory_usage_pct": round(mem_used / mem_total * 100, 1) if mem_total > 0 else 0,
133
+ "gpu_utilization_pct": utilization,
134
+ "temperature_c": float(parts[6]),
135
+ })
136
+ result["gpus"] = gpus
137
+ if gpus:
138
+ result["status"] = "ok"
139
+ else:
140
+ result["status"] = "no_gpu"
141
+ result["error"] = "nvidia-smi returned no valid GPU data"
142
+
143
+ client.close()
144
+
145
+ except paramiko.AuthenticationException:
146
+ result["error"] = "Authentication failed"
147
+ except paramiko.SSHException as e:
148
+ result["error"] = f"SSH error: {e}"
149
+ except TimeoutError:
150
+ result["error"] = "Connection timed out"
151
+ except OSError as e:
152
+ result["error"] = f"Connection failed: {e}"
153
+ except Exception as e:
154
+ result["error"] = f"{type(e).__name__}: {e}"
155
+
156
+ return result
157
+
158
+
159
+ def fetch_all_gpu_info():
160
+ """Query all hosts concurrently."""
161
+ hosts = parse_ssh_config(SSH_CONFIG_PATH)
162
+ results = []
163
+
164
+ if not hosts:
165
+ return results
166
+
167
+ with ThreadPoolExecutor(max_workers=10) as executor:
168
+ futures = {executor.submit(query_gpu, h): h for h in hosts}
169
+ for future in as_completed(futures):
170
+ results.append(future.result())
171
+
172
+ order = {"ok": 0, "no_gpu": 1, "error": 2}
173
+ results.sort(key=lambda x: (order.get(x["status"], 3), x["alias"]))
174
+ return results
175
+
176
+
177
+ @app.route("/")
178
+ def index():
179
+ return Response(DASHBOARD_HTML, mimetype="text/html")
180
+
181
+
182
+ @app.route("/api/gpus")
183
+ def api_gpus():
184
+ now = time.time()
185
+ with cache_lock:
186
+ if now - cache["last_update"] > CACHE_TTL:
187
+ cache["data"] = fetch_all_gpu_info()
188
+ cache["last_update"] = now
189
+ return jsonify({"hosts": cache["data"], "updated_at": cache["last_update"]})
190
+
191
+
192
+ @app.route("/api/refresh")
193
+ def api_refresh():
194
+ with cache_lock:
195
+ cache["data"] = fetch_all_gpu_info()
196
+ cache["last_update"] = time.time()
197
+ return jsonify({"hosts": cache["data"], "updated_at": cache["last_update"]})
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: gnvitop
3
+ Version: 0.1.0
4
+ Summary: Global nvitop: web-based GPU monitoring dashboard for all your remote servers via SSH
5
+ Author: Linwei94
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/Linwei94/gnvitop
8
+ Project-URL: Repository, https://github.com/Linwei94/gnvitop
9
+ Project-URL: Issues, https://github.com/Linwei94/gnvitop/issues
10
+ Keywords: gpu,monitor,nvidia,ssh,nvitop,dashboard,nvidia-smi
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Environment :: Web Environment
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: Intended Audience :: System Administrators
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Topic :: System :: Monitoring
19
+ Requires-Python: >=3.7
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: flask>=2.0
23
+ Requires-Dist: paramiko>=2.0
24
+ Dynamic: license-file
25
+
26
+ # gnvitop
27
+
28
+ **Global nvitop** -- a web-based GPU monitoring dashboard that monitors **all** your remote GPU servers from a single page.
29
+
30
+ Like [nvitop](https://github.com/XuehaiPan/nvitop), but for **all your servers at once**, displayed as a beautiful web dashboard.
31
+
32
+ ```
33
+ pip install gnvitop
34
+ gnvitop
35
+ ```
36
+
37
+ ## How It Works
38
+
39
+ 1. Reads your `~/.ssh/config` automatically
40
+ 2. SSH into each server and runs `nvidia-smi`
41
+ 3. Displays everything in a real-time web dashboard
42
+ 4. Auto-refreshes every 30 seconds
43
+
44
+ ```
45
+ ┌──> Server A (nvidia-smi) ──> 4x A100
46
+ gnvitop ──> Browser ──> ├──> Server B (nvidia-smi) ──> 8x V100
47
+ ├──> Server C (nvidia-smi) ──> 2x RTX 4090
48
+ └──> Server D ──> offline
49
+ ```
50
+
51
+ ## Installation
52
+
53
+ ```bash
54
+ pip install gnvitop
55
+ ```
56
+
57
+ ## Usage
58
+
59
+ ```bash
60
+ gnvitop # start and auto-open browser
61
+ gnvitop -p 8080 # custom port
62
+ gnvitop --host 0.0.0.0 # expose to LAN
63
+ gnvitop --no-browser # don't auto-open browser
64
+ gnvitop --ssh-config /path/to/config # custom SSH config
65
+ gnvitop -v # show version
66
+ ```
67
+
68
+ Or run as a module:
69
+
70
+ ```bash
71
+ python -m gnvitop
72
+ ```
73
+
74
+ ## Prerequisites
75
+
76
+ 1. **SSH config** -- your `~/.ssh/config` should have server entries:
77
+
78
+ ```
79
+ Host gpu-server-01
80
+ HostName 192.168.1.101
81
+ User alice
82
+ IdentityFile ~/.ssh/id_rsa
83
+
84
+ Host gpu-server-02
85
+ HostName 192.168.1.102
86
+ User bob
87
+ ```
88
+
89
+ 2. **SSH key auth** -- password-less login should be set up
90
+ 3. **nvidia-smi** -- must be installed on the remote servers
91
+
92
+ ## Features
93
+
94
+ - **Zero config** -- reads `~/.ssh/config` automatically, no setup needed
95
+ - **One command** -- `pip install gnvitop && gnvitop`, that's it
96
+ - **Auto browser** -- opens dashboard in your browser on start
97
+ - **Real-time** -- 30s auto-refresh with manual refresh button
98
+ - **Concurrent** -- queries all servers in parallel (10 workers)
99
+ - **Cached** -- 30s cache to avoid hammering your servers
100
+ - **Dark UI** -- clean, responsive dark-themed dashboard
101
+ - **At a glance** -- summary bar shows online hosts, total GPUs, idle GPUs, free memory
102
+ - **Color coded** -- green (online), yellow (no GPU), red (offline)
103
+ - **GPU details** -- utilization bars, memory bars, temperature with color alerts
104
+
105
+ ## Comparison with nvitop
106
+
107
+ | Feature | nvitop | gnvitop |
108
+ |---------|--------|---------|
109
+ | Monitor local GPU | Yes | No |
110
+ | Monitor remote GPUs | No | Yes |
111
+ | Multiple servers | No | Yes |
112
+ | Interface | Terminal | Web browser |
113
+ | Setup | Run on each server | Run once, reads SSH config |
114
+
115
+ **gnvitop** is not a replacement for nvitop -- it's a complement. Use nvitop for detailed local GPU monitoring, use gnvitop to get an overview of all your GPU servers from one place.
116
+
117
+ ## License
118
+
119
+ MIT
@@ -0,0 +1,14 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ gnvitop/__init__.py
5
+ gnvitop/__main__.py
6
+ gnvitop/cli.py
7
+ gnvitop/dashboard.py
8
+ gnvitop/server.py
9
+ gnvitop.egg-info/PKG-INFO
10
+ gnvitop.egg-info/SOURCES.txt
11
+ gnvitop.egg-info/dependency_links.txt
12
+ gnvitop.egg-info/entry_points.txt
13
+ gnvitop.egg-info/requires.txt
14
+ gnvitop.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ gnvitop = gnvitop.cli:main
@@ -0,0 +1,2 @@
1
+ flask>=2.0
2
+ paramiko>=2.0
@@ -0,0 +1 @@
1
+ gnvitop
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["setuptools>=64", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "gnvitop"
7
+ version = "0.1.0"
8
+ description = "Global nvitop: web-based GPU monitoring dashboard for all your remote servers via SSH"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.7"
12
+ authors = [
13
+ {name = "Linwei94"},
14
+ ]
15
+ keywords = ["gpu", "monitor", "nvidia", "ssh", "nvitop", "dashboard", "nvidia-smi"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Environment :: Console",
19
+ "Environment :: Web Environment",
20
+ "Intended Audience :: Developers",
21
+ "Intended Audience :: Science/Research",
22
+ "Intended Audience :: System Administrators",
23
+ "Programming Language :: Python :: 3",
24
+ "Topic :: System :: Monitoring",
25
+ ]
26
+ dependencies = [
27
+ "flask>=2.0",
28
+ "paramiko>=2.0",
29
+ ]
30
+
31
+ [project.scripts]
32
+ gnvitop = "gnvitop.cli:main"
33
+
34
+ [project.urls]
35
+ Homepage = "https://github.com/Linwei94/gnvitop"
36
+ Repository = "https://github.com/Linwei94/gnvitop"
37
+ Issues = "https://github.com/Linwei94/gnvitop/issues"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+