hpc-stats-scripts 1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,278 @@
1
+ Metadata-Version: 2.1
2
+ Name: hpc-stats-scripts
3
+ Version: 1.2
4
+ Summary: Utilities for HPC clusters including PBS/Slurm job statistics and a psutil-based resource monitor.
5
+ Author: hpc-stats-scripts contributors
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Keywords: hpc,pbs,slurm,monitoring,cluster,psutil
29
+ Requires-Python: >=3.9
30
+ Description-Content-Type: text/markdown
31
+ License-File: LICENSE
32
+ Requires-Dist: psutil
33
+ Provides-Extra: plot
34
+ Requires-Dist: matplotlib; extra == "plot"
35
+ Requires-Dist: numpy; extra == "plot"
36
+ Provides-Extra: gpu
37
+ Requires-Dist: nvidia-ml-py3; extra == "gpu"
38
+ Provides-Extra: all
39
+ Requires-Dist: matplotlib; extra == "all"
40
+ Requires-Dist: numpy; extra == "all"
41
+ Requires-Dist: nvidia-ml-py3; extra == "all"
42
+
43
+ # hpc-stats-scripts
44
+
45
+ Utilities for working with high-performance computing (HPC) environments. The scripts
46
+ help inspect PBS/Slurm job efficiency and monitor CPU and memory usage on a
47
+ running system or process tree.
48
+
49
+ Made with Codex :)
50
+
51
+ ## Dependencies
52
+
53
+ Install the required Python packages with pip:
54
+
55
+ | Feature | Packages | Install command |
56
+ | ------- | -------- | ---------------- |
57
+ | Core utilities | psutil | `pip install psutil` |
58
+ | Plotting for `psutil-monitor` | matplotlib, numpy | `pip install matplotlib numpy` |
59
+ | GPU monitoring for `psutil-monitor --gpu` | nvidia-ml-py3 (pynvml) | `pip install nvidia-ml-py3` |
60
+ | Plot + GPU combo | psutil, matplotlib, numpy, nvidia-ml-py3 | `pip install psutil matplotlib numpy nvidia-ml-py3` |
61
+ | All extras via pip extras | plot + GPU | `pip install .[all]` |
62
+
63
+ The `pbs-bulk-user-stats` command also expects the PBS `qstat` utility to be
64
+ available in your environment.
65
+ The `slurm-bulk-user-stats` command expects Slurm's `sacct` utility to be
66
+ available in your environment.
67
+
68
+ ## Installation
69
+
70
+ Clone the repository and install with pip:
71
+
72
+ ```bash
73
+ # Core only
74
+ pip install .
75
+
76
+ # Core + plotting support
77
+ pip install .[plot]
78
+
79
+ # Core + GPU support
80
+ pip install .[gpu]
81
+
82
+ # Everything (plot + GPU)
83
+ pip install .[all]
84
+ ```
85
+
86
+ Install directly from GitHub:
87
+ ```bash
88
+ pip install "hpc-stats-scripts @ git+https://github.com/avnikonenko/hpc-stats-scripts.git"
89
+ pip install "hpc-stats-scripts[plot] @ git+https://github.com/avnikonenko/hpc-stats-scripts.git"
90
+ pip install "hpc-stats-scripts[gpu] @ git+https://github.com/avnikonenko/hpc-stats-scripts.git"
91
+ pip install "hpc-stats-scripts[all] @ git+https://github.com/avnikonenko/hpc-stats-scripts.git"
92
+ ```
93
+
94
+ The base installation depends on [psutil](https://pypi.org/project/psutil/).
95
+ The `plot` extra pulls in `matplotlib` and `numpy` for the `--plot` feature of `psutil-monitor`.
96
+ The `gpu` extra installs `nvidia-ml-py3` to enable `--gpu`.
97
+
98
+ ## CLI tools
99
+
100
+ ### `pbs-bulk-user-stats`
101
+
102
+ Summarize CPU and memory usage for PBS jobs and show which nodes the jobs are
103
+ allocated to. The command relies on `qstat` being available in your `PATH`.
104
+ The table now includes `NGPUS` (requested GPUs) when present.
105
+
106
+ Examples:
107
+
108
+ ```bash
109
+ # Summarize a specific job and write CSV output
110
+ pbs-bulk-user-stats --job 12345 --csv stats.csv
111
+
112
+ # Summarize all jobs for the current user (default)
113
+ pbs-bulk-user-stats --include-finished
114
+
115
+ # Summarize all jobs for a specific user
116
+ pbs-bulk-user-stats --user myuser --include-finished
117
+ ```
118
+
119
+ When invoked with no `--user` or `--job` options:
120
+ - On a login node (no `$PBS_JOBID` present), it summarizes all jobs for the current user.
121
+ - Inside a running PBS job (where `$PBS_JOBID` is set), it automatically summarizes that specific job.
122
+
123
+ ```
124
+ pbs-bulk-user-stats
125
+ ```
126
+
127
+ State codes (PBS):
128
+ - `R` running, `Q` queued/waiting, `X` finished (requires `--include-finished`), other codes are printed under “other” in the summary.
129
+
130
+ **Expected output (CPU/RAM only):**
131
+ ```
132
+ $ pbs-bulk-user-stats
133
+
134
+ JOBID STATE NAME NODES NCPUS WALL(h) CPUT(h) avgCPU CPUeff memUsed memReq memEff
135
+ -------------------------------------------------------------------------------------------------------
136
+ 0001 R run1 pbs-1 176 38.55 3632.12 163.6 93.53% 207.4 GiB 256.00 GiB 81.10%
137
+ 0002 R run2 pbs-2 176 38.59 3589.72 93.13 52.91% 50.02 GiB 256.00 GiB 19.54%
138
+ ...
139
+ Summary:
140
+ jobs: 5
141
+ unique nodes: 3
142
+ states: R=4 Q=1 X=0 other=0
143
+ mean CPUeff: 75.20%
144
+ mean avgCPU: 132.35
145
+ mean memEff: 82.50%
146
+ max memUsed: 230.16 GiB
147
+
148
+ ```
149
+ or if run inside a running PBS:
150
+ ```
151
+ JOBID STATE NAME NODES NCPUS WALL(h) CPUT(h) avgCPU CPUeff memUsed memReq memEff
152
+ -----------------------------------------------------------------------------------------------------
153
+ 0001 R STDIN pbs-5 100 0.03 0.01 0.22 0.22% 666.58 MiB 30.00 GiB 2.17%
154
+
155
+ Summary:
156
+ jobs: 1
157
+ mean CPUeff: 0.22%
158
+ mean avgCPU: 0.22
159
+ mean memEff: 2.17%
160
+ max memUsed: 666.58 MiB
161
+
162
+ ```
163
+
164
+ After the table, a summary reports the job count, mean CPU efficiency,
165
+ mean average CPU usage, mean memory efficiency, and the peak memory used
166
+ across all listed jobs.
167
+
168
+ ### `psutil-monitor`
169
+
170
+ Real-time CPU and memory monitor for the system or a process tree.
171
+ Use `--gpu` to also report aggregate GPU utilization and memory via NVML (requires `nvidia-ml-py3`).
172
+ When `--csv`/`--plot` are used, metrics stream live to the terminal during the run; CSV/PNG files are written when the monitor exits (Ctrl+C, duration reached, or proc tree ends).
173
+
174
+ Example output files (generated with `--plot` and `--csv`):
175
+
176
+ - Plot (CPU + GPU stacked):
177
+
178
+ ![psutil-monitor example plot](docs/psutil-monitor-example.jpg)
179
+
180
+ - CSV: `docs/psutil-monitor-example.csv`
181
+
182
+ GPU output fields (when `--gpu` is used):
183
+ - **GPU util**: Average utilization across visible GPUs.
184
+ - **busyGPUs**: Sum of utilization fractions (e.g., two GPUs at 50% each → 1.0).
185
+ - **GPU mem %**: Aggregate GPU memory usage percentage.
186
+ - **Per-GPU** (CSV `gpu_pergpu`): `index:util%/used/total` for each device.
187
+
188
+ Examples:
189
+
190
+ ```bash
191
+ # System-wide (by default) monitoring with console output only
192
+ psutil-monitor
193
+
194
+ # System-wide monitoring with CSV and PNG output
195
+ psutil-monitor --mode system --csv node.csv --plot node.png
196
+
197
+ # Monitor the current process tree (useful inside a PBS job)
198
+ psutil-monitor --mode proc --pid $$ --include-children --csv job.csv
199
+
200
+ # For script.py resources monitoring:
201
+ python script.py & # launch the workload
202
+ target=$! # PID of script.py
203
+ echo $target
204
+ # psutil-monitor watches that PID and exits when the process tree is gone
205
+ psutil-monitor --mode proc --pid "$target" --include-children --csv stat.csv --plot plot.png
206
+
207
+ ```
208
+ **Expected output:**
209
+ ```
210
+ $ psutil-monitor
211
+
212
+ CPUs available (affinity): 384
213
+ Total memory available: 754.76 GiB
214
+ CPU basis for %: 384
215
+ Memory basis for %: 754.76 GiB
216
+ 2025-08-14T15:20:14 CPU 79.67% busyCPUs 305.93 (provided 384) MEM 9.93% used 74.96 GiB / total 754.76 GiB
217
+ 2025-08-14T15:20:16 CPU 69.30% busyCPUs 266.13 (provided 384) MEM 9.95% used 75.12 GiB / total 754.76 GiB
218
+ 2025-08-14T15:20:18 CPU 61.34% busyCPUs 235.53 (provided 384) MEM 10.05% used 75.82 GiB / total 754.76 GiB
219
+ 2025-08-14T15:20:20 CPU 61.32% busyCPUs 235.47 (provided 384) MEM 10.09% used 76.15 GiB / total 754.76 GiB
220
+ 2025-08-14T15:20:22 CPU 74.57% busyCPUs 286.33 (provided 384) MEM 9.94% used 74.99 GiB / total 754.76 GiB
221
+ 2025-08-14T15:20:24 CPU 85.94% busyCPUs 330.01 (provided 384) MEM 9.86% used 74.44 GiB / total 754.76 GiB
222
+ Average busy CPUs over run: 276.570
223
+ Peak memory (system): 76.15 GiB
224
+
225
+ ```
226
+ With GPUs (`--gpu` and NVIDIA GPUs present):
227
+ ```
228
+ $ psutil-monitor --gpu
229
+
230
+ CPUs available (affinity): 96
231
+ Total memory available: 503.70 GiB
232
+ CPU basis for %: 96
233
+ Memory basis for %: 503.70 GiB
234
+ GPUs detected (NVML): 4
235
+ 2026-02-03T10:00:14 CPU 45.12% busyCPUs 43.32 (provided 96) MEM 8.10% used 40.80 GiB / total 503.70 GiB GPU util 57.5% busyGPUs 2.30 mem 42.0%
236
+ 2026-02-03T10:00:16 CPU 48.33% busyCPUs 46.39 (provided 96) MEM 8.20% used 41.30 GiB / total 503.70 GiB GPU util 63.0% busyGPUs 2.52 mem 44.1%
237
+ 2026-02-03T10:00:18 CPU 52.10% busyCPUs 49.99 (provided 96) MEM 8.25% used 41.60 GiB / total 503.70 GiB GPU util 68.7% busyGPUs 2.75 mem 45.3%
238
+ Average busy CPUs over run: 46.567
239
+ Average busy GPUs over run: 2.523
240
+ Peak memory (system): 41.60 GiB
241
+
242
+ ```
243
+
244
+ Use the `--help` option of each command to see all available options.
245
+
246
+ ### `slurm-bulk-user-stats`
247
+
248
+ Summarize CPU and memory usage for Slurm jobs and show which nodes the jobs are
249
+ allocated to. The command relies on `sacct` being available in your `PATH`.
250
+ The table includes `NGPUS` based on AllocTres/AllocGRES when present.
251
+ If TRES GPU usage metrics are available, the summary also reports mean GPU util and GPU hours (used/requested).
252
+
253
+ State codes (Slurm):
254
+ - `R`/`RUNNING`, `PD`/`PENDING`, `CD`/`COMPLETED`; other states (e.g., `F`, `CG`, `S`, `TO`) are grouped under “other” in the summary and listed in the breakdown.
255
+
256
+ Examples:
257
+
258
+ ```bash
259
+ # Summarize a specific job and write CSV output
260
+ slurm-bulk-user-stats --job 12345 --csv stats.csv
261
+
262
+ # Summarize all running jobs for the current user (default)
263
+ slurm-bulk-user-stats
264
+
265
+ # Summarize all jobs (including finished) for a specific user
266
+ slurm-bulk-user-stats --user myuser --include-finished
267
+ ```
268
+
269
+ When invoked with no `--user` or `--job` options:
270
+ - On a login node (no `$SLURM_JOB_ID` present), it summarizes pending/running jobs for the current user.
271
+ - Inside a running Slurm job (where `$SLURM_JOB_ID` is set), it automatically summarizes that specific job.
272
+
273
+ ```
274
+ slurm-bulk-user-stats
275
+ ```
276
+
277
+ The output mirrors the PBS version, showing job state, node list, CPU/memory
278
+ usage, efficiency metrics, and a summary block with job counts and averages.
@@ -0,0 +1,236 @@
1
+ # hpc-stats-scripts
2
+
3
+ Utilities for working with high-performance computing (HPC) environments. The scripts
4
+ help inspect PBS/Slurm job efficiency and monitor CPU and memory usage on a
5
+ running system or process tree.
6
+
7
+ Made with Codex :)
8
+
9
+ ## Dependencies
10
+
11
+ Install the required Python packages with pip:
12
+
13
+ | Feature | Packages | Install command |
14
+ | ------- | -------- | ---------------- |
15
+ | Core utilities | psutil | `pip install psutil` |
16
+ | Plotting for `psutil-monitor` | matplotlib, numpy | `pip install matplotlib numpy` |
17
+ | GPU monitoring for `psutil-monitor --gpu` | nvidia-ml-py3 (pynvml) | `pip install nvidia-ml-py3` |
18
+ | Plot + GPU combo | psutil, matplotlib, numpy, nvidia-ml-py3 | `pip install psutil matplotlib numpy nvidia-ml-py3` |
19
+ | All extras via pip extras | plot + GPU | `pip install .[all]` |
20
+
21
+ The `pbs-bulk-user-stats` command also expects the PBS `qstat` utility to be
22
+ available in your environment.
23
+ The `slurm-bulk-user-stats` command expects Slurm's `sacct` utility to be
24
+ available in your environment.
25
+
26
+ ## Installation
27
+
28
+ Clone the repository and install with pip:
29
+
30
+ ```bash
31
+ # Core only
32
+ pip install .
33
+
34
+ # Core + plotting support
35
+ pip install .[plot]
36
+
37
+ # Core + GPU support
38
+ pip install .[gpu]
39
+
40
+ # Everything (plot + GPU)
41
+ pip install .[all]
42
+ ```
43
+
44
+ Install directly from GitHub:
45
+ ```bash
46
+ pip install "hpc-stats-scripts @ git+https://github.com/avnikonenko/hpc-stats-scripts.git"
47
+ pip install "hpc-stats-scripts[plot] @ git+https://github.com/avnikonenko/hpc-stats-scripts.git"
48
+ pip install "hpc-stats-scripts[gpu] @ git+https://github.com/avnikonenko/hpc-stats-scripts.git"
49
+ pip install "hpc-stats-scripts[all] @ git+https://github.com/avnikonenko/hpc-stats-scripts.git"
50
+ ```
51
+
52
+ The base installation depends on [psutil](https://pypi.org/project/psutil/).
53
+ The `plot` extra pulls in `matplotlib` and `numpy` for the `--plot` feature of `psutil-monitor`.
54
+ The `gpu` extra installs `nvidia-ml-py3` to enable `--gpu`.
55
+
56
+ ## CLI tools
57
+
58
+ ### `pbs-bulk-user-stats`
59
+
60
+ Summarize CPU and memory usage for PBS jobs and show which nodes the jobs are
61
+ allocated to. The command relies on `qstat` being available in your `PATH`.
62
+ The table now includes `NGPUS` (requested GPUs) when present.
63
+
64
+ Examples:
65
+
66
+ ```bash
67
+ # Summarize a specific job and write CSV output
68
+ pbs-bulk-user-stats --job 12345 --csv stats.csv
69
+
70
+ # Summarize all jobs for the current user (default)
71
+ pbs-bulk-user-stats --include-finished
72
+
73
+ # Summarize all jobs for a specific user
74
+ pbs-bulk-user-stats --user myuser --include-finished
75
+ ```
76
+
77
+ When invoked with no `--user` or `--job` options:
78
+ - On a login node (no `$PBS_JOBID` present), it summarizes all jobs for the current user.
79
+ - Inside a running PBS job (where `$PBS_JOBID` is set), it automatically summarizes that specific job.
80
+
81
+ ```
82
+ pbs-bulk-user-stats
83
+ ```
84
+
85
+ State codes (PBS):
86
+ - `R` running, `Q` queued/waiting, `X` finished (requires `--include-finished`), other codes are printed under “other” in the summary.
87
+
88
+ **Expected output (CPU/RAM only):**
89
+ ```
90
+ $ pbs-bulk-user-stats
91
+
92
+ JOBID STATE NAME NODES NCPUS WALL(h) CPUT(h) avgCPU CPUeff memUsed memReq memEff
93
+ -------------------------------------------------------------------------------------------------------
94
+ 0001 R run1 pbs-1 176 38.55 3632.12 163.6 93.53% 207.4 GiB 256.00 GiB 81.10%
95
+ 0002 R run2 pbs-2 176 38.59 3589.72 93.13 52.91% 50.02 GiB 256.00 GiB 19.54%
96
+ ...
97
+ Summary:
98
+ jobs: 5
99
+ unique nodes: 3
100
+ states: R=4 Q=1 X=0 other=0
101
+ mean CPUeff: 75.20%
102
+ mean avgCPU: 132.35
103
+ mean memEff: 82.50%
104
+ max memUsed: 230.16 GiB
105
+
106
+ ```
107
+ or if run inside a running PBS:
108
+ ```
109
+ JOBID STATE NAME NODES NCPUS WALL(h) CPUT(h) avgCPU CPUeff memUsed memReq memEff
110
+ -----------------------------------------------------------------------------------------------------
111
+ 0001 R STDIN pbs-5 100 0.03 0.01 0.22 0.22% 666.58 MiB 30.00 GiB 2.17%
112
+
113
+ Summary:
114
+ jobs: 1
115
+ mean CPUeff: 0.22%
116
+ mean avgCPU: 0.22
117
+ mean memEff: 2.17%
118
+ max memUsed: 666.58 MiB
119
+
120
+ ```
121
+
122
+ After the table, a summary reports the job count, mean CPU efficiency,
123
+ mean average CPU usage, mean memory efficiency, and the peak memory used
124
+ across all listed jobs.
125
+
126
+ ### `psutil-monitor`
127
+
128
+ Real-time CPU and memory monitor for the system or a process tree.
129
+ Use `--gpu` to also report aggregate GPU utilization and memory via NVML (requires `nvidia-ml-py3`).
130
+ When `--csv`/`--plot` are used, metrics stream live to the terminal during the run; CSV/PNG files are written when the monitor exits (Ctrl+C, duration reached, or proc tree ends).
131
+
132
+ Example output files (generated with `--plot` and `--csv`):
133
+
134
+ - Plot (CPU + GPU stacked):
135
+
136
+ ![psutil-monitor example plot](docs/psutil-monitor-example.jpg)
137
+
138
+ - CSV: `docs/psutil-monitor-example.csv`
139
+
140
+ GPU output fields (when `--gpu` is used):
141
+ - **GPU util**: Average utilization across visible GPUs.
142
+ - **busyGPUs**: Sum of utilization fractions (e.g., two GPUs at 50% each → 1.0).
143
+ - **GPU mem %**: Aggregate GPU memory usage percentage.
144
+ - **Per-GPU** (CSV `gpu_pergpu`): `index:util%/used/total` for each device.
145
+
146
+ Examples:
147
+
148
+ ```bash
149
+ # System-wide (by default) monitoring with console output only
150
+ psutil-monitor
151
+
152
+ # System-wide monitoring with CSV and PNG output
153
+ psutil-monitor --mode system --csv node.csv --plot node.png
154
+
155
+ # Monitor the current process tree (useful inside a PBS job)
156
+ psutil-monitor --mode proc --pid $$ --include-children --csv job.csv
157
+
158
+ # For script.py resources monitoring:
159
+ python script.py & # launch the workload
160
+ target=$! # PID of script.py
161
+ echo $target
162
+ # psutil-monitor watches that PID and exits when the process tree is gone
163
+ psutil-monitor --mode proc --pid "$target" --include-children --csv stat.csv --plot plot.png
164
+
165
+ ```
166
+ **Expected output:**
167
+ ```
168
+ $ psutil-monitor
169
+
170
+ CPUs available (affinity): 384
171
+ Total memory available: 754.76 GiB
172
+ CPU basis for %: 384
173
+ Memory basis for %: 754.76 GiB
174
+ 2025-08-14T15:20:14 CPU 79.67% busyCPUs 305.93 (provided 384) MEM 9.93% used 74.96 GiB / total 754.76 GiB
175
+ 2025-08-14T15:20:16 CPU 69.30% busyCPUs 266.13 (provided 384) MEM 9.95% used 75.12 GiB / total 754.76 GiB
176
+ 2025-08-14T15:20:18 CPU 61.34% busyCPUs 235.53 (provided 384) MEM 10.05% used 75.82 GiB / total 754.76 GiB
177
+ 2025-08-14T15:20:20 CPU 61.32% busyCPUs 235.47 (provided 384) MEM 10.09% used 76.15 GiB / total 754.76 GiB
178
+ 2025-08-14T15:20:22 CPU 74.57% busyCPUs 286.33 (provided 384) MEM 9.94% used 74.99 GiB / total 754.76 GiB
179
+ 2025-08-14T15:20:24 CPU 85.94% busyCPUs 330.01 (provided 384) MEM 9.86% used 74.44 GiB / total 754.76 GiB
180
+ Average busy CPUs over run: 276.570
181
+ Peak memory (system): 76.15 GiB
182
+
183
+ ```
184
+ With GPUs (`--gpu` and NVIDIA GPUs present):
185
+ ```
186
+ $ psutil-monitor --gpu
187
+
188
+ CPUs available (affinity): 96
189
+ Total memory available: 503.70 GiB
190
+ CPU basis for %: 96
191
+ Memory basis for %: 503.70 GiB
192
+ GPUs detected (NVML): 4
193
+ 2026-02-03T10:00:14 CPU 45.12% busyCPUs 43.32 (provided 96) MEM 8.10% used 40.80 GiB / total 503.70 GiB GPU util 57.5% busyGPUs 2.30 mem 42.0%
194
+ 2026-02-03T10:00:16 CPU 48.33% busyCPUs 46.39 (provided 96) MEM 8.20% used 41.30 GiB / total 503.70 GiB GPU util 63.0% busyGPUs 2.52 mem 44.1%
195
+ 2026-02-03T10:00:18 CPU 52.10% busyCPUs 49.99 (provided 96) MEM 8.25% used 41.60 GiB / total 503.70 GiB GPU util 68.7% busyGPUs 2.75 mem 45.3%
196
+ Average busy CPUs over run: 46.567
197
+ Average busy GPUs over run: 2.523
198
+ Peak memory (system): 41.60 GiB
199
+
200
+ ```
201
+
202
+ Use the `--help` option of each command to see all available options.
203
+
204
+ ### `slurm-bulk-user-stats`
205
+
206
+ Summarize CPU and memory usage for Slurm jobs and show which nodes the jobs are
207
+ allocated to. The command relies on `sacct` being available in your `PATH`.
208
+ The table includes `NGPUS` based on AllocTres/AllocGRES when present.
209
+ If TRES GPU usage metrics are available, the summary also reports mean GPU util and GPU hours (used/requested).
210
+
211
+ State codes (Slurm):
212
+ - `R`/`RUNNING`, `PD`/`PENDING`, `CD`/`COMPLETED`; other states (e.g., `F`, `CG`, `S`, `TO`) are grouped under “other” in the summary and listed in the breakdown.
213
+
214
+ Examples:
215
+
216
+ ```bash
217
+ # Summarize a specific job and write CSV output
218
+ slurm-bulk-user-stats --job 12345 --csv stats.csv
219
+
220
+ # Summarize all running jobs for the current user (default)
221
+ slurm-bulk-user-stats
222
+
223
+ # Summarize all jobs (including finished) for a specific user
224
+ slurm-bulk-user-stats --user myuser --include-finished
225
+ ```
226
+
227
+ When invoked with no `--user` or `--job` options:
228
+ - On a login node (no `$SLURM_JOB_ID` present), it summarizes pending/running jobs for the current user.
229
+ - Inside a running Slurm job (where `$SLURM_JOB_ID` is set), it automatically summarizes that specific job.
230
+
231
+ ```
232
+ slurm-bulk-user-stats
233
+ ```
234
+
235
+ The output mirrors the PBS version, showing job state, node list, CPU/memory
236
+ usage, efficiency metrics, and a summary block with job counts and averages.
@@ -0,0 +1,38 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hpc-stats-scripts"
7
+ version = "1.2"
8
+ description = "Utilities for HPC clusters including PBS/Slurm job statistics and a psutil-based resource monitor."
9
+ readme = "README.md"
10
+ license = {file = "LICENSE"}
11
+ requires-python = ">=3.9"
12
+ authors = [{name = "hpc-stats-scripts contributors"}]
13
+ dependencies = [
14
+ "psutil",
15
+ ]
16
+ keywords = ["hpc", "pbs", "slurm", "monitoring", "cluster", "psutil"]
17
+
18
+ [project.optional-dependencies]
19
+ plot = [
20
+ "matplotlib",
21
+ "numpy",
22
+ ]
23
+ gpu = [
24
+ "nvidia-ml-py3",
25
+ ]
26
+ all = [
27
+ "matplotlib",
28
+ "numpy",
29
+ "nvidia-ml-py3",
30
+ ]
31
+
32
+ [project.scripts]
33
+ pbs-bulk-user-stats = "hpc_scripts.pbs_bulk_user_stats:main"
34
+ psutil-monitor = "hpc_scripts.psutil_monitor:main"
35
+ slurm-bulk-user-stats = "hpc_scripts.slurm_bulk_user_stats:main"
36
+
37
+ [tool.setuptools.packages.find]
38
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,34 @@
1
+ from setuptools import find_packages, setup
2
+ from pathlib import Path
3
+
4
+
5
+ def read_readme() -> str:
6
+ readme = Path(__file__).parent / "README.md"
7
+ return readme.read_text(encoding="utf-8") if readme.exists() else ""
8
+
9
+
10
+ setup(
11
+ name="hpc-stats-scripts",
12
+ version="1.2",
13
+ description="Utilities for HPC clusters including PBS/Slurm job statistics and a psutil-based resource monitor.",
14
+ long_description=read_readme(),
15
+ long_description_content_type="text/markdown",
16
+ author="hpc-stats-scripts contributors",
17
+ python_requires=">=3.9",
18
+ license="MIT",
19
+ package_dir={"": "src"},
20
+ packages=find_packages(where="src"),
21
+ install_requires=["psutil"],
22
+ extras_require={
23
+ "plot": ["matplotlib", "numpy"],
24
+ "gpu": ["nvidia-ml-py3"],
25
+ "all": ["matplotlib", "numpy", "nvidia-ml-py3"],
26
+ },
27
+ entry_points={
28
+ "console_scripts": [
29
+ "pbs-bulk-user-stats=hpc_scripts.pbs_bulk_user_stats:main",
30
+ "psutil-monitor=hpc_scripts.psutil_monitor:main",
31
+ "slurm-bulk-user-stats=hpc_scripts.slurm_bulk_user_stats:main",
32
+ ]
33
+ },
34
+ )
@@ -0,0 +1,8 @@
1
+ """Utility scripts for HPC clusters.
2
+
3
+ Provides command-line tools for monitoring jobs and resources.
4
+ """
5
+
6
+ __version__ = "1.2"
7
+
8
+ __all__ = ["pbs_bulk_user_stats", "psutil_monitor", "slurm_bulk_user_stats"]