slurm2sql 0.9.2__tar.gz → 0.9.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slurm2sql-0.9.2 → slurm2sql-0.9.4}/.github/workflows/pyrelease.yaml +1 -1
- {slurm2sql-0.9.2 → slurm2sql-0.9.4}/.github/workflows/pytest.yml +1 -1
- {slurm2sql-0.9.2 → slurm2sql-0.9.4}/PKG-INFO +26 -26
- {slurm2sql-0.9.2 → slurm2sql-0.9.4}/README.rst +23 -24
- {slurm2sql-0.9.2 → slurm2sql-0.9.4}/pyproject.toml +2 -0
- {slurm2sql-0.9.2 → slurm2sql-0.9.4}/slurm2sql.py +132 -45
- {slurm2sql-0.9.2 → slurm2sql-0.9.4}/test.py +9 -8
- {slurm2sql-0.9.2 → slurm2sql-0.9.4}/.gitignore +0 -0
- {slurm2sql-0.9.2 → slurm2sql-0.9.4}/LICENSE +0 -0
- {slurm2sql-0.9.2 → slurm2sql-0.9.4}/requirements.txt +0 -0
- {slurm2sql-0.9.2 → slurm2sql-0.9.4}/tests/test-data1.csv +0 -0
- {slurm2sql-0.9.2 → slurm2sql-0.9.4}/tests/test-data2.csv +0 -0
- {slurm2sql-0.9.2 → slurm2sql-0.9.4}/tests/test-data3.csv +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: slurm2sql
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.4
|
|
4
4
|
Summary: Import Slurm accounting database from sacct to sqlite3 database
|
|
5
5
|
Keywords: slurm,sqlite3
|
|
6
6
|
Author: Richard Darst
|
|
@@ -16,6 +16,7 @@ Classifier: Intended Audience :: System Administrators
|
|
|
16
16
|
Classifier: Topic :: Database
|
|
17
17
|
Classifier: Topic :: System :: Clustering
|
|
18
18
|
Classifier: Topic :: System :: Distributed Computing
|
|
19
|
+
License-File: LICENSE
|
|
19
20
|
Requires-Dist: tabulate
|
|
20
21
|
Requires-Dist: pytest ; extra == "test"
|
|
21
22
|
Project-URL: Repository, https://github.com/NordicHPC/slurm2sql
|
|
@@ -275,44 +276,44 @@ them. For other columns, check ``man sacct``.
|
|
|
275
276
|
stripped out and give invalid data. File an issue and this will
|
|
276
277
|
be added.
|
|
277
278
|
|
|
278
|
-
*
|
|
279
|
-
parsing this, you probably want to use one of the other values below.
|
|
279
|
+
* **Memory related**
|
|
280
280
|
|
|
281
|
-
* ``
|
|
282
|
-
|
|
283
|
-
to check our logic if you rely on this). In Slurm, you
|
|
284
|
-
can request memory either per-node or per-core, and this calculates
|
|
285
|
-
the other one for you.
|
|
281
|
+
* ``AllocMem``: The ``mem=`` value from ``AllocTRES`` field. You
|
|
282
|
+
probably want to use this.
|
|
286
283
|
|
|
287
|
-
* ``
|
|
288
|
-
|
|
289
|
-
nothing here, and the column value is null.
|
|
284
|
+
* ``TotalMem``: The ``mem=`` value from ``TRESUsageInTot`` field.
|
|
285
|
+
You probably want to use this.
|
|
290
286
|
|
|
291
|
-
* ``
|
|
292
|
-
``c`` or ``n``.
|
|
287
|
+
* ``ReqMem``: The raw slurm value from the ReqMem column.
|
|
293
288
|
|
|
294
|
-
* ``
|
|
289
|
+
* ``ReqMemNode``, ``ReqMemCPU``: Requested memory per node or CPU,
|
|
290
|
+
``ReqMem`` / ``NNodes``.
|
|
295
291
|
|
|
296
|
-
*
|
|
292
|
+
* ``MemEff``: Computed ``TotalMem / AllocMem``.
|
|
293
|
+
|
|
294
|
+
* **GPU information.** These use values from the ``TRESUsageInAve``
|
|
297
295
|
fields in modern Slurm
|
|
298
296
|
|
|
299
|
-
* ``
|
|
297
|
+
* ``ReqGPU``: Number of GPUs requested. Extracted from ``ReqTRES``.
|
|
298
|
+
|
|
299
|
+
* ``GpuMem``: ``gres/gpumem`` from ``TRESUsageInAve``
|
|
300
300
|
|
|
301
301
|
* ``GpuUtil``: ``gres/gpuutil`` (fraction 0.0-1.0).
|
|
302
302
|
|
|
303
|
-
* ``NGpus``: Number of GPUs
|
|
304
|
-
who knows.
|
|
303
|
+
* ``NGpus``: Number of GPUs from ``gres/gpu`` in ``AllocTRES``.
|
|
304
|
+
Should be the same as ``ReqGPU``, but who knows.
|
|
305
305
|
|
|
306
306
|
* ``GpuUtilTot``, ``GpuMemTot``: like above but using the
|
|
307
307
|
``TRESUsageInTot`` sacct field.
|
|
308
308
|
|
|
309
|
-
* ``
|
|
310
|
-
|
|
311
|
-
calculates this now.
|
|
309
|
+
* ``GpuEff``: ``gres/gpuutil`` (from ``TRESUsageInTot``) / (100 *
|
|
310
|
+
``gres/gpu`` (from ``AllocTRES``).
|
|
312
311
|
|
|
313
312
|
* ``CPUEff``: CPU efficiency (0.0-1.0). All the same caveats as above
|
|
314
313
|
apply: test before trusting.
|
|
315
314
|
|
|
315
|
+
* And more, see the code for now.
|
|
316
|
+
|
|
316
317
|
Quick reference of the other most important columns from the
|
|
317
318
|
accounting database that are hardest to remember:
|
|
318
319
|
|
|
@@ -325,12 +326,11 @@ accounting database that are hardest to remember:
|
|
|
325
326
|
|
|
326
327
|
The ``eff`` table adds the following:
|
|
327
328
|
|
|
328
|
-
* ``CPUEff``:
|
|
329
|
+
* ``CPUEff``: Highest CPUEff for any job step
|
|
329
330
|
|
|
330
|
-
* ``MemEff``:
|
|
331
|
-
ReqMem)
|
|
331
|
+
* ``MemEff``: Highest MemEff for any job step
|
|
332
332
|
|
|
333
|
-
*
|
|
333
|
+
* ``GpuEff``: Highest GpuEff for any job step
|
|
334
334
|
|
|
335
335
|
|
|
336
336
|
|
|
@@ -252,44 +252,44 @@ them. For other columns, check ``man sacct``.
|
|
|
252
252
|
stripped out and give invalid data. File an issue and this will
|
|
253
253
|
be added.
|
|
254
254
|
|
|
255
|
-
*
|
|
256
|
-
parsing this, you probably want to use one of the other values below.
|
|
255
|
+
* **Memory related**
|
|
257
256
|
|
|
258
|
-
* ``
|
|
259
|
-
|
|
260
|
-
to check our logic if you rely on this). In Slurm, you
|
|
261
|
-
can request memory either per-node or per-core, and this calculates
|
|
262
|
-
the other one for you.
|
|
257
|
+
* ``AllocMem``: The ``mem=`` value from ``AllocTRES`` field. You
|
|
258
|
+
probably want to use this.
|
|
263
259
|
|
|
264
|
-
* ``
|
|
265
|
-
|
|
266
|
-
nothing here, and the column value is null.
|
|
260
|
+
* ``TotalMem``: The ``mem=`` value from ``TRESUsageInTot`` field.
|
|
261
|
+
You probably want to use this.
|
|
267
262
|
|
|
268
|
-
* ``
|
|
269
|
-
``c`` or ``n``.
|
|
263
|
+
* ``ReqMem``: The raw slurm value from the ReqMem column.
|
|
270
264
|
|
|
271
|
-
* ``
|
|
265
|
+
* ``ReqMemNode``, ``ReqMemCPU``: Requested memory per node or CPU,
|
|
266
|
+
``ReqMem`` / ``NNodes``.
|
|
272
267
|
|
|
273
|
-
*
|
|
268
|
+
* ``MemEff``: Computed ``TotalMem / AllocMem``.
|
|
269
|
+
|
|
270
|
+
* **GPU information.** These use values from the ``TRESUsageInAve``
|
|
274
271
|
fields in modern Slurm
|
|
275
272
|
|
|
276
|
-
* ``
|
|
273
|
+
* ``ReqGPU``: Number of GPUs requested. Extracted from ``ReqTRES``.
|
|
274
|
+
|
|
275
|
+
* ``GpuMem``: ``gres/gpumem`` from ``TRESUsageInAve``
|
|
277
276
|
|
|
278
277
|
* ``GpuUtil``: ``gres/gpuutil`` (fraction 0.0-1.0).
|
|
279
278
|
|
|
280
|
-
* ``NGpus``: Number of GPUs
|
|
281
|
-
who knows.
|
|
279
|
+
* ``NGpus``: Number of GPUs from ``gres/gpu`` in ``AllocTRES``.
|
|
280
|
+
Should be the same as ``ReqGPU``, but who knows.
|
|
282
281
|
|
|
283
282
|
* ``GpuUtilTot``, ``GpuMemTot``: like above but using the
|
|
284
283
|
``TRESUsageInTot`` sacct field.
|
|
285
284
|
|
|
286
|
-
* ``
|
|
287
|
-
|
|
288
|
-
calculates this now.
|
|
285
|
+
* ``GpuEff``: ``gres/gpuutil`` (from ``TRESUsageInTot``) / (100 *
|
|
286
|
+
``gres/gpu`` (from ``AllocTRES``).
|
|
289
287
|
|
|
290
288
|
* ``CPUEff``: CPU efficiency (0.0-1.0). All the same caveats as above
|
|
291
289
|
apply: test before trusting.
|
|
292
290
|
|
|
291
|
+
* And more, see the code for now.
|
|
292
|
+
|
|
293
293
|
Quick reference of the other most important columns from the
|
|
294
294
|
accounting database that are hardest to remember:
|
|
295
295
|
|
|
@@ -302,12 +302,11 @@ accounting database that are hardest to remember:
|
|
|
302
302
|
|
|
303
303
|
The ``eff`` table adds the following:
|
|
304
304
|
|
|
305
|
-
* ``CPUEff``:
|
|
305
|
+
* ``CPUEff``: Highest CPUEff for any job step
|
|
306
306
|
|
|
307
|
-
* ``MemEff``:
|
|
308
|
-
ReqMem)
|
|
307
|
+
* ``MemEff``: Highest MemEff for any job step
|
|
309
308
|
|
|
310
|
-
*
|
|
309
|
+
* ``GpuEff``: Highest GpuEff for any job step
|
|
311
310
|
|
|
312
311
|
|
|
313
312
|
|
|
@@ -39,6 +39,8 @@ test = [
|
|
|
39
39
|
slurm2sql = "slurm2sql:main"
|
|
40
40
|
slurm2sql-sacct = "slurm2sql:sacct_cli"
|
|
41
41
|
slurm2sql-seff = "slurm2sql:seff_cli"
|
|
42
|
+
sacct2 = "slurm2sql:sacct_cli"
|
|
43
|
+
seff2 = "slurm2sql:seff_cli"
|
|
42
44
|
|
|
43
45
|
[project.urls]
|
|
44
46
|
Repository = "https://github.com/NordicHPC/slurm2sql"
|
|
@@ -18,7 +18,7 @@ import subprocess
|
|
|
18
18
|
import sys
|
|
19
19
|
import time
|
|
20
20
|
|
|
21
|
-
__version__ = '0.9.
|
|
21
|
+
__version__ = '0.9.4'
|
|
22
22
|
|
|
23
23
|
LOG = logging.getLogger('slurm2sql')
|
|
24
24
|
LOG.setLevel(logging.DEBUG)
|
|
@@ -383,6 +383,19 @@ class slurmGPUCount(linefunc):
|
|
|
383
383
|
if m:
|
|
384
384
|
return int(m.group(1))
|
|
385
385
|
|
|
386
|
+
RE_TRES_GPU = re.compile(rf'\bgres/gpu=([^,]*)\b')
|
|
387
|
+
RE_TRES_GPU_UTIL = re.compile(rf'\bgres/gpuutil=([^,]*)\b')
|
|
388
|
+
class slurmGPUEff2(linefunc):
|
|
389
|
+
"""Slurm GPU efficiency (using AllocTRES and TRESUsageInTot columns).
|
|
390
|
+
"""
|
|
391
|
+
type = 'real'
|
|
392
|
+
@staticmethod
|
|
393
|
+
def calc(row):
|
|
394
|
+
m_used = RE_TRES_GPU_UTIL.search(row['TRESUsageInTot'])
|
|
395
|
+
m_alloc = RE_TRES_GPU.search(row['AllocTRES'])
|
|
396
|
+
if m_alloc and m_used:
|
|
397
|
+
return (float_metric(m_used.group(1)) / 100.) / float_metric(m_alloc.group(1))
|
|
398
|
+
return None
|
|
386
399
|
|
|
387
400
|
# Job ID related stuff
|
|
388
401
|
jobidonly_re = re.compile(r'[0-9]+')
|
|
@@ -467,6 +480,23 @@ class slurmMemEff(linefunc):
|
|
|
467
480
|
raise ValueError('unknown memory type: %s'%reqmem_type)
|
|
468
481
|
return mem_max / nodemem
|
|
469
482
|
|
|
483
|
+
RE_TRES_MEM = re.compile(rf'\bmem=([^,]*)\b')
|
|
484
|
+
class slurmMemEff2(linefunc):
|
|
485
|
+
"""Slurm memory efficiency (using AllocTRES and TRESUsageInTot columns).
|
|
486
|
+
|
|
487
|
+
This *does* work in new enough Slurm.
|
|
488
|
+
"""
|
|
489
|
+
# https://github.com/SchedMD/slurm/blob/master/contribs/seff/seff
|
|
490
|
+
type = 'real'
|
|
491
|
+
@staticmethod
|
|
492
|
+
def calc(row):
|
|
493
|
+
m_used = RE_TRES_MEM.search(row['TRESUsageInTot'])
|
|
494
|
+
m_alloc = RE_TRES_MEM.search(row['AllocTRES'])
|
|
495
|
+
if m_alloc and m_used:
|
|
496
|
+
return float_bytes(m_used.group(1)) / float_bytes(m_alloc.group(1))
|
|
497
|
+
return None
|
|
498
|
+
|
|
499
|
+
|
|
470
500
|
class slurmCPUEff(linefunc):
|
|
471
501
|
# This matches the seff tool currently:
|
|
472
502
|
# https://github.com/SchedMD/slurm/blob/master/contribs/seff/seff
|
|
@@ -589,6 +619,9 @@ COLUMNS = {
|
|
|
589
619
|
'MinCPUTask': nullstr,
|
|
590
620
|
|
|
591
621
|
# Memory related
|
|
622
|
+
'_TotalMem': ExtractField('TotalMem', 'TRESUsageInTot', 'mem', float_bytes),
|
|
623
|
+
'_AllocMem': ExtractField('AllocMem', 'AllocTRES', 'mem', float_bytes),
|
|
624
|
+
'_MemEff': slurmMemEff2, # Calculated from AllocTRES and TRESUsageInTot
|
|
592
625
|
'ReqMem': float_bytes, # Requested mem, value from slurm. Sum across all nodes
|
|
593
626
|
'_ReqMemNode': slurmMemNode, # Mem per node, computed
|
|
594
627
|
'_ReqMemCPU': slurmMemCPU, # Mem per cpu, computed
|
|
@@ -598,7 +631,6 @@ COLUMNS = {
|
|
|
598
631
|
'MaxRSSTask': nullstr,
|
|
599
632
|
'MaxPages': int_metric,
|
|
600
633
|
'MaxVMSize': slurmmem,
|
|
601
|
-
#'_MemEff': slurmMemEff, # Slurm memory efficiency - see above for why this doesn't work
|
|
602
634
|
|
|
603
635
|
# Disk related
|
|
604
636
|
'AveDiskRead': int_bytes,
|
|
@@ -614,10 +646,10 @@ COLUMNS = {
|
|
|
614
646
|
'_ReqGPUS': ExtractField('ReqGpus', 'ReqTRES', 'gres/gpu', float_metric),
|
|
615
647
|
'Comment': nullstr_strip, # Slurm Comment field (at Aalto used for GPU stats)
|
|
616
648
|
#'_GPUMem': slurmGPUMem, # GPU mem extracted from comment field
|
|
617
|
-
|
|
649
|
+
'_GpuEff': slurmGPUEff2, # GPU utilization (0.0 to 1.0) from AllocTRES()
|
|
618
650
|
#'_NGPU': slurmGPUCount, # Number of GPUs, extracted from comment field
|
|
619
651
|
'_NGpus': ExtractField('NGpus', 'AllocTRES', 'gres/gpu', float_metric),
|
|
620
|
-
'_GpuUtil': ExtractField('GpuUtil', 'TRESUsageInAve', 'gres/gpuutil', float_metric, wrap=lambda x: x/100.),
|
|
652
|
+
'_GpuUtil': ExtractField('GpuUtil', 'TRESUsageInAve', 'gres/gpuutil', float_metric, wrap=lambda x: x/100.), # can be >100 for multi-GPU.
|
|
621
653
|
'_GpuMem': ExtractField('GpuMem2', 'TRESUsageInAve', 'gres/gpumem', float_metric),
|
|
622
654
|
'_GpuUtilTot': ExtractField('GpuUtilTot', 'TRESUsageInTot', 'gres/gpuutil', float_metric),
|
|
623
655
|
'_GpuMemTot': ExtractField('GpuMemTot', 'TRESUsageInTot', 'gres/gpumem', float_metric),
|
|
@@ -671,7 +703,7 @@ def main(argv=sys.argv[1:], db=None, raw_sacct=None, csv_input=None):
|
|
|
671
703
|
logging.lastResort.setLevel(logging.WARN)
|
|
672
704
|
LOG.debug(args)
|
|
673
705
|
|
|
674
|
-
sacct_filter =
|
|
706
|
+
sacct_filter = args_to_sacct_filter(args, sacct_filter)
|
|
675
707
|
|
|
676
708
|
# db is only given as an argument in tests (normally)
|
|
677
709
|
if db is None:
|
|
@@ -864,10 +896,11 @@ def slurm2sql(db, sacct_filter=['-a'], update=False, jobs_only=False,
|
|
|
864
896
|
db.execute('CREATE TABLE IF NOT EXISTS slurm (%s)'%create_columns)
|
|
865
897
|
db.execute('CREATE TABLE IF NOT EXISTS meta_slurm_lastupdate (id INTEGER PRIMARY KEY, update_time REAL)')
|
|
866
898
|
db.execute('CREATE VIEW IF NOT EXISTS allocations AS select * from slurm where JobStep is null;')
|
|
899
|
+
db.execute('CREATE VIEW IF NOT EXISTS steps AS select * from slurm where JobStep is not null;')
|
|
867
900
|
db.execute('CREATE VIEW IF NOT EXISTS eff AS select '
|
|
868
901
|
'JobIDnostep AS JobID, '
|
|
869
902
|
'max(User) AS User, '
|
|
870
|
-
'max(Partition), '
|
|
903
|
+
'max(Partition) AS Partition, '
|
|
871
904
|
'Account, '
|
|
872
905
|
'State, '
|
|
873
906
|
'Time, '
|
|
@@ -882,20 +915,23 @@ def slurm2sql(db, sacct_filter=['-a'], update=False, jobs_only=False,
|
|
|
882
915
|
'max(cputime) AS cpu_s_reserved, '
|
|
883
916
|
'max(totalcpu) AS cpu_s_used, '
|
|
884
917
|
'max(ReqMemNode) AS MemReq, '
|
|
885
|
-
'max(
|
|
918
|
+
'max(AllocMem) AS AllocMem, '
|
|
919
|
+
'max(TotalMem) AS TotalMem, '
|
|
886
920
|
'max(MaxRSS) AS MaxRSS, '
|
|
887
|
-
'max(
|
|
921
|
+
'max(MemEff) AS MemEff, '
|
|
922
|
+
'max(AllocMem*Elapsed) AS mem_s_reserved, ' # highest of any job
|
|
888
923
|
'max(NGpus) AS NGpus, '
|
|
889
924
|
'max(NGpus)*max(Elapsed) AS gpu_s_reserved, '
|
|
890
925
|
'max(NGpus)*max(Elapsed)*max(GPUutil) AS gpu_s_used, '
|
|
891
|
-
'max(GPUutil) AS GPUeff, ' # Individual job with highest use (check this)
|
|
926
|
+
#'max(GPUutil)/max(NGpus) AS GPUeff, ' # Individual job with highest use (check this)
|
|
927
|
+
'max(GPUEff) AS GPUeff, ' # Individual job with highest use (check this)
|
|
892
928
|
'max(GPUMem) AS GPUMem, '
|
|
893
929
|
'MaxDiskRead, '
|
|
894
930
|
'MaxDiskWrite, '
|
|
895
931
|
'sum(TotDiskRead) as TotDiskRead, '
|
|
896
932
|
'sum(TotDiskWrite) as TotDiskWrite '
|
|
897
933
|
'FROM slurm GROUP BY JobIDnostep')
|
|
898
|
-
db.execute('PRAGMA journal_mode = WAL;')
|
|
934
|
+
#db.execute('PRAGMA journal_mode = WAL;')
|
|
899
935
|
db.commit()
|
|
900
936
|
c = db.cursor()
|
|
901
937
|
|
|
@@ -946,7 +982,7 @@ def slurm2sql(db, sacct_filter=['-a'], update=False, jobs_only=False,
|
|
|
946
982
|
return errors[0]
|
|
947
983
|
|
|
948
984
|
|
|
949
|
-
def
|
|
985
|
+
def args_to_sacct_filter(args, sacct_filter):
|
|
950
986
|
"""Generate sacct filter args in a standard way
|
|
951
987
|
|
|
952
988
|
For example adding a --completed argument that translates into
|
|
@@ -958,8 +994,52 @@ def process_sacct_filter(args, sacct_filter):
|
|
|
958
994
|
# Set for completed jobs.
|
|
959
995
|
if getattr(args, 'completed', None):
|
|
960
996
|
sacct_filter[:0] = ['--endtime=now', f'--state={COMPLETED_STATES}']
|
|
997
|
+
if getattr(args, 'user', None):
|
|
998
|
+
sacct_filter[:0] = [f'--user={args.user}']
|
|
999
|
+
# Set args.user to None. We have already handled it here and
|
|
1000
|
+
# it shouldn't be re-handled in the future SQL code (future
|
|
1001
|
+
# SQL woludn't handle multiple users, for example).
|
|
1002
|
+
args.user = None
|
|
1003
|
+
if getattr(args, 'partition', None):
|
|
1004
|
+
sacct_filter[:0] = [f'--partition={args.partition}']
|
|
1005
|
+
args.partition = None
|
|
1006
|
+
if getattr(args, 'running_at_time', None):
|
|
1007
|
+
sacct_filter[:0] = [f'--start={args.running_at_time}', f'--end={args.running_at_time}', '--state=RUNNING' ]
|
|
1008
|
+
args.running_at_time = None
|
|
961
1009
|
return sacct_filter
|
|
962
1010
|
|
|
1011
|
+
def args_to_sql_where(args):
|
|
1012
|
+
where = [ ]
|
|
1013
|
+
if getattr(args, 'user', None):
|
|
1014
|
+
where.append('and user=:user')
|
|
1015
|
+
if getattr(args, 'partition', None):
|
|
1016
|
+
where.append("and Partition like '%'||:partition||'%'")
|
|
1017
|
+
return ' '.join(where)
|
|
1018
|
+
|
|
1019
|
+
|
|
1020
|
+
def import_or_open_db(args, sacct_filter, csv_input=None):
|
|
1021
|
+
"""Helper function to either open a DB or generate a new in-mem one from sacct
|
|
1022
|
+
|
|
1023
|
+
The `args` sholud be an argparse argument option. This function
|
|
1024
|
+
will look at its arguments and do what it says. So, if you want
|
|
1025
|
+
various features, you need to define these arguments in argparse:
|
|
1026
|
+
|
|
1027
|
+
db: filename of a database to open
|
|
1028
|
+
|
|
1029
|
+
"""
|
|
1030
|
+
if args.db:
|
|
1031
|
+
db = sqlite3.connect(args.db)
|
|
1032
|
+
if sacct_filter:
|
|
1033
|
+
LOG.warn("Warning: reading from database. Any sacct filters are ignored.")
|
|
1034
|
+
else:
|
|
1035
|
+
# Import fresh
|
|
1036
|
+
sacct_filter = args_to_sacct_filter(args, sacct_filter)
|
|
1037
|
+
LOG.debug(f'sacct args: {sacct_filter}')
|
|
1038
|
+
db = sqlite3.connect(':memory:')
|
|
1039
|
+
errors = slurm2sql(db, sacct_filter=sacct_filter,
|
|
1040
|
+
csv_input=getattr(args, 'csv_input', False) or csv_input)
|
|
1041
|
+
return db
|
|
1042
|
+
|
|
963
1043
|
|
|
964
1044
|
def update_last_timestamp(db, update_time=None):
|
|
965
1045
|
"""Update the last update time in the database, for resuming.
|
|
@@ -1011,7 +1091,8 @@ def compact_table():
|
|
|
1011
1091
|
)
|
|
1012
1092
|
|
|
1013
1093
|
|
|
1014
|
-
SACCT_DEFAULT_FIELDS =
|
|
1094
|
+
SACCT_DEFAULT_FIELDS = "JobID,User,State,datetime(Start, 'unixepoch') AS Start,datetime(End, 'unixepoch') AS End,Partition,ExitCodeRaw,NodeList,NCPUS,CPUtime,CPUEff,AllocMem,TotalMem,MemEff,ReqGPUS,GPUEff,TotDiskRead,TotDiskWrite,ReqTRES,AllocTRES,TRESUsageInTot,TRESUsageOutTot"
|
|
1095
|
+
SACCT_DEFAULT_FIELDS_LONG = "JobID,User,State,datetime(Start, 'unixepoch') AS Start,datetime(End, 'unixepoch') AS End,Elapsed,Partition,ExitCodeRaw,NodeList,NCPUS,CPUtime,CPUEff,AllocMem,TotalMem,MemEff,ReqMem,MaxRSS,ReqGPUS,GPUEff,GPUUtil,TotDiskRead,TotDiskWrite,ReqTRES,AllocTRES,TRESUsageInTot,TRESUsageOutTot"
|
|
1015
1096
|
COMPLETED_STATES = 'CA,CD,DL,F,NF,OOM,PR,RV,TO'
|
|
1016
1097
|
def sacct_cli(argv=sys.argv[1:], csv_input=None):
|
|
1017
1098
|
"""A command line that uses slurm2sql to give an sacct-like interface."""
|
|
@@ -1026,13 +1107,11 @@ def sacct_cli(argv=sys.argv[1:], csv_input=None):
|
|
|
1026
1107
|
parser.add_argument('--db',
|
|
1027
1108
|
help="Read from this DB. Don't import new data.")
|
|
1028
1109
|
parser.add_argument('--output', '-o', default=SACCT_DEFAULT_FIELDS,
|
|
1029
|
-
help="Fields to output (comma separated list, use '*' for all fields). NOT safe from SQL injection")
|
|
1110
|
+
help="Fields to output (comma separated list, use '*' for all fields). NOT safe from SQL injection. If 'long' then some longer default list")
|
|
1030
1111
|
parser.add_argument('--format', '-f', default=compact_table(),
|
|
1031
1112
|
help="Output format (see tabulate formats: https://pypi.org/project/tabulate/ (default simple)")
|
|
1032
1113
|
parser.add_argument('--order',
|
|
1033
1114
|
help="SQL order by (arbitrary SQL expression using column names). NOT safe from SQL injection.")
|
|
1034
|
-
parser.add_argument('--completed', '-c', action='store_true',
|
|
1035
|
-
help=f"Select for completed job states ({COMPLETED_STATES}) You need to specify --starttime (-S) at some point in the past, due to how saccont default works (for example '-S now-1week'). This option automatically sets '-E now'")
|
|
1036
1115
|
parser.add_argument('--csv-input',
|
|
1037
1116
|
help="Don't parse sacct but import this CSV file. It's read with "
|
|
1038
1117
|
"Python's default csv reader (excel format). Beware badly "
|
|
@@ -1041,6 +1120,16 @@ def sacct_cli(argv=sys.argv[1:], csv_input=None):
|
|
|
1041
1120
|
help="Don't output anything unless errors")
|
|
1042
1121
|
parser.add_argument('--verbose', '-v', action='store_true',
|
|
1043
1122
|
help="Output more logging info")
|
|
1123
|
+
# No --db compatibility
|
|
1124
|
+
group = parser.add_argument_group(description="Selectors that only works when getting new data (not with --db):")
|
|
1125
|
+
group.add_argument('--completed', '-c', action='store_true',
|
|
1126
|
+
help=f"Select for completed job states ({COMPLETED_STATES}) You need to specify --starttime (-S) at some point in the past, due to how saccont default works (for example '-S now-1week'). This option automatically sets '-E now'. Not compatible with --db.")
|
|
1127
|
+
group.add_argument('--running-at-time', metavar='TIME', help="Only jobs running at this time. Not compatible with --db. Expanded to --start=TIME --end=TIME --state=R.")
|
|
1128
|
+
# --db compatibility
|
|
1129
|
+
group = parser.add_argument_group(description="Selectors that also work with --db:")
|
|
1130
|
+
group.add_argument('--user', '-u', help="Limit to this or these users. Compatible with --db.")
|
|
1131
|
+
group.add_argument('--partition', '-r', help="Jobs in this partition. Works with --db. Getting fresh data, an exact match and can be a comma separated list. With --db, a raw glob match.")
|
|
1132
|
+
|
|
1044
1133
|
args, sacct_filter = parser.parse_known_args(argv)
|
|
1045
1134
|
|
|
1046
1135
|
if args.verbose:
|
|
@@ -1048,20 +1137,17 @@ def sacct_cli(argv=sys.argv[1:], csv_input=None):
|
|
|
1048
1137
|
if args.quiet:
|
|
1049
1138
|
logging.lastResort.setLevel(logging.WARN)
|
|
1050
1139
|
LOG.debug(args)
|
|
1140
|
+
if args.output == 'long':
|
|
1141
|
+
args.output = SACCT_DEFAULT_FIELDS_LONG
|
|
1051
1142
|
|
|
1052
|
-
|
|
1143
|
+
db = import_or_open_db(args, sacct_filter, csv_input=csv_input)
|
|
1053
1144
|
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
db = sqlite3.connect(args.db)
|
|
1057
|
-
else:
|
|
1058
|
-
db = sqlite3.connect(':memory:')
|
|
1059
|
-
errors = slurm2sql(db, sacct_filter=sacct_filter,
|
|
1060
|
-
csv_input=args.csv_input or csv_input)
|
|
1145
|
+
# If we run sacct, then args.user is set to None so we don't do double filtering here
|
|
1146
|
+
where = args_to_sql_where(args)
|
|
1061
1147
|
|
|
1062
1148
|
from tabulate import tabulate
|
|
1063
|
-
|
|
1064
|
-
|
|
1149
|
+
cur = db.execute(f'select {args.output} from slurm WHERE true {where}',
|
|
1150
|
+
{'user':args.user, 'partition': args.partition})
|
|
1065
1151
|
headers = [ x[0] for x in cur.description ]
|
|
1066
1152
|
print(tabulate(cur, headers=headers, tablefmt=args.format))
|
|
1067
1153
|
|
|
@@ -1079,8 +1165,6 @@ def seff_cli(argv=sys.argv[1:], csv_input=None):
|
|
|
1079
1165
|
jobs, use "--completed -S now-1week" (a start time must be
|
|
1080
1166
|
given with --completed because of how sacct works).
|
|
1081
1167
|
|
|
1082
|
-
MemReqGiB is amount requested per node (to compare with MaxRSSGiB).
|
|
1083
|
-
|
|
1084
1168
|
This only queries jobs with an End time (unlike most other commands).
|
|
1085
1169
|
|
|
1086
1170
|
If a single argument is given, and it
|
|
@@ -1097,8 +1181,6 @@ def seff_cli(argv=sys.argv[1:], csv_input=None):
|
|
|
1097
1181
|
help="Aggregate data by user.")
|
|
1098
1182
|
parser.add_argument('--order',
|
|
1099
1183
|
help="SQL order by (arbitrary SQL expression using column names). NOT safe from SQL injection.")
|
|
1100
|
-
parser.add_argument('--completed', '-c', action='store_true',
|
|
1101
|
-
help=f"Select for completed job states ({COMPLETED_STATES}) You need to specify --starttime (-S) at some point in the past, due to how saccont default works (for example '-S now-1week'). This option automatically sets '-E now'.")
|
|
1102
1184
|
parser.add_argument('--csv-input',
|
|
1103
1185
|
help="Don't parse sacct but import this CSV file. It's read with "
|
|
1104
1186
|
"Python's default csv reader (excel format). Beware badly "
|
|
@@ -1107,6 +1189,16 @@ def seff_cli(argv=sys.argv[1:], csv_input=None):
|
|
|
1107
1189
|
help="Don't output anything unless errors")
|
|
1108
1190
|
parser.add_argument('--verbose', '-v', action='store_true',
|
|
1109
1191
|
help="Output more logging info")
|
|
1192
|
+
# No --db compatibility
|
|
1193
|
+
group = parser.add_argument_group(description="Selectors that only works when getting new data (not with --db):")
|
|
1194
|
+
group.add_argument('--completed', '-c', action='store_true',
|
|
1195
|
+
help=f"Select for completed job states ({COMPLETED_STATES}) You need to specify --starttime (-S) at some point in the past, due to how saccont default works (for example '-S now-1week'). This option automatically sets '-E now'. Not compatible with --db.")
|
|
1196
|
+
group.add_argument('--running-at-time', metavar='TIME', help="Only jobs running at this time. Not compatible with --db. Expanded to --start=TIME --end=TIME --state=R.")
|
|
1197
|
+
# --db compatibility
|
|
1198
|
+
group = parser.add_argument_group(description="Selectors that also work with --db:")
|
|
1199
|
+
group.add_argument('--user', '-u', help="Limit to this or these users. Compatible with --db.")
|
|
1200
|
+
group.add_argument('--partition', '-r', help="Jobs in this partition. Works with --db. Getting fresh data, an exact match and can be a comma separated list. With --db, a raw glob match.")
|
|
1201
|
+
|
|
1110
1202
|
args, sacct_filter = parser.parse_known_args(argv)
|
|
1111
1203
|
|
|
1112
1204
|
if args.verbose:
|
|
@@ -1115,20 +1207,15 @@ def seff_cli(argv=sys.argv[1:], csv_input=None):
|
|
|
1115
1207
|
logging.lastResort.setLevel(logging.WARN)
|
|
1116
1208
|
LOG.debug(args)
|
|
1117
1209
|
|
|
1118
|
-
sacct_filter = process_sacct_filter(args, sacct_filter)
|
|
1119
|
-
|
|
1120
1210
|
if args.order:
|
|
1121
1211
|
order_by = f'ORDER BY {args.order}'
|
|
1122
1212
|
else:
|
|
1123
1213
|
order_by = ''
|
|
1124
1214
|
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
db = sqlite3.connect(':memory:')
|
|
1130
|
-
errors = slurm2sql(db, sacct_filter=sacct_filter,
|
|
1131
|
-
csv_input=args.csv_input or csv_input)
|
|
1215
|
+
db = import_or_open_db(args, sacct_filter, csv_input=csv_input)
|
|
1216
|
+
|
|
1217
|
+
# If we run sacct, then args.user is set to None so we don't do double filtering here
|
|
1218
|
+
where = args_to_sql_where(args)
|
|
1132
1219
|
|
|
1133
1220
|
from tabulate import tabulate
|
|
1134
1221
|
|
|
@@ -1140,8 +1227,8 @@ def seff_cli(argv=sys.argv[1:], csv_input=None):
|
|
|
1140
1227
|
round(sum(Elapsed*NCPUS)/86400,1) AS cpu_day,
|
|
1141
1228
|
printf("%2.0f%%", 100*sum(Elapsed*NCPUS*CPUEff)/sum(Elapsed*NCPUS)) AS CPUEff,
|
|
1142
1229
|
|
|
1143
|
-
round(sum(Elapsed*
|
|
1144
|
-
printf("%2.0f%%", 100*sum(Elapsed*
|
|
1230
|
+
round(sum(Elapsed*AllocMem)/1073741824/86400,1) AS mem_GiB_day,
|
|
1231
|
+
printf("%2.0f%%", 100*sum(Elapsed*AllocMem*MemEff)/sum(Elapsed*AllocMem)) AS MemEff,
|
|
1145
1232
|
|
|
1146
1233
|
round(sum(Elapsed*NGPUs)/86400,1) AS gpu_day,
|
|
1147
1234
|
iif(sum(NGpus), printf("%2.0f%%", 100*sum(Elapsed*NGPUs*GPUeff)/sum(Elapsed*NGPUs)), NULL) AS GPUEff,
|
|
@@ -1150,9 +1237,9 @@ def seff_cli(argv=sys.argv[1:], csv_input=None):
|
|
|
1150
1237
|
round(sum(TotDiskWrite/1048576)/sum(Elapsed),2) AS write_MiBps
|
|
1151
1238
|
|
|
1152
1239
|
FROM eff
|
|
1153
|
-
WHERE End IS NOT NULL
|
|
1240
|
+
WHERE End IS NOT NULL {where}
|
|
1154
1241
|
GROUP BY user ) {order_by}
|
|
1155
|
-
""")
|
|
1242
|
+
""", {'user': args.user, 'partition': args.partition})
|
|
1156
1243
|
headers = [ x[0] for x in cur.description ]
|
|
1157
1244
|
data = cur.fetchall()
|
|
1158
1245
|
if len(data) == 0:
|
|
@@ -1169,8 +1256,8 @@ def seff_cli(argv=sys.argv[1:], csv_input=None):
|
|
|
1169
1256
|
NCPUS,
|
|
1170
1257
|
printf("%3.0f%%",round(CPUeff, 2)*100) AS "CPUeff",
|
|
1171
1258
|
|
|
1172
|
-
round(
|
|
1173
|
-
round(
|
|
1259
|
+
round(AllocMem/1073741824,2) AS MemAllocGiB,
|
|
1260
|
+
round(TotalMem/1073741824,2) AS MemTotGiB,
|
|
1174
1261
|
printf("%3.0f%%",round(MemEff,2)*100) AS MemEff,
|
|
1175
1262
|
|
|
1176
1263
|
NGpus,
|
|
@@ -1181,7 +1268,7 @@ def seff_cli(argv=sys.argv[1:], csv_input=None):
|
|
|
1181
1268
|
round(TotDiskWrite/Elapsed/1048576,2) AS write_MiBps
|
|
1182
1269
|
|
|
1183
1270
|
FROM eff
|
|
1184
|
-
WHERE End IS NOT NULL ) {order_by}""")
|
|
1271
|
+
WHERE End IS NOT NULL {where} ) {order_by}""", {'user': args.user, 'partition': args.partition})
|
|
1185
1272
|
headers = [ x[0] for x in cur.description ]
|
|
1186
1273
|
data = cur.fetchall()
|
|
1187
1274
|
if len(data) == 0:
|
|
@@ -164,12 +164,12 @@ def test_cpueff(db):
|
|
|
164
164
|
|
|
165
165
|
def test_gpueff(db):
|
|
166
166
|
data = """
|
|
167
|
-
JobID,
|
|
168
|
-
1,gres/gpuutil=23
|
|
167
|
+
JobID,AllocTRES,TRESUsageInTot
|
|
168
|
+
1,gres/gpu=1,gres/gpuutil=23
|
|
169
169
|
"""
|
|
170
170
|
slurm2sql.slurm2sql(db, [], csv_input=csvdata(data))
|
|
171
171
|
print(db.execute('select * from eff;').fetchall())
|
|
172
|
-
assert fetch(db, 1, '
|
|
172
|
+
assert fetch(db, 1, 'GpuEff', table='eff') == 0.23
|
|
173
173
|
|
|
174
174
|
|
|
175
175
|
#
|
|
@@ -230,18 +230,19 @@ def test_seff(db, capsys):
|
|
|
230
230
|
|
|
231
231
|
def test_seff_mem(db, capsys):
|
|
232
232
|
data = """
|
|
233
|
-
JobID,End,NNodes,NCPUS,ReqMem,MaxRSS
|
|
234
|
-
111,1970-01-01T00:00:00,1,1,10G,
|
|
235
|
-
111.2,,1,1,,8G
|
|
233
|
+
JobID,End,NNodes,NCPUS,ReqMem,MaxRSS,AllocTRES,TRESUsageInTot
|
|
234
|
+
111,1970-01-01T00:00:00,1,1,10G,,mem=10G,
|
|
235
|
+
111.2,,1,1,,8G,mem=10G,mem=6G
|
|
236
236
|
"""
|
|
237
|
+
# Changed 2025-04-23: no longer uses ReqMe.m and MaxRSS but AllocTRES and TRESUsageInTot
|
|
237
238
|
slurm2sql.seff_cli(argv=[], csv_input=csvdata(data))
|
|
238
239
|
captured = capsys.readouterr()
|
|
239
240
|
assert '111' in captured.out
|
|
240
|
-
assert '
|
|
241
|
+
assert '60%' in captured.out
|
|
241
242
|
|
|
242
243
|
def test_seff_gpu(db, capsys):
|
|
243
244
|
data = """
|
|
244
|
-
JobID,End,Elapsed,TotalCPU,NCPUS,AllocTRES,
|
|
245
|
+
JobID,End,Elapsed,TotalCPU,NCPUS,AllocTRES,TRESUsageInTot
|
|
245
246
|
111,1970-01-01T00:00:00,,1,1,,
|
|
246
247
|
111.2,1970-01-01T00:00:00,100,1,1,gres/gpu=1,gres/gpuutil=23
|
|
247
248
|
"""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|