cmd-queue 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cmd-queue might be problematic. Click here for more details.

Files changed (65) hide show
  1. {cmd_queue-0.2.1/cmd_queue.egg-info → cmd_queue-0.2.2}/PKG-INFO +13 -2
  2. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/__init__.py +1 -1
  3. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/serial_queue.py +7 -43
  4. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/slurm_queue.py +286 -40
  5. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/tmux_queue.py +1 -2
  6. cmd_queue-0.2.2/cmd_queue/util/util_bash.py +52 -0
  7. cmd_queue-0.2.2/cmd_queue/util/util_tmux.py +114 -0
  8. {cmd_queue-0.2.1 → cmd_queue-0.2.2/cmd_queue.egg-info}/PKG-INFO +13 -2
  9. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue.egg-info/SOURCES.txt +1 -0
  10. cmd_queue-0.2.1/cmd_queue/util/util_tmux.py +0 -38
  11. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/LICENSE +0 -0
  12. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/MANIFEST.in +0 -0
  13. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/README.rst +0 -0
  14. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/__main__.py +0 -0
  15. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/__main__.pyi +0 -0
  16. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/airflow_queue.py +0 -0
  17. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/airflow_queue.pyi +0 -0
  18. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/base_queue.py +0 -0
  19. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/base_queue.pyi +0 -0
  20. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/cli_boilerplate.py +0 -0
  21. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/cli_boilerplate.pyi +0 -0
  22. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/main.py +0 -0
  23. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/main.pyi +0 -0
  24. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/monitor_app.py +0 -0
  25. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/monitor_app.pyi +0 -0
  26. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/py.typed +0 -0
  27. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/serial_queue.pyi +0 -0
  28. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/slurm_queue.pyi +0 -0
  29. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/slurmify.py +0 -0
  30. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/tmux_queue.pyi +0 -0
  31. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/__init__.py +0 -0
  32. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/richer.py +0 -0
  33. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/richer.pyi +0 -0
  34. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/texter.py +0 -0
  35. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/texter.pyi +0 -0
  36. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/textual_extensions.py +0 -0
  37. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/textual_extensions.pyi +0 -0
  38. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/util_algo.py +0 -0
  39. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/util_algo.pyi +0 -0
  40. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/util_network_text.py +0 -0
  41. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/util_network_text.pyi +0 -0
  42. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/util_networkx.py +0 -0
  43. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/util_networkx.pyi +0 -0
  44. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/util_tags.py +0 -0
  45. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/util_tags.pyi +0 -0
  46. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/util_tmux.pyi +0 -0
  47. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/util_yaml.py +0 -0
  48. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue/util/util_yaml.pyi +0 -0
  49. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue.egg-info/dependency_links.txt +0 -0
  50. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue.egg-info/entry_points.txt +0 -0
  51. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue.egg-info/requires.txt +0 -0
  52. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/cmd_queue.egg-info/top_level.txt +0 -0
  53. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/pyproject.toml +0 -0
  54. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/requirements/docs.txt +0 -0
  55. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/requirements/linting.txt +0 -0
  56. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/requirements/optional.txt +0 -0
  57. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/requirements/runtime.txt +0 -0
  58. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/requirements/tests.txt +0 -0
  59. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/setup.cfg +0 -0
  60. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/setup.py +0 -0
  61. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/tests/test_bash_job_errors.py +0 -0
  62. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/tests/test_cli.py +0 -0
  63. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/tests/test_errors.py +0 -0
  64. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/tests/test_import.py +0 -0
  65. {cmd_queue-0.2.1 → cmd_queue-0.2.2}/tests/tests_mixed_hardware_tmux.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: cmd_queue
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: The cmd_queue module for a DAG of bash commands
5
5
  Home-page: https://gitlab.kitware.com/computer-vision/cmd_queue
6
6
  Author: Kitware Inc., Jon Crall
@@ -224,6 +224,17 @@ Requires-Dist: myst_parser==0.18.0; extra == "docs-strict"
224
224
  Requires-Dist: sphinx-reredirects==0.0.1; extra == "docs-strict"
225
225
  Provides-Extra: linting-strict
226
226
  Requires-Dist: flake8==5.0.0; extra == "linting-strict"
227
+ Dynamic: author
228
+ Dynamic: author-email
229
+ Dynamic: classifier
230
+ Dynamic: description
231
+ Dynamic: description-content-type
232
+ Dynamic: home-page
233
+ Dynamic: license
234
+ Dynamic: provides-extra
235
+ Dynamic: requires-dist
236
+ Dynamic: requires-python
237
+ Dynamic: summary
227
238
 
228
239
  Command Queue - cmd_queue
229
240
  =========================
@@ -306,7 +306,7 @@ Example:
306
306
  __mkinit__ = """
307
307
  mkinit -m cmd_queue
308
308
  """
309
- __version__ = '0.2.1'
309
+ __version__ = '0.2.2'
310
310
 
311
311
 
312
312
  __submodules__ = {
@@ -7,6 +7,7 @@ import ubelt as ub
7
7
  import uuid
8
8
  from cmd_queue import base_queue
9
9
  from cmd_queue.util import util_tags
10
+ from cmd_queue.util import util_bash
10
11
 
11
12
 
12
13
  class BashJob(base_queue.Job):
@@ -169,7 +170,8 @@ class BashJob(base_queue.Job):
169
170
  json_fmt_parts += [
170
171
  ('logs', '"%s"', self.log_fpath),
171
172
  ]
172
- dump_pre_status = _bash_json_dump(json_fmt_parts, self.stat_fpath)
173
+ dump_pre_status = util_bash.bash_json_dump(json_fmt_parts,
174
+ self.stat_fpath)
173
175
  script.append('# Mark job as running')
174
176
  script.append(dump_pre_status)
175
177
 
@@ -238,7 +240,8 @@ class BashJob(base_queue.Job):
238
240
  json_fmt_parts += [
239
241
  ('logs', '"%s"', self.log_fpath),
240
242
  ]
241
- dump_post_status = _bash_json_dump(json_fmt_parts, self.stat_fpath)
243
+ dump_post_status = util_bash.bash_json_dump(json_fmt_parts,
244
+ self.stat_fpath)
242
245
 
243
246
  on_pass_part = indent(_job_conditionals['on_pass'])
244
247
  on_fail_part = indent(_job_conditionals['on_fail'])
@@ -459,7 +462,8 @@ class SerialQueue(base_queue.Queue):
459
462
  ('name', '"%s"', self.name),
460
463
  ('rootid', '"%s"', self.rootid),
461
464
  ]
462
- dump_code = _bash_json_dump(json_fmt_parts, self.state_fpath)
465
+ dump_code = util_bash.bash_json_dump(json_fmt_parts,
466
+ self.state_fpath)
463
467
  script.append('# Update queue status')
464
468
  script.append(dump_code)
465
469
  # script.append('cat ' + str(self.state_fpath))
@@ -670,46 +674,6 @@ class SerialQueue(base_queue.Queue):
670
674
  return state
671
675
 
672
676
 
673
- def _bash_json_dump(json_fmt_parts, fpath):
674
- """
675
- Make a printf command that dumps a json file indicating some status in a
676
- bash environment.
677
-
678
- Args:
679
- List[Tuple[str, str, str]]: A list of 3-tupels indicating the name of
680
- the json key, the printf code, and the bash expression to fill the
681
- printf code.
682
-
683
- fpath (str): where bash should write the json file
684
-
685
- Returns:
686
- str : the bash that will perform the printf
687
-
688
- Example:
689
- >>> from cmd_queue.serial_queue import _bash_json_dump
690
- >>> json_fmt_parts = [
691
- >>> ('home', '%s', '$HOME'),
692
- >>> ('const', '%s', 'MY_CONSTANT'),
693
- >>> ('ps2', '"%s"', '$PS2'),
694
- >>> ]
695
- >>> fpath = 'out.json'
696
- >>> dump_code = _bash_json_dump(json_fmt_parts, fpath)
697
- >>> print(dump_code)
698
- """
699
- printf_body_parts = [
700
- '"{}": {}'.format(k, f) for k, f, v in json_fmt_parts
701
- ]
702
- printf_arg_parts = [
703
- '"{}"'.format(v) for k, f, v in json_fmt_parts
704
- ]
705
- printf_body = r"'{" + ", ".join(printf_body_parts) + r"}\n'"
706
- printf_args = ' '.join(printf_arg_parts)
707
- redirect_part = '> ' + str(fpath)
708
- printf_part = 'printf ' + printf_body + ' \\\n ' + printf_args
709
- dump_code = printf_part + ' \\\n ' + redirect_part
710
- return dump_code
711
-
712
-
713
677
  def indent(text, prefix=' '):
714
678
  r"""
715
679
  Indents a block of text
@@ -435,6 +435,8 @@ class SlurmQueue(base_queue.Queue):
435
435
  self.all_depends = None
436
436
  self._sbatch_kvargs = ub.udict(kwargs) & SLURM_SBATCH_KVARGS
437
437
  self._sbatch_flags = ub.udict(kwargs) & SLURM_SBATCH_FLAGS
438
+ self._include_monitor_metadata = True
439
+ self.jobid_fpath = None
438
440
 
439
441
  def __nice__(self):
440
442
  return self.queue_id
@@ -490,15 +492,29 @@ class SlurmQueue(base_queue.Queue):
490
492
  # Dont check in this case
491
493
  return True
492
494
  else:
493
- sinfo = ub.cmd('sinfo --json')
495
+ import json
496
+ # sinfo --json changed between v22 and v23
497
+ # https://github.com/SchedMD/slurm/blob/slurm-23.02/RELEASE_NOTES#L230
498
+ if sinfo_major_version == 22:
499
+ sinfo = ub.cmd('sinfo --json')
500
+ else:
501
+ sinfo = ub.cmd('scontrol show nodes --json')
494
502
  if sinfo['ret'] == 0:
495
- import json
496
503
  sinfo_out = json.loads(sinfo['out'])
497
- has_working_nodes = not all(
498
- node['state'] == 'down'
499
- for node in sinfo_out['nodes'])
504
+ nodes = sinfo_out['nodes']
505
+ # FIXME: this might be an incorrect check on v22
506
+ # the v23 version seems different, but I don't have
507
+ # v22 setup anymore. Might not be worth supporting.
508
+ node_states = [node['state'] for node in nodes]
509
+ if sinfo_major_version == 22:
510
+ has_working_nodes = not all(
511
+ 'down' in str(state).lower() for state in node_states)
512
+ else:
513
+ has_working_nodes = not all(
514
+ 'DOWN' in state for state in node_states)
500
515
  if has_working_nodes:
501
516
  return True
517
+
502
518
  return False
503
519
 
504
520
  def submit(self, command, **kwargs):
@@ -540,6 +556,12 @@ class SlurmQueue(base_queue.Queue):
540
556
  self.header_commands.append(command)
541
557
 
542
558
  def order_jobs(self):
559
+ """
560
+ Get a topological sorting of the jobs in this DAG.
561
+
562
+ Returns:
563
+ List[SlurmJob]: ordered jobs
564
+ """
543
565
  import networkx as nx
544
566
  graph = self._dependency_graph()
545
567
  if 0:
@@ -551,6 +573,15 @@ class SlurmQueue(base_queue.Queue):
551
573
  return new_order
552
574
 
553
575
  def finalize_text(self, exclude_tags=None, **kwargs):
576
+ """
577
+ Serialize the state of the queue into a bash script.
578
+
579
+ Returns:
580
+ str
581
+ """
582
+ # generating the slurm bash script is straightforward because slurm
583
+ # will take of the hard stuff (like scheduling) for us. we just need
584
+ # to effectively encode the DAG as a list of sbatch commands.
554
585
  exclude_tags = util_tags.Tags.coerce(exclude_tags)
555
586
  new_order = self.order_jobs()
556
587
  commands = []
@@ -571,6 +602,20 @@ class SlurmQueue(base_queue.Queue):
571
602
  jobname_to_varname[job.name] = varname
572
603
  commands.append(command)
573
604
  self.jobname_to_varname = jobname_to_varname
605
+
606
+ self._include_monitor_metadata = True
607
+ if self._include_monitor_metadata:
608
+ # Build a command to dump the job-ids for this queue to disk to
609
+ # allow us to track them in the monitor.
610
+ from cmd_queue.util import util_bash
611
+ json_fmt_parts = [
612
+ (job_varname, '%s', '$' + job_varname)
613
+ for job_varname in self.jobname_to_varname.values()
614
+ ]
615
+ self.jobid_fpath = self.fpath.augment(ext='.jobids.json')
616
+ command = util_bash.bash_json_dump(json_fmt_parts, self.jobid_fpath)
617
+ commands.append(command)
618
+
574
619
  text = '\n'.join(commands)
575
620
  return text
576
621
 
@@ -586,6 +631,25 @@ class SlurmQueue(base_queue.Queue):
586
631
  def monitor(self, refresh_rate=0.4):
587
632
  """
588
633
  Monitor progress until the jobs are done
634
+
635
+ CommandLine:
636
+ xdoctest -m cmd_queue.slurm_queue SlurmQueue.monitor --dev --run
637
+
638
+ Example:
639
+ >>> # xdoctest: +REQUIRES(--dev)
640
+ >>> from cmd_queue.slurm_queue import * # NOQA
641
+ >>> dpath = ub.Path.appdir('slurm_queue/tests/test-slurm-failed-monitor')
642
+ >>> queue = SlurmQueue()
643
+ >>> job0 = queue.submit(f'echo "here we go"', name='job0')
644
+ >>> job1 = queue.submit(f'echo "this job will pass, allowing dependencies to run" && true', depends=[job0])
645
+ >>> job2 = queue.submit(f'echo "this job will run and pass" && sleep 10 && true', depends=[job1])
646
+ >>> job3 = queue.submit(f'echo "this job will run and fail" && false', depends=[job1])
647
+ >>> job4 = queue.submit(f'echo "this job will fail, preventing dependencies from running" && false', depends=[job0])
648
+ >>> job5 = queue.submit(f'echo "this job will never run" && true', depends=[job4])
649
+ >>> job6 = queue.submit(f'echo "this job will also never run" && false', depends=[job4])
650
+ >>> queue.print_commands()
651
+ >>> # xdoctest: +REQUIRES(--run)
652
+ >>> queue.run()
589
653
  """
590
654
 
591
655
  import time
@@ -597,50 +661,154 @@ class SlurmQueue(base_queue.Queue):
597
661
 
598
662
  num_at_start = None
599
663
 
664
+ job_status_table = None
665
+ if self.jobid_fpath is not None:
666
+ class UnableToMonitor(Exception):
667
+ ...
668
+ try:
669
+ import json
670
+ if not self.jobid_fpath.exists():
671
+ raise UnableToMonitor
672
+ jobid_lut = json.loads(self.jobid_fpath.read_text())
673
+ job_status_table = [
674
+ {
675
+ 'job_varname': job_varname,
676
+ 'job_id': job_id,
677
+ 'status': 'unknown',
678
+ 'needs_update': True,
679
+ }
680
+ for job_varname, job_id in jobid_lut.items()
681
+ ]
682
+ except UnableToMonitor:
683
+ print('ERROR: Unable to monitors jobids')
684
+
685
+ def update_jobid_status():
686
+ import rich
687
+ for row in job_status_table:
688
+ if row['needs_update']:
689
+ job_id = row['job_id']
690
+ out = ub.cmd(f'scontrol show job "{job_id}"')
691
+ info = parse_scontrol_output(out.stdout)
692
+ row['JobState'] = info['JobState']
693
+ row['ExitCode'] = info.get('ExitCode', None)
694
+ # https://slurm.schedmd.com/job_state_codes.html
695
+ if info['JobState'].startswith('FAILED'):
696
+ row['status'] = 'failed'
697
+ rich.print(f'[red] Failed job: {info["JobName"]}')
698
+ if info["StdErr"] == info["StdOut"]:
699
+ rich.print(f'[red] * Logs: {info["StdErr"]}')
700
+ else:
701
+ rich.print(f'[red] StdErr: {info["StdErr"]}')
702
+ rich.print(f'[red] StdOut: {info["StdOut"]}')
703
+ row['needs_update'] = False
704
+ elif info['JobState'].startswith('CANCELLED'):
705
+ rich.print(f'[yellow] Skip job: {info["JobName"]}')
706
+ row['status'] = 'skipped'
707
+ row['needs_update'] = False
708
+ elif info['JobState'].startswith('COMPLETED'):
709
+ rich.print(f'[green] Completed job: {info["JobName"]}')
710
+ row['status'] = 'passed'
711
+ row['needs_update'] = False
712
+ elif info['JobState'].startswith('RUNNING'):
713
+ row['status'] = 'running'
714
+ elif info['JobState'].startswith('PENDING'):
715
+ row['status'] = 'pending'
716
+ else:
717
+ row['status'] = 'unknown'
718
+ # print(f'job_status_table = {ub.urepr(job_status_table, nl=1)}')
719
+
600
720
  def update_status_table():
601
721
  nonlocal num_at_start
602
- # https://rich.readthedocs.io/en/stable/live.html
603
- info = ub.cmd('squeue --format="%i %P %j %u %t %M %D %R"')
604
- stream = io.StringIO(info['out'])
605
- df = pd.read_csv(stream, sep=' ')
606
-
607
- # Only include job names that this queue created
608
- job_names = [job.name for job in self.jobs]
609
- df = df[df['NAME'].isin(job_names)]
610
- jobid_history.update(df['JOBID'])
611
-
612
- num_running = (df['ST'] == 'R').sum()
613
- num_in_queue = len(df)
614
- total_monitored = len(jobid_history)
615
-
616
- HACK_KILL_BROKEN_JOBS = 1
617
- if HACK_KILL_BROKEN_JOBS:
618
- # For whatever reason using kill-on-invalid-dep
619
- # kills jobs too fast and not when they are in a dependency state not a
620
- # a never satisfied state. Killing these jobs here seems to fix
621
- # it.
622
- broken_jobs = df[df['NODELIST(REASON)'] == '(DependencyNeverSatisfied)']
623
- if len(broken_jobs):
624
- for name in broken_jobs['NAME']:
625
- ub.cmd(f'scancel --name="{name}"')
722
+
723
+ # TODO: move this block into into the version where job status
724
+ # table is not available, and reimplement it for the per-job style
725
+ # of query. The reason we have it out here now is because we need
726
+ # to implement the HACK_KILL_BROKEN_JOBS in the alternate case.
727
+ if True:
728
+ # https://rich.readthedocs.io/en/stable/live.html
729
+ info = ub.cmd('squeue --format="%i %P %j %u %t %M %D %R"')
730
+ stream = io.StringIO(info['out'])
731
+ df = pd.read_csv(stream, sep=' ')
732
+
733
+ # Only include job names that this queue created
734
+ job_names = [job.name for job in self.jobs]
735
+ df = df[df['NAME'].isin(job_names)]
736
+ jobid_history.update(df['JOBID'])
737
+
738
+ num_running = (df['ST'] == 'R').sum()
739
+ num_in_queue = len(df)
740
+ total_monitored = len(jobid_history)
741
+
742
+ HACK_KILL_BROKEN_JOBS = 1
743
+ if HACK_KILL_BROKEN_JOBS:
744
+ # For whatever reason using kill-on-invalid-dep
745
+ # kills jobs too fast and not when they are in a dependency state not a
746
+ # a never satisfied state. Killing these jobs here seems to fix
747
+ # it.
748
+ broken_jobs = df[df['NODELIST(REASON)'] == '(DependencyNeverSatisfied)']
749
+ if len(broken_jobs):
750
+ for name in broken_jobs['NAME']:
751
+ ub.cmd(f'scancel --name="{name}"')
626
752
 
627
753
  if num_at_start is None:
628
754
  num_at_start = len(df)
629
755
 
630
- table = Table(*['num_running', 'num_in_queue', 'total_monitored', 'num_at_start'],
756
+ if job_status_table is not None:
757
+ update_jobid_status()
758
+ state = ub.dict_hist([row['status'] for row in job_status_table])
759
+ state.setdefault('passed', 0)
760
+ state.setdefault('failed', 0)
761
+ state.setdefault('skipped', 0)
762
+ state.setdefault('pending', 0)
763
+ state.setdefault('unknown', 0)
764
+ state.setdefault('running', 0)
765
+ state['total'] = len(job_status_table)
766
+
767
+ state['other'] = state['total'] - (
768
+ state['passed'] + state['failed'] + state['skipped'] +
769
+ state['running'] + state['pending']
770
+ )
771
+ pass_color = ''
772
+ fail_color = ''
773
+ skip_color = ''
774
+ finished = (state['pending'] + state['unknown'] + state['running'] == 0)
775
+ if (state['failed'] > 0):
776
+ fail_color = '[red]'
777
+ if (state['skipped'] > 0):
778
+ skip_color = '[yellow]'
779
+ if finished:
780
+ pass_color = '[green]'
781
+
782
+ header = ['passed', 'failed', 'skipped', 'running', 'pending', 'other', 'total']
783
+ row_values = [
784
+ f"{pass_color}{state['passed']}",
785
+ f"{fail_color}{state['failed']}",
786
+ f"{skip_color}{state['skipped']}",
787
+ f"{state['running']}",
788
+ f"{state['pending']}",
789
+ f"{state['other']}",
790
+ f"{state['total']}",
791
+ ]
792
+ else:
793
+ # TODO: determine if slurm has accounting on, and if we can
794
+ # figure out how many jobs errored / passed
795
+ header = ['num_running', 'num_in_queue', 'total_monitored', 'num_at_start']
796
+ row_values = [
797
+ f'{num_running}',
798
+ f'{num_in_queue}',
799
+ f'{total_monitored}',
800
+ f'{num_at_start}',
801
+ ]
802
+ # row_values.append(str(state.get('FAIL', 0)))
803
+ # row_values.append(str(state.get('SKIPPED', 0)))
804
+ # row_values.append(str(state.get('PENDING', 0)))
805
+ finished = (num_in_queue == 0)
806
+
807
+ table = Table(*header,
631
808
  title='slurm-monitor')
632
809
 
633
- # TODO: determine if slurm has accounting on, and if we can
634
- # figure out how many jobs errored / passed
635
-
636
- table.add_row(
637
- f'{num_running}',
638
- f'{num_in_queue}',
639
- f'{total_monitored}',
640
- f'{num_at_start}',
641
- )
810
+ table.add_row(*row_values)
642
811
 
643
- finished = (num_in_queue == 0)
644
812
  return table, finished
645
813
 
646
814
  try:
@@ -680,6 +848,8 @@ class SlurmQueue(base_queue.Queue):
680
848
  style (str):
681
849
  can be 'colors', 'rich', or 'plain'
682
850
 
851
+ **kwargs: extra backend-specific args passed to finalize_text
852
+
683
853
  CommandLine:
684
854
  xdoctest -m cmd_queue.slurm_queue SlurmQueue.print_commands
685
855
 
@@ -698,6 +868,82 @@ class SlurmQueue(base_queue.Queue):
698
868
  rprint = print_commands
699
869
 
700
870
 
871
+ def parse_scontrol_output(output: str) -> dict:
872
+ """
873
+ Parses the output of `scontrol show job` into a dictionary.
874
+
875
+ Example:
876
+ from cmd_queue.slurm_queue import * # NOQA
877
+ # Example usage
878
+ output = ub.codeblock(
879
+ '''
880
+ JobId=307 JobName=J0002-SQ-2025 with a space 0218T165929-9a50513a
881
+ UserId=joncrall(1000) GroupId=joncrall(1000) MCS_label=N/A
882
+ Priority=1 Nice=0 Account=(null) QOS=(null)
883
+ JobState=COMPLETED Reason=None Dependency=(null)
884
+ Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
885
+ RunTime=00:00:10 TimeLimit=365-00:00:00 TimeMin=N/A
886
+ SubmitTime=2025-02-18T16:59:30 EligibleTime=2025-02-18T16:59:33
887
+ AccrueTime=Unknown
888
+ StartTime=2025-02-18T16:59:33 EndTime=2025-02-18T16:59:43 Deadline=N/A
889
+ SuspendTime=None SecsPreSuspend=0 LastSchedEval=2025-02-18T16:59:33 Scheduler=Backfill
890
+ Partition=priority AllocNode:Sid=localhost:215414
891
+ ReqNodeList=(null) ExcNodeList=(null)
892
+ NodeList=toothbrush
893
+ BatchHost=toothbrush
894
+ NumNodes=1 NumCPUs=2 NumTasks=1 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
895
+ ReqTRES=cpu=1,mem=120445M,node=1,billing=1
896
+ AllocTRES=cpu=2,node=1,billing=2
897
+ Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
898
+ MinCPUsNode=1 MinMemoryNode=0 MinTmpDiskNode=0
899
+ Features=(null) DelayBoot=00:00:00
900
+ OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)
901
+ Command=(null)
902
+ WorkDir=/home/joncrall/code/cmd_queue
903
+ StdErr="cmd_queue/slurm/SQ-2025021 with a space 8T165929-9a50513a/logs/J0002-SQ-20250218T165929-9a50513a.sh"
904
+ StdIn=/dev/null
905
+ StdOut="slurm/SQ-20 with and = 250218T165929-9a50513a/logs/J0002-SQ-20250218T165929-9a50513a.sh"
906
+ Power=
907
+ ''')
908
+ parse_scontrol_output(output)
909
+ """
910
+ import re
911
+ # These keys should be the last key on a line. They are allowed to contain
912
+ # space and equal characters.
913
+ special_keys = [
914
+ 'JobName', 'WorkDir', 'StdErr', 'StdIn', 'StdOut', 'Command',
915
+ 'NodeList', 'BatchHost', 'Partition'
916
+ ]
917
+ patterns = '(' + '|'.join(f' {re.escape(k)}=' for k in special_keys) + ')'
918
+ pat = re.compile(patterns)
919
+
920
+ # Initialize dictionary to store parsed key-value pairs
921
+ parsed_data = {}
922
+
923
+ # Split the input into lines
924
+ for line in output.splitlines():
925
+ # First, check for special keys (those with spaces before the equal sign)
926
+ match = pat.search(line)
927
+ if match:
928
+ # Special case: Key is a special key with a space
929
+ startpos = match.start()
930
+ leading_part = line[:startpos]
931
+ special_part = line[startpos + 1:]
932
+ key, value = special_part.split('=', 1)
933
+ parsed_data[key] = value.strip()
934
+ line = leading_part
935
+
936
+ # Now, handle the general case: split by spaces and then by "="
937
+ line = line.strip()
938
+ if line:
939
+ parts = line.split(' ')
940
+ for part in parts:
941
+ key, value = part.split('=', 1)
942
+ parsed_data[key] = value
943
+
944
+ return parsed_data
945
+
946
+
701
947
  SLURM_NOTES = r"""
702
948
  This shows a few things you can do with slurm
703
949
 
@@ -724,7 +724,7 @@ class TMUXMultiQueue(base_queue.Queue):
724
724
 
725
725
  CommandLine:
726
726
  xdoctest -m cmd_queue.tmux_queue TMUXMultiQueue.monitor:0
727
- xdoctest -m cmd_queue.tmux_queue TMUXMultiQueue.monitor:1 --interact
727
+ INTERACTIVE_TEST=1 xdoctest -m cmd_queue.tmux_queue TMUXMultiQueue.monitor:1
728
728
 
729
729
  Example:
730
730
  >>> # xdoctest: +REQUIRES(--interact)
@@ -855,7 +855,6 @@ class TMUXMultiQueue(base_queue.Queue):
855
855
  finished &= (state['status'] == 'done')
856
856
  if state['status'] == 'done':
857
857
  pass_color = '[green]'
858
-
859
858
  if (state['failed'] > 0):
860
859
  fail_color = '[red]'
861
860
  if (state['skipped'] > 0):
@@ -0,0 +1,52 @@
1
+ def bash_json_dump(json_fmt_parts, fpath):
2
+ r"""
3
+ Generate a printf bash command that dumps a JSON file indicating some
4
+ status in a bash environment.
5
+
6
+ Args:
7
+ List[Tuple[str, str, str]]: A list of 3-tuples where each tuple contains:
8
+ - The JSON key (str)
9
+ - The printf format string (str)
10
+ - The bash expression (str) that provides the value to be printed
11
+
12
+ fpath (str): Path where the bash script should write the JSON file.
13
+
14
+ Returns:
15
+ str: The bash command that will perform the printf.
16
+
17
+ Example:
18
+ >>> from cmd_queue.util.util_bash import * # NOQA
19
+ >>> json_fmt_parts = [
20
+ >>> ('home', '%s', '$HOME'),
21
+ >>> ('const', '%s', 'MY_CONSTANT'),
22
+ >>> ('ps2', '"%s"', '$PS2'),
23
+ >>> ]
24
+ >>> fpath = 'out.json'
25
+ >>> dump_code = bash_json_dump(json_fmt_parts, fpath)
26
+ >>> print(dump_code)
27
+ printf '{"home": %s, "const": %s, "ps2": "%s"}\n' \
28
+ "$HOME" "MY_CONSTANT" "$PS2" \
29
+ > out.json
30
+
31
+ Example:
32
+ >>> from cmd_queue.util.util_bash import * # NOQA
33
+ >>> json_fmt_parts = []
34
+ >>> fpath = 'out.json'
35
+ >>> dump_code = bash_json_dump(json_fmt_parts, fpath)
36
+ >>> print(dump_code)
37
+ printf '{}\n' \
38
+ \
39
+ > out.json
40
+ """
41
+ printf_body_parts = [
42
+ '"{}": {}'.format(k, f) for k, f, v in json_fmt_parts
43
+ ]
44
+ printf_arg_parts = [
45
+ '"{}"'.format(v) for k, f, v in json_fmt_parts
46
+ ]
47
+ printf_body = r"'{" + ", ".join(printf_body_parts) + r"}\n'"
48
+ printf_args = ' '.join(printf_arg_parts)
49
+ redirect_part = '> ' + str(fpath)
50
+ printf_part = 'printf ' + printf_body + ' \\\n ' + printf_args
51
+ dump_code = printf_part + ' \\\n ' + redirect_part
52
+ return dump_code
@@ -0,0 +1,114 @@
1
+ """
2
+ Generic tmux helpers
3
+ """
4
+ import ubelt as ub
5
+
6
+
7
+ class tmux:
8
+ """
9
+ TODO:
10
+ - [ ] should use libtmux instead, or provide a compatible minimal API.
11
+
12
+ Example:
13
+ >>> # xdoctest: +SKIP
14
+ >>> from cmd_queue.util.util_tmux import tmux
15
+ >>> sessions = tmux.list_sessions()
16
+
17
+ """
18
+
19
+ @staticmethod
20
+ def list_sessions():
21
+ info = ub.cmd('tmux list-sessions')
22
+ sessions = []
23
+ for line in info['out'].split('\n'):
24
+ line = line.strip()
25
+ if line:
26
+ session_id, rest = line.split(':', 1)
27
+ sessions.append({
28
+ 'id': session_id,
29
+ 'rest': rest
30
+ })
31
+ return sessions
32
+
33
+ @staticmethod
34
+ def _kill_session_command(target_session):
35
+ return f'tmux kill-session -t {target_session}'
36
+
37
+ @staticmethod
38
+ def _capture_pane_command(target_session):
39
+ # Relly should take a target pane argument
40
+ return f'tmux capture-pane -p -t "{target_session}:0.0"'
41
+
42
+ @staticmethod
43
+ def capture_pane(target_session, verbose=3):
44
+ return ub.cmd(tmux._capture_pane_command(target_session), verbose=verbose)
45
+
46
+ @staticmethod
47
+ def kill_session(target_session, verbose=3):
48
+ return ub.cmd(tmux._kill_session_command(target_session), verbose=verbose)
49
+
50
+ @staticmethod
51
+ def kill_pane(pane_id, verbose=3):
52
+ return ub.cmd(f'tmux kill-pane -t {pane_id}', verbose=verbose)
53
+
54
+ @staticmethod
55
+ def list_panes(target_session):
56
+ """
57
+ Ignore:
58
+ from cmd_queue.util.util_tmux import tmux
59
+ sessions = tmux.list_sessions()
60
+ rows = []
61
+ for session in tmux.list_sessions():
62
+ target_session = session['id']
63
+ rows.extend(tmux.list_panes(target_session))
64
+ print(f'rows = {ub.urepr(rows, nl=1)}')
65
+ """
66
+ import json
67
+ # References:
68
+ # https://github.com/tmux-python/libtmux/blob/f705713c7aff1b14e8f8f3ca53d1b0b6ba6e98d0/src/libtmux/formats.py#L80
69
+ PANE_FORMATS = [
70
+ "pane_id",
71
+ "pane_index",
72
+ "pane_pid",
73
+
74
+ "pane_active",
75
+ "pane_dead",
76
+ "pane_in_mode",
77
+ "pane_synchronized",
78
+ "pane_tty",
79
+ "pane_start_command",
80
+ "pane_start_path",
81
+ "pane_current_path",
82
+ "pane_current_command",
83
+ "cursor_x",
84
+ "cursor_y",
85
+ "scroll_region_upper",
86
+ "scroll_region_lower",
87
+ "saved_cursor_x",
88
+ "saved_cursor_y",
89
+ "alternate_on",
90
+ "alternate_saved_x",
91
+ "alternate_saved_y",
92
+ "cursor_flag",
93
+ "insert_flag",
94
+ "keypad_cursor_flag",
95
+ "keypad_flag",
96
+ "wrap_flag",
97
+ "mouse_standard_flag",
98
+ "mouse_button_flag",
99
+ "mouse_any_flag",
100
+ "mouse_utf8_flag",
101
+ "history_size",
102
+ "history_limit",
103
+ "history_bytes",
104
+ "pane_width",
105
+ "pane_height",
106
+ # "pane_title", # removed in 3.1+
107
+ ]
108
+ format_code = json.dumps({k: '#{' + k + '}' for k in PANE_FORMATS})
109
+ rows = []
110
+ out = ub.cmd(['tmux', 'list-panes', '-t', str(target_session), '-F', format_code], verbose=0)
111
+ for line in out.stdout.strip().split('\n'):
112
+ row = json.loads(line)
113
+ rows.append(row)
114
+ return rows
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: cmd_queue
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: The cmd_queue module for a DAG of bash commands
5
5
  Home-page: https://gitlab.kitware.com/computer-vision/cmd_queue
6
6
  Author: Kitware Inc., Jon Crall
@@ -224,6 +224,17 @@ Requires-Dist: myst_parser==0.18.0; extra == "docs-strict"
224
224
  Requires-Dist: sphinx-reredirects==0.0.1; extra == "docs-strict"
225
225
  Provides-Extra: linting-strict
226
226
  Requires-Dist: flake8==5.0.0; extra == "linting-strict"
227
+ Dynamic: author
228
+ Dynamic: author-email
229
+ Dynamic: classifier
230
+ Dynamic: description
231
+ Dynamic: description-content-type
232
+ Dynamic: home-page
233
+ Dynamic: license
234
+ Dynamic: provides-extra
235
+ Dynamic: requires-dist
236
+ Dynamic: requires-python
237
+ Dynamic: summary
227
238
 
228
239
  Command Queue - cmd_queue
229
240
  =========================
@@ -39,6 +39,7 @@ cmd_queue/util/textual_extensions.py
39
39
  cmd_queue/util/textual_extensions.pyi
40
40
  cmd_queue/util/util_algo.py
41
41
  cmd_queue/util/util_algo.pyi
42
+ cmd_queue/util/util_bash.py
42
43
  cmd_queue/util/util_network_text.py
43
44
  cmd_queue/util/util_network_text.pyi
44
45
  cmd_queue/util/util_networkx.py
@@ -1,38 +0,0 @@
1
- """
2
- Generic tmux helpers
3
- """
4
- import ubelt as ub
5
-
6
-
7
- class tmux:
8
-
9
- @staticmethod
10
- def list_sessions():
11
- info = ub.cmd('tmux list-sessions')
12
- sessions = []
13
- for line in info['out'].split('\n'):
14
- line = line.strip()
15
- if line:
16
- session_id, rest = line.split(':', 1)
17
- sessions.append({
18
- 'id': session_id,
19
- 'rest': rest
20
- })
21
- return sessions
22
-
23
- @staticmethod
24
- def _kill_session_command(target_session):
25
- return f'tmux kill-session -t {target_session}'
26
-
27
- @staticmethod
28
- def _capture_pane_command(target_session):
29
- # Relly should take a target pane argument
30
- return f'tmux capture-pane -p -t "{target_session}:0.0"'
31
-
32
- @staticmethod
33
- def capture_pane(target_session, verbose=3):
34
- return ub.cmd(tmux._capture_pane_command(target_session), verbose=verbose)
35
-
36
- @staticmethod
37
- def kill_session(target_session, verbose=3):
38
- return ub.cmd(tmux._kill_session_command(target_session), verbose=verbose)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes