PyPI - cmd-queue - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

cmd-queue 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cmd-queue might be problematic. Click here for more details.

Files changed (13) hide show

cmd_queue/__init__.py +1 -1
cmd_queue/serial_queue.py +55 -59
cmd_queue/slurm_queue.py +353 -44
cmd_queue/slurmify.py +116 -0
cmd_queue/tmux_queue.py +15 -2
cmd_queue/util/util_bash.py +52 -0
cmd_queue/util/util_tmux.py +76 -0
{cmd_queue-0.2.0.dist-info → cmd_queue-0.2.2.dist-info}/METADATA +213 -175
{cmd_queue-0.2.0.dist-info → cmd_queue-0.2.2.dist-info}/RECORD +13 -11
{cmd_queue-0.2.0.dist-info → cmd_queue-0.2.2.dist-info}/WHEEL +1 -1
{cmd_queue-0.2.0.dist-info → cmd_queue-0.2.2.dist-info}/LICENSE +0 -0
{cmd_queue-0.2.0.dist-info → cmd_queue-0.2.2.dist-info}/entry_points.txt +0 -0
{cmd_queue-0.2.0.dist-info → cmd_queue-0.2.2.dist-info}/top_level.txt +0 -0

cmd_queue/slurm_queue.py CHANGED Viewed

@@ -264,10 +264,6 @@ class SlurmJob(base_queue.Job):
         return ' \\\n    '.join(args)
     def _build_sbatch_args(self, jobname_to_varname=None):
-        # job_name = 'todo'
-        # output_fpath = '$HOME/.cache/slurm/logs/job-%j-%x.out'
-        # command = "python -c 'import sys; sys.exit(1)'"
-        # -c 2 -p priority --gres=gpu:1
         sbatch_args = ['sbatch']
         if self.name:
             sbatch_args.append(f'--job-name="{self.name}"')
@@ -306,7 +302,8 @@ class SlurmJob(base_queue.Job):
         for key, value in self._sbatch_kvargs.items():
             key = key.replace('_', '-')
-            sbatch_args.append(f'--{key}="{value}"')
+            if value is not None:
+                sbatch_args.append(f'--{key}="{value}"')
         for key, flag in self._sbatch_flags.items():
             if flag:
@@ -371,6 +368,19 @@ class SlurmQueue(base_queue.Queue):
     CommandLine:
        xdoctest -m cmd_queue.slurm_queue SlurmQueue
+    Example:
+        >>> from cmd_queue.slurm_queue import *  # NOQA
+        >>> self = SlurmQueue()
+        >>> job0 = self.submit('echo "hi from $SLURM_JOBID"')
+        >>> job1 = self.submit('echo "hi from $SLURM_JOBID"', depends=[job0])
+        >>> job2 = self.submit('echo "hi from $SLURM_JOBID"', depends=[job1])
+        >>> job3 = self.submit('echo "hi from $SLURM_JOBID"', depends=[job1, job2])
+        >>> self.write()
+        >>> self.print_commands()
+        >>> # xdoctest: +REQUIRES(--run)
+        >>> if not self.is_available():
+        >>>     self.run()
     Example:
         >>> from cmd_queue.slurm_queue import *  # NOQA
         >>> self = SlurmQueue()
@@ -413,6 +423,11 @@ class SlurmQueue(base_queue.Queue):
         self.unused_kwargs = kwargs
         self.queue_id = name + '-' + stamp + '-' + ub.hash_data(uuid.uuid4())[0:8]
         self.dpath = ub.Path.appdir('cmd_queue/slurm') / self.queue_id
+        if 0:
+            # hack for submission on different systems, probably dont want to
+            # do this.
+            self.dpath = self.dpath.shrinkuser(home='$HOME')
         self.log_dpath = self.dpath / 'logs'
         self.fpath = self.dpath / (self.queue_id + '.sh')
         self.shell = shell
@@ -420,10 +435,43 @@ class SlurmQueue(base_queue.Queue):
         self.all_depends = None
         self._sbatch_kvargs = ub.udict(kwargs) & SLURM_SBATCH_KVARGS
         self._sbatch_flags = ub.udict(kwargs) & SLURM_SBATCH_FLAGS
+        self._include_monitor_metadata = True
+        self.jobid_fpath = None
     def __nice__(self):
         return self.queue_id
+    @classmethod
+    def _slurm_checks(cls):
+        status = {}
+        info = {}
+        info['squeue_fpath'] = ub.find_exe('squeue')
+        status['has_squeue'] = bool(info['squeue_fpath'])
+        status['slurmd_running'] = False
+        import psutil
+        for p in psutil.process_iter():
+            if p.name() == 'slurmd':
+                status['slurmd_running'] = True
+                info['slurmd_info'] = {
+                    'pid': p.pid,
+                    'name': p.name(),
+                    'status': p.status(),
+                    'create_time': p.create_time(),
+                }
+                break
+        status['squeue_working'] = (ub.cmd('squeue')['ret'] == 0)
+        sinfo = ub.cmd('sinfo --json')
+        status['sinfo_working'] = False
+        if sinfo['ret'] == 0:
+            status['sinfo_working'] = True
+            import json
+            sinfo_out = json.loads(sinfo['out'])
+            has_working_nodes = not all(
+                node['state'] == 'down'
+                for node in sinfo_out['nodes'])
+            status['has_working_nodes'] = has_working_nodes
     @classmethod
     def is_available(cls):
         """
@@ -436,15 +484,37 @@ class SlurmQueue(base_queue.Queue):
                 squeue_working = (ub.cmd('squeue')['ret'] == 0)
                 if squeue_working:
                     # Check if nodes are available or down
-                    sinfo = ub.cmd('sinfo --json')
-                    if sinfo['ret'] == 0:
+                    # note: the --json command is not available in
+                    # slurm-wlm 19.05.5, but it is in slurm-wlm 21.08.5
+                    sinfo_version_str = ub.cmd('sinfo --version').stdout.strip().split(' ')[1]
+                    sinfo_major_version = int(sinfo_version_str.split('.')[0])
+                    if sinfo_major_version < 21:
+                        # Dont check in this case
+                        return True
+                    else:
                         import json
-                        sinfo_out = json.loads(sinfo['out'])
-                        has_working_nodes = not all(
-                            node['state'] == 'down'
-                            for node in sinfo_out['nodes'])
-                        if has_working_nodes:
-                            return True
+                        # sinfo --json changed between v22 and v23
+                        # https://github.com/SchedMD/slurm/blob/slurm-23.02/RELEASE_NOTES#L230
+                        if sinfo_major_version == 22:
+                            sinfo = ub.cmd('sinfo --json')
+                        else:
+                            sinfo = ub.cmd('scontrol show nodes --json')
+                        if sinfo['ret'] == 0:
+                            sinfo_out = json.loads(sinfo['out'])
+                            nodes = sinfo_out['nodes']
+                            # FIXME: this might be an incorrect check on v22
+                            # the v23 version seems different, but I don't have
+                            # v22 setup anymore. Might not be worth supporting.
+                            node_states = [node['state'] for node in nodes]
+                            if sinfo_major_version == 22:
+                                has_working_nodes = not all(
+                                    'down' in str(state).lower() for state in node_states)
+                            else:
+                                has_working_nodes = not all(
+                                    'DOWN' in state for state in node_states)
+                            if has_working_nodes:
+                                return True
         return False
     def submit(self, command, **kwargs):
@@ -486,6 +556,12 @@ class SlurmQueue(base_queue.Queue):
         self.header_commands.append(command)
     def order_jobs(self):
+        """
+        Get a topological sorting of the jobs in this DAG.
+        Returns:
+            List[SlurmJob]: ordered jobs
+        """
         import networkx as nx
         graph = self._dependency_graph()
         if 0:
@@ -497,6 +573,15 @@ class SlurmQueue(base_queue.Queue):
         return new_order
     def finalize_text(self, exclude_tags=None, **kwargs):
+        """
+        Serialize the state of the queue into a bash script.
+        Returns:
+            str
+        """
+        # generating the slurm bash script is straightforward because slurm
+        # will take of the hard stuff (like scheduling) for us.  we just need
+        # to effectively encode the DAG as a list of sbatch commands.
         exclude_tags = util_tags.Tags.coerce(exclude_tags)
         new_order = self.order_jobs()
         commands = []
@@ -517,6 +602,20 @@ class SlurmQueue(base_queue.Queue):
                 jobname_to_varname[job.name] = varname
             commands.append(command)
         self.jobname_to_varname = jobname_to_varname
+        self._include_monitor_metadata = True
+        if self._include_monitor_metadata:
+            # Build a command to dump the job-ids for this queue to disk to
+            # allow us to track them in the monitor.
+            from cmd_queue.util import util_bash
+            json_fmt_parts = [
+                (job_varname, '%s', '$' + job_varname)
+                for job_varname in self.jobname_to_varname.values()
+            ]
+            self.jobid_fpath = self.fpath.augment(ext='.jobids.json')
+            command = util_bash.bash_json_dump(json_fmt_parts, self.jobid_fpath)
+            commands.append(command)
         text = '\n'.join(commands)
         return text
@@ -532,6 +631,25 @@ class SlurmQueue(base_queue.Queue):
     def monitor(self, refresh_rate=0.4):
         """
         Monitor progress until the jobs are done
+        CommandLine:
+            xdoctest -m cmd_queue.slurm_queue SlurmQueue.monitor --dev --run
+        Example:
+            >>> # xdoctest: +REQUIRES(--dev)
+            >>> from cmd_queue.slurm_queue import *  # NOQA
+            >>> dpath = ub.Path.appdir('slurm_queue/tests/test-slurm-failed-monitor')
+            >>> queue = SlurmQueue()
+            >>> job0 = queue.submit(f'echo "here we go"', name='job0')
+            >>> job1 = queue.submit(f'echo "this job will pass, allowing dependencies to run" && true', depends=[job0])
+            >>> job2 = queue.submit(f'echo "this job will run and pass" && sleep 10 && true', depends=[job1])
+            >>> job3 = queue.submit(f'echo "this job will run and fail" && false', depends=[job1])
+            >>> job4 = queue.submit(f'echo "this job will fail, preventing dependencies from running" && false', depends=[job0])
+            >>> job5 = queue.submit(f'echo "this job will never run" && true', depends=[job4])
+            >>> job6 = queue.submit(f'echo "this job will also never run" && false', depends=[job4])
+            >>> queue.print_commands()
+            >>> # xdoctest: +REQUIRES(--run)
+            >>> queue.run()
         """
         import time
@@ -543,46 +661,154 @@ class SlurmQueue(base_queue.Queue):
         num_at_start = None
+        job_status_table = None
+        if self.jobid_fpath is not None:
+            class UnableToMonitor(Exception):
+                ...
+            try:
+                import json
+                if not self.jobid_fpath.exists():
+                    raise UnableToMonitor
+                jobid_lut = json.loads(self.jobid_fpath.read_text())
+                job_status_table = [
+                    {
+                        'job_varname': job_varname,
+                        'job_id': job_id,
+                        'status': 'unknown',
+                        'needs_update': True,
+                    }
+                    for job_varname, job_id in jobid_lut.items()
+                ]
+            except UnableToMonitor:
+                print('ERROR: Unable to monitors jobids')
+        def update_jobid_status():
+            import rich
+            for row in job_status_table:
+                if row['needs_update']:
+                    job_id = row['job_id']
+                    out = ub.cmd(f'scontrol show job "{job_id}"')
+                    info = parse_scontrol_output(out.stdout)
+                    row['JobState'] = info['JobState']
+                    row['ExitCode'] = info.get('ExitCode', None)
+                    # https://slurm.schedmd.com/job_state_codes.html
+                    if info['JobState'].startswith('FAILED'):
+                        row['status'] = 'failed'
+                        rich.print(f'[red] Failed job: {info["JobName"]}')
+                        if info["StdErr"] == info["StdOut"]:
+                            rich.print(f'[red]  * Logs: {info["StdErr"]}')
+                        else:
+                            rich.print(f'[red] StdErr: {info["StdErr"]}')
+                            rich.print(f'[red] StdOut: {info["StdOut"]}')
+                        row['needs_update'] = False
+                    elif info['JobState'].startswith('CANCELLED'):
+                        rich.print(f'[yellow] Skip job: {info["JobName"]}')
+                        row['status'] = 'skipped'
+                        row['needs_update'] = False
+                    elif info['JobState'].startswith('COMPLETED'):
+                        rich.print(f'[green] Completed job: {info["JobName"]}')
+                        row['status'] = 'passed'
+                        row['needs_update'] = False
+                    elif info['JobState'].startswith('RUNNING'):
+                        row['status'] = 'running'
+                    elif info['JobState'].startswith('PENDING'):
+                        row['status'] = 'pending'
+                    else:
+                        row['status'] = 'unknown'
+            # print(f'job_status_table = {ub.urepr(job_status_table, nl=1)}')
         def update_status_table():
             nonlocal num_at_start
-            # https://rich.readthedocs.io/en/stable/live.html
-            info = ub.cmd('squeue --format="%i %P %j %u %t %M %D %R"')
-            stream = io.StringIO(info['out'])
-            df = pd.read_csv(stream, sep=' ')
-            jobid_history.update(df['JOBID'])
-            num_running = (df['ST'] == 'R').sum()
-            num_in_queue = len(df)
-            total_monitored = len(jobid_history)
-            HACK_KILL_BROKEN_JOBS = 1
-            if HACK_KILL_BROKEN_JOBS:
-                # For whatever reason using kill-on-invalid-dep
-                # kills jobs too fast and not when they are in a dependency state not a
-                # a never satisfied state. Killing these jobs here seems to fix
-                # it.
-                broken_jobs = df[df['NODELIST(REASON)'] == '(DependencyNeverSatisfied)']
-                if len(broken_jobs):
-                    for name in broken_jobs['NAME']:
-                        ub.cmd(f'scancel --name="{name}"')
+            # TODO: move this block into into the version where job status
+            # table is not available, and reimplement it for the per-job style
+            # of query. The reason we have it out here now is because we need
+            # to implement the HACK_KILL_BROKEN_JOBS in the alternate case.
+            if True:
+                # https://rich.readthedocs.io/en/stable/live.html
+                info = ub.cmd('squeue --format="%i %P %j %u %t %M %D %R"')
+                stream = io.StringIO(info['out'])
+                df = pd.read_csv(stream, sep=' ')
+                # Only include job names that this queue created
+                job_names = [job.name for job in self.jobs]
+                df = df[df['NAME'].isin(job_names)]
+                jobid_history.update(df['JOBID'])
+                num_running = (df['ST'] == 'R').sum()
+                num_in_queue = len(df)
+                total_monitored = len(jobid_history)
+                HACK_KILL_BROKEN_JOBS = 1
+                if HACK_KILL_BROKEN_JOBS:
+                    # For whatever reason using kill-on-invalid-dep
+                    # kills jobs too fast and not when they are in a dependency state not a
+                    # a never satisfied state. Killing these jobs here seems to fix
+                    # it.
+                    broken_jobs = df[df['NODELIST(REASON)'] == '(DependencyNeverSatisfied)']
+                    if len(broken_jobs):
+                        for name in broken_jobs['NAME']:
+                            ub.cmd(f'scancel --name="{name}"')
             if num_at_start is None:
                 num_at_start = len(df)
-            table = Table(*['num_running', 'num_in_queue', 'total_monitored', 'num_at_start'],
+            if job_status_table is not None:
+                update_jobid_status()
+                state = ub.dict_hist([row['status'] for row in job_status_table])
+                state.setdefault('passed', 0)
+                state.setdefault('failed', 0)
+                state.setdefault('skipped', 0)
+                state.setdefault('pending', 0)
+                state.setdefault('unknown', 0)
+                state.setdefault('running', 0)
+                state['total'] = len(job_status_table)
+                state['other'] = state['total'] - (
+                    state['passed'] + state['failed'] + state['skipped'] +
+                    state['running'] + state['pending']
+                )
+                pass_color = ''
+                fail_color = ''
+                skip_color = ''
+                finished = (state['pending'] + state['unknown'] + state['running'] == 0)
+                if (state['failed'] > 0):
+                    fail_color = '[red]'
+                if (state['skipped'] > 0):
+                    skip_color = '[yellow]'
+                if finished:
+                    pass_color = '[green]'
+                header = ['passed', 'failed', 'skipped', 'running', 'pending', 'other', 'total']
+                row_values = [
+                    f"{pass_color}{state['passed']}",
+                    f"{fail_color}{state['failed']}",
+                    f"{skip_color}{state['skipped']}",
+                    f"{state['running']}",
+                    f"{state['pending']}",
+                    f"{state['other']}",
+                    f"{state['total']}",
+                ]
+            else:
+                # TODO: determine if slurm has accounting on, and if we can
+                # figure out how many jobs errored / passed
+                header = ['num_running', 'num_in_queue', 'total_monitored', 'num_at_start']
+                row_values = [
+                    f'{num_running}',
+                    f'{num_in_queue}',
+                    f'{total_monitored}',
+                    f'{num_at_start}',
+                ]
+                # row_values.append(str(state.get('FAIL', 0)))
+                # row_values.append(str(state.get('SKIPPED', 0)))
+                # row_values.append(str(state.get('PENDING', 0)))
+                finished = (num_in_queue == 0)
+            table = Table(*header,
                           title='slurm-monitor')
-            # TODO: determine if slurm has accounting on, and if we can
-            # figure out how many jobs errored / passed
-            table.add_row(
-                f'{num_running}',
-                f'{num_in_queue}',
-                f'{total_monitored}',
-                f'{num_at_start}',
-            )
+            table.add_row(*row_values)
-            finished = (num_in_queue == 0)
             return table, finished
         try:
@@ -622,6 +848,8 @@ class SlurmQueue(base_queue.Queue):
             style (str):
                 can be 'colors', 'rich', or 'plain'
+            **kwargs: extra backend-specific args passed to finalize_text
         CommandLine:
             xdoctest -m cmd_queue.slurm_queue SlurmQueue.print_commands
@@ -640,6 +868,82 @@ class SlurmQueue(base_queue.Queue):
     rprint = print_commands
+def parse_scontrol_output(output: str) -> dict:
+    """
+    Parses the output of `scontrol show job` into a dictionary.
+    Example:
+        from cmd_queue.slurm_queue import *  # NOQA
+        # Example usage
+        output = ub.codeblock(
+            '''
+            JobId=307 JobName=J0002-SQ-2025 with a space 0218T165929-9a50513a
+               UserId=joncrall(1000) GroupId=joncrall(1000) MCS_label=N/A
+               Priority=1 Nice=0 Account=(null) QOS=(null)
+               JobState=COMPLETED Reason=None Dependency=(null)
+               Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
+               RunTime=00:00:10 TimeLimit=365-00:00:00 TimeMin=N/A
+               SubmitTime=2025-02-18T16:59:30 EligibleTime=2025-02-18T16:59:33
+               AccrueTime=Unknown
+               StartTime=2025-02-18T16:59:33 EndTime=2025-02-18T16:59:43 Deadline=N/A
+               SuspendTime=None SecsPreSuspend=0 LastSchedEval=2025-02-18T16:59:33 Scheduler=Backfill
+               Partition=priority AllocNode:Sid=localhost:215414
+               ReqNodeList=(null) ExcNodeList=(null)
+               NodeList=toothbrush
+               BatchHost=toothbrush
+               NumNodes=1 NumCPUs=2 NumTasks=1 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
+               ReqTRES=cpu=1,mem=120445M,node=1,billing=1
+               AllocTRES=cpu=2,node=1,billing=2
+               Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
+               MinCPUsNode=1 MinMemoryNode=0 MinTmpDiskNode=0
+               Features=(null) DelayBoot=00:00:00
+               OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)
+               Command=(null)
+               WorkDir=/home/joncrall/code/cmd_queue
+               StdErr="cmd_queue/slurm/SQ-2025021 with a space 8T165929-9a50513a/logs/J0002-SQ-20250218T165929-9a50513a.sh"
+               StdIn=/dev/null
+               StdOut="slurm/SQ-20 with and = 250218T165929-9a50513a/logs/J0002-SQ-20250218T165929-9a50513a.sh"
+               Power=
+             ''')
+        parse_scontrol_output(output)
+    """
+    import re
+    # These keys should be the last key on a line. They are allowed to contain
+    # space and equal characters.
+    special_keys = [
+        'JobName', 'WorkDir', 'StdErr', 'StdIn', 'StdOut', 'Command',
+        'NodeList', 'BatchHost', 'Partition'
+    ]
+    patterns = '(' + '|'.join(f' {re.escape(k)}=' for k in special_keys) + ')'
+    pat = re.compile(patterns)
+    # Initialize dictionary to store parsed key-value pairs
+    parsed_data = {}
+    # Split the input into lines
+    for line in output.splitlines():
+        # First, check for special keys (those with spaces before the equal sign)
+        match = pat.search(line)
+        if match:
+            # Special case: Key is a special key with a space
+            startpos = match.start()
+            leading_part = line[:startpos]
+            special_part = line[startpos + 1:]
+            key, value = special_part.split('=', 1)
+            parsed_data[key] = value.strip()
+            line = leading_part
+        # Now, handle the general case: split by spaces and then by "="
+        line = line.strip()
+        if line:
+            parts = line.split(' ')
+            for part in parts:
+                key, value = part.split('=', 1)
+                parsed_data[key] = value
+    return parsed_data
 SLURM_NOTES = r"""
 This shows a few things you can do with slurm
@@ -729,4 +1033,9 @@ sbatch \
 squeue
+References:
+    https://stackoverflow.com/questions/74164136/slurm-accessing-stdout-stderr-location-of-a-completed-job
 """

cmd_queue/slurmify.py ADDED Viewed

@@ -0,0 +1,116 @@
+r"""
+Helper script to wrap a command with sbatch, but using a more srun like syntax.
+.. code:: bash
+    python -m cmd_queue.slurmify \
+        --jobname="my_job" \
+        --depends=None \
+        --gpus=1 \
+        --mem=16GB \
+        --cpus_per_task=5 \
+        --ntasks=1 \
+        --ntasks-per-node=1 \
+        --partition=community \
+        -- \
+            python -c 'import sys; print("hello world"); sys.exit(0)'
+"""
+#!/usr/bin/env python3
+import scriptconfig as scfg
+import ubelt as ub
+class SlurmifyCLI(scfg.DataConfig):
+    __command__ = 'slurmify'
+    jobname = scfg.Value(None, help='for submit, this is the name of the new job')
+    depends = scfg.Value(None, help='comma separated jobnames to depend on')
+    command = scfg.Value(None, type=str, position=1, nargs='*', help=ub.paragraph(
+        '''
+        Specifies the bash command to queue.
+        Care must be taken when specifying this argument.  If specifying as a
+        key/value pair argument, it is important to quote and escape the bash
+        command properly.  A more convinient way to specify this command is as
+        a positional argument. End all of the options to this CLI with `--` and
+        then specify your full command.
+        '''))
+    gpus = scfg.Value(None, help='a comma separated list of the gpu numbers to spread across. tmux backend only.')
+    workers = scfg.Value(1, help='number of concurrent queues for the tmux backend.')
+    mem = scfg.Value(None, help='')
+    partition = scfg.Value(1, help='slurm partition')
+    ntasks = scfg.Value(None, help='')
+    ntasks_per_node = scfg.Value(None, help='')
+    cpus_per_task = scfg.Value(None, help='')
+    @classmethod
+    def main(cls, cmdline=1, **kwargs):
+        """
+        Example:
+            >>> # xdoctest: +SKIP
+            >>> from cmd_queue.slurmify import *  # NOQA
+            >>> cmdline = 0
+            >>> kwargs = dict()
+            >>> cls = SlurmifyCLI
+            >>> cls.main(cmdline=cmdline, **kwargs)
+        """
+        import rich
+        from rich.markup import escape
+        config = cls.cli(cmdline=cmdline, data=kwargs, strict=True)
+        rich.print('config = ' + escape(ub.urepr(config, nl=1)))
+        # import json
+        # Run a new CLI queue
+        row = {'type': 'command', 'command': config['command']}
+        if config.jobname:
+            row['name'] = config.jobname
+        if config.depends:
+            row['depends'] = config.depends
+        import cmd_queue
+        queue = cmd_queue.Queue.create(
+            size=max(1, config['workers']),
+            backend='slurm',
+            name='slurmified',
+            gpus=config['gpus'],
+            mem=config['mem'],
+            partition=config['partition'],
+            ntasks=config['ntasks'],
+            ntasks_per_node=config['ntasks_per_node'],
+        )
+        try:
+            bash_command = row['command']
+            if isinstance(bash_command, list):
+                if len(bash_command) == 1:
+                    # hack
+                    import shlex
+                    if shlex.quote(bash_command[0]) == bash_command[0]:
+                        bash_command = bash_command[0]
+                    else:
+                        bash_command = shlex.quote(bash_command[0])
+                else:
+                    import shlex
+                    bash_command = ' '.join([shlex.quote(str(p)) for p in bash_command])
+            submitkw = ub.udict(row) & {'name', 'depends'}
+            queue.submit(bash_command, log=False, **submitkw)
+        except Exception:
+            print('row = {}'.format(ub.urepr(row, nl=1)))
+            raise
+        queue.print_commands()
+        # config.cli_queue_fpath.write_text(json.dumps(row))
+        # 'sbatch --job-name="test_job1" --output="$HOME/.cache/slurm/logs/job-%j-%x.out" --wrap=""
+__cli__ = SlurmifyCLI
+if __name__ == '__main__':
+    """
+    CommandLine:
+        python ~/code/cmd_queue/cmd_queue/slurmify.py
+        python -m cmd_queue.slurmify
+    """
+    __cli__.main()

cmd_queue/tmux_queue.py CHANGED Viewed

@@ -724,7 +724,7 @@ class TMUXMultiQueue(base_queue.Queue):
         CommandLine:
             xdoctest -m cmd_queue.tmux_queue TMUXMultiQueue.monitor:0
-            xdoctest -m cmd_queue.tmux_queue TMUXMultiQueue.monitor:1 --interact
+            INTERACTIVE_TEST=1 xdoctest -m cmd_queue.tmux_queue TMUXMultiQueue.monitor:1
         Example:
             >>> # xdoctest: +REQUIRES(--interact)
@@ -855,7 +855,6 @@ class TMUXMultiQueue(base_queue.Queue):
                 finished &= (state['status'] == 'done')
                 if state['status'] == 'done':
                     pass_color = '[green]'
                 if (state['failed'] > 0):
                     fail_color = '[red]'
                 if (state['skipped'] > 0):
@@ -1074,5 +1073,19 @@ if 0:
     tmux send -t my_session_id1 "tmux select-pane -t 3" Enter
     tmux send -t my_session_id1 "echo pane3" Enter
+    # https://stackoverflow.com/questions/54954177/how-to-write-a-tmux-script-so-that-it-automatically-split-windows-and-opens-a-se
+    # https://tmuxcheatsheet.com/
+    # https://gist.github.com/Starefossen/5955406
+    # List the bindings
+    tmux list-keys
+    # Can arange the splits in a session via a preset layout
+    # Preset layouts are:
+    # even-horizontal, even-vertical, main-horizontal, main-vertical, or tiled.
+    tmux select-layout -t "${SESSION_NAME}" even-vertical
+    # switch to an existing session
+    tmux switch -t "${SESSION_NAME}"
     """

cmd-queue 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

Potentially problematic release.

cmd-queue 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl