oh-my-batch 0.1.0.dev0__tar.gz → 0.1.0.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: oh-my-batch
3
- Version: 0.1.0.dev0
3
+ Version: 0.1.0.dev1
4
4
  Summary:
5
5
  License: GPL
6
6
  Author: weihong.xu
@@ -118,13 +118,13 @@ You can use `omb job` to track the state of the jobs.
118
118
  ```bash
119
119
 
120
120
  omb job slurm \
121
- submit tmp/*.slurm --max_tries 3 --wait --recovery lammps.recovery
121
+ submit tmp/*.slurm --max_tries 3 --wait --recovery lammps-jobs.json
122
122
  ```
123
123
 
124
124
  The above command will submit the batch scripts to the job scheduler,
125
125
  and wait for the jobs to finish. If the job fails, it will retry for at most 3 times.
126
126
 
127
- The `--recovery` option will save the job information to `lammps.recovery` file,
127
+ The `--recovery` option will save the job information to `lammps-jobs.json` file,
128
128
  if `omb job` is interrupted, you can run the exact same command to recover the job status,
129
129
  so that you don't need to resubmit the jobs that are already submitted.
130
130
 
@@ -100,12 +100,12 @@ You can use `omb job` to track the state of the jobs.
100
100
  ```bash
101
101
 
102
102
  omb job slurm \
103
- submit tmp/*.slurm --max_tries 3 --wait --recovery lammps.recovery
103
+ submit tmp/*.slurm --max_tries 3 --wait --recovery lammps-jobs.json
104
104
  ```
105
105
 
106
106
  The above command will submit the batch scripts to the job scheduler,
107
107
  and wait for the jobs to finish. If the job fails, it will retry for at most 3 times.
108
108
 
109
- The `--recovery` option will save the job information to `lammps.recovery` file,
109
+ The `--recovery` option will save the job information to `lammps-jobs.json` file,
110
110
  if `omb job` is interrupted, you can run the exact same command to recover the job status,
111
111
  so that you don't need to resubmit the jobs that are already submitted.
@@ -69,9 +69,6 @@ class BaseJobManager:
69
69
  scripts = set(os.path.normpath(s) for s in expand_globs(script))
70
70
  logger.info('Scripts to submit: %s', scripts)
71
71
 
72
- if recovery and recover_scripts != scripts:
73
- raise ValueError('Scripts to submit are different from scripts in recovery file')
74
-
75
72
  for script_file in scripts:
76
73
  if script_file not in recover_scripts:
77
74
  jobs.append(new_job(script_file))
@@ -108,19 +105,21 @@ class Slurm(BaseJobManager):
108
105
 
109
106
  def _update_jobs(self, jobs: List[dict], max_tries: int, submit_opts: str):
110
107
  # query job status
111
- job_ids = ','.join(j['id'] for j in jobs if j['id'])
112
- query_cmd = f'{self._sacct_bin} -X -P -j {job_ids} --format=JobID,JobName,State'
113
-
114
- user = os.environ.get('USER')
115
- if user:
116
- query_cmd += f' -u {user}'
117
-
118
- cp = shell_run(query_cmd)
119
- if cp.returncode != 0:
120
- logger.error('Failed to query job status: %s', cp.stderr.decode('utf-8'))
121
- return jobs
122
- logger.info('Job status: %s', cp.stdout.decode('utf-8'))
123
- new_state = parse_csv(cp.stdout.decode('utf-8'))
108
+ job_ids = [j['id'] for j in jobs if j['id']]
109
+ if job_ids:
110
+ query_cmd = f'{self._sacct_bin} -X -P --format=JobID,JobName,State -j {",".join(job_ids)}'
111
+ user = os.environ.get('USER')
112
+ if user:
113
+ query_cmd += f' -u {user}'
114
+
115
+ cp = shell_run(query_cmd)
116
+ if cp.returncode != 0:
117
+ logger.error('Failed to query job status: %s', cp.stderr.decode('utf-8'))
118
+ return jobs
119
+ logger.info('Job status: %s', cp.stdout.decode('utf-8'))
120
+ new_state = parse_csv(cp.stdout.decode('utf-8'))
121
+ else:
122
+ new_state = []
124
123
 
125
124
  for job in jobs:
126
125
  for row in new_state:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "oh-my-batch"
3
- version = "0.1.0.dev0"
3
+ version = "0.1.0.dev1"
4
4
  description = ""
5
5
  authors = ["weihong.xu <xuweihong.cn@gmail.com>"]
6
6
  license = "GPL"