oh-my-batch 0.1.0.dev0__tar.gz → 0.1.0.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {oh_my_batch-0.1.0.dev0 → oh_my_batch-0.1.0.dev1}/PKG-INFO +3 -3
- {oh_my_batch-0.1.0.dev0 → oh_my_batch-0.1.0.dev1}/README.md +2 -2
- {oh_my_batch-0.1.0.dev0 → oh_my_batch-0.1.0.dev1}/oh_my_batch/job.py +15 -16
- {oh_my_batch-0.1.0.dev0 → oh_my_batch-0.1.0.dev1}/pyproject.toml +1 -1
- {oh_my_batch-0.1.0.dev0 → oh_my_batch-0.1.0.dev1}/LICENSE +0 -0
- {oh_my_batch-0.1.0.dev0 → oh_my_batch-0.1.0.dev1}/oh_my_batch/__init__.py +0 -0
- {oh_my_batch-0.1.0.dev0 → oh_my_batch-0.1.0.dev1}/oh_my_batch/__main__.py +0 -0
- {oh_my_batch-0.1.0.dev0 → oh_my_batch-0.1.0.dev1}/oh_my_batch/assets/__init__.py +0 -0
- {oh_my_batch-0.1.0.dev0 → oh_my_batch-0.1.0.dev1}/oh_my_batch/assets/functions.sh +0 -0
- {oh_my_batch-0.1.0.dev0 → oh_my_batch-0.1.0.dev1}/oh_my_batch/batch.py +0 -0
- {oh_my_batch-0.1.0.dev0 → oh_my_batch-0.1.0.dev1}/oh_my_batch/cli.py +0 -0
- {oh_my_batch-0.1.0.dev0 → oh_my_batch-0.1.0.dev1}/oh_my_batch/combo.py +0 -0
- {oh_my_batch-0.1.0.dev0 → oh_my_batch-0.1.0.dev1}/oh_my_batch/util.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: oh-my-batch
|
3
|
-
Version: 0.1.0.
|
3
|
+
Version: 0.1.0.dev1
|
4
4
|
Summary:
|
5
5
|
License: GPL
|
6
6
|
Author: weihong.xu
|
@@ -118,13 +118,13 @@ You can use `omb job` to track the state of the jobs.
|
|
118
118
|
```bash
|
119
119
|
|
120
120
|
omb job slurm \
|
121
|
-
submit tmp/*.slurm --max_tries 3 --wait --recovery lammps.
|
121
|
+
submit tmp/*.slurm --max_tries 3 --wait --recovery lammps-jobs.json
|
122
122
|
```
|
123
123
|
|
124
124
|
The above command will submit the batch scripts to the job scheduler,
|
125
125
|
and wait for the jobs to finish. If the job fails, it will retry for at most 3 times.
|
126
126
|
|
127
|
-
The `--recovery` option will save the job information to `lammps.
|
127
|
+
The `--recovery` option will save the job information to `lammps-jobs.json` file,
|
128
128
|
if `omb job` is interrupted, you can run the exact same command to recover the job status,
|
129
129
|
so that you don't need to resubmit the jobs that are already submitted.
|
130
130
|
|
@@ -100,12 +100,12 @@ You can use `omb job` to track the state of the jobs.
|
|
100
100
|
```bash
|
101
101
|
|
102
102
|
omb job slurm \
|
103
|
-
submit tmp/*.slurm --max_tries 3 --wait --recovery lammps.
|
103
|
+
submit tmp/*.slurm --max_tries 3 --wait --recovery lammps-jobs.json
|
104
104
|
```
|
105
105
|
|
106
106
|
The above command will submit the batch scripts to the job scheduler,
|
107
107
|
and wait for the jobs to finish. If the job fails, it will retry for at most 3 times.
|
108
108
|
|
109
|
-
The `--recovery` option will save the job information to `lammps.
|
109
|
+
The `--recovery` option will save the job information to `lammps-jobs.json` file,
|
110
110
|
if `omb job` is interrupted, you can run the exact same command to recover the job status,
|
111
111
|
so that you don't need to resubmit the jobs that are already submitted.
|
@@ -69,9 +69,6 @@ class BaseJobManager:
|
|
69
69
|
scripts = set(os.path.normpath(s) for s in expand_globs(script))
|
70
70
|
logger.info('Scripts to submit: %s', scripts)
|
71
71
|
|
72
|
-
if recovery and recover_scripts != scripts:
|
73
|
-
raise ValueError('Scripts to submit are different from scripts in recovery file')
|
74
|
-
|
75
72
|
for script_file in scripts:
|
76
73
|
if script_file not in recover_scripts:
|
77
74
|
jobs.append(new_job(script_file))
|
@@ -108,19 +105,21 @@ class Slurm(BaseJobManager):
|
|
108
105
|
|
109
106
|
def _update_jobs(self, jobs: List[dict], max_tries: int, submit_opts: str):
|
110
107
|
# query job status
|
111
|
-
job_ids =
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
108
|
+
job_ids = [j['id'] for j in jobs if j['id']]
|
109
|
+
if job_ids:
|
110
|
+
query_cmd = f'{self._sacct_bin} -X -P --format=JobID,JobName,State -j {",".join(job_ids)}'
|
111
|
+
user = os.environ.get('USER')
|
112
|
+
if user:
|
113
|
+
query_cmd += f' -u {user}'
|
114
|
+
|
115
|
+
cp = shell_run(query_cmd)
|
116
|
+
if cp.returncode != 0:
|
117
|
+
logger.error('Failed to query job status: %s', cp.stderr.decode('utf-8'))
|
118
|
+
return jobs
|
119
|
+
logger.info('Job status: %s', cp.stdout.decode('utf-8'))
|
120
|
+
new_state = parse_csv(cp.stdout.decode('utf-8'))
|
121
|
+
else:
|
122
|
+
new_state = []
|
124
123
|
|
125
124
|
for job in jobs:
|
126
125
|
for row in new_state:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|