oh-my-batch 0.1.0.dev3__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {oh_my_batch-0.1.0.dev3 → oh_my_batch-0.1.1}/PKG-INFO +11 -11
- {oh_my_batch-0.1.0.dev3 → oh_my_batch-0.1.1}/README.md +10 -10
- oh_my_batch-0.1.1/oh_my_batch/__init__.py +4 -0
- {oh_my_batch-0.1.0.dev3 → oh_my_batch-0.1.1}/oh_my_batch/combo.py +0 -1
- {oh_my_batch-0.1.0.dev3 → oh_my_batch-0.1.1}/oh_my_batch/job.py +10 -9
- {oh_my_batch-0.1.0.dev3 → oh_my_batch-0.1.1}/oh_my_batch/util.py +1 -1
- {oh_my_batch-0.1.0.dev3 → oh_my_batch-0.1.1}/pyproject.toml +1 -1
- oh_my_batch-0.1.0.dev3/oh_my_batch/__init__.py +0 -0
- {oh_my_batch-0.1.0.dev3 → oh_my_batch-0.1.1}/LICENSE +0 -0
- {oh_my_batch-0.1.0.dev3 → oh_my_batch-0.1.1}/oh_my_batch/__main__.py +0 -0
- {oh_my_batch-0.1.0.dev3 → oh_my_batch-0.1.1}/oh_my_batch/assets/__init__.py +0 -0
- {oh_my_batch-0.1.0.dev3 → oh_my_batch-0.1.1}/oh_my_batch/assets/functions.sh +0 -0
- {oh_my_batch-0.1.0.dev3 → oh_my_batch-0.1.1}/oh_my_batch/batch.py +0 -0
- {oh_my_batch-0.1.0.dev3 → oh_my_batch-0.1.1}/oh_my_batch/cli.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: oh-my-batch
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.1
|
4
4
|
Summary:
|
5
5
|
License: GPL
|
6
6
|
Author: weihong.xu
|
@@ -17,6 +17,11 @@ Requires-Dist: fire (>=0.7.0,<0.8.0)
|
|
17
17
|
Description-Content-Type: text/markdown
|
18
18
|
|
19
19
|
# oh-my-batch
|
20
|
+
|
21
|
+
[](https://badge.fury.io/py/oh-my-batch)
|
22
|
+
[](https://pypi.org/project/oh-my-batch/)
|
23
|
+
[](https://pypi.org/project/oh-my-batch/)
|
24
|
+
|
20
25
|
A simple tool to manipulate batch tasks designed for scientific computing community.
|
21
26
|
|
22
27
|
## Features
|
@@ -41,7 +46,6 @@ for example, different temperatures 300K, 400K, 500K, against each data file.
|
|
41
46
|
In this case, you can use `omb combo` command to generate a series of input files for you.
|
42
47
|
|
43
48
|
```bash
|
44
|
-
#! /bin/bash
|
45
49
|
# prepare fake data files
|
46
50
|
mkdir -p tmp/
|
47
51
|
touch tmp/1.data tmp/2.data tmp/3.data
|
@@ -87,7 +91,6 @@ You want to package them into 2 batch scripts to submit to a job scheduler.
|
|
87
91
|
You can use `omb batch` to generate batch scripts for you like this:
|
88
92
|
|
89
93
|
```bash
|
90
|
-
#! /bin/bash
|
91
94
|
cat > tmp/lammps_header.sh <<EOF
|
92
95
|
#!/bin/bash
|
93
96
|
#SBATCH -J lmp
|
@@ -112,19 +115,16 @@ You can run the above script by `./examples/omb-batch.sh`,
|
|
112
115
|
### Track the state of job in job schedular
|
113
116
|
|
114
117
|
Let's continue the above example, now you have submitted the batch scripts to the job scheduler.
|
115
|
-
|
116
|
-
You can use `omb job` to track the state of the jobs.
|
118
|
+
In this case, you can use `omb job` to track the state of the jobs.
|
117
119
|
|
118
120
|
```bash
|
119
|
-
|
120
|
-
omb job slurm \
|
121
|
-
submit tmp/*.slurm --max_tries 3 --wait --recovery lammps-jobs.json
|
121
|
+
omb job slurm submit tmp/*.slurm --max_tries 3 --wait --recovery lammps-jobs.json
|
122
122
|
```
|
123
123
|
|
124
124
|
The above command will submit the batch scripts to the job scheduler,
|
125
125
|
and wait for the jobs to finish. If the job fails, it will retry for at most 3 times.
|
126
126
|
|
127
|
-
The `--recovery` option will save the job information to `lammps-jobs.json` file
|
128
|
-
|
129
|
-
so that you don't need to resubmit the jobs that are
|
127
|
+
The `--recovery` option will save the job information to `lammps-jobs.json` file.
|
128
|
+
If `omb job` is interrupted, you can rerun the exact same command to recover the job status,
|
129
|
+
so that you don't need to resubmit the jobs that are still running or completed.
|
130
130
|
|
@@ -1,4 +1,9 @@
|
|
1
1
|
# oh-my-batch
|
2
|
+
|
3
|
+
[](https://badge.fury.io/py/oh-my-batch)
|
4
|
+
[](https://pypi.org/project/oh-my-batch/)
|
5
|
+
[](https://pypi.org/project/oh-my-batch/)
|
6
|
+
|
2
7
|
A simple tool to manipulate batch tasks designed for scientific computing community.
|
3
8
|
|
4
9
|
## Features
|
@@ -23,7 +28,6 @@ for example, different temperatures 300K, 400K, 500K, against each data file.
|
|
23
28
|
In this case, you can use `omb combo` command to generate a series of input files for you.
|
24
29
|
|
25
30
|
```bash
|
26
|
-
#! /bin/bash
|
27
31
|
# prepare fake data files
|
28
32
|
mkdir -p tmp/
|
29
33
|
touch tmp/1.data tmp/2.data tmp/3.data
|
@@ -69,7 +73,6 @@ You want to package them into 2 batch scripts to submit to a job scheduler.
|
|
69
73
|
You can use `omb batch` to generate batch scripts for you like this:
|
70
74
|
|
71
75
|
```bash
|
72
|
-
#! /bin/bash
|
73
76
|
cat > tmp/lammps_header.sh <<EOF
|
74
77
|
#!/bin/bash
|
75
78
|
#SBATCH -J lmp
|
@@ -94,18 +97,15 @@ You can run the above script by `./examples/omb-batch.sh`,
|
|
94
97
|
### Track the state of job in job schedular
|
95
98
|
|
96
99
|
Let's continue the above example, now you have submitted the batch scripts to the job scheduler.
|
97
|
-
|
98
|
-
You can use `omb job` to track the state of the jobs.
|
100
|
+
In this case, you can use `omb job` to track the state of the jobs.
|
99
101
|
|
100
102
|
```bash
|
101
|
-
|
102
|
-
omb job slurm \
|
103
|
-
submit tmp/*.slurm --max_tries 3 --wait --recovery lammps-jobs.json
|
103
|
+
omb job slurm submit tmp/*.slurm --max_tries 3 --wait --recovery lammps-jobs.json
|
104
104
|
```
|
105
105
|
|
106
106
|
The above command will submit the batch scripts to the job scheduler,
|
107
107
|
and wait for the jobs to finish. If the job fails, it will retry for at most 3 times.
|
108
108
|
|
109
|
-
The `--recovery` option will save the job information to `lammps-jobs.json` file
|
110
|
-
|
111
|
-
so that you don't need to resubmit the jobs that are
|
109
|
+
The `--recovery` option will save the job information to `lammps-jobs.json` file.
|
110
|
+
If `omb job` is interrupted, you can rerun the exact same command to recover the job status,
|
111
|
+
so that you don't need to resubmit the jobs that are still running or completed.
|
@@ -1,5 +1,4 @@
|
|
1
1
|
from typing import List
|
2
|
-
from enum import Enum
|
3
2
|
|
4
3
|
import logging
|
5
4
|
import json
|
@@ -12,6 +11,7 @@ from .util import expand_globs, shell_run, parse_csv
|
|
12
11
|
|
13
12
|
logger = logging.getLogger(__name__)
|
14
13
|
|
14
|
+
|
15
15
|
class JobState:
|
16
16
|
NULL = 0
|
17
17
|
PENDING = 1
|
@@ -59,7 +59,7 @@ class BaseJobManager:
|
|
59
59
|
recover_scripts = set(j['script'] for j in jobs)
|
60
60
|
logger.info('Scripts in recovery files: %s', recover_scripts)
|
61
61
|
|
62
|
-
scripts = set(
|
62
|
+
scripts = set(norm_path(s) for s in expand_globs(script, raise_invalid=True))
|
63
63
|
logger.info('Scripts to submit: %s', scripts)
|
64
64
|
|
65
65
|
for script_file in scripts:
|
@@ -101,20 +101,18 @@ class Slurm(BaseJobManager):
|
|
101
101
|
job_ids = [j['id'] for j in jobs if j['id']]
|
102
102
|
if job_ids:
|
103
103
|
query_cmd = f'{self._sacct_bin} -X -P --format=JobID,JobName,State -j {",".join(job_ids)}'
|
104
|
-
user = os.environ.get('USER')
|
105
|
-
if user:
|
106
|
-
query_cmd += f' -u {user}'
|
107
|
-
|
108
104
|
cp = shell_run(query_cmd)
|
109
105
|
if cp.returncode != 0:
|
110
106
|
logger.error('Failed to query job status: %s', cp.stderr.decode('utf-8'))
|
111
107
|
return jobs
|
112
|
-
logger.info('Job status
|
108
|
+
logger.info('Job status:\n%s', cp.stdout.decode('utf-8'))
|
113
109
|
new_state = parse_csv(cp.stdout.decode('utf-8'))
|
114
110
|
else:
|
115
111
|
new_state = []
|
116
112
|
|
117
113
|
for job in jobs:
|
114
|
+
if not job['id']:
|
115
|
+
continue
|
118
116
|
for row in new_state:
|
119
117
|
if job['id'] == row['JobID']:
|
120
118
|
job['state'] = self._map_state(row['State'])
|
@@ -122,8 +120,7 @@ class Slurm(BaseJobManager):
|
|
122
120
|
logger.warning('Unknown job %s state: %s',row['JobID'], row['State'])
|
123
121
|
break
|
124
122
|
else:
|
125
|
-
|
126
|
-
logger.error('Job %s not found in sacct output', job['id'])
|
123
|
+
logger.error('Job %s not found in sacct output', job['id'])
|
127
124
|
|
128
125
|
# check if there are jobs to be (re)submitted
|
129
126
|
for job in jobs:
|
@@ -169,3 +166,7 @@ def should_submit(job: dict, max_tries: int):
|
|
169
166
|
if job['tries'] >= max_tries:
|
170
167
|
return False
|
171
168
|
return state != JobState.COMPLETED
|
169
|
+
|
170
|
+
|
171
|
+
def norm_path(path: str):
|
172
|
+
return os.path.normpath(os.path.abspath(path))
|
@@ -19,7 +19,7 @@ def expand_globs(patterns: Iterable[str], raise_invalid=False) -> List[str]:
|
|
19
19
|
"""
|
20
20
|
paths = []
|
21
21
|
for pattern in patterns:
|
22
|
-
result = glob.glob(pattern, recursive=True)
|
22
|
+
result = glob.glob(pattern, recursive=True)
|
23
23
|
if raise_invalid and len(result) == 0:
|
24
24
|
raise FileNotFoundError(f'No file found for {pattern}')
|
25
25
|
for p in result:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|