oh-my-batch 0.1.0.dev0__py3-none-any.whl → 0.1.0.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oh_my_batch/job.py CHANGED
@@ -12,29 +12,22 @@ from .util import expand_globs, shell_run, parse_csv
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
 
15
+ class JobState:
16
+ NULL = 0
17
+ PENDING = 1
18
+ RUNNING = 2
19
+ CANCELLED = 3
20
+ COMPLETED = 4
21
+ FAILED = 5
22
+ UNKNOWN = 6
15
23
 
16
- class JobState(bytes, Enum):
17
- """
18
- Job state enumeration
19
- """
20
- def __new__(cls, value: int, terminal: bool, status_name: str) -> "JobState":
21
- obj = bytes.__new__(cls, [value])
22
- obj._value_ = value
23
- obj.terminal = terminal
24
- obj.status_name = status_name
25
- return obj
26
-
27
- value: int # type: ignore
28
- terminal: bool
29
- status_name: str
30
-
31
- NULL = (0, True, "NULL")
32
- PENDING = (1, False, "PENDING")
33
- RUNNING = (2, False, "RUNNING")
34
- CANCELLED = (3, True, "CANCELLED")
35
- COMPLETED = (4, True, "COMPLETED")
36
- FAILED = (5, True, "FAILED")
37
- UNKNOWN = (6, False, "UNKNOWN")
24
+ @classmethod
25
+ def is_terminal(cls, state: int):
26
+ return state in (JobState.NULL, JobState.COMPLETED, JobState.FAILED, JobState.CANCELLED)
27
+
28
+ @classmethod
29
+ def is_success(cls, state: int):
30
+ return state == JobState.COMPLETED
38
31
 
39
32
 
40
33
  def new_job(script: str):
@@ -69,9 +62,6 @@ class BaseJobManager:
69
62
  scripts = set(os.path.normpath(s) for s in expand_globs(script))
70
63
  logger.info('Scripts to submit: %s', scripts)
71
64
 
72
- if recovery and recover_scripts != scripts:
73
- raise ValueError('Scripts to submit are different from scripts in recovery file')
74
-
75
65
  for script_file in scripts:
76
66
  if script_file not in recover_scripts:
77
67
  jobs.append(new_job(script_file))
@@ -87,7 +77,7 @@ class BaseJobManager:
87
77
  break
88
78
 
89
79
  # stop if all jobs are terminal and not job to be submitted
90
- if (all(j['state'].terminal for j in jobs) and
80
+ if (all(JobState.is_terminal(j['state']) for j in jobs) and
91
81
  not any(should_submit(j, max_tries) for j in jobs)):
92
82
  break
93
83
 
@@ -108,19 +98,21 @@ class Slurm(BaseJobManager):
108
98
 
109
99
  def _update_jobs(self, jobs: List[dict], max_tries: int, submit_opts: str):
110
100
  # query job status
111
- job_ids = ','.join(j['id'] for j in jobs if j['id'])
112
- query_cmd = f'{self._sacct_bin} -X -P -j {job_ids} --format=JobID,JobName,State'
113
-
114
- user = os.environ.get('USER')
115
- if user:
116
- query_cmd += f' -u {user}'
117
-
118
- cp = shell_run(query_cmd)
119
- if cp.returncode != 0:
120
- logger.error('Failed to query job status: %s', cp.stderr.decode('utf-8'))
121
- return jobs
122
- logger.info('Job status: %s', cp.stdout.decode('utf-8'))
123
- new_state = parse_csv(cp.stdout.decode('utf-8'))
101
+ job_ids = [j['id'] for j in jobs if j['id']]
102
+ if job_ids:
103
+ query_cmd = f'{self._sacct_bin} -X -P --format=JobID,JobName,State -j {",".join(job_ids)}'
104
+ user = os.environ.get('USER')
105
+ if user:
106
+ query_cmd += f' -u {user}'
107
+
108
+ cp = shell_run(query_cmd)
109
+ if cp.returncode != 0:
110
+ logger.error('Failed to query job status: %s', cp.stderr.decode('utf-8'))
111
+ return jobs
112
+ logger.info('Job status: %s', cp.stdout.decode('utf-8'))
113
+ new_state = parse_csv(cp.stdout.decode('utf-8'))
114
+ else:
115
+ new_state = []
124
116
 
125
117
  for job in jobs:
126
118
  for row in new_state:
@@ -130,8 +122,8 @@ class Slurm(BaseJobManager):
130
122
  logger.warning('Unknown job %s state: %s',row['JobID'], row['State'])
131
123
  break
132
124
  else:
133
- job['state'] = JobState.FAILED
134
- logger.error('Job %s not found in sacct output', job['id'])
125
+ if job['id']:
126
+ logger.error('Job %s not found in sacct output', job['id'])
135
127
 
136
128
  # check if there are jobs to be (re)submitted
137
129
  for job in jobs:
@@ -171,8 +163,8 @@ class Slurm(BaseJobManager):
171
163
 
172
164
 
173
165
  def should_submit(job: dict, max_tries: int):
174
- state: JobState = job['state']
175
- if not state.terminal:
166
+ state: int = job['state']
167
+ if not JobState.is_terminal(state):
176
168
  return False
177
169
  if job['tries'] >= max_tries:
178
170
  return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: oh-my-batch
3
- Version: 0.1.0.dev0
3
+ Version: 0.1.0.dev2
4
4
  Summary:
5
5
  License: GPL
6
6
  Author: weihong.xu
@@ -118,13 +118,13 @@ You can use `omb job` to track the state of the jobs.
118
118
  ```bash
119
119
 
120
120
  omb job slurm \
121
- submit tmp/*.slurm --max_tries 3 --wait --recovery lammps.recovery
121
+ submit tmp/*.slurm --max_tries 3 --wait --recovery lammps-jobs.json
122
122
  ```
123
123
 
124
124
  The above command will submit the batch scripts to the job scheduler,
125
125
  and wait for the jobs to finish. If the job fails, it will retry for at most 3 times.
126
126
 
127
- The `--recovery` option will save the job information to `lammps.recovery` file,
127
+ The `--recovery` option will save the job information to `lammps-jobs.json` file,
128
128
  if `omb job` is interrupted, you can run the exact same command to recover the job status,
129
129
  so that you don't need to resubmit the jobs that are already submitted.
130
130
 
@@ -5,10 +5,10 @@ oh_my_batch/assets/functions.sh,sha256=eORxFefV-XrWbG-2I6u-c8uf1XxOQ31LaeVHBumwz
5
5
  oh_my_batch/batch.py,sha256=e73N-xwxMvgxnWwFMp33PQD1Dy-T-ATjANlwtPRHPQM,3016
6
6
  oh_my_batch/cli.py,sha256=G_JxqX0Zbx_EbcDxXbYjJ_4O-EOhmkF1lcMWgQ5ZPqo,375
7
7
  oh_my_batch/combo.py,sha256=AHFD5CLoczqtjcfl2Rb4A2ucoQU40-cWtDOYjtP-yY4,7680
8
- oh_my_batch/job.py,sha256=kup6Kwr3HFeCWAYJzJ1BET81_Dvbz1HxuHfmMPOpCnU,6080
8
+ oh_my_batch/job.py,sha256=_fETBYpuSd_hNHKnXSwYcSU3OXtU7PO-P2QMfhE-Wfs,5788
9
9
  oh_my_batch/util.py,sha256=H8B4zVNH5xRp-NG_uypgvtmz2YSpXy_6LK5ROv6SYrc,2116
10
- oh_my_batch-0.1.0.dev0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
11
- oh_my_batch-0.1.0.dev0.dist-info/METADATA,sha256=nUAgfApBgfXFhHD9-VWXAZsyGF1iJcO8bxYGMZKcGLI,4453
12
- oh_my_batch-0.1.0.dev0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
13
- oh_my_batch-0.1.0.dev0.dist-info/entry_points.txt,sha256=ZY2GutSoNjjSyJ4qO2pTeseKUFgoTYdvmgkuZZkwi68,77
14
- oh_my_batch-0.1.0.dev0.dist-info/RECORD,,
10
+ oh_my_batch-0.1.0.dev2.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
11
+ oh_my_batch-0.1.0.dev2.dist-info/METADATA,sha256=Novvp5-MhKR9J0Q6Al833vA6GjT7EVRtDv4ADmnluxk,4456
12
+ oh_my_batch-0.1.0.dev2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
13
+ oh_my_batch-0.1.0.dev2.dist-info/entry_points.txt,sha256=ZY2GutSoNjjSyJ4qO2pTeseKUFgoTYdvmgkuZZkwi68,77
14
+ oh_my_batch-0.1.0.dev2.dist-info/RECORD,,