resubmit 0.0.2__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- resubmit-0.0.4/PKG-INFO +89 -0
- resubmit-0.0.4/README.md +76 -0
- {resubmit-0.0.2 → resubmit-0.0.4}/pyproject.toml +1 -1
- resubmit-0.0.4/src/resubmit/__bookkeeping.py +223 -0
- resubmit-0.0.4/src/resubmit/__init__.py +6 -0
- resubmit-0.0.2/src/resubmit/submit.py → resubmit-0.0.4/src/resubmit/__submit.py +16 -17
- resubmit-0.0.4/src/resubmit.egg-info/PKG-INFO +89 -0
- {resubmit-0.0.2 → resubmit-0.0.4}/src/resubmit.egg-info/SOURCES.txt +4 -3
- resubmit-0.0.4/tests/test_bookkeeping.py +45 -0
- resubmit-0.0.4/tests/test_resubmit.py +117 -0
- resubmit-0.0.2/PKG-INFO +0 -36
- resubmit-0.0.2/README.md +0 -23
- resubmit-0.0.2/src/resubmit/__init__.py +0 -7
- resubmit-0.0.2/src/resubmit/slurm.py +0 -10
- resubmit-0.0.2/src/resubmit.egg-info/PKG-INFO +0 -36
- resubmit-0.0.2/tests/test_resubmit.py +0 -19
- {resubmit-0.0.2 → resubmit-0.0.4}/LICENSE +0 -0
- {resubmit-0.0.2 → resubmit-0.0.4}/setup.cfg +0 -0
- /resubmit-0.0.2/src/resubmit/debug.py → /resubmit-0.0.4/src/resubmit/__debug.py +0 -0
- {resubmit-0.0.2 → resubmit-0.0.4}/src/resubmit.egg-info/dependency_links.txt +0 -0
- {resubmit-0.0.2 → resubmit-0.0.4}/src/resubmit.egg-info/requires.txt +0 -0
- {resubmit-0.0.2 → resubmit-0.0.4}/src/resubmit.egg-info/top_level.txt +0 -0
resubmit-0.0.4/PKG-INFO
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: resubmit
|
|
3
|
+
Version: 0.0.4
|
|
4
|
+
Summary: Small wrapper around submitit to simplify cluster submissions
|
|
5
|
+
Author: Amir Mehrpanah
|
|
6
|
+
License: MIT
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: submitit>=0.8
|
|
10
|
+
Provides-Extra: debug
|
|
11
|
+
Requires-Dist: debugpy; extra == "debug"
|
|
12
|
+
Dynamic: license-file
|
|
13
|
+
|
|
14
|
+
# resubmit
|
|
15
|
+
|
|
16
|
+
Small utility library to simplify job submission with Submitit on SLURM clusters.
|
|
17
|
+
|
|
18
|
+
Quick usage:
|
|
19
|
+
|
|
20
|
+
- Install locally for development:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install -e .[debug]
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
- Use in your project:
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from resubmit import submit_jobs, maybe_attach_debugger
|
|
30
|
+
|
|
31
|
+
# attach remote debugger if requested
|
|
32
|
+
maybe_attach_debugger(args.get("port", None))
|
|
33
|
+
|
|
34
|
+
# submit jobs (list of dicts)
|
|
35
|
+
submit_jobs(jobs_list, my_entrypoint, timeout_min=60, block=True)
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## API
|
|
39
|
+
|
|
40
|
+
### submit_jobs(...) 🔧
|
|
41
|
+
|
|
42
|
+
Submit multiple jobs to a Slurm cluster using Submitit.
|
|
43
|
+
|
|
44
|
+
Signature (short):
|
|
45
|
+
|
|
46
|
+
`submit_jobs(jobs_args: Iterable[dict], func: Callable[[List[dict]], Any], *, timeout_min: int, cpus_per_task: int = 16, mem_gb: int = 64, num_gpus: int = 1, account: Optional[str] = None, folder: str = "logs/%j", block: bool = False, prompt: bool = True, local_run: bool = False, slurm_additional_parameters: Optional[Dict] = None, constraint: Optional[str] = None, reservation: Optional[str] = None)`
|
|
47
|
+
|
|
48
|
+
- `jobs_args`: iterable of per-job kwargs (each item is passed to `func`).
|
|
49
|
+
- `func`: entrypoint called for each job (should accept a list or single job dict depending on your usage).
|
|
50
|
+
- `timeout_min`, `cpus_per_task`, `mem_gb`, `num_gpus`: common Slurm resources.
|
|
51
|
+
- `account`: optional Slurm account name.
|
|
52
|
+
- `folder`: logs folder for Submitit files (supports `%j` for job id).
|
|
53
|
+
- `block`: if True, waits for all jobs and returns results.
|
|
54
|
+
- `prompt`: if True, asks for confirmation interactively; set to `False` for CI or tests.
|
|
55
|
+
- `local_run`: run the jobs locally without Submitit (useful for debugging).
|
|
56
|
+
- `slurm_additional_parameters`: pass any extra Slurm key/value pairs to Submitit.
|
|
57
|
+
- `constraint` / `reservation`: cluster-specific options kept out of defaults — provide them explicitly if you need them (they take precedence over values in `slurm_additional_parameters`).
|
|
58
|
+
|
|
59
|
+
Example:
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
submit_jobs(
|
|
63
|
+
jobs_list,
|
|
64
|
+
my_entrypoint,
|
|
65
|
+
timeout_min=60,
|
|
66
|
+
num_gpus=2,
|
|
67
|
+
prompt=False,
|
|
68
|
+
constraint="gpu",
|
|
69
|
+
)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### maybe_attach_debugger(port: Optional[int]) 🐞
|
|
73
|
+
|
|
74
|
+
Attach `debugpy` to the job when `port` is provided (> 0). Safe no-op if `port` is `None` or `<= 0`.
|
|
75
|
+
|
|
76
|
+
- If `debugpy` (and `submitit`) are not available on the node, a `RuntimeError` is raised with an explanatory message.
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
# attach remote debugger only when a port is provided (e.g., from CLI args)
|
|
82
|
+
maybe_attach_debugger(args.get("port"))
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
Tips:
|
|
88
|
+
- Use `prompt=False` when calling `submit_jobs` from scripts or CI to avoid interactive prompts.
|
|
89
|
+
- Tests demonstrate non-interactive behavior (`prompt=False`) and optional `constraint`/`reservation` handling.
|
resubmit-0.0.4/README.md
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# resubmit
|
|
2
|
+
|
|
3
|
+
Small utility library to simplify job submission with Submitit on SLURM clusters.
|
|
4
|
+
|
|
5
|
+
Quick usage:
|
|
6
|
+
|
|
7
|
+
- Install locally for development:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install -e .[debug]
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
- Use in your project:
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from resubmit import submit_jobs, maybe_attach_debugger
|
|
17
|
+
|
|
18
|
+
# attach remote debugger if requested
|
|
19
|
+
maybe_attach_debugger(args.get("port", None))
|
|
20
|
+
|
|
21
|
+
# submit jobs (list of dicts)
|
|
22
|
+
submit_jobs(jobs_list, my_entrypoint, timeout_min=60, block=True)
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## API
|
|
26
|
+
|
|
27
|
+
### submit_jobs(...) 🔧
|
|
28
|
+
|
|
29
|
+
Submit multiple jobs to a Slurm cluster using Submitit.
|
|
30
|
+
|
|
31
|
+
Signature (short):
|
|
32
|
+
|
|
33
|
+
`submit_jobs(jobs_args: Iterable[dict], func: Callable[[List[dict]], Any], *, timeout_min: int, cpus_per_task: int = 16, mem_gb: int = 64, num_gpus: int = 1, account: Optional[str] = None, folder: str = "logs/%j", block: bool = False, prompt: bool = True, local_run: bool = False, slurm_additional_parameters: Optional[Dict] = None, constraint: Optional[str] = None, reservation: Optional[str] = None)`
|
|
34
|
+
|
|
35
|
+
- `jobs_args`: iterable of per-job kwargs (each item is passed to `func`).
|
|
36
|
+
- `func`: entrypoint called for each job (should accept a list or single job dict depending on your usage).
|
|
37
|
+
- `timeout_min`, `cpus_per_task`, `mem_gb`, `num_gpus`: common Slurm resources.
|
|
38
|
+
- `account`: optional Slurm account name.
|
|
39
|
+
- `folder`: logs folder for Submitit files (supports `%j` for job id).
|
|
40
|
+
- `block`: if True, waits for all jobs and returns results.
|
|
41
|
+
- `prompt`: if True, asks for confirmation interactively; set to `False` for CI or tests.
|
|
42
|
+
- `local_run`: run the jobs locally without Submitit (useful for debugging).
|
|
43
|
+
- `slurm_additional_parameters`: pass any extra Slurm key/value pairs to Submitit.
|
|
44
|
+
- `constraint` / `reservation`: cluster-specific options kept out of defaults — provide them explicitly if you need them (they take precedence over values in `slurm_additional_parameters`).
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
submit_jobs(
|
|
50
|
+
jobs_list,
|
|
51
|
+
my_entrypoint,
|
|
52
|
+
timeout_min=60,
|
|
53
|
+
num_gpus=2,
|
|
54
|
+
prompt=False,
|
|
55
|
+
constraint="gpu",
|
|
56
|
+
)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### maybe_attach_debugger(port: Optional[int]) 🐞
|
|
60
|
+
|
|
61
|
+
Attach `debugpy` to the job when `port` is provided (> 0). Safe no-op if `port` is `None` or `<= 0`.
|
|
62
|
+
|
|
63
|
+
- If `debugpy` (and `submitit`) are not available on the node, a `RuntimeError` is raised with an explanatory message.
|
|
64
|
+
|
|
65
|
+
Example:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
# attach remote debugger only when a port is provided (e.g., from CLI args)
|
|
69
|
+
maybe_attach_debugger(args.get("port"))
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
Tips:
|
|
75
|
+
- Use `prompt=False` when calling `submit_jobs` from scripts or CI to avoid interactive prompts.
|
|
76
|
+
- Tests demonstrate non-interactive behavior (`prompt=False`) and optional `constraint`/`reservation` handling.
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Any, Dict, List, Tuple, Union, Optional, Iterable
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from itertools import product
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _is_regex_spec(val: Any) -> bool:
|
|
9
|
+
"""Return True if val looks like a regex specifier.
|
|
10
|
+
|
|
11
|
+
Accepted forms:
|
|
12
|
+
- compiled `re.Pattern`
|
|
13
|
+
- tuple (`re.Pattern`, exclude: bool)
|
|
14
|
+
- dict with keys `pattern` (re.Pattern) and optional `exclude` (bool)
|
|
15
|
+
- string starting with 're:' (e.g. 're:^foo.*') meaning include matches
|
|
16
|
+
- string starting with '!re:' meaning exclude matches
|
|
17
|
+
"""
|
|
18
|
+
if hasattr(val, "search") and callable(val.search):
|
|
19
|
+
return True
|
|
20
|
+
if isinstance(val, tuple) and len(val) >= 1 and hasattr(val[0], "search"):
|
|
21
|
+
return True
|
|
22
|
+
if isinstance(val, dict) and "pattern" in val:
|
|
23
|
+
return True
|
|
24
|
+
if isinstance(val, str) and (val.startswith("re:") or val.startswith("!re:")):
|
|
25
|
+
return True
|
|
26
|
+
return False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _normalize_regex_spec(val: Any) -> Tuple[re.Pattern, bool]:
|
|
30
|
+
"""Return (compiled_pattern, exclude_flag) for a given regex spec.
|
|
31
|
+
|
|
32
|
+
Raises ValueError for unsupported types.
|
|
33
|
+
"""
|
|
34
|
+
if hasattr(val, "search") and callable(val.search):
|
|
35
|
+
return val, False
|
|
36
|
+
if isinstance(val, tuple) and len(val) >= 1:
|
|
37
|
+
pat = val[0]
|
|
38
|
+
exclude = bool(val[1]) if len(val) > 1 else False
|
|
39
|
+
return pat, exclude
|
|
40
|
+
if isinstance(val, dict):
|
|
41
|
+
pat = val["pattern"]
|
|
42
|
+
exclude = bool(val.get("exclude", False))
|
|
43
|
+
return pat, exclude
|
|
44
|
+
if isinstance(val, str):
|
|
45
|
+
if val.startswith("!re:"):
|
|
46
|
+
return re.compile(val[4:]), True
|
|
47
|
+
elif val.startswith("re:"):
|
|
48
|
+
return re.compile(val[3:]), False
|
|
49
|
+
raise ValueError(f"Unsupported regex spec: {val!r}")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def ensure_unique_combinations(
|
|
53
|
+
df: pd.DataFrame, cols: Union[str, List[str]], raise_on_conflict: bool = True
|
|
54
|
+
) -> Tuple[bool, Optional[pd.DataFrame]]:
|
|
55
|
+
"""Check that combinations of columns `cols` are unique across `df`.
|
|
56
|
+
|
|
57
|
+
Returns (is_unique, duplicates_df) where `duplicates_df` is None when unique.
|
|
58
|
+
If `raise_on_conflict` is True, raises `ValueError` when duplicates are found.
|
|
59
|
+
"""
|
|
60
|
+
if isinstance(cols, str):
|
|
61
|
+
cols = [cols]
|
|
62
|
+
# Stringify to avoid dtype mismatch effects
|
|
63
|
+
key_series = df[cols].astype(str).agg("||".join, axis=1)
|
|
64
|
+
nunique = key_series.nunique()
|
|
65
|
+
if nunique == len(df):
|
|
66
|
+
return True, None
|
|
67
|
+
|
|
68
|
+
duplicates = df[key_series.duplicated(keep=False)]
|
|
69
|
+
if raise_on_conflict:
|
|
70
|
+
raise ValueError(
|
|
71
|
+
f"Found {len(duplicates)} rows with non-unique combinations for cols={cols}."
|
|
72
|
+
)
|
|
73
|
+
return False, duplicates
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def create_jobs_dataframe(params: Dict[str, Any]) -> pd.DataFrame:
|
|
77
|
+
"""Create a job DataFrame from a parameter map.
|
|
78
|
+
|
|
79
|
+
Rules:
|
|
80
|
+
- For parameters whose values are iterable (lists, tuples), we build the Cartesian
|
|
81
|
+
product across all such parameters.
|
|
82
|
+
- If a parameter value is callable, it is evaluated AFTER the initial DataFrame
|
|
83
|
+
is created; the callable is called as `col_values = fn(df)` and the result is
|
|
84
|
+
used as the column values (must be same length as `df`).
|
|
85
|
+
- If a parameter value is a regex spec (see `_is_regex_spec`), it is applied LAST
|
|
86
|
+
as a filter on the generated DataFrame. Regex specs can be used to include or
|
|
87
|
+
exclude rows based on the stringified value of that column.
|
|
88
|
+
|
|
89
|
+
Returns a filtered DataFrame with the applied callables and regex filters.
|
|
90
|
+
"""
|
|
91
|
+
# Separate static values (used for product), callables and regex specs
|
|
92
|
+
static_items = {}
|
|
93
|
+
callables: Dict[str, Any] = {}
|
|
94
|
+
regex_specs: Dict[str, Any] = {}
|
|
95
|
+
unique_items: Dict[str, Any] = {}
|
|
96
|
+
|
|
97
|
+
for k, v in params.items():
|
|
98
|
+
# support explicit regex keys like 'name__regex' or 'name_regex' to filter 'name'
|
|
99
|
+
if k.endswith("__regex") or k.endswith("_regex"):
|
|
100
|
+
if k.endswith("__regex"):
|
|
101
|
+
base = k[: -len("__regex")]
|
|
102
|
+
else:
|
|
103
|
+
base = k[: -len("_regex")]
|
|
104
|
+
regex_specs[base] = v
|
|
105
|
+
elif k.endswith("__callable") or k.endswith("_callable"):
|
|
106
|
+
if k.endswith("__callable"):
|
|
107
|
+
base = k[: -len("__callable")]
|
|
108
|
+
else:
|
|
109
|
+
base = k[: -len("_callable")]
|
|
110
|
+
callables[base] = v
|
|
111
|
+
elif k.endswith("__unique") or k.endswith("_unique"):
|
|
112
|
+
if k.endswith("__unique"):
|
|
113
|
+
base = k[: -len("__unique")]
|
|
114
|
+
else:
|
|
115
|
+
base = k[: -len("_unique")]
|
|
116
|
+
unique_items[base] = v
|
|
117
|
+
continue
|
|
118
|
+
elif callable(v):
|
|
119
|
+
callables[k] = v
|
|
120
|
+
elif _is_regex_spec(v):
|
|
121
|
+
# treat a regex spec provided under the same key as a filter for that column
|
|
122
|
+
regex_specs[k] = v
|
|
123
|
+
else:
|
|
124
|
+
static_items[k] = v
|
|
125
|
+
|
|
126
|
+
# If there are no static items, start from single-row DataFrame so callables
|
|
127
|
+
# can still compute columns.
|
|
128
|
+
if len(static_items) == 0:
|
|
129
|
+
df = pd.DataFrame([{}])
|
|
130
|
+
else:
|
|
131
|
+
df = pd.DataFrame(
|
|
132
|
+
list(product(*static_items.values())), columns=static_items.keys()
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Apply callables (they must accept the dataframe and return a list-like)
|
|
136
|
+
for k, fn in callables.items():
|
|
137
|
+
vals = fn(df)
|
|
138
|
+
if len(vals) != len(df):
|
|
139
|
+
raise ValueError(
|
|
140
|
+
f"Callable for param {k!r} returned length {len(vals)} != {len(df)}"
|
|
141
|
+
)
|
|
142
|
+
df[k] = vals
|
|
143
|
+
|
|
144
|
+
# Apply regex specs last as filters
|
|
145
|
+
if len(regex_specs) > 0:
|
|
146
|
+
mask = pd.Series([True] * len(df), index=df.index)
|
|
147
|
+
for k, spec in regex_specs.items():
|
|
148
|
+
pat, exclude = _normalize_regex_spec(spec)
|
|
149
|
+
col_str = df[k].astype(str)
|
|
150
|
+
matches = col_str.apply(lambda s: bool(pat.search(s)))
|
|
151
|
+
if exclude:
|
|
152
|
+
mask = mask & ~matches
|
|
153
|
+
else:
|
|
154
|
+
mask = mask & matches
|
|
155
|
+
df = df[mask].reset_index(drop=True)
|
|
156
|
+
|
|
157
|
+
# apply unique constraints
|
|
158
|
+
for k, unique_val in unique_items.items():
|
|
159
|
+
is_unique, duplicates = ensure_unique_combinations(
|
|
160
|
+
df,
|
|
161
|
+
k,
|
|
162
|
+
raise_on_conflict=unique_val,
|
|
163
|
+
)
|
|
164
|
+
if not is_unique:
|
|
165
|
+
logging.warning(f"Non-unique values found for column {k!r}:\n{duplicates}")
|
|
166
|
+
|
|
167
|
+
return df
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def submit_jobs(
|
|
171
|
+
jobs_args: dict[Iterable],
|
|
172
|
+
func: Any,
|
|
173
|
+
*,
|
|
174
|
+
timeout_min: int,
|
|
175
|
+
cpus_per_task: int = 16,
|
|
176
|
+
mem_gb: int = 64,
|
|
177
|
+
num_gpus: int = 1,
|
|
178
|
+
folder: str = "logs/%j",
|
|
179
|
+
block: bool = False,
|
|
180
|
+
prompt: bool = True,
|
|
181
|
+
local_run: bool = False,
|
|
182
|
+
slurm_additional_parameters: Dict | None = None,
|
|
183
|
+
) -> Any:
|
|
184
|
+
"""
|
|
185
|
+
Submit jobs described by `jobs_args` where each entry is a dict of kwargs for `func`.
|
|
186
|
+
A dataframe is created from cartesian product of parameter lists, with support for callables and regex filtering.
|
|
187
|
+
1. use `__unique' postfix in keys to enforce uniqueness.
|
|
188
|
+
2. use `__callable' postfix in keys to define callables for column values.
|
|
189
|
+
3. use `__regex' postfix in keys to define regex filters for columns.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
jobs_args: dict of lists of job parameters.
|
|
193
|
+
func: Function to be submitted for each job.
|
|
194
|
+
timeout_min: Job timeout in minutes.
|
|
195
|
+
cpus_per_task: Number of CPUs per task.
|
|
196
|
+
mem_gb: Memory in GB.
|
|
197
|
+
num_gpus: Number of GPUs.
|
|
198
|
+
folder: Folder for logs.
|
|
199
|
+
block: Whether to block until jobs complete.
|
|
200
|
+
prompt: Whether to prompt for confirmation before submission.
|
|
201
|
+
local_run: If True, runs the function locally instead of submitting.
|
|
202
|
+
slurm_additional_parameters: Additional Slurm parameters as a dict. If not provided, defaults to {"gpus": num_gpus}.
|
|
203
|
+
Returns:
|
|
204
|
+
The result of `submit_jobs` from `.__submit`.
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
jobs_df = create_jobs_dataframe(jobs_args)
|
|
208
|
+
records = jobs_df.to_dict(orient="records")
|
|
209
|
+
from .__submit import submit_jobs as _submit_jobs
|
|
210
|
+
|
|
211
|
+
return _submit_jobs(
|
|
212
|
+
records,
|
|
213
|
+
func,
|
|
214
|
+
timeout_min=timeout_min,
|
|
215
|
+
cpus_per_task=cpus_per_task,
|
|
216
|
+
mem_gb=mem_gb,
|
|
217
|
+
num_gpus=num_gpus,
|
|
218
|
+
folder=folder,
|
|
219
|
+
block=block,
|
|
220
|
+
prompt=prompt,
|
|
221
|
+
local_run=local_run,
|
|
222
|
+
slurm_additional_parameters=slurm_additional_parameters,
|
|
223
|
+
)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Core submission utilities wrapping submitit."""
|
|
2
|
+
|
|
2
3
|
from typing import Any, Callable, Iterable, List, Optional, Dict
|
|
3
4
|
|
|
4
5
|
|
|
@@ -7,20 +8,24 @@ def submit_jobs(
|
|
|
7
8
|
func: Callable[[List[dict]], Any],
|
|
8
9
|
*,
|
|
9
10
|
timeout_min: int,
|
|
10
|
-
cpus_per_task: int
|
|
11
|
-
mem_gb: int
|
|
12
|
-
num_gpus: int
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
local_run: bool = False,
|
|
11
|
+
cpus_per_task: int,
|
|
12
|
+
mem_gb: int,
|
|
13
|
+
num_gpus: int,
|
|
14
|
+
folder: str,
|
|
15
|
+
block: bool,
|
|
16
|
+
prompt: bool,
|
|
17
|
+
local_run: bool,
|
|
18
18
|
slurm_additional_parameters: Optional[Dict] = None,
|
|
19
19
|
):
|
|
20
20
|
"""Submit jobs described by `jobs_args` where each entry is a dict of kwargs for `func`.
|
|
21
21
|
|
|
22
22
|
- If `local_run` is True, the function is called directly: `func(jobs_args)`.
|
|
23
23
|
- Otherwise, submits via submitit.AutoExecutor and returns job objects or, if `block` is True, waits and returns results.
|
|
24
|
+
|
|
25
|
+
Optional Slurm settings `constraint` and `reservation` can be provided via explicit
|
|
26
|
+
parameters (they take precedence) or by passing `slurm_additional_parameters`.
|
|
27
|
+
If not provided, they are omitted so the code is not tied to cluster-specific
|
|
28
|
+
defaults.
|
|
24
29
|
"""
|
|
25
30
|
jobs_list = list(jobs_args) if not isinstance(jobs_args, list) else jobs_args
|
|
26
31
|
|
|
@@ -39,23 +44,17 @@ def submit_jobs(
|
|
|
39
44
|
return
|
|
40
45
|
|
|
41
46
|
import submitit
|
|
47
|
+
|
|
42
48
|
print("submitting jobs")
|
|
43
49
|
executor = submitit.AutoExecutor(folder=folder)
|
|
44
50
|
|
|
45
|
-
# default slurm params
|
|
51
|
+
# default slurm params (keep cluster-specific options out unless explicitly set)
|
|
46
52
|
if slurm_additional_parameters is None:
|
|
47
|
-
slurm_additional_parameters = {
|
|
48
|
-
"constraint": "thin",
|
|
49
|
-
"reservation": "safe",
|
|
50
|
-
"gpus": num_gpus,
|
|
51
|
-
}
|
|
53
|
+
slurm_additional_parameters = {"gpus": num_gpus}
|
|
52
54
|
else:
|
|
53
55
|
slurm_additional_parameters = dict(slurm_additional_parameters)
|
|
54
56
|
slurm_additional_parameters.setdefault("gpus", num_gpus)
|
|
55
57
|
|
|
56
|
-
if account is not None:
|
|
57
|
-
slurm_additional_parameters["account"] = account
|
|
58
|
-
|
|
59
58
|
print("Slurm additional parameters:", slurm_additional_parameters)
|
|
60
59
|
|
|
61
60
|
executor.update_parameters(
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: resubmit
|
|
3
|
+
Version: 0.0.4
|
|
4
|
+
Summary: Small wrapper around submitit to simplify cluster submissions
|
|
5
|
+
Author: Amir Mehrpanah
|
|
6
|
+
License: MIT
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: submitit>=0.8
|
|
10
|
+
Provides-Extra: debug
|
|
11
|
+
Requires-Dist: debugpy; extra == "debug"
|
|
12
|
+
Dynamic: license-file
|
|
13
|
+
|
|
14
|
+
# resubmit
|
|
15
|
+
|
|
16
|
+
Small utility library to simplify job submission with Submitit on SLURM clusters.
|
|
17
|
+
|
|
18
|
+
Quick usage:
|
|
19
|
+
|
|
20
|
+
- Install locally for development:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install -e .[debug]
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
- Use in your project:
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from resubmit import submit_jobs, maybe_attach_debugger
|
|
30
|
+
|
|
31
|
+
# attach remote debugger if requested
|
|
32
|
+
maybe_attach_debugger(args.get("port", None))
|
|
33
|
+
|
|
34
|
+
# submit jobs (list of dicts)
|
|
35
|
+
submit_jobs(jobs_list, my_entrypoint, timeout_min=60, block=True)
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## API
|
|
39
|
+
|
|
40
|
+
### submit_jobs(...) 🔧
|
|
41
|
+
|
|
42
|
+
Submit multiple jobs to a Slurm cluster using Submitit.
|
|
43
|
+
|
|
44
|
+
Signature (short):
|
|
45
|
+
|
|
46
|
+
`submit_jobs(jobs_args: Iterable[dict], func: Callable[[List[dict]], Any], *, timeout_min: int, cpus_per_task: int = 16, mem_gb: int = 64, num_gpus: int = 1, account: Optional[str] = None, folder: str = "logs/%j", block: bool = False, prompt: bool = True, local_run: bool = False, slurm_additional_parameters: Optional[Dict] = None, constraint: Optional[str] = None, reservation: Optional[str] = None)`
|
|
47
|
+
|
|
48
|
+
- `jobs_args`: iterable of per-job kwargs (each item is passed to `func`).
|
|
49
|
+
- `func`: entrypoint called for each job (should accept a list or single job dict depending on your usage).
|
|
50
|
+
- `timeout_min`, `cpus_per_task`, `mem_gb`, `num_gpus`: common Slurm resources.
|
|
51
|
+
- `account`: optional Slurm account name.
|
|
52
|
+
- `folder`: logs folder for Submitit files (supports `%j` for job id).
|
|
53
|
+
- `block`: if True, waits for all jobs and returns results.
|
|
54
|
+
- `prompt`: if True, asks for confirmation interactively; set to `False` for CI or tests.
|
|
55
|
+
- `local_run`: run the jobs locally without Submitit (useful for debugging).
|
|
56
|
+
- `slurm_additional_parameters`: pass any extra Slurm key/value pairs to Submitit.
|
|
57
|
+
- `constraint` / `reservation`: cluster-specific options kept out of defaults — provide them explicitly if you need them (they take precedence over values in `slurm_additional_parameters`).
|
|
58
|
+
|
|
59
|
+
Example:
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
submit_jobs(
|
|
63
|
+
jobs_list,
|
|
64
|
+
my_entrypoint,
|
|
65
|
+
timeout_min=60,
|
|
66
|
+
num_gpus=2,
|
|
67
|
+
prompt=False,
|
|
68
|
+
constraint="gpu",
|
|
69
|
+
)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### maybe_attach_debugger(port: Optional[int]) 🐞
|
|
73
|
+
|
|
74
|
+
Attach `debugpy` to the job when `port` is provided (> 0). Safe no-op if `port` is `None` or `<= 0`.
|
|
75
|
+
|
|
76
|
+
- If `debugpy` (and `submitit`) are not available on the node, a `RuntimeError` is raised with an explanatory message.
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
# attach remote debugger only when a port is provided (e.g., from CLI args)
|
|
82
|
+
maybe_attach_debugger(args.get("port"))
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
Tips:
|
|
88
|
+
- Use `prompt=False` when calling `submit_jobs` from scripts or CI to avoid interactive prompts.
|
|
89
|
+
- Tests demonstrate non-interactive behavior (`prompt=False`) and optional `constraint`/`reservation` handling.
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
LICENSE
|
|
2
2
|
README.md
|
|
3
3
|
pyproject.toml
|
|
4
|
+
src/resubmit/__bookkeeping.py
|
|
5
|
+
src/resubmit/__debug.py
|
|
4
6
|
src/resubmit/__init__.py
|
|
5
|
-
src/resubmit/
|
|
6
|
-
src/resubmit/slurm.py
|
|
7
|
-
src/resubmit/submit.py
|
|
7
|
+
src/resubmit/__submit.py
|
|
8
8
|
src/resubmit.egg-info/PKG-INFO
|
|
9
9
|
src/resubmit.egg-info/SOURCES.txt
|
|
10
10
|
src/resubmit.egg-info/dependency_links.txt
|
|
11
11
|
src/resubmit.egg-info/requires.txt
|
|
12
12
|
src/resubmit.egg-info/top_level.txt
|
|
13
|
+
tests/test_bookkeeping.py
|
|
13
14
|
tests/test_resubmit.py
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from src.resubmit.__bookkeeping import create_jobs_dataframe, ensure_unique_combinations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_create_jobs_basic():
|
|
7
|
+
params = {"a": [1, 2], "b": [10]}
|
|
8
|
+
df = create_jobs_dataframe(params)
|
|
9
|
+
assert len(df) == 2
|
|
10
|
+
assert set(df.columns) == {"a", "b"}
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_create_jobs_callable():
|
|
14
|
+
params = {"a": [1, 2], "b": lambda df: df["a"] * 10}
|
|
15
|
+
df = create_jobs_dataframe(params)
|
|
16
|
+
assert list(df["b"]) == [10, 20]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_create_jobs_regex_include():
|
|
20
|
+
params = {"name": ["apple", "banana", "apricot"], "name__regex": re.compile(r"^a")}
|
|
21
|
+
df = create_jobs_dataframe(params)
|
|
22
|
+
assert set(df["name"]) == {"apple", "apricot"}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_create_jobs_regex_exclude():
|
|
26
|
+
params = {"name": ["apple", "banana", "apricot"], "name_regex": "!re:^a"}
|
|
27
|
+
df = create_jobs_dataframe(params)
|
|
28
|
+
assert set(df["name"]) == {"banana"}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_ensure_unique_combinations_raises():
|
|
32
|
+
df = pd.DataFrame({"a": [1, 1, 2], "b": [3, 3, 4]})
|
|
33
|
+
try:
|
|
34
|
+
ensure_unique_combinations(df, ["a", "b"], raise_on_conflict=True)
|
|
35
|
+
raised = False
|
|
36
|
+
except ValueError:
|
|
37
|
+
raised = True
|
|
38
|
+
assert raised
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_ensure_unique_combinations_ok():
|
|
42
|
+
df = pd.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
|
|
43
|
+
ok, dup = ensure_unique_combinations(df, ["a", "b"], raise_on_conflict=False)
|
|
44
|
+
assert ok
|
|
45
|
+
assert dup is None
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from resubmit import maybe_attach_debugger
|
|
3
|
+
from resubmit.__submit import submit_jobs
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def dummy_func(jobs):
|
|
7
|
+
# return a list of strings to show behavior
|
|
8
|
+
return [f"ok-{j['id']}" for j in jobs]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_submit_local_run():
|
|
12
|
+
jobs = [{"id": 1}, {"id": 2}]
|
|
13
|
+
res = submit_jobs(jobs, dummy_func, timeout_min=1, local_run=True)
|
|
14
|
+
assert res == ["ok-1", "ok-2"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_maybe_attach_debugger_noop():
|
|
18
|
+
# should not raise when port is None or 0
|
|
19
|
+
maybe_attach_debugger(None)
|
|
20
|
+
maybe_attach_debugger(0)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_slurm_parameters_optional(monkeypatch):
|
|
24
|
+
events = {}
|
|
25
|
+
|
|
26
|
+
class DummyExecutor:
|
|
27
|
+
def __init__(self, folder):
|
|
28
|
+
events['folder'] = folder
|
|
29
|
+
|
|
30
|
+
def update_parameters(self, **kwargs):
|
|
31
|
+
# capture the parameters passed to the executor
|
|
32
|
+
events['update'] = kwargs
|
|
33
|
+
|
|
34
|
+
def map_array(self, func, jobs_list):
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
class DummyModule:
|
|
38
|
+
AutoExecutor = DummyExecutor
|
|
39
|
+
|
|
40
|
+
import sys
|
|
41
|
+
monkeypatch.setitem(sys.modules, 'submitit', DummyModule)
|
|
42
|
+
|
|
43
|
+
jobs = [{"id": 1}]
|
|
44
|
+
# default: no constraint/reservation keys
|
|
45
|
+
submit_jobs(jobs, dummy_func, timeout_min=1, local_run=False, num_gpus=2, prompt=False)
|
|
46
|
+
slurm = events['update']['slurm_additional_parameters']
|
|
47
|
+
assert slurm['gpus'] == 2
|
|
48
|
+
assert 'constraint' not in slurm
|
|
49
|
+
assert 'reservation' not in slurm
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_slurm_parameters_settable(monkeypatch):
|
|
53
|
+
events = {}
|
|
54
|
+
|
|
55
|
+
class DummyExecutor:
|
|
56
|
+
def __init__(self, folder):
|
|
57
|
+
events['folder'] = folder
|
|
58
|
+
|
|
59
|
+
def update_parameters(self, **kwargs):
|
|
60
|
+
events['update'] = kwargs
|
|
61
|
+
|
|
62
|
+
def map_array(self, func, jobs_list):
|
|
63
|
+
return []
|
|
64
|
+
|
|
65
|
+
class DummyModule:
|
|
66
|
+
AutoExecutor = DummyExecutor
|
|
67
|
+
|
|
68
|
+
import sys
|
|
69
|
+
monkeypatch.setitem(sys.modules, 'submitit', DummyModule)
|
|
70
|
+
|
|
71
|
+
jobs = [{"id": 1}]
|
|
72
|
+
submit_jobs(
|
|
73
|
+
jobs,
|
|
74
|
+
dummy_func,
|
|
75
|
+
timeout_min=1,
|
|
76
|
+
local_run=False,
|
|
77
|
+
constraint='thin',
|
|
78
|
+
reservation='safe',
|
|
79
|
+
prompt=False,
|
|
80
|
+
)
|
|
81
|
+
slurm = events['update']['slurm_additional_parameters']
|
|
82
|
+
assert slurm['constraint'] == 'thin'
|
|
83
|
+
assert slurm['reservation'] == 'safe'
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_slurm_parameters_arg_precedence(monkeypatch):
|
|
87
|
+
events = {}
|
|
88
|
+
|
|
89
|
+
class DummyExecutor:
|
|
90
|
+
def __init__(self, folder):
|
|
91
|
+
events['folder'] = folder
|
|
92
|
+
|
|
93
|
+
def update_parameters(self, **kwargs):
|
|
94
|
+
events['update'] = kwargs
|
|
95
|
+
|
|
96
|
+
def map_array(self, func, jobs_list):
|
|
97
|
+
return []
|
|
98
|
+
|
|
99
|
+
class DummyModule:
|
|
100
|
+
AutoExecutor = DummyExecutor
|
|
101
|
+
|
|
102
|
+
import sys
|
|
103
|
+
monkeypatch.setitem(sys.modules, 'submitit', DummyModule)
|
|
104
|
+
|
|
105
|
+
jobs = [{"id": 1}]
|
|
106
|
+
# slurm_additional_parameters has constraint='foo' but explicit arg should override
|
|
107
|
+
submit_jobs(
|
|
108
|
+
jobs,
|
|
109
|
+
dummy_func,
|
|
110
|
+
timeout_min=1,
|
|
111
|
+
local_run=False,
|
|
112
|
+
slurm_additional_parameters={'constraint': 'foo'},
|
|
113
|
+
constraint='bar',
|
|
114
|
+
prompt=False,
|
|
115
|
+
)
|
|
116
|
+
slurm = events['update']['slurm_additional_parameters']
|
|
117
|
+
assert slurm['constraint'] == 'bar'
|
resubmit-0.0.2/PKG-INFO
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: resubmit
|
|
3
|
-
Version: 0.0.2
|
|
4
|
-
Summary: Small wrapper around submitit to simplify cluster submissions
|
|
5
|
-
Author: Amir Mehrpanah
|
|
6
|
-
License: MIT
|
|
7
|
-
Description-Content-Type: text/markdown
|
|
8
|
-
License-File: LICENSE
|
|
9
|
-
Requires-Dist: submitit>=0.8
|
|
10
|
-
Provides-Extra: debug
|
|
11
|
-
Requires-Dist: debugpy; extra == "debug"
|
|
12
|
-
Dynamic: license-file
|
|
13
|
-
|
|
14
|
-
# resubmit
|
|
15
|
-
|
|
16
|
-
Small utility library to simplify job submission with Submitit on SLURM clusters.
|
|
17
|
-
|
|
18
|
-
Quick usage:
|
|
19
|
-
|
|
20
|
-
- Install locally for development:
|
|
21
|
-
|
|
22
|
-
```bash
|
|
23
|
-
pip install -e .[debug]
|
|
24
|
-
```
|
|
25
|
-
|
|
26
|
-
- Use in your project:
|
|
27
|
-
|
|
28
|
-
```python
|
|
29
|
-
from resubmit import submit_jobs, maybe_attach_debugger
|
|
30
|
-
|
|
31
|
-
# attach remote debugger if requested
|
|
32
|
-
maybe_attach_debugger(args.get("port", None))
|
|
33
|
-
|
|
34
|
-
# submit jobs (list of dicts)
|
|
35
|
-
submit_jobs(jobs_list, my_entrypoint, timeout_min=60, block=True)
|
|
36
|
-
```
|
resubmit-0.0.2/README.md
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
# resubmit
|
|
2
|
-
|
|
3
|
-
Small utility library to simplify job submission with Submitit on SLURM clusters.
|
|
4
|
-
|
|
5
|
-
Quick usage:
|
|
6
|
-
|
|
7
|
-
- Install locally for development:
|
|
8
|
-
|
|
9
|
-
```bash
|
|
10
|
-
pip install -e .[debug]
|
|
11
|
-
```
|
|
12
|
-
|
|
13
|
-
- Use in your project:
|
|
14
|
-
|
|
15
|
-
```python
|
|
16
|
-
from resubmit import submit_jobs, maybe_attach_debugger
|
|
17
|
-
|
|
18
|
-
# attach remote debugger if requested
|
|
19
|
-
maybe_attach_debugger(args.get("port", None))
|
|
20
|
-
|
|
21
|
-
# submit jobs (list of dicts)
|
|
22
|
-
submit_jobs(jobs_list, my_entrypoint, timeout_min=60, block=True)
|
|
23
|
-
```
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
"""resubmit: small helpers around submitit for reproducible cluster submissions."""
|
|
2
|
-
|
|
3
|
-
from .submit import submit_jobs
|
|
4
|
-
from .debug import maybe_attach_debugger
|
|
5
|
-
from .slurm import make_default_slurm_params
|
|
6
|
-
|
|
7
|
-
__all__ = ["submit_jobs", "maybe_attach_debugger", "make_default_slurm_params"]
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
"""Small helpers for SLURM parameter construction."""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
from typing import Optional
|
|
5
|
-
|
|
6
|
-
def make_default_slurm_params(gpus: int = 1, constraint: str = "thin", reservation: str = "safe", account: Optional[str] = None) -> dict:
|
|
7
|
-
params = {"constraint": constraint, "reservation": reservation, "gpus": gpus}
|
|
8
|
-
if account is not None:
|
|
9
|
-
params["account"] = account
|
|
10
|
-
return params
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: resubmit
|
|
3
|
-
Version: 0.0.2
|
|
4
|
-
Summary: Small wrapper around submitit to simplify cluster submissions
|
|
5
|
-
Author: Amir Mehrpanah
|
|
6
|
-
License: MIT
|
|
7
|
-
Description-Content-Type: text/markdown
|
|
8
|
-
License-File: LICENSE
|
|
9
|
-
Requires-Dist: submitit>=0.8
|
|
10
|
-
Provides-Extra: debug
|
|
11
|
-
Requires-Dist: debugpy; extra == "debug"
|
|
12
|
-
Dynamic: license-file
|
|
13
|
-
|
|
14
|
-
# resubmit
|
|
15
|
-
|
|
16
|
-
Small utility library to simplify job submission with Submitit on SLURM clusters.
|
|
17
|
-
|
|
18
|
-
Quick usage:
|
|
19
|
-
|
|
20
|
-
- Install locally for development:
|
|
21
|
-
|
|
22
|
-
```bash
|
|
23
|
-
pip install -e .[debug]
|
|
24
|
-
```
|
|
25
|
-
|
|
26
|
-
- Use in your project:
|
|
27
|
-
|
|
28
|
-
```python
|
|
29
|
-
from resubmit import submit_jobs, maybe_attach_debugger
|
|
30
|
-
|
|
31
|
-
# attach remote debugger if requested
|
|
32
|
-
maybe_attach_debugger(args.get("port", None))
|
|
33
|
-
|
|
34
|
-
# submit jobs (list of dicts)
|
|
35
|
-
submit_jobs(jobs_list, my_entrypoint, timeout_min=60, block=True)
|
|
36
|
-
```
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
from resubmit import submit_jobs, maybe_attach_debugger
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def dummy_func(jobs):
|
|
6
|
-
# return a list of strings to show behavior
|
|
7
|
-
return [f"ok-{j['id']}" for j in jobs]
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def test_submit_local_run():
|
|
11
|
-
jobs = [{"id": 1}, {"id": 2}]
|
|
12
|
-
res = submit_jobs(jobs, dummy_func, timeout_min=1, local_run=True)
|
|
13
|
-
assert res == ["ok-1", "ok-2"]
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def test_maybe_attach_debugger_noop():
|
|
17
|
-
# should not raise when port is None or 0
|
|
18
|
-
maybe_attach_debugger(None)
|
|
19
|
-
maybe_attach_debugger(0)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|