laco-submitit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""SLURM/submitit integration for laco.
|
|
2
|
+
|
|
3
|
+
Submits a laco task as a SLURM array job, one job per override combination.
|
|
4
|
+
|
|
5
|
+
Examples
|
|
6
|
+
--------
|
|
7
|
+
::
|
|
8
|
+
|
|
9
|
+
import laco
|
|
10
|
+
import laco.integrations.submitit as laco_submitit
|
|
11
|
+
|
|
12
|
+
cfg = laco.load("configs/train.py")
|
|
13
|
+
|
|
14
|
+
executor = laco_submitit.executor(
|
|
15
|
+
folder="logs/slurm/%j",
|
|
16
|
+
slurm_partition="gpu",
|
|
17
|
+
slurm_gres="gpu:1",
|
|
18
|
+
timeout_min=120,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# Submit a sweep over lr and epochs:
|
|
22
|
+
jobs = laco_submitit.sweep(
|
|
23
|
+
executor,
|
|
24
|
+
cfg,
|
|
25
|
+
task_fn=train,
|
|
26
|
+
overrides=[
|
|
27
|
+
["lr=1e-3", "epochs=10"],
|
|
28
|
+
["lr=1e-4", "epochs=20"],
|
|
29
|
+
],
|
|
30
|
+
)
|
|
31
|
+
results = [j.result() for j in jobs]
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from laco.integrations.submitit._core import executor as executor
|
|
35
|
+
from laco.integrations.submitit._core import submit as submit
|
|
36
|
+
from laco.integrations.submitit._core import sweep as sweep
|
|
37
|
+
|
|
38
|
+
__all__ = ["executor", "submit", "sweep"]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Implementation for laco-submitit."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import typing
|
|
6
|
+
|
|
7
|
+
if typing.TYPE_CHECKING:
|
|
8
|
+
import submitit
|
|
9
|
+
from omegaconf import DictConfig
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def executor(
|
|
13
|
+
folder: str = "logs/slurm/%j",
|
|
14
|
+
**slurm_kwargs: typing.Any,
|
|
15
|
+
) -> submitit.AutoExecutor:
|
|
16
|
+
"""Create a configured submitit AutoExecutor.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
folder : str
|
|
21
|
+
Log directory pattern. ``%j`` is replaced by the job ID.
|
|
22
|
+
**slurm_kwargs
|
|
23
|
+
SLURM parameters forwarded to ``executor.update_parameters()``
|
|
24
|
+
(e.g. ``slurm_partition``, ``slurm_gres``, ``timeout_min``, ``mem_gb``).
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
submitit.AutoExecutor
|
|
29
|
+
Configured executor ready for job submission.
|
|
30
|
+
"""
|
|
31
|
+
import submitit as _submitit
|
|
32
|
+
|
|
33
|
+
ex = _submitit.AutoExecutor(folder=folder)
|
|
34
|
+
if slurm_kwargs:
|
|
35
|
+
ex.update_parameters(**slurm_kwargs)
|
|
36
|
+
return ex
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def submit(
|
|
40
|
+
ex: submitit.AutoExecutor,
|
|
41
|
+
cfg: DictConfig,
|
|
42
|
+
task_fn: typing.Callable[[DictConfig], typing.Any],
|
|
43
|
+
*,
|
|
44
|
+
overrides: list[str] | None = None,
|
|
45
|
+
) -> submitit.Job[typing.Any]:
|
|
46
|
+
"""Submit a single laco task as a SLURM job.
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
ex : submitit.AutoExecutor
|
|
51
|
+
Configured executor from :func:`executor`.
|
|
52
|
+
cfg : DictConfig
|
|
53
|
+
Base laco DictConfig.
|
|
54
|
+
task_fn : Callable
|
|
55
|
+
Function that accepts the resolved DictConfig.
|
|
56
|
+
overrides : list[str] | None
|
|
57
|
+
Hydra-style override strings applied before submission.
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
submitit.Job
|
|
62
|
+
Job handle.
|
|
63
|
+
"""
|
|
64
|
+
from laco._overrides import apply_overrides
|
|
65
|
+
|
|
66
|
+
resolved = apply_overrides(cfg, overrides) if overrides else cfg
|
|
67
|
+
return ex.submit(task_fn, resolved)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def sweep(
|
|
71
|
+
ex: submitit.AutoExecutor,
|
|
72
|
+
cfg: DictConfig,
|
|
73
|
+
task_fn: typing.Callable[[DictConfig], typing.Any],
|
|
74
|
+
overrides: list[list[str]],
|
|
75
|
+
) -> list[submitit.Job[typing.Any]]:
|
|
76
|
+
"""Submit a sweep of laco tasks as SLURM array jobs.
|
|
77
|
+
|
|
78
|
+
Each entry in *overrides* produces one job with those overrides applied on
|
|
79
|
+
top of the base config.
|
|
80
|
+
|
|
81
|
+
Parameters
|
|
82
|
+
----------
|
|
83
|
+
ex : submitit.AutoExecutor
|
|
84
|
+
Configured executor from :func:`executor`.
|
|
85
|
+
cfg : DictConfig
|
|
86
|
+
Base config shared across all sweep jobs.
|
|
87
|
+
task_fn : Callable
|
|
88
|
+
Function that accepts a resolved DictConfig.
|
|
89
|
+
overrides : list[list[str]]
|
|
90
|
+
List of override lists, one per sweep point.
|
|
91
|
+
E.g. ``[["lr=1e-3"], ["lr=1e-4"]]``.
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
list[submitit.Job]
|
|
96
|
+
Job handles in the same order as *overrides*.
|
|
97
|
+
"""
|
|
98
|
+
from copy import deepcopy
|
|
99
|
+
|
|
100
|
+
from laco._overrides import apply_overrides
|
|
101
|
+
|
|
102
|
+
configs = [apply_overrides(deepcopy(cfg), ov) for ov in overrides]
|
|
103
|
+
jobs = ex.map_array(task_fn, configs)
|
|
104
|
+
return list(jobs)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: laco-submitit
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: SLURM/submitit cluster job submission for laco.
|
|
5
|
+
Author-email: Kurt Stolle <kurt@khws.io>
|
|
6
|
+
Requires-Python: >=3.13
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: laco>=1.0.0
|
|
9
|
+
Requires-Dist: submitit>=1.5
|
|
10
|
+
|
|
11
|
+
# Laco-Submitit
|
|
12
|
+
|
|
13
|
+
SLURM/submitit cluster job submission for laco.
|
|
14
|
+
|
|
15
|
+
Part of the [laco](https://github.com/khwstolle/laco) project — see the [root README](../../README.md) for an overview.
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install laco-submitit
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Features
|
|
24
|
+
|
|
25
|
+
`executor()`, `submit()`, `sweep()`
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
See [`docs/index.md`](docs/index.md) for the full guide.
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
laco/integrations/submitit/__init__.py,sha256=gv9gz980IEhsKPK5ph-ZIzpSsEGoVcDCf54rZOeDFIU,932
|
|
2
|
+
laco/integrations/submitit/_core.py,sha256=Zl1mNsKBqpDpWGNbN-MjWf3aVrdQKCBDMf4HziTcFa4,2779
|
|
3
|
+
laco_submitit-1.0.0.dist-info/METADATA,sha256=9v-AIXgQ715D2WM720T32a4yc1MSuDL0j1WQ3O_uoFI,638
|
|
4
|
+
laco_submitit-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
5
|
+
laco_submitit-1.0.0.dist-info/top_level.txt,sha256=G2kLu09Aje44OkSqu-Tae3mjmTYhyRc2VrTyh3OmxFw,5
|
|
6
|
+
laco_submitit-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
laco
|