furu 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- furu/__init__.py +11 -1
- furu/adapters/submitit.py +23 -2
- furu/config.py +21 -3
- furu/core/__init__.py +2 -2
- furu/core/furu.py +708 -188
- furu/core/list.py +1 -1
- furu/dashboard/__init__.py +10 -1
- furu/dashboard/frontend/dist/assets/{index-CbdDfSOZ.css → index-BXAIKNNr.css} +1 -1
- furu/dashboard/frontend/dist/assets/{index-DDv_TYB_.js → index-DS3FsqcY.js} +3 -3
- furu/dashboard/frontend/dist/index.html +2 -2
- furu/dashboard/main.py +10 -3
- furu/errors.py +60 -5
- furu/execution/__init__.py +22 -0
- furu/execution/context.py +30 -0
- furu/execution/local.py +184 -0
- furu/execution/paths.py +20 -0
- furu/execution/plan.py +238 -0
- furu/execution/plan_utils.py +13 -0
- furu/execution/slurm_dag.py +271 -0
- furu/execution/slurm_pool.py +878 -0
- furu/execution/slurm_spec.py +38 -0
- furu/execution/submitit_factory.py +47 -0
- furu/migration.py +8 -4
- furu/runtime/logging.py +10 -10
- furu/serialization/serializer.py +40 -2
- furu/storage/metadata.py +17 -5
- furu/storage/state.py +78 -12
- {furu-0.0.2.dist-info → furu-0.0.4.dist-info}/METADATA +83 -33
- furu-0.0.4.dist-info/RECORD +46 -0
- furu-0.0.2.dist-info/RECORD +0 -36
- {furu-0.0.2.dist-info → furu-0.0.4.dist-info}/WHEEL +0 -0
- {furu-0.0.2.dist-info → furu-0.0.4.dist-info}/entry_points.txt +0 -0
furu/__init__.py
CHANGED
|
@@ -13,12 +13,16 @@ __version__ = version("furu")
|
|
|
13
13
|
|
|
14
14
|
from .config import FURU_CONFIG, FuruConfig, get_furu_root, set_furu_root
|
|
15
15
|
from .adapters import SubmititAdapter
|
|
16
|
-
from .core import Furu, FuruList
|
|
16
|
+
from .core import DependencyChzSpec, DependencySpec, Furu, FuruList
|
|
17
17
|
from .errors import (
|
|
18
18
|
FuruComputeError,
|
|
19
19
|
FuruError,
|
|
20
|
+
FuruExecutionError,
|
|
20
21
|
FuruLockNotAcquired,
|
|
22
|
+
FuruMissingArtifact,
|
|
21
23
|
FuruMigrationRequired,
|
|
24
|
+
FuruSpecMismatch,
|
|
25
|
+
FuruValidationError,
|
|
22
26
|
FuruWaitTimeout,
|
|
23
27
|
MISSING,
|
|
24
28
|
)
|
|
@@ -51,11 +55,17 @@ __all__ = [
|
|
|
51
55
|
"FuruComputeError",
|
|
52
56
|
"FuruConfig",
|
|
53
57
|
"FuruError",
|
|
58
|
+
"FuruExecutionError",
|
|
54
59
|
"FuruList",
|
|
55
60
|
"FuruLockNotAcquired",
|
|
61
|
+
"FuruMissingArtifact",
|
|
56
62
|
"FuruMigrationRequired",
|
|
63
|
+
"FuruSpecMismatch",
|
|
64
|
+
"FuruValidationError",
|
|
57
65
|
"FuruSerializer",
|
|
58
66
|
"FuruWaitTimeout",
|
|
67
|
+
"DependencyChzSpec",
|
|
68
|
+
"DependencySpec",
|
|
59
69
|
"MISSING",
|
|
60
70
|
"migrate",
|
|
61
71
|
"NamespacePair",
|
furu/adapters/submitit.py
CHANGED
|
@@ -6,6 +6,7 @@ from typing import Any, Callable, Protocol
|
|
|
6
6
|
|
|
7
7
|
from ..config import FURU_CONFIG
|
|
8
8
|
from ..storage import StateManager
|
|
9
|
+
from ..runtime.logging import get_logger
|
|
9
10
|
from ..storage.state import _FuruState, ProbeResult
|
|
10
11
|
|
|
11
12
|
|
|
@@ -102,19 +103,39 @@ class SubmititAdapter:
|
|
|
102
103
|
"""Watch for job ID in background thread and update state."""
|
|
103
104
|
|
|
104
105
|
def watcher():
|
|
106
|
+
_ = attempt_id # intentionally unused; queued->running attempt swap is expected
|
|
105
107
|
while True:
|
|
106
108
|
job_id = self.get_job_id(job)
|
|
107
109
|
if job_id:
|
|
108
110
|
|
|
109
111
|
def mutate(state: _FuruState) -> None:
|
|
110
112
|
attempt = state.attempt
|
|
111
|
-
if attempt is None
|
|
113
|
+
if attempt is None:
|
|
114
|
+
return
|
|
115
|
+
if attempt.backend != "submitit":
|
|
116
|
+
return
|
|
117
|
+
if (
|
|
118
|
+
attempt.status not in {"queued", "running"}
|
|
119
|
+
and attempt.status not in StateManager.TERMINAL_STATUSES
|
|
120
|
+
):
|
|
121
|
+
return
|
|
122
|
+
existing = attempt.scheduler.get("job_id")
|
|
123
|
+
if existing == job_id:
|
|
112
124
|
return
|
|
113
125
|
attempt.scheduler["job_id"] = job_id
|
|
114
126
|
|
|
115
127
|
StateManager.update_state(directory, mutate)
|
|
116
128
|
if callback:
|
|
117
|
-
|
|
129
|
+
try:
|
|
130
|
+
callback(job_id)
|
|
131
|
+
except Exception:
|
|
132
|
+
# Avoid killing the watcher thread; state update already happened.
|
|
133
|
+
logger = get_logger()
|
|
134
|
+
logger.exception(
|
|
135
|
+
"submitit watcher: job_id callback failed for %s: %s",
|
|
136
|
+
directory,
|
|
137
|
+
job_id,
|
|
138
|
+
)
|
|
118
139
|
break
|
|
119
140
|
|
|
120
141
|
if self.is_done(job):
|
furu/config.py
CHANGED
|
@@ -18,16 +18,29 @@ class FuruConfig:
|
|
|
18
18
|
return (project_root / self.DEFAULT_ROOT_DIR).resolve()
|
|
19
19
|
|
|
20
20
|
self.base_root = _get_base_root()
|
|
21
|
+
self.submitit_root = (
|
|
22
|
+
Path(os.getenv("FURU_SUBMITIT_PATH", str(self.base_root / "submitit")))
|
|
23
|
+
.expanduser()
|
|
24
|
+
.resolve()
|
|
25
|
+
)
|
|
21
26
|
self.version_controlled_root_override = self._get_version_controlled_override()
|
|
22
27
|
self.poll_interval = float(os.getenv("FURU_POLL_INTERVAL_SECS", "10"))
|
|
23
28
|
self.wait_log_every_sec = float(os.getenv("FURU_WAIT_LOG_EVERY_SECS", "10"))
|
|
24
29
|
self.stale_timeout = float(os.getenv("FURU_STALE_AFTER_SECS", str(30 * 60)))
|
|
30
|
+
max_wait_env = os.getenv("FURU_MAX_WAIT_SECS")
|
|
31
|
+
self.max_wait_time_sec = float(max_wait_env) if max_wait_env else None
|
|
25
32
|
self.lease_duration_sec = float(os.getenv("FURU_LEASE_SECS", "120"))
|
|
26
33
|
hb = os.getenv("FURU_HEARTBEAT_SECS")
|
|
27
34
|
self.heartbeat_interval_sec = (
|
|
28
35
|
float(hb) if hb is not None else max(1.0, self.lease_duration_sec / 3.0)
|
|
29
36
|
)
|
|
30
37
|
self.max_requeues = int(os.getenv("FURU_PREEMPT_MAX", "5"))
|
|
38
|
+
self.max_compute_retries = int(os.getenv("FURU_MAX_COMPUTE_RETRIES", "3"))
|
|
39
|
+
self.retry_failed = os.getenv("FURU_RETRY_FAILED", "1").lower() in {
|
|
40
|
+
"1",
|
|
41
|
+
"true",
|
|
42
|
+
"yes",
|
|
43
|
+
}
|
|
31
44
|
self.ignore_git_diff = os.getenv("FURU_IGNORE_DIFF", "0").lower() in {
|
|
32
45
|
"1",
|
|
33
46
|
"true",
|
|
@@ -102,6 +115,9 @@ class FuruConfig:
|
|
|
102
115
|
return self._resolve_version_controlled_root()
|
|
103
116
|
return self.base_root / "data"
|
|
104
117
|
|
|
118
|
+
def get_submitit_root(self) -> Path:
|
|
119
|
+
return self.submitit_root
|
|
120
|
+
|
|
105
121
|
@classmethod
|
|
106
122
|
def _get_version_controlled_override(cls) -> Path | None:
|
|
107
123
|
env = os.getenv("FURU_VERSION_CONTROLLED_PATH")
|
|
@@ -151,8 +167,7 @@ class FuruConfig:
|
|
|
151
167
|
value = getattr(target, attr, missing_sentinel)
|
|
152
168
|
if value is missing_sentinel:
|
|
153
169
|
raise ValueError(
|
|
154
|
-
"FURU_ALWAYS_RERUN entry does not exist: "
|
|
155
|
-
f"{namespace!r}"
|
|
170
|
+
f"FURU_ALWAYS_RERUN entry does not exist: {namespace!r}"
|
|
156
171
|
)
|
|
157
172
|
target = value
|
|
158
173
|
|
|
@@ -169,4 +184,7 @@ def get_furu_root(*, version_controlled: bool = False) -> Path:
|
|
|
169
184
|
|
|
170
185
|
|
|
171
186
|
def set_furu_root(path: Path) -> None:
|
|
172
|
-
|
|
187
|
+
root = path.resolve()
|
|
188
|
+
FURU_CONFIG.base_root = root
|
|
189
|
+
if os.getenv("FURU_SUBMITIT_PATH") is None:
|
|
190
|
+
FURU_CONFIG.submitit_root = (root / "submitit").resolve()
|
furu/core/__init__.py
CHANGED