furu 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- furu/__init__.py +8 -0
- furu/adapters/submitit.py +23 -2
- furu/config.py +40 -41
- furu/core/furu.py +479 -252
- furu/core/list.py +4 -3
- furu/dashboard/__init__.py +10 -1
- furu/dashboard/frontend/dist/assets/{index-DS3FsqcY.js → index-BjyrY-Zz.js} +1 -1
- furu/dashboard/frontend/dist/index.html +1 -1
- furu/dashboard/main.py +10 -3
- furu/errors.py +17 -4
- furu/execution/__init__.py +22 -0
- furu/execution/context.py +30 -0
- furu/execution/local.py +186 -0
- furu/execution/paths.py +20 -0
- furu/execution/plan.py +330 -0
- furu/execution/plan_utils.py +13 -0
- furu/execution/slurm_dag.py +273 -0
- furu/execution/slurm_pool.py +878 -0
- furu/execution/slurm_spec.py +38 -0
- furu/execution/submitit_factory.py +47 -0
- furu/migration.py +1 -2
- furu/runtime/env.py +1 -1
- furu/runtime/logging.py +40 -14
- furu/storage/metadata.py +25 -29
- furu/storage/migration.py +0 -1
- furu/storage/state.py +120 -98
- {furu-0.0.3.dist-info → furu-0.0.5.dist-info}/METADATA +91 -42
- furu-0.0.5.dist-info/RECORD +46 -0
- {furu-0.0.3.dist-info → furu-0.0.5.dist-info}/WHEEL +1 -1
- furu-0.0.3.dist-info/RECORD +0 -36
- {furu-0.0.3.dist-info → furu-0.0.5.dist-info}/entry_points.txt +0 -0
furu/__init__.py
CHANGED
|
@@ -17,8 +17,12 @@ from .core import DependencyChzSpec, DependencySpec, Furu, FuruList
|
|
|
17
17
|
from .errors import (
|
|
18
18
|
FuruComputeError,
|
|
19
19
|
FuruError,
|
|
20
|
+
FuruExecutionError,
|
|
20
21
|
FuruLockNotAcquired,
|
|
22
|
+
FuruMissingArtifact,
|
|
21
23
|
FuruMigrationRequired,
|
|
24
|
+
FuruSpecMismatch,
|
|
25
|
+
FuruValidationError,
|
|
22
26
|
FuruWaitTimeout,
|
|
23
27
|
MISSING,
|
|
24
28
|
)
|
|
@@ -51,9 +55,13 @@ __all__ = [
|
|
|
51
55
|
"FuruComputeError",
|
|
52
56
|
"FuruConfig",
|
|
53
57
|
"FuruError",
|
|
58
|
+
"FuruExecutionError",
|
|
54
59
|
"FuruList",
|
|
55
60
|
"FuruLockNotAcquired",
|
|
61
|
+
"FuruMissingArtifact",
|
|
56
62
|
"FuruMigrationRequired",
|
|
63
|
+
"FuruSpecMismatch",
|
|
64
|
+
"FuruValidationError",
|
|
57
65
|
"FuruSerializer",
|
|
58
66
|
"FuruWaitTimeout",
|
|
59
67
|
"DependencyChzSpec",
|
furu/adapters/submitit.py
CHANGED
|
@@ -6,6 +6,7 @@ from typing import Any, Callable, Protocol
|
|
|
6
6
|
|
|
7
7
|
from ..config import FURU_CONFIG
|
|
8
8
|
from ..storage import StateManager
|
|
9
|
+
from ..runtime.logging import get_logger
|
|
9
10
|
from ..storage.state import _FuruState, ProbeResult
|
|
10
11
|
|
|
11
12
|
|
|
@@ -102,19 +103,39 @@ class SubmititAdapter:
|
|
|
102
103
|
"""Watch for job ID in background thread and update state."""
|
|
103
104
|
|
|
104
105
|
def watcher():
|
|
106
|
+
_ = attempt_id # intentionally unused; queued->running attempt swap is expected
|
|
105
107
|
while True:
|
|
106
108
|
job_id = self.get_job_id(job)
|
|
107
109
|
if job_id:
|
|
108
110
|
|
|
109
111
|
def mutate(state: _FuruState) -> None:
|
|
110
112
|
attempt = state.attempt
|
|
111
|
-
if attempt is None
|
|
113
|
+
if attempt is None:
|
|
114
|
+
return
|
|
115
|
+
if attempt.backend != "submitit":
|
|
116
|
+
return
|
|
117
|
+
if (
|
|
118
|
+
attempt.status not in {"queued", "running"}
|
|
119
|
+
and attempt.status not in StateManager.TERMINAL_STATUSES
|
|
120
|
+
):
|
|
121
|
+
return
|
|
122
|
+
existing = attempt.scheduler.get("job_id")
|
|
123
|
+
if existing == job_id:
|
|
112
124
|
return
|
|
113
125
|
attempt.scheduler["job_id"] = job_id
|
|
114
126
|
|
|
115
127
|
StateManager.update_state(directory, mutate)
|
|
116
128
|
if callback:
|
|
117
|
-
|
|
129
|
+
try:
|
|
130
|
+
callback(job_id)
|
|
131
|
+
except Exception:
|
|
132
|
+
# Avoid killing the watcher thread; state update already happened.
|
|
133
|
+
logger = get_logger()
|
|
134
|
+
logger.exception(
|
|
135
|
+
"submitit watcher: job_id callback failed for %s: %s",
|
|
136
|
+
directory,
|
|
137
|
+
job_id,
|
|
138
|
+
)
|
|
118
139
|
break
|
|
119
140
|
|
|
120
141
|
if self.is_done(job):
|
furu/config.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from importlib import import_module
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import Literal, cast
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
RecordGitMode = Literal["ignore", "cached", "uncached"]
|
|
4
8
|
|
|
5
9
|
|
|
6
10
|
class FuruConfig:
|
|
@@ -18,6 +22,11 @@ class FuruConfig:
|
|
|
18
22
|
return (project_root / self.DEFAULT_ROOT_DIR).resolve()
|
|
19
23
|
|
|
20
24
|
self.base_root = _get_base_root()
|
|
25
|
+
self.submitit_root = (
|
|
26
|
+
Path(os.getenv("FURU_SUBMITIT_PATH", str(self.base_root / "submitit")))
|
|
27
|
+
.expanduser()
|
|
28
|
+
.resolve()
|
|
29
|
+
)
|
|
21
30
|
self.version_controlled_root_override = self._get_version_controlled_override()
|
|
22
31
|
self.poll_interval = float(os.getenv("FURU_POLL_INTERVAL_SECS", "10"))
|
|
23
32
|
self.wait_log_every_sec = float(os.getenv("FURU_WAIT_LOG_EVERY_SECS", "10"))
|
|
@@ -30,26 +39,20 @@ class FuruConfig:
|
|
|
30
39
|
float(hb) if hb is not None else max(1.0, self.lease_duration_sec / 3.0)
|
|
31
40
|
)
|
|
32
41
|
self.max_requeues = int(os.getenv("FURU_PREEMPT_MAX", "5"))
|
|
42
|
+
self.max_compute_retries = int(os.getenv("FURU_MAX_COMPUTE_RETRIES", "3"))
|
|
33
43
|
self.retry_failed = os.getenv("FURU_RETRY_FAILED", "1").lower() in {
|
|
34
44
|
"1",
|
|
35
45
|
"true",
|
|
36
46
|
"yes",
|
|
37
47
|
}
|
|
38
|
-
self.
|
|
39
|
-
|
|
40
|
-
"
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
"yes",
|
|
47
|
-
}
|
|
48
|
-
self.require_git_remote = os.getenv("FURU_REQUIRE_GIT_REMOTE", "1").lower() in {
|
|
49
|
-
"1",
|
|
50
|
-
"true",
|
|
51
|
-
"yes",
|
|
52
|
-
}
|
|
48
|
+
self.record_git = self._parse_record_git(os.getenv("FURU_RECORD_GIT", "cached"))
|
|
49
|
+
self.allow_no_git_origin = self._parse_bool(
|
|
50
|
+
os.getenv("FURU_ALLOW_NO_GIT_ORIGIN", "0")
|
|
51
|
+
)
|
|
52
|
+
if self.allow_no_git_origin and self.record_git == "ignore":
|
|
53
|
+
raise ValueError(
|
|
54
|
+
"FURU_ALLOW_NO_GIT_ORIGIN cannot be enabled when FURU_RECORD_GIT=ignore"
|
|
55
|
+
)
|
|
53
56
|
always_rerun_items = {
|
|
54
57
|
item.strip()
|
|
55
58
|
for item in os.getenv("FURU_ALWAYS_RERUN", "").split(",")
|
|
@@ -71,35 +74,25 @@ class FuruConfig:
|
|
|
71
74
|
"FURU_CANCELLED_IS_PREEMPTED", "false"
|
|
72
75
|
).lower() in {"1", "true", "yes"}
|
|
73
76
|
|
|
74
|
-
# Parse FURU_CACHE_METADATA: "never", "forever", or duration like "5m", "1h"
|
|
75
|
-
# Default: "5m" (5 minutes) - balances performance with freshness
|
|
76
|
-
self.cache_metadata_ttl_sec: float | None = self._parse_cache_duration(
|
|
77
|
-
os.getenv("FURU_CACHE_METADATA", "5m")
|
|
78
|
-
)
|
|
79
|
-
|
|
80
77
|
@staticmethod
|
|
81
|
-
def
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
# Parse duration like "5m", "1h", "30s"
|
|
90
|
-
import re
|
|
91
|
-
|
|
92
|
-
match = re.match(r"^(\d+(?:\.\d+)?)\s*([smh]?)$", value)
|
|
93
|
-
if not match:
|
|
78
|
+
def _parse_bool(value: str) -> bool:
|
|
79
|
+
return value.strip().lower() in {"1", "true", "yes"}
|
|
80
|
+
|
|
81
|
+
@classmethod
|
|
82
|
+
def _parse_record_git(cls, value: str) -> RecordGitMode:
|
|
83
|
+
normalized = value.strip().lower()
|
|
84
|
+
allowed = {"ignore", "cached", "uncached"}
|
|
85
|
+
if normalized not in allowed:
|
|
94
86
|
raise ValueError(
|
|
95
|
-
|
|
96
|
-
"Use 'never', 'forever', or duration like '5m', '1h', '30s'"
|
|
87
|
+
"FURU_RECORD_GIT must be one of 'ignore', 'cached', or 'uncached'"
|
|
97
88
|
)
|
|
89
|
+
return cast(RecordGitMode, normalized)
|
|
98
90
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
91
|
+
@property
|
|
92
|
+
def cache_metadata_ttl_sec(self) -> float | None:
|
|
93
|
+
if self.record_git == "cached":
|
|
94
|
+
return float("inf")
|
|
95
|
+
return None
|
|
103
96
|
|
|
104
97
|
def get_root(self, version_controlled: bool = False) -> Path:
|
|
105
98
|
"""Get root directory for storage (version_controlled uses its own root)."""
|
|
@@ -109,6 +102,9 @@ class FuruConfig:
|
|
|
109
102
|
return self._resolve_version_controlled_root()
|
|
110
103
|
return self.base_root / "data"
|
|
111
104
|
|
|
105
|
+
def get_submitit_root(self) -> Path:
|
|
106
|
+
return self.submitit_root
|
|
107
|
+
|
|
112
108
|
@classmethod
|
|
113
109
|
def _get_version_controlled_override(cls) -> Path | None:
|
|
114
110
|
env = os.getenv("FURU_VERSION_CONTROLLED_PATH")
|
|
@@ -175,4 +171,7 @@ def get_furu_root(*, version_controlled: bool = False) -> Path:
|
|
|
175
171
|
|
|
176
172
|
|
|
177
173
|
def set_furu_root(path: Path) -> None:
|
|
178
|
-
|
|
174
|
+
root = path.resolve()
|
|
175
|
+
FURU_CONFIG.base_root = root
|
|
176
|
+
if os.getenv("FURU_SUBMITIT_PATH") is None:
|
|
177
|
+
FURU_CONFIG.submitit_root = (root / "submitit").resolve()
|