furu 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- furu/__init__.py +82 -0
- furu/adapters/__init__.py +3 -0
- furu/adapters/submitit.py +195 -0
- furu/config.py +98 -0
- furu/core/__init__.py +4 -0
- furu/core/furu.py +999 -0
- furu/core/list.py +123 -0
- furu/dashboard/__init__.py +9 -0
- furu/dashboard/__main__.py +7 -0
- furu/dashboard/api/__init__.py +7 -0
- furu/dashboard/api/models.py +170 -0
- furu/dashboard/api/routes.py +135 -0
- furu/dashboard/frontend/dist/assets/index-CbdDfSOZ.css +1 -0
- furu/dashboard/frontend/dist/assets/index-DDv_TYB_.js +67 -0
- furu/dashboard/frontend/dist/favicon.svg +10 -0
- furu/dashboard/frontend/dist/index.html +22 -0
- furu/dashboard/main.py +134 -0
- furu/dashboard/scanner.py +931 -0
- furu/errors.py +76 -0
- furu/migrate.py +48 -0
- furu/migration.py +926 -0
- furu/runtime/__init__.py +27 -0
- furu/runtime/env.py +8 -0
- furu/runtime/logging.py +301 -0
- furu/runtime/tracebacks.py +64 -0
- furu/serialization/__init__.py +20 -0
- furu/serialization/migrations.py +246 -0
- furu/serialization/serializer.py +233 -0
- furu/storage/__init__.py +32 -0
- furu/storage/metadata.py +282 -0
- furu/storage/migration.py +81 -0
- furu/storage/state.py +1107 -0
- furu-0.0.1.dist-info/METADATA +502 -0
- furu-0.0.1.dist-info/RECORD +36 -0
- furu-0.0.1.dist-info/WHEEL +4 -0
- furu-0.0.1.dist-info/entry_points.txt +2 -0
furu/__init__.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Furu: cacheable, nested pipelines as config objects.
|
|
3
|
+
|
|
4
|
+
This package uses a src-layout. Import the package as `furu`.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from importlib.metadata import version
|
|
8
|
+
|
|
9
|
+
import chz
|
|
10
|
+
import submitit
|
|
11
|
+
|
|
12
|
+
__version__ = version("furu")
|
|
13
|
+
|
|
14
|
+
from .config import FURU_CONFIG, FuruConfig, get_furu_root, set_furu_root
|
|
15
|
+
from .adapters import SubmititAdapter
|
|
16
|
+
from .core import Furu, FuruList
|
|
17
|
+
from .errors import (
|
|
18
|
+
FuruComputeError,
|
|
19
|
+
FuruError,
|
|
20
|
+
FuruLockNotAcquired,
|
|
21
|
+
FuruMigrationRequired,
|
|
22
|
+
FuruWaitTimeout,
|
|
23
|
+
MISSING,
|
|
24
|
+
)
|
|
25
|
+
from .runtime import (
|
|
26
|
+
configure_logging,
|
|
27
|
+
current_holder,
|
|
28
|
+
current_log_dir,
|
|
29
|
+
enter_holder,
|
|
30
|
+
get_logger,
|
|
31
|
+
load_env,
|
|
32
|
+
log,
|
|
33
|
+
write_separator,
|
|
34
|
+
)
|
|
35
|
+
from .migrate import migrate
|
|
36
|
+
from .migration import (
|
|
37
|
+
NamespacePair,
|
|
38
|
+
MigrationCandidate,
|
|
39
|
+
MigrationSkip,
|
|
40
|
+
apply_migration,
|
|
41
|
+
find_migration_candidates,
|
|
42
|
+
find_migration_candidates_initialized_target,
|
|
43
|
+
)
|
|
44
|
+
from .serialization import FuruSerializer
|
|
45
|
+
from .storage import MetadataManager, StateManager
|
|
46
|
+
|
|
47
|
+
__all__ = [
|
|
48
|
+
"__version__",
|
|
49
|
+
"FURU_CONFIG",
|
|
50
|
+
"Furu",
|
|
51
|
+
"FuruComputeError",
|
|
52
|
+
"FuruConfig",
|
|
53
|
+
"FuruError",
|
|
54
|
+
"FuruList",
|
|
55
|
+
"FuruLockNotAcquired",
|
|
56
|
+
"FuruMigrationRequired",
|
|
57
|
+
"FuruSerializer",
|
|
58
|
+
"FuruWaitTimeout",
|
|
59
|
+
"MISSING",
|
|
60
|
+
"migrate",
|
|
61
|
+
"NamespacePair",
|
|
62
|
+
"MigrationCandidate",
|
|
63
|
+
"MigrationSkip",
|
|
64
|
+
"apply_migration",
|
|
65
|
+
"find_migration_candidates",
|
|
66
|
+
"find_migration_candidates_initialized_target",
|
|
67
|
+
"MetadataManager",
|
|
68
|
+
"StateManager",
|
|
69
|
+
"SubmititAdapter",
|
|
70
|
+
"chz",
|
|
71
|
+
"configure_logging",
|
|
72
|
+
"current_holder",
|
|
73
|
+
"current_log_dir",
|
|
74
|
+
"enter_holder",
|
|
75
|
+
"get_furu_root",
|
|
76
|
+
"get_logger",
|
|
77
|
+
"load_env",
|
|
78
|
+
"log",
|
|
79
|
+
"write_separator",
|
|
80
|
+
"set_furu_root",
|
|
81
|
+
"submitit",
|
|
82
|
+
]
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
import time
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Callable, Protocol
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
from ..config import FURU_CONFIG
|
|
8
|
+
from ..storage import StateManager
|
|
9
|
+
from ..storage.state import _FuruState, ProbeResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Protocol for submitit Job-like objects. We use this instead of Any because
|
|
13
|
+
# submitit is an external library and we want to document the interface we expect.
|
|
14
|
+
class SubmititJobProtocol(Protocol):
|
|
15
|
+
"""Protocol for submitit Job objects."""
|
|
16
|
+
|
|
17
|
+
job_id: str | None
|
|
18
|
+
|
|
19
|
+
def done(self) -> bool: ...
|
|
20
|
+
def state(self) -> str: ...
|
|
21
|
+
def result(self, timeout: float | None = None) -> object: ...
|
|
22
|
+
def wait(self) -> None: ...
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# Type alias for submitit Executor. The executor is from an external library
|
|
26
|
+
# with a complex generic type, so we use Any here.
|
|
27
|
+
SubmititExecutor = Any
|
|
28
|
+
|
|
29
|
+
# Type alias for submitit Job. Jobs come from external library and can be
|
|
30
|
+
# various types depending on the executor backend.
|
|
31
|
+
SubmititJob = Any
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class SubmititAdapter:
|
|
35
|
+
"""Adapter for working with submitit executors."""
|
|
36
|
+
|
|
37
|
+
JOB_PICKLE = "job.pkl"
|
|
38
|
+
|
|
39
|
+
def __init__(self, executor: SubmititExecutor):
|
|
40
|
+
self.executor = executor
|
|
41
|
+
|
|
42
|
+
def submit(self, fn: Callable[[], None]) -> SubmititJob:
|
|
43
|
+
"""Submit a job to the executor."""
|
|
44
|
+
return self.executor.submit(fn)
|
|
45
|
+
|
|
46
|
+
def wait(self, job: SubmititJob, timeout: float | None = None) -> None:
|
|
47
|
+
"""Wait for job completion."""
|
|
48
|
+
if timeout:
|
|
49
|
+
job.result(timeout=timeout)
|
|
50
|
+
else:
|
|
51
|
+
job.wait()
|
|
52
|
+
|
|
53
|
+
def get_job_id(self, job: SubmititJob) -> str | None:
|
|
54
|
+
"""Get job ID if available."""
|
|
55
|
+
job_id = getattr(job, "job_id", None)
|
|
56
|
+
if job_id:
|
|
57
|
+
return str(job_id)
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
def is_done(self, job: SubmititJob) -> bool:
|
|
61
|
+
"""Check if job is done."""
|
|
62
|
+
done_fn = getattr(job, "done", None)
|
|
63
|
+
if done_fn and callable(done_fn):
|
|
64
|
+
return done_fn()
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
def get_state(self, job: SubmititJob) -> str | None:
|
|
68
|
+
"""Get job state from scheduler."""
|
|
69
|
+
state_fn = getattr(job, "state", None)
|
|
70
|
+
if state_fn and callable(state_fn):
|
|
71
|
+
return state_fn()
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
def pickle_job(self, job: SubmititJob, directory: Path) -> None:
|
|
75
|
+
"""Pickle job handle to file."""
|
|
76
|
+
import cloudpickle as pickle
|
|
77
|
+
|
|
78
|
+
job_path = StateManager.get_internal_dir(directory) / self.JOB_PICKLE
|
|
79
|
+
job_path.parent.mkdir(parents=True, exist_ok=True)
|
|
80
|
+
with job_path.open("wb") as f:
|
|
81
|
+
pickle.dump(job, f)
|
|
82
|
+
|
|
83
|
+
def load_job(self, directory: Path) -> SubmititJob | None:
|
|
84
|
+
"""Load job handle from pickle file."""
|
|
85
|
+
job_path = StateManager.get_internal_dir(directory) / self.JOB_PICKLE
|
|
86
|
+
if not job_path.is_file():
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
import cloudpickle as pickle
|
|
90
|
+
|
|
91
|
+
with job_path.open("rb") as f:
|
|
92
|
+
return pickle.load(f)
|
|
93
|
+
|
|
94
|
+
def watch_job_id(
|
|
95
|
+
self,
|
|
96
|
+
job: SubmititJob,
|
|
97
|
+
directory: Path,
|
|
98
|
+
*,
|
|
99
|
+
attempt_id: str,
|
|
100
|
+
callback: Callable[[str], None] | None = None,
|
|
101
|
+
) -> None:
|
|
102
|
+
"""Watch for job ID in background thread and update state."""
|
|
103
|
+
|
|
104
|
+
def watcher():
|
|
105
|
+
while True:
|
|
106
|
+
job_id = self.get_job_id(job)
|
|
107
|
+
if job_id:
|
|
108
|
+
|
|
109
|
+
def mutate(state: _FuruState) -> None:
|
|
110
|
+
attempt = state.attempt
|
|
111
|
+
if attempt is None or attempt.id != attempt_id:
|
|
112
|
+
return
|
|
113
|
+
attempt.scheduler["job_id"] = job_id
|
|
114
|
+
|
|
115
|
+
StateManager.update_state(directory, mutate)
|
|
116
|
+
if callback:
|
|
117
|
+
callback(job_id)
|
|
118
|
+
break
|
|
119
|
+
|
|
120
|
+
if self.is_done(job):
|
|
121
|
+
break
|
|
122
|
+
|
|
123
|
+
time.sleep(0.5)
|
|
124
|
+
|
|
125
|
+
thread = threading.Thread(target=watcher, daemon=True)
|
|
126
|
+
thread.start()
|
|
127
|
+
|
|
128
|
+
def classify_scheduler_state(self, state: str | None) -> str | None:
|
|
129
|
+
"""Map scheduler state to Furu status."""
|
|
130
|
+
if not state:
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
s = state.upper()
|
|
134
|
+
|
|
135
|
+
if "COMPLETE" in s or "COMPLETED" in s:
|
|
136
|
+
return "success"
|
|
137
|
+
|
|
138
|
+
if s in {
|
|
139
|
+
"PREEMPTED",
|
|
140
|
+
"TIMEOUT",
|
|
141
|
+
"NODE_FAIL",
|
|
142
|
+
"REQUEUED",
|
|
143
|
+
"REQUEUE_HOLD",
|
|
144
|
+
}:
|
|
145
|
+
return "preempted"
|
|
146
|
+
|
|
147
|
+
if s == "CANCELLED":
|
|
148
|
+
return "preempted" if FURU_CONFIG.cancelled_is_preempted else "failed"
|
|
149
|
+
|
|
150
|
+
if "FAIL" in s or "ERROR" in s:
|
|
151
|
+
return "failed"
|
|
152
|
+
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
def probe(self, directory: Path, state: _FuruState) -> ProbeResult:
|
|
156
|
+
"""
|
|
157
|
+
Best-effort scheduler reconciliation.
|
|
158
|
+
|
|
159
|
+
Returns a dict for `StateManager.reconcile(..., submitit_probe=...)`:
|
|
160
|
+
- `terminal_status`: one of {failed, cancelled, preempted, crashed}
|
|
161
|
+
- `scheduler_state`: raw scheduler state when available
|
|
162
|
+
- `reason`: best-effort reason string
|
|
163
|
+
|
|
164
|
+
Returns empty dict if job status cannot be determined (e.g., job pickle
|
|
165
|
+
doesn't exist yet), allowing reconcile to fall back to lease expiry.
|
|
166
|
+
"""
|
|
167
|
+
job = self.load_job(directory)
|
|
168
|
+
if job is None:
|
|
169
|
+
# Job pickle doesn't exist - can't determine status, fall back to lease expiry
|
|
170
|
+
return {}
|
|
171
|
+
|
|
172
|
+
scheduler_state = self.get_state(job)
|
|
173
|
+
classified = self.classify_scheduler_state(scheduler_state)
|
|
174
|
+
if classified is None:
|
|
175
|
+
if self.is_done(job):
|
|
176
|
+
return {
|
|
177
|
+
"terminal_status": "crashed",
|
|
178
|
+
"scheduler_state": scheduler_state,
|
|
179
|
+
"reason": "job_done_unknown_state",
|
|
180
|
+
}
|
|
181
|
+
return {}
|
|
182
|
+
|
|
183
|
+
# `COMPLETED` doesn't guarantee the worker wrote a success marker/state.
|
|
184
|
+
if classified == "success":
|
|
185
|
+
return {
|
|
186
|
+
"terminal_status": "crashed",
|
|
187
|
+
"scheduler_state": scheduler_state,
|
|
188
|
+
"reason": "scheduler_completed_no_success_marker",
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return {
|
|
192
|
+
"terminal_status": classified,
|
|
193
|
+
"scheduler_state": scheduler_state,
|
|
194
|
+
"reason": f"scheduler:{scheduler_state}",
|
|
195
|
+
}
|
furu/config.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class FuruConfig:
|
|
6
|
+
"""Central configuration for Furu behavior."""
|
|
7
|
+
|
|
8
|
+
def __init__(self):
|
|
9
|
+
def _get_base_root() -> Path:
|
|
10
|
+
env = os.getenv("FURU_PATH")
|
|
11
|
+
if env:
|
|
12
|
+
return Path(env).expanduser().resolve()
|
|
13
|
+
return Path("data-furu").resolve()
|
|
14
|
+
|
|
15
|
+
self.base_root = _get_base_root()
|
|
16
|
+
self.poll_interval = float(os.getenv("FURU_POLL_INTERVAL_SECS", "10"))
|
|
17
|
+
self.wait_log_every_sec = float(os.getenv("FURU_WAIT_LOG_EVERY_SECS", "10"))
|
|
18
|
+
self.stale_timeout = float(os.getenv("FURU_STALE_AFTER_SECS", str(30 * 60)))
|
|
19
|
+
self.lease_duration_sec = float(os.getenv("FURU_LEASE_SECS", "120"))
|
|
20
|
+
hb = os.getenv("FURU_HEARTBEAT_SECS")
|
|
21
|
+
self.heartbeat_interval_sec = (
|
|
22
|
+
float(hb) if hb is not None else max(1.0, self.lease_duration_sec / 3.0)
|
|
23
|
+
)
|
|
24
|
+
self.max_requeues = int(os.getenv("FURU_PREEMPT_MAX", "5"))
|
|
25
|
+
self.ignore_git_diff = os.getenv("FURU_IGNORE_DIFF", "0").lower() in {
|
|
26
|
+
"1",
|
|
27
|
+
"true",
|
|
28
|
+
"yes",
|
|
29
|
+
}
|
|
30
|
+
self.require_git = os.getenv("FURU_REQUIRE_GIT", "1").lower() in {
|
|
31
|
+
"1",
|
|
32
|
+
"true",
|
|
33
|
+
"yes",
|
|
34
|
+
}
|
|
35
|
+
self.require_git_remote = os.getenv("FURU_REQUIRE_GIT_REMOTE", "1").lower() in {
|
|
36
|
+
"1",
|
|
37
|
+
"true",
|
|
38
|
+
"yes",
|
|
39
|
+
}
|
|
40
|
+
self.force_recompute = {
|
|
41
|
+
item.strip()
|
|
42
|
+
for item in os.getenv("FURU_FORCE_RECOMPUTE", "").split(",")
|
|
43
|
+
if item.strip()
|
|
44
|
+
}
|
|
45
|
+
self.cancelled_is_preempted = os.getenv(
|
|
46
|
+
"FURU_CANCELLED_IS_PREEMPTED", "false"
|
|
47
|
+
).lower() in {"1", "true", "yes"}
|
|
48
|
+
|
|
49
|
+
# Parse FURU_CACHE_METADATA: "never", "forever", or duration like "5m", "1h"
|
|
50
|
+
# Default: "5m" (5 minutes) - balances performance with freshness
|
|
51
|
+
self.cache_metadata_ttl_sec: float | None = self._parse_cache_duration(
|
|
52
|
+
os.getenv("FURU_CACHE_METADATA", "5m")
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def _parse_cache_duration(value: str) -> float | None:
|
|
57
|
+
"""Parse cache duration string into seconds. Returns None for 'never', float('inf') for 'forever'."""
|
|
58
|
+
value = value.strip().lower()
|
|
59
|
+
if value in {"never", "0", "false", "no"}:
|
|
60
|
+
return None # No caching
|
|
61
|
+
if value in {"forever", "inf", "true", "yes", "1"}:
|
|
62
|
+
return float("inf") # Cache forever
|
|
63
|
+
|
|
64
|
+
# Parse duration like "5m", "1h", "30s"
|
|
65
|
+
import re
|
|
66
|
+
|
|
67
|
+
match = re.match(r"^(\d+(?:\.\d+)?)\s*([smh]?)$", value)
|
|
68
|
+
if not match:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
f"Invalid FURU_CACHE_METADATA value: {value!r}. "
|
|
71
|
+
"Use 'never', 'forever', or duration like '5m', '1h', '30s'"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
num = float(match.group(1))
|
|
75
|
+
unit = match.group(2) or "s"
|
|
76
|
+
multipliers = {"s": 1, "m": 60, "h": 3600}
|
|
77
|
+
return num * multipliers[unit]
|
|
78
|
+
|
|
79
|
+
def get_root(self, version_controlled: bool = False) -> Path:
|
|
80
|
+
"""Get root directory for storage (version_controlled determines subdirectory)."""
|
|
81
|
+
if version_controlled:
|
|
82
|
+
return self.base_root / "git"
|
|
83
|
+
return self.base_root / "data"
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def raw_dir(self) -> Path:
|
|
87
|
+
return self.base_root / "raw"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
FURU_CONFIG = FuruConfig()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get_furu_root(*, version_controlled: bool = False) -> Path:
|
|
94
|
+
return FURU_CONFIG.get_root(version_controlled=version_controlled)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def set_furu_root(path: Path) -> None:
|
|
98
|
+
FURU_CONFIG.base_root = path.resolve()
|
furu/core/__init__.py
ADDED