furu 0.0.4__py3-none-any.whl → 0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- furu/config.py +27 -40
- furu/core/furu.py +194 -126
- furu/core/list.py +3 -2
- furu/dashboard/frontend/dist/assets/{index-DS3FsqcY.js → index-BjyrY-Zz.js} +1 -1
- furu/dashboard/frontend/dist/index.html +1 -1
- furu/execution/local.py +9 -7
- furu/execution/plan.py +117 -25
- furu/execution/slurm_dag.py +16 -14
- furu/execution/slurm_pool.py +5 -5
- furu/execution/slurm_spec.py +2 -2
- furu/migration.py +1 -2
- furu/runtime/env.py +1 -1
- furu/runtime/logging.py +30 -4
- furu/storage/metadata.py +25 -29
- furu/storage/migration.py +0 -1
- furu/storage/state.py +86 -92
- {furu-0.0.4.dist-info → furu-0.0.5.dist-info}/METADATA +18 -6
- {furu-0.0.4.dist-info → furu-0.0.5.dist-info}/RECORD +20 -20
- {furu-0.0.4.dist-info → furu-0.0.5.dist-info}/WHEEL +1 -1
- {furu-0.0.4.dist-info → furu-0.0.5.dist-info}/entry_points.txt +0 -0
furu/storage/state.py
CHANGED
|
@@ -9,13 +9,12 @@ from collections.abc import Generator
|
|
|
9
9
|
from contextlib import contextmanager
|
|
10
10
|
from dataclasses import dataclass
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import Annotated, Any, Callable, Literal, Mapping,
|
|
12
|
+
from typing import Annotated, Any, Callable, Literal, Mapping, TypeAlias, TypedDict
|
|
13
13
|
|
|
14
14
|
from pydantic import BaseModel, ConfigDict, Field, ValidationError, model_validator
|
|
15
15
|
|
|
16
16
|
from ..errors import FuruLockNotAcquired, FuruWaitTimeout
|
|
17
17
|
|
|
18
|
-
|
|
19
18
|
# Type alias for scheduler-specific metadata. Different schedulers (SLURM, LSF, PBS, local)
|
|
20
19
|
# return different fields, so this must remain dynamic.
|
|
21
20
|
SchedulerMetadata = dict[str, Any]
|
|
@@ -167,7 +166,6 @@ class _StateAttemptBase(BaseModel):
|
|
|
167
166
|
number: int = 1
|
|
168
167
|
backend: str
|
|
169
168
|
started_at: str
|
|
170
|
-
heartbeat_at: str
|
|
171
169
|
lease_duration_sec: float
|
|
172
170
|
lease_expires_at: str
|
|
173
171
|
owner: StateOwner
|
|
@@ -228,7 +226,6 @@ class StateAttempt(BaseModel):
|
|
|
228
226
|
backend: str
|
|
229
227
|
status: str
|
|
230
228
|
started_at: str
|
|
231
|
-
heartbeat_at: str
|
|
232
229
|
lease_duration_sec: float
|
|
233
230
|
lease_expires_at: str
|
|
234
231
|
owner: StateOwner
|
|
@@ -246,7 +243,6 @@ class StateAttempt(BaseModel):
|
|
|
246
243
|
backend=attempt.backend,
|
|
247
244
|
status=attempt.status,
|
|
248
245
|
started_at=attempt.started_at,
|
|
249
|
-
heartbeat_at=attempt.heartbeat_at,
|
|
250
246
|
lease_duration_sec=attempt.lease_duration_sec,
|
|
251
247
|
lease_expires_at=attempt.lease_expires_at,
|
|
252
248
|
owner=attempt.owner,
|
|
@@ -286,9 +282,9 @@ class StateManager:
|
|
|
286
282
|
EVENTS_FILE = "events.jsonl"
|
|
287
283
|
SUCCESS_MARKER = "SUCCESS.json"
|
|
288
284
|
|
|
289
|
-
COMPUTE_LOCK = "
|
|
290
|
-
SUBMIT_LOCK = "
|
|
291
|
-
STATE_LOCK = "
|
|
285
|
+
COMPUTE_LOCK = "compute.lock"
|
|
286
|
+
SUBMIT_LOCK = "submit.lock"
|
|
287
|
+
STATE_LOCK = "state.lock"
|
|
292
288
|
|
|
293
289
|
TERMINAL_STATUSES = {
|
|
294
290
|
"success",
|
|
@@ -302,6 +298,12 @@ class StateManager:
|
|
|
302
298
|
def get_internal_dir(cls, directory: Path) -> Path:
|
|
303
299
|
return directory / cls.INTERNAL_DIR
|
|
304
300
|
|
|
301
|
+
@classmethod
|
|
302
|
+
def ensure_internal_dir(cls, directory: Path) -> Path:
|
|
303
|
+
internal_dir = cls.get_internal_dir(directory)
|
|
304
|
+
internal_dir.mkdir(parents=True, exist_ok=True)
|
|
305
|
+
return internal_dir
|
|
306
|
+
|
|
305
307
|
@classmethod
|
|
306
308
|
def get_state_path(cls, directory: Path) -> Path:
|
|
307
309
|
return cls.get_internal_dir(directory) / cls.STATE_FILE
|
|
@@ -366,7 +368,6 @@ class StateManager:
|
|
|
366
368
|
@classmethod
|
|
367
369
|
def _write_state_unlocked(cls, directory: Path, state: _FuruState) -> None:
|
|
368
370
|
state_path = cls.get_state_path(directory)
|
|
369
|
-
state_path.parent.mkdir(parents=True, exist_ok=True)
|
|
370
371
|
tmp_path = state_path.with_suffix(".tmp")
|
|
371
372
|
tmp_path.write_text(json.dumps(state.model_dump(mode="json"), indent=2))
|
|
372
373
|
os.replace(tmp_path, state_path)
|
|
@@ -385,7 +386,6 @@ class StateManager:
|
|
|
385
386
|
@classmethod
|
|
386
387
|
def try_lock(cls, lock_path: Path) -> int | None:
|
|
387
388
|
try:
|
|
388
|
-
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
|
389
389
|
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_RDWR, 0o644)
|
|
390
390
|
payload = {
|
|
391
391
|
"pid": os.getpid(),
|
|
@@ -485,19 +485,23 @@ class StateManager:
|
|
|
485
485
|
|
|
486
486
|
@classmethod
|
|
487
487
|
def update_state(
|
|
488
|
-
cls, directory: Path, mutator: Callable[[_FuruState], None]
|
|
488
|
+
cls, directory: Path, mutator: Callable[[_FuruState], bool | None]
|
|
489
489
|
) -> _FuruState:
|
|
490
490
|
lock_path = cls.get_lock_path(directory, cls.STATE_LOCK)
|
|
491
491
|
fd: int | None = None
|
|
492
492
|
try:
|
|
493
493
|
fd = cls._acquire_lock_blocking(lock_path)
|
|
494
|
+
state_path = cls.get_state_path(directory)
|
|
495
|
+
force_write = not state_path.is_file()
|
|
494
496
|
state = cls.read_state(directory)
|
|
495
|
-
mutator(state)
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
497
|
+
changed = mutator(state)
|
|
498
|
+
if force_write or changed is not False:
|
|
499
|
+
state.schema_version = cls.SCHEMA_VERSION
|
|
500
|
+
state.updated_at = cls._iso_now()
|
|
501
|
+
validated = _FuruState.model_validate(state)
|
|
502
|
+
cls._write_state_unlocked(directory, validated)
|
|
503
|
+
return validated
|
|
504
|
+
return state
|
|
501
505
|
finally:
|
|
502
506
|
cls.release_lock(fd, lock_path)
|
|
503
507
|
|
|
@@ -510,14 +514,12 @@ class StateManager:
|
|
|
510
514
|
"host": socket.gethostname(),
|
|
511
515
|
**event,
|
|
512
516
|
}
|
|
513
|
-
path.parent.mkdir(parents=True, exist_ok=True)
|
|
514
517
|
with path.open("a", encoding="utf-8") as f:
|
|
515
518
|
f.write(json.dumps(enriched) + "\n")
|
|
516
519
|
|
|
517
520
|
@classmethod
|
|
518
521
|
def write_success_marker(cls, directory: Path, *, attempt_id: str) -> None:
|
|
519
522
|
marker = cls.get_success_marker_path(directory)
|
|
520
|
-
marker.parent.mkdir(parents=True, exist_ok=True)
|
|
521
523
|
payload = {"attempt_id": attempt_id, "created_at": cls._iso_now()}
|
|
522
524
|
tmp = marker.with_suffix(".tmp")
|
|
523
525
|
tmp.write_text(json.dumps(payload, indent=2))
|
|
@@ -536,6 +538,26 @@ class StateManager:
|
|
|
536
538
|
return True
|
|
537
539
|
return cls._utcnow() >= expires
|
|
538
540
|
|
|
541
|
+
@classmethod
|
|
542
|
+
def last_heartbeat_mtime(cls, directory: Path) -> float | None:
|
|
543
|
+
lock_path = cls.get_lock_path(directory, cls.COMPUTE_LOCK)
|
|
544
|
+
try:
|
|
545
|
+
return lock_path.stat().st_mtime
|
|
546
|
+
except FileNotFoundError:
|
|
547
|
+
return None
|
|
548
|
+
|
|
549
|
+
@classmethod
|
|
550
|
+
def _running_heartbeat_reason(
|
|
551
|
+
cls, directory: Path, attempt: _StateAttemptRunning
|
|
552
|
+
) -> str | None:
|
|
553
|
+
last_heartbeat = cls.last_heartbeat_mtime(directory)
|
|
554
|
+
if last_heartbeat is None:
|
|
555
|
+
return "missing_heartbeat"
|
|
556
|
+
expires_at = last_heartbeat + float(attempt.lease_duration_sec)
|
|
557
|
+
if time.time() >= expires_at:
|
|
558
|
+
return "lease_expired"
|
|
559
|
+
return None
|
|
560
|
+
|
|
539
561
|
@classmethod
|
|
540
562
|
def start_attempt_queued(
|
|
541
563
|
cls,
|
|
@@ -604,7 +626,6 @@ class StateManager:
|
|
|
604
626
|
|
|
605
627
|
owner_state = StateOwner.model_validate(owner)
|
|
606
628
|
started_at = now.isoformat(timespec="seconds")
|
|
607
|
-
heartbeat_at = started_at
|
|
608
629
|
lease_duration = float(lease_duration_sec)
|
|
609
630
|
lease_expires_at = expires.isoformat(timespec="seconds")
|
|
610
631
|
scheduler_state: SchedulerMetadata = scheduler or {}
|
|
@@ -614,7 +635,6 @@ class StateManager:
|
|
|
614
635
|
number=int(number),
|
|
615
636
|
backend=backend,
|
|
616
637
|
started_at=started_at,
|
|
617
|
-
heartbeat_at=heartbeat_at,
|
|
618
638
|
lease_duration_sec=lease_duration,
|
|
619
639
|
lease_expires_at=lease_expires_at,
|
|
620
640
|
owner=owner_state,
|
|
@@ -661,49 +681,9 @@ class StateManager:
|
|
|
661
681
|
return attempt.id
|
|
662
682
|
|
|
663
683
|
@classmethod
|
|
664
|
-
def heartbeat(
|
|
665
|
-
cls
|
|
666
|
-
|
|
667
|
-
ok = False
|
|
668
|
-
|
|
669
|
-
def mutate(state: _FuruState) -> None:
|
|
670
|
-
nonlocal ok
|
|
671
|
-
attempt = state.attempt
|
|
672
|
-
if not isinstance(attempt, _StateAttemptRunning):
|
|
673
|
-
return
|
|
674
|
-
if attempt.id != attempt_id:
|
|
675
|
-
return
|
|
676
|
-
now = cls._utcnow()
|
|
677
|
-
expires = now + _dt.timedelta(seconds=float(lease_duration_sec))
|
|
678
|
-
attempt.heartbeat_at = now.isoformat(timespec="seconds")
|
|
679
|
-
attempt.lease_duration_sec = float(lease_duration_sec)
|
|
680
|
-
attempt.lease_expires_at = expires.isoformat(timespec="seconds")
|
|
681
|
-
ok = True
|
|
682
|
-
|
|
683
|
-
cls.update_state(directory, mutate)
|
|
684
|
-
return ok
|
|
685
|
-
|
|
686
|
-
@classmethod
|
|
687
|
-
def set_attempt_fields(
|
|
688
|
-
cls, directory: Path, *, attempt_id: str, fields: SchedulerMetadata
|
|
689
|
-
) -> bool:
|
|
690
|
-
ok = False
|
|
691
|
-
|
|
692
|
-
def mutate(state: _FuruState) -> None:
|
|
693
|
-
nonlocal ok
|
|
694
|
-
attempt = state.attempt
|
|
695
|
-
if attempt is None or attempt.id != attempt_id:
|
|
696
|
-
return
|
|
697
|
-
for key, value in fields.items():
|
|
698
|
-
if key == "scheduler" and isinstance(value, dict):
|
|
699
|
-
attempt.scheduler.update(value)
|
|
700
|
-
continue
|
|
701
|
-
if hasattr(attempt, key):
|
|
702
|
-
setattr(attempt, key, value)
|
|
703
|
-
ok = True
|
|
704
|
-
|
|
705
|
-
cls.update_state(directory, mutate)
|
|
706
|
-
return ok
|
|
684
|
+
def heartbeat(cls, directory: Path) -> None:
|
|
685
|
+
lock_path = cls.get_lock_path(directory, cls.COMPUTE_LOCK)
|
|
686
|
+
os.utime(lock_path)
|
|
707
687
|
|
|
708
688
|
@classmethod
|
|
709
689
|
def finish_attempt_success(cls, directory: Path, *, attempt_id: str) -> None:
|
|
@@ -717,7 +697,6 @@ class StateManager:
|
|
|
717
697
|
number=attempt.number,
|
|
718
698
|
backend=attempt.backend,
|
|
719
699
|
started_at=attempt.started_at,
|
|
720
|
-
heartbeat_at=attempt.heartbeat_at,
|
|
721
700
|
lease_duration_sec=attempt.lease_duration_sec,
|
|
722
701
|
lease_expires_at=attempt.lease_expires_at,
|
|
723
702
|
owner=attempt.owner,
|
|
@@ -754,7 +733,6 @@ class StateManager:
|
|
|
754
733
|
number=attempt.number,
|
|
755
734
|
backend=attempt.backend,
|
|
756
735
|
started_at=attempt.started_at,
|
|
757
|
-
heartbeat_at=attempt.heartbeat_at,
|
|
758
736
|
lease_duration_sec=attempt.lease_duration_sec,
|
|
759
737
|
lease_expires_at=attempt.lease_expires_at,
|
|
760
738
|
owner=attempt.owner,
|
|
@@ -792,7 +770,6 @@ class StateManager:
|
|
|
792
770
|
number=attempt.number,
|
|
793
771
|
backend=attempt.backend,
|
|
794
772
|
started_at=attempt.started_at,
|
|
795
|
-
heartbeat_at=attempt.heartbeat_at,
|
|
796
773
|
lease_duration_sec=attempt.lease_duration_sec,
|
|
797
774
|
lease_expires_at=attempt.lease_expires_at,
|
|
798
775
|
owner=attempt.owner,
|
|
@@ -842,10 +819,10 @@ class StateManager:
|
|
|
842
819
|
to lease expiry.
|
|
843
820
|
"""
|
|
844
821
|
|
|
845
|
-
def mutate(state: _FuruState) ->
|
|
822
|
+
def mutate(state: _FuruState) -> bool:
|
|
846
823
|
attempt = state.attempt
|
|
847
824
|
if not isinstance(attempt, (_StateAttemptQueued, _StateAttemptRunning)):
|
|
848
|
-
return
|
|
825
|
+
return False
|
|
849
826
|
|
|
850
827
|
# Fast promotion if we can see a durable success marker.
|
|
851
828
|
if cls.success_marker_exists(directory):
|
|
@@ -855,7 +832,6 @@ class StateManager:
|
|
|
855
832
|
number=attempt.number,
|
|
856
833
|
backend=attempt.backend,
|
|
857
834
|
started_at=attempt.started_at,
|
|
858
|
-
heartbeat_at=attempt.heartbeat_at,
|
|
859
835
|
lease_duration_sec=attempt.lease_duration_sec,
|
|
860
836
|
lease_expires_at=attempt.lease_expires_at,
|
|
861
837
|
owner=attempt.owner,
|
|
@@ -865,7 +841,7 @@ class StateManager:
|
|
|
865
841
|
state.result = _coerce_result(
|
|
866
842
|
state.result, status="success", created_at=ended
|
|
867
843
|
)
|
|
868
|
-
return
|
|
844
|
+
return True
|
|
869
845
|
|
|
870
846
|
backend = attempt.backend
|
|
871
847
|
now = cls._iso_now()
|
|
@@ -878,6 +854,10 @@ class StateManager:
|
|
|
878
854
|
if alive is False:
|
|
879
855
|
terminal_status = "crashed"
|
|
880
856
|
reason = "pid_dead"
|
|
857
|
+
elif isinstance(attempt, _StateAttemptRunning):
|
|
858
|
+
reason = cls._running_heartbeat_reason(directory, attempt)
|
|
859
|
+
if reason is not None:
|
|
860
|
+
terminal_status = "crashed"
|
|
881
861
|
elif cls._lease_expired(attempt):
|
|
882
862
|
terminal_status = "crashed"
|
|
883
863
|
reason = "lease_expired"
|
|
@@ -890,16 +870,25 @@ class StateManager:
|
|
|
890
870
|
attempt.scheduler.update(
|
|
891
871
|
{k: v for k, v in verdict.items() if k != "terminal_status"}
|
|
892
872
|
)
|
|
893
|
-
if terminal_status is None
|
|
894
|
-
|
|
895
|
-
|
|
873
|
+
if terminal_status is None:
|
|
874
|
+
if isinstance(attempt, _StateAttemptRunning):
|
|
875
|
+
reason = cls._running_heartbeat_reason(directory, attempt)
|
|
876
|
+
if reason is not None:
|
|
877
|
+
terminal_status = "crashed"
|
|
878
|
+
elif cls._lease_expired(attempt):
|
|
879
|
+
terminal_status = "crashed"
|
|
880
|
+
reason = "lease_expired"
|
|
896
881
|
else:
|
|
897
|
-
if
|
|
882
|
+
if isinstance(attempt, _StateAttemptRunning):
|
|
883
|
+
reason = cls._running_heartbeat_reason(directory, attempt)
|
|
884
|
+
if reason is not None:
|
|
885
|
+
terminal_status = "crashed"
|
|
886
|
+
elif cls._lease_expired(attempt):
|
|
898
887
|
terminal_status = "crashed"
|
|
899
888
|
reason = "lease_expired"
|
|
900
889
|
|
|
901
890
|
if terminal_status is None:
|
|
902
|
-
return
|
|
891
|
+
return False
|
|
903
892
|
if terminal_status == "success":
|
|
904
893
|
terminal_status = "crashed"
|
|
905
894
|
reason = reason or "scheduler_success_no_success_marker"
|
|
@@ -910,7 +899,6 @@ class StateManager:
|
|
|
910
899
|
number=attempt.number,
|
|
911
900
|
backend=attempt.backend,
|
|
912
901
|
started_at=attempt.started_at,
|
|
913
|
-
heartbeat_at=attempt.heartbeat_at,
|
|
914
902
|
lease_duration_sec=attempt.lease_duration_sec,
|
|
915
903
|
lease_expires_at=attempt.lease_expires_at,
|
|
916
904
|
owner=attempt.owner,
|
|
@@ -927,7 +915,6 @@ class StateManager:
|
|
|
927
915
|
number=attempt.number,
|
|
928
916
|
backend=attempt.backend,
|
|
929
917
|
started_at=attempt.started_at,
|
|
930
|
-
heartbeat_at=attempt.heartbeat_at,
|
|
931
918
|
lease_duration_sec=attempt.lease_duration_sec,
|
|
932
919
|
lease_expires_at=attempt.lease_expires_at,
|
|
933
920
|
owner=attempt.owner,
|
|
@@ -942,7 +929,6 @@ class StateManager:
|
|
|
942
929
|
number=attempt.number,
|
|
943
930
|
backend=attempt.backend,
|
|
944
931
|
started_at=attempt.started_at,
|
|
945
|
-
heartbeat_at=attempt.heartbeat_at,
|
|
946
932
|
lease_duration_sec=attempt.lease_duration_sec,
|
|
947
933
|
lease_expires_at=attempt.lease_expires_at,
|
|
948
934
|
owner=attempt.owner,
|
|
@@ -957,7 +943,6 @@ class StateManager:
|
|
|
957
943
|
number=attempt.number,
|
|
958
944
|
backend=attempt.backend,
|
|
959
945
|
started_at=attempt.started_at,
|
|
960
|
-
heartbeat_at=attempt.heartbeat_at,
|
|
961
946
|
lease_duration_sec=attempt.lease_duration_sec,
|
|
962
947
|
lease_expires_at=attempt.lease_expires_at,
|
|
963
948
|
owner=attempt.owner,
|
|
@@ -970,6 +955,7 @@ class StateManager:
|
|
|
970
955
|
state.result,
|
|
971
956
|
status="failed" if terminal_status == "failed" else "incomplete",
|
|
972
957
|
)
|
|
958
|
+
return True
|
|
973
959
|
|
|
974
960
|
state = cls.update_state(directory, mutate)
|
|
975
961
|
attempt = state.attempt
|
|
@@ -1067,16 +1053,28 @@ def compute_lock(
|
|
|
1067
1053
|
return ", ".join(parts)
|
|
1068
1054
|
|
|
1069
1055
|
def _describe_wait(attempt: _StateAttempt, waited_sec: float) -> str:
|
|
1070
|
-
label = "last heartbeat"
|
|
1071
|
-
timestamp = attempt.heartbeat_at
|
|
1072
1056
|
if attempt.status == "queued":
|
|
1073
1057
|
label = "queued at"
|
|
1074
1058
|
timestamp = attempt.started_at
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1059
|
+
parsed = StateManager._parse_time(timestamp)
|
|
1060
|
+
timestamp_info = timestamp
|
|
1061
|
+
if parsed is not None:
|
|
1062
|
+
age = (StateManager._utcnow() - parsed).total_seconds()
|
|
1063
|
+
timestamp_info = f"{timestamp} ({_format_wait_duration(age)} ago)"
|
|
1064
|
+
else:
|
|
1065
|
+
label = "last heartbeat"
|
|
1066
|
+
last_heartbeat = StateManager.last_heartbeat_mtime(directory)
|
|
1067
|
+
if last_heartbeat is None:
|
|
1068
|
+
timestamp_info = "missing"
|
|
1069
|
+
else:
|
|
1070
|
+
heartbeat_dt = _dt.datetime.fromtimestamp(
|
|
1071
|
+
last_heartbeat, tz=_dt.timezone.utc
|
|
1072
|
+
)
|
|
1073
|
+
age = time.time() - last_heartbeat
|
|
1074
|
+
timestamp_info = (
|
|
1075
|
+
f"{heartbeat_dt.isoformat(timespec='seconds')} "
|
|
1076
|
+
f"({_format_wait_duration(age)} ago)"
|
|
1077
|
+
)
|
|
1080
1078
|
return (
|
|
1081
1079
|
"waited "
|
|
1082
1080
|
f"{_format_wait_duration(waited_sec)}, {label} {timestamp_info}, "
|
|
@@ -1228,11 +1226,7 @@ def compute_lock(
|
|
|
1228
1226
|
# Start heartbeat IMMEDIATELY
|
|
1229
1227
|
def heartbeat() -> None:
|
|
1230
1228
|
while not stop_event.wait(heartbeat_interval_sec):
|
|
1231
|
-
StateManager.heartbeat(
|
|
1232
|
-
directory,
|
|
1233
|
-
attempt_id=attempt_id, # type: ignore[arg-type]
|
|
1234
|
-
lease_duration_sec=lease_duration_sec,
|
|
1235
|
-
)
|
|
1229
|
+
StateManager.heartbeat(directory)
|
|
1236
1230
|
|
|
1237
1231
|
thread = threading.Thread(target=heartbeat, daemon=True)
|
|
1238
1232
|
thread.start()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: furu
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.5
|
|
4
4
|
Summary: Cacheable, nested pipelines for Python. Define computations as configs; furu handles caching, state tracking, and result reuse across runs.
|
|
5
5
|
Author: Herman Brunborg
|
|
6
6
|
Author-email: Herman Brunborg <herman@brunborg.com>
|
|
@@ -459,8 +459,11 @@ The `/api/experiments` endpoint supports:
|
|
|
459
459
|
|----------|---------|-------------|
|
|
460
460
|
| `FURU_PATH` | `<project>/furu-data` | Base storage directory for non-versioned artifacts |
|
|
461
461
|
| `FURU_VERSION_CONTROLLED_PATH` | `<project>/furu-data/artifacts` | Override version-controlled storage root |
|
|
462
|
+
| `FURU_SUBMITIT_PATH` | `<FURU_PATH>/submitit` | Override submitit logs root |
|
|
462
463
|
| `FURU_LOG_LEVEL` | `INFO` | Console verbosity (`DEBUG`, `INFO`, `WARNING`, `ERROR`) |
|
|
463
|
-
| `
|
|
464
|
+
| `FURU_RICH_UNCAUGHT_TRACEBACKS` | `true` | Use Rich for exception formatting (set `0` to disable) |
|
|
465
|
+
| `FURU_RECORD_GIT` | `cached` | Git provenance capture: `ignore` skips git metadata, `cached` records once per process, `uncached` records every time |
|
|
466
|
+
| `FURU_ALLOW_NO_GIT_ORIGIN` | `false` | Allow missing git `origin` when recording git metadata (invalid with `FURU_RECORD_GIT=ignore`) |
|
|
464
467
|
| `FURU_ALWAYS_RERUN` | `""` | Comma-separated class qualnames to always rerun (use `ALL` to bypass cache globally; cannot combine with other entries; entries must be importable) |
|
|
465
468
|
| `FURU_RETRY_FAILED` | `true` | Retry failed artifacts by default (set to `0` to keep failures sticky) |
|
|
466
469
|
| `FURU_MAX_COMPUTE_RETRIES` | `3` | Maximum compute retries per node after the first failure |
|
|
@@ -469,12 +472,21 @@ The `/api/experiments` endpoint supports:
|
|
|
469
472
|
| `FURU_WAIT_LOG_EVERY_SECS` | `10` | Interval between "waiting" log messages |
|
|
470
473
|
| `FURU_STALE_AFTER_SECS` | `1800` | Consider running jobs stale after this duration |
|
|
471
474
|
| `FURU_LEASE_SECS` | `120` | Compute lock lease duration |
|
|
472
|
-
| `FURU_HEARTBEAT_SECS` | `lease/3` | Heartbeat interval for running jobs |
|
|
475
|
+
| `FURU_HEARTBEAT_SECS` | `lease/3` | Heartbeat interval for running jobs (min 1s) |
|
|
473
476
|
| `FURU_PREEMPT_MAX` | `5` | Maximum submitit requeues on preemption |
|
|
474
477
|
| `FURU_CANCELLED_IS_PREEMPTED` | `false` | Treat SLURM CANCELLED as preempted |
|
|
475
|
-
| `
|
|
478
|
+
| `SLURM_JOB_ID` | unset | Read-only; set by Slurm to record job id and enable submitit context |
|
|
476
479
|
|
|
477
|
-
Local `.env` files are loaded automatically
|
|
480
|
+
Local `.env` files are not loaded automatically. Call `furu.load_env()` when you
|
|
481
|
+
want to load `.env` values (requires `python-dotenv`).
|
|
482
|
+
|
|
483
|
+
### Test and CI Environment Variables
|
|
484
|
+
|
|
485
|
+
| Variable | Default | Description |
|
|
486
|
+
|----------|---------|-------------|
|
|
487
|
+
| `FURU_DASHBOARD_DEV_DATA_DIR` | unset | Override data dir for `make dashboard-dev` (defaults to a temp dir) |
|
|
488
|
+
| `FURU_E2E_DATA_DIR` | unset | Required for Playwright e2e runs; used as the data root and to set `FURU_PATH` |
|
|
489
|
+
| `CI` | unset | Enables CI-friendly Playwright settings (retries, single worker, traces, screenshots, video) |
|
|
478
490
|
|
|
479
491
|
### Programmatic Configuration
|
|
480
492
|
|
|
@@ -487,7 +499,7 @@ furu.set_furu_root(Path("/my/storage"))
|
|
|
487
499
|
root = furu.get_furu_root()
|
|
488
500
|
|
|
489
501
|
# Access config directly
|
|
490
|
-
furu.FURU_CONFIG.
|
|
502
|
+
furu.FURU_CONFIG.record_git = "uncached"
|
|
491
503
|
furu.FURU_CONFIG.poll_interval = 5.0
|
|
492
504
|
```
|
|
493
505
|
|
|
@@ -1,46 +1,46 @@
|
|
|
1
1
|
furu/__init__.py,sha256=Z8VssTuQm2nH7bgB8SQc8pXsNGc-H1QGHFffKzNzqk8,2018
|
|
2
2
|
furu/adapters/__init__.py,sha256=onLzEj9hccPK15g8a8va2T19nqQXoxb9rQlJIjKSKnE,69
|
|
3
3
|
furu/adapters/submitit.py,sha256=FV3XEUSQuS5vIyzkW-Iuqtf8SRL-fsokPG67u7tMF5I,7276
|
|
4
|
-
furu/config.py,sha256=
|
|
4
|
+
furu/config.py,sha256=UGnH8QAKMUgrGMGNkfBgLXideXEpDlozUSsX9iNN8Lw,6844
|
|
5
5
|
furu/core/__init__.py,sha256=6hH7i6r627c0FZn6eQVsSG7LD4QmTta6iQw0AiPQPTM,156
|
|
6
|
-
furu/core/furu.py,sha256=
|
|
7
|
-
furu/core/list.py,sha256=
|
|
6
|
+
furu/core/furu.py,sha256=tGUtHVAgSV_oKeW5hlSH5h6OvZG1h4BDBltpjFyJByQ,61375
|
|
7
|
+
furu/core/list.py,sha256=QaGSh8NFg1K2WFncM8duOYQ6KLZ6EW2pRLArN_e5Juw,3662
|
|
8
8
|
furu/dashboard/__init__.py,sha256=ziAordJfkbbXNIM7iA9O7vR2gsCq34AInYiMYOCfWOc,362
|
|
9
9
|
furu/dashboard/__main__.py,sha256=cNs65IMl4kwZFpxa9xLXmFSy4-M5D1X1ZBfTDxW11vo,144
|
|
10
10
|
furu/dashboard/api/__init__.py,sha256=9-WyWOt-VQJJBIsdW29D-7JvR-BivJd9G_SRaRptCz0,80
|
|
11
11
|
furu/dashboard/api/models.py,sha256=SCu-kLJyW7dwSKswdgQNS3wQuj25ORs0pHkvX9xBbo4,4767
|
|
12
12
|
furu/dashboard/api/routes.py,sha256=iZez0khIUvbgfeSoy1BJvmoEEbgUrdSQA8SN8iAIkM8,4813
|
|
13
13
|
furu/dashboard/frontend/dist/assets/index-BXAIKNNr.css,sha256=qhsN0Td3mM-GAR8mZ0CtocynABLKa1ncl9ioDrTKOIQ,34768
|
|
14
|
-
furu/dashboard/frontend/dist/assets/index-
|
|
14
|
+
furu/dashboard/frontend/dist/assets/index-BjyrY-Zz.js,sha256=fItsQ--Dzobq5KdUcuqDi4txM2-NNqx8JET5Lwkwf7U,544515
|
|
15
15
|
furu/dashboard/frontend/dist/favicon.svg,sha256=3TSLHNZITFe3JTPoYHZnDgiGsJxIzf39v97l2A1Hodo,369
|
|
16
|
-
furu/dashboard/frontend/dist/index.html,sha256=
|
|
16
|
+
furu/dashboard/frontend/dist/index.html,sha256=Ig-j0qgTXBSge0GN7PaM7mcLnuRhRMQmkTZjU1wmTXY,810
|
|
17
17
|
furu/dashboard/main.py,sha256=gj9Cdj2qyaSCEkmfNHUMQXlXv6GpWTQ9IZEi7WzlCSo,4463
|
|
18
18
|
furu/dashboard/scanner.py,sha256=qXCvkvFByBc09TUdth5Js67rS8zpRBlRkVQ9dJ7YbdE,34696
|
|
19
19
|
furu/errors.py,sha256=FFbV4M0-ipVGizv5ee80L-NZFVjaRjy8i19mClr6R0g,3959
|
|
20
20
|
furu/execution/__init__.py,sha256=ixVw1Shvg2ulS597OYYeGgSSTwv25j_McuQdDXIiEL8,625
|
|
21
21
|
furu/execution/context.py,sha256=0tAbM0azqEus8hknf_A9-Zs9Sq99bnUkFyV4RO4ZMRU,666
|
|
22
|
-
furu/execution/local.py,sha256=
|
|
22
|
+
furu/execution/local.py,sha256=SXUH9PfcCAeHSZYrTP1YNjl2fV7vqzpZXZQzmtIcVMg,7137
|
|
23
23
|
furu/execution/paths.py,sha256=0MfQk5Kh7bxvJiWvG40TJe7RF5Q5Na6uvi6qV0OT3Vc,460
|
|
24
|
-
furu/execution/plan.py,sha256=
|
|
24
|
+
furu/execution/plan.py,sha256=bEnzFlBVN3vGKb_0a03sOEtJYRkLSJkIaNzGnxjZTo4,9928
|
|
25
25
|
furu/execution/plan_utils.py,sha256=TAQqlPeJfOdH2MT-X7g3j1Se_0e4oKvG0tJaWC1kM40,381
|
|
26
|
-
furu/execution/slurm_dag.py,sha256=
|
|
27
|
-
furu/execution/slurm_pool.py,sha256=
|
|
28
|
-
furu/execution/slurm_spec.py,sha256=
|
|
26
|
+
furu/execution/slurm_dag.py,sha256=xh9EUGdPZaAH3UfcRqo6MsKYBIV-UW3_7owY8kLOwz4,9392
|
|
27
|
+
furu/execution/slurm_pool.py,sha256=ft76Gp-HgFWWjGvDclUChLOjY1rvhhfkP5mxhK3ViQk,30395
|
|
28
|
+
furu/execution/slurm_spec.py,sha256=DG8BF4FCga2ZXsqGUvfNibk6II40JcShVZ4jTwxTdec,977
|
|
29
29
|
furu/execution/submitit_factory.py,sha256=B2vkDtmscuAX0sBaj9V5pNlgOtkkV35yJ1fZ7A-DSvU,1119
|
|
30
30
|
furu/migrate.py,sha256=x_Uh7oXAv40L5ZAHJhdnw-o7ct56rWUSZLbHHfRObeY,1313
|
|
31
|
-
furu/migration.py,sha256=
|
|
31
|
+
furu/migration.py,sha256=EYWULuH8lEVvESthO2qEF95WJTo1Uj6d4L6VU2zmWpw,31350
|
|
32
32
|
furu/runtime/__init__.py,sha256=fQqE7wUuWunLD73Vm3lss7BFSij3UVxXOKQXBAOS8zw,504
|
|
33
|
-
furu/runtime/env.py,sha256=
|
|
34
|
-
furu/runtime/logging.py,sha256=
|
|
33
|
+
furu/runtime/env.py,sha256=lb-LWl-1EM_CP8sy0z3HAY20NXQ-v3QdOgte1i0HYVA,214
|
|
34
|
+
furu/runtime/logging.py,sha256=Xni1hWyH21bKc6D2owBZzThsj6q8yQOBD9zUrDS4jtI,10760
|
|
35
35
|
furu/runtime/tracebacks.py,sha256=PGCuOq8QkWSoun791gjUXM8frOP2wWV8IBlqaA4nuGE,1631
|
|
36
36
|
furu/serialization/__init__.py,sha256=L7oHuIbxdSh7GCY3thMQnDwlt_ERH-TMy0YKEAZLrPs,341
|
|
37
37
|
furu/serialization/migrations.py,sha256=HD5g8JCBdH3Y0rHJYc4Ug1IXBVcUDxLE7nfiXZnXcUE,7772
|
|
38
38
|
furu/serialization/serializer.py,sha256=_nfUaAOy_KHegvfXlpPh4rCuvkzalJva75OvDg5nXiI,10114
|
|
39
39
|
furu/storage/__init__.py,sha256=cLLL-GPpSu9C72Mdk5S6TGu3g-SnBfEuxzfpx5ZJPtw,616
|
|
40
|
-
furu/storage/metadata.py,sha256=
|
|
41
|
-
furu/storage/migration.py,sha256=
|
|
42
|
-
furu/storage/state.py,sha256=
|
|
43
|
-
furu-0.0.
|
|
44
|
-
furu-0.0.
|
|
45
|
-
furu-0.0.
|
|
46
|
-
furu-0.0.
|
|
40
|
+
furu/storage/metadata.py,sha256=fJ_0G0vWRl9vNb7IigjXd__aokTok2ZHowmttoXjTsM,9581
|
|
41
|
+
furu/storage/migration.py,sha256=FNExLdPu1ekKZR2XJkAgags9U8pV2FfkKAECSXkSra8,2585
|
|
42
|
+
furu/storage/state.py,sha256=kcIfAwdKWT8Q2ElbC5qofQC6noS_k6eNSPkNAdYXoaY,43707
|
|
43
|
+
furu-0.0.5.dist-info/WHEEL,sha256=e_m4S054HL0hyR3CpOk-b7Q7fDX6BuFkgL5OjAExXas,80
|
|
44
|
+
furu-0.0.5.dist-info/entry_points.txt,sha256=hZkjtFzNlb33Zk-aUfLMRj-XgVDxdT82-JXG9d4bu2E,60
|
|
45
|
+
furu-0.0.5.dist-info/METADATA,sha256=1ugdjmF6ECLzzO21v123_GdyMmSb6tni6avp6YJRbG4,17101
|
|
46
|
+
furu-0.0.5.dist-info/RECORD,,
|
|
File without changes
|