furu 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- furu/__init__.py +3 -1
- furu/config.py +8 -2
- furu/core/__init__.py +2 -2
- furu/core/furu.py +427 -66
- furu/dashboard/frontend/dist/assets/{index-CbdDfSOZ.css → index-BXAIKNNr.css} +1 -1
- furu/dashboard/frontend/dist/assets/{index-DDv_TYB_.js → index-DS3FsqcY.js} +3 -3
- furu/dashboard/frontend/dist/index.html +2 -2
- furu/errors.py +47 -5
- furu/migration.py +8 -4
- furu/serialization/serializer.py +40 -2
- furu/storage/metadata.py +17 -5
- furu/storage/state.py +44 -6
- {furu-0.0.2.dist-info → furu-0.0.3.dist-info}/METADATA +14 -1
- {furu-0.0.2.dist-info → furu-0.0.3.dist-info}/RECORD +16 -16
- {furu-0.0.2.dist-info → furu-0.0.3.dist-info}/WHEEL +0 -0
- {furu-0.0.2.dist-info → furu-0.0.3.dist-info}/entry_points.txt +0 -0
furu/core/furu.py
CHANGED
|
@@ -10,12 +10,29 @@ import traceback
|
|
|
10
10
|
from abc import ABC, abstractmethod
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
from types import FrameType
|
|
13
|
-
from typing import
|
|
13
|
+
from typing import (
|
|
14
|
+
AbstractSet,
|
|
15
|
+
Any,
|
|
16
|
+
Callable,
|
|
17
|
+
ClassVar,
|
|
18
|
+
Hashable,
|
|
19
|
+
Mapping,
|
|
20
|
+
Protocol,
|
|
21
|
+
Self,
|
|
22
|
+
Sequence,
|
|
23
|
+
TypedDict,
|
|
24
|
+
TypeAlias,
|
|
25
|
+
TypeVar,
|
|
26
|
+
cast,
|
|
27
|
+
overload,
|
|
28
|
+
)
|
|
14
29
|
|
|
15
30
|
import chz
|
|
16
31
|
import submitit
|
|
17
32
|
from typing_extensions import dataclass_transform
|
|
18
33
|
|
|
34
|
+
from chz.field import Field as ChzField
|
|
35
|
+
|
|
19
36
|
from ..adapters import SubmititAdapter
|
|
20
37
|
from ..adapters.submitit import SubmititJob
|
|
21
38
|
from ..config import FURU_CONFIG
|
|
@@ -41,7 +58,6 @@ from ..storage import (
|
|
|
41
58
|
from ..storage.state import (
|
|
42
59
|
_FuruState,
|
|
43
60
|
_OwnerDict,
|
|
44
|
-
_StateAttemptFailed,
|
|
45
61
|
_StateAttemptQueued,
|
|
46
62
|
_StateAttemptRunning,
|
|
47
63
|
_StateResultAbsent,
|
|
@@ -177,6 +193,29 @@ class Furu[T](ABC):
|
|
|
177
193
|
"""Validate that result is complete and correct (override if needed)."""
|
|
178
194
|
return True
|
|
179
195
|
|
|
196
|
+
def _dependencies(self: Self) -> "DependencySpec | None":
|
|
197
|
+
"""Return extra dependencies not captured by fields."""
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
def _get_dependencies(self: Self, *, recursive: bool = True) -> list["Furu"]:
|
|
201
|
+
"""Collect Furu dependencies from fields and `_dependencies()`."""
|
|
202
|
+
seen = {self._furu_hash}
|
|
203
|
+
dependencies: list[Furu] = []
|
|
204
|
+
_collect_dependencies(self, dependencies, seen, recursive=recursive)
|
|
205
|
+
return dependencies
|
|
206
|
+
|
|
207
|
+
def _dependency_hashes(self: Self) -> list[str]:
|
|
208
|
+
dependencies = _direct_dependencies(self)
|
|
209
|
+
if not dependencies:
|
|
210
|
+
return []
|
|
211
|
+
|
|
212
|
+
digests: set[str] = set()
|
|
213
|
+
for dependency in dependencies:
|
|
214
|
+
if dependency is self:
|
|
215
|
+
raise ValueError("Furu dependencies cannot include self")
|
|
216
|
+
digests.add(dependency._furu_hash)
|
|
217
|
+
return sorted(digests)
|
|
218
|
+
|
|
180
219
|
def _invalidate_cached_success(self: Self, directory: Path, *, reason: str) -> None:
|
|
181
220
|
logger = get_logger()
|
|
182
221
|
logger.warning(
|
|
@@ -275,20 +314,33 @@ class Furu[T](ABC):
|
|
|
275
314
|
return MigrationManager.read_migration(self._base_furu_dir())
|
|
276
315
|
|
|
277
316
|
@overload
|
|
278
|
-
def load_or_create(
|
|
317
|
+
def load_or_create(
|
|
318
|
+
self,
|
|
319
|
+
executor: submitit.Executor,
|
|
320
|
+
*,
|
|
321
|
+
retry_failed: bool | None = None,
|
|
322
|
+
) -> T | submitit.Job[T]: ...
|
|
279
323
|
|
|
280
324
|
@overload
|
|
281
|
-
def load_or_create(
|
|
325
|
+
def load_or_create(
|
|
326
|
+
self,
|
|
327
|
+
executor: None = None,
|
|
328
|
+
*,
|
|
329
|
+
retry_failed: bool | None = None,
|
|
330
|
+
) -> T: ...
|
|
282
331
|
|
|
283
332
|
def load_or_create(
|
|
284
333
|
self: Self,
|
|
285
334
|
executor: submitit.Executor | None = None,
|
|
335
|
+
*,
|
|
336
|
+
retry_failed: bool | None = None,
|
|
286
337
|
) -> T | submitit.Job[T]:
|
|
287
338
|
"""
|
|
288
339
|
Load result if it exists, computing if necessary.
|
|
289
340
|
|
|
290
341
|
Args:
|
|
291
342
|
executor: Optional executor for batch submission (e.g., submitit.Executor)
|
|
343
|
+
retry_failed: Whether to retry failed results (default uses FURU_RETRY_FAILED)
|
|
292
344
|
|
|
293
345
|
Returns:
|
|
294
346
|
Result if wait=True, job handle if wait=False, or None if already exists
|
|
@@ -299,6 +351,9 @@ class Furu[T](ABC):
|
|
|
299
351
|
logger = get_logger()
|
|
300
352
|
parent_holder = current_holder()
|
|
301
353
|
has_parent = parent_holder is not None and parent_holder is not self
|
|
354
|
+
retry_failed_effective = (
|
|
355
|
+
retry_failed if retry_failed is not None else FURU_CONFIG.retry_failed
|
|
356
|
+
)
|
|
302
357
|
if has_parent:
|
|
303
358
|
logger.debug(
|
|
304
359
|
"dep: begin %s %s %s",
|
|
@@ -380,6 +435,16 @@ class Furu[T](ABC):
|
|
|
380
435
|
|
|
381
436
|
state0 = StateManager.read_state(directory)
|
|
382
437
|
|
|
438
|
+
if (
|
|
439
|
+
isinstance(state0.result, _StateResultFailed)
|
|
440
|
+
and not retry_failed_effective
|
|
441
|
+
):
|
|
442
|
+
raise self._build_failed_state_error(
|
|
443
|
+
directory,
|
|
444
|
+
state0,
|
|
445
|
+
message="Computation previously failed",
|
|
446
|
+
)
|
|
447
|
+
|
|
383
448
|
needs_reconcile = True
|
|
384
449
|
if isinstance(state0.result, _StateResultSuccess):
|
|
385
450
|
# Double check logic if we fell through to here (e.g. race condition or invalidation above)
|
|
@@ -462,7 +527,8 @@ class Furu[T](ABC):
|
|
|
462
527
|
# Synchronous execution
|
|
463
528
|
if executor is None:
|
|
464
529
|
status, created_here, result = self._run_locally(
|
|
465
|
-
start_time=start_time
|
|
530
|
+
start_time=start_time,
|
|
531
|
+
allow_failed=retry_failed_effective,
|
|
466
532
|
)
|
|
467
533
|
if status == "success":
|
|
468
534
|
ok = True
|
|
@@ -478,19 +544,10 @@ class Furu[T](ABC):
|
|
|
478
544
|
)
|
|
479
545
|
return self._load()
|
|
480
546
|
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
if isinstance(attempt, _StateAttemptFailed)
|
|
486
|
-
else None
|
|
487
|
-
)
|
|
488
|
-
suffix = (
|
|
489
|
-
f": {message}" if isinstance(message, str) and message else ""
|
|
490
|
-
)
|
|
491
|
-
raise FuruComputeError(
|
|
492
|
-
f"Computation {status}{suffix}",
|
|
493
|
-
StateManager.get_state_path(directory),
|
|
547
|
+
raise self._build_failed_state_error(
|
|
548
|
+
directory,
|
|
549
|
+
None,
|
|
550
|
+
message="Computation previously failed",
|
|
494
551
|
)
|
|
495
552
|
|
|
496
553
|
# Asynchronous execution with submitit
|
|
@@ -504,7 +561,12 @@ class Furu[T](ABC):
|
|
|
504
561
|
"load_or_create: %s -> submitit submit_once()",
|
|
505
562
|
self.__class__.__name__,
|
|
506
563
|
)
|
|
507
|
-
job = self._submit_once(
|
|
564
|
+
job = self._submit_once(
|
|
565
|
+
adapter,
|
|
566
|
+
directory,
|
|
567
|
+
None,
|
|
568
|
+
allow_failed=retry_failed_effective,
|
|
569
|
+
)
|
|
508
570
|
ok = True
|
|
509
571
|
return cast(submitit.Job[T], job)
|
|
510
572
|
finally:
|
|
@@ -547,12 +609,56 @@ class Furu[T](ABC):
|
|
|
547
609
|
},
|
|
548
610
|
)
|
|
549
611
|
|
|
612
|
+
def _add_exception_breadcrumbs(self, exc: BaseException, directory: Path) -> None:
|
|
613
|
+
if not hasattr(exc, "add_note"):
|
|
614
|
+
return
|
|
615
|
+
state_path = StateManager.get_state_path(directory)
|
|
616
|
+
log_path = StateManager.get_internal_dir(directory) / "furu.log"
|
|
617
|
+
note = (
|
|
618
|
+
f"Furu directory: {directory}\n"
|
|
619
|
+
f"State file: {state_path}\n"
|
|
620
|
+
f"Log file: {log_path}"
|
|
621
|
+
)
|
|
622
|
+
exc.add_note(note)
|
|
623
|
+
|
|
624
|
+
@staticmethod
|
|
625
|
+
def _failed_state_hints() -> list[str]:
|
|
626
|
+
return [
|
|
627
|
+
"To retry this failed artifact: set FURU_RETRY_FAILED=1 or call load_or_create(retry_failed=True).",
|
|
628
|
+
"To inspect details: open the state file and furu.log shown above.",
|
|
629
|
+
]
|
|
630
|
+
|
|
631
|
+
def _build_failed_state_error(
|
|
632
|
+
self,
|
|
633
|
+
directory: Path,
|
|
634
|
+
state: _FuruState | None,
|
|
635
|
+
*,
|
|
636
|
+
message: str,
|
|
637
|
+
) -> FuruComputeError:
|
|
638
|
+
current_state = state or StateManager.read_state(directory)
|
|
639
|
+
attempt = current_state.attempt
|
|
640
|
+
error = getattr(attempt, "error", None) if attempt is not None else None
|
|
641
|
+
return FuruComputeError(
|
|
642
|
+
message,
|
|
643
|
+
StateManager.get_state_path(directory),
|
|
644
|
+
recorded_error_type=getattr(error, "type", None),
|
|
645
|
+
recorded_error_message=getattr(error, "message", None),
|
|
646
|
+
recorded_traceback=getattr(error, "traceback", None),
|
|
647
|
+
hints=self._failed_state_hints(),
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
def _effective_max_wait_time_sec(self) -> float | None:
|
|
651
|
+
if FURU_CONFIG.max_wait_time_sec is not None:
|
|
652
|
+
return FURU_CONFIG.max_wait_time_sec
|
|
653
|
+
return self._max_wait_time_sec
|
|
654
|
+
|
|
550
655
|
def _check_timeout(self, start_time: float) -> None:
|
|
551
656
|
"""Check if operation has timed out."""
|
|
552
|
-
|
|
553
|
-
|
|
657
|
+
max_wait_time = self._effective_max_wait_time_sec()
|
|
658
|
+
if max_wait_time is not None:
|
|
659
|
+
if time.time() - start_time > max_wait_time:
|
|
554
660
|
raise FuruWaitTimeout(
|
|
555
|
-
f"Furu operation timed out after {
|
|
661
|
+
f"Furu operation timed out after {max_wait_time} seconds."
|
|
556
662
|
)
|
|
557
663
|
|
|
558
664
|
def _is_migrated_state(self, directory: Path) -> bool:
|
|
@@ -613,8 +719,10 @@ class Furu[T](ABC):
|
|
|
613
719
|
event: dict[str, str | int] = {
|
|
614
720
|
"type": "migration_overwrite",
|
|
615
721
|
"policy": record.policy,
|
|
616
|
-
"
|
|
617
|
-
"
|
|
722
|
+
"from_namespace": record.from_namespace,
|
|
723
|
+
"from_hash": record.from_hash,
|
|
724
|
+
"to_namespace": record.to_namespace,
|
|
725
|
+
"to_hash": record.to_hash,
|
|
618
726
|
"reason": reason,
|
|
619
727
|
}
|
|
620
728
|
StateManager.append_event(directory, event.copy())
|
|
@@ -625,6 +733,8 @@ class Furu[T](ABC):
|
|
|
625
733
|
adapter: SubmititAdapter,
|
|
626
734
|
directory: Path,
|
|
627
735
|
on_job_id: Callable[[str], None] | None,
|
|
736
|
+
*,
|
|
737
|
+
allow_failed: bool,
|
|
628
738
|
) -> SubmititJob | None:
|
|
629
739
|
"""Submit job once without waiting (fire-and-forget mode)."""
|
|
630
740
|
logger = get_logger()
|
|
@@ -693,7 +803,7 @@ class Furu[T](ABC):
|
|
|
693
803
|
scheduler={},
|
|
694
804
|
)
|
|
695
805
|
|
|
696
|
-
job = adapter.submit(lambda: self._worker_entry())
|
|
806
|
+
job = adapter.submit(lambda: self._worker_entry(allow_failed=allow_failed))
|
|
697
807
|
|
|
698
808
|
# Save job handle and watch for job ID
|
|
699
809
|
adapter.pickle_job(job, directory)
|
|
@@ -729,7 +839,7 @@ class Furu[T](ABC):
|
|
|
729
839
|
finally:
|
|
730
840
|
StateManager.release_lock(lock_fd, lock_path)
|
|
731
841
|
|
|
732
|
-
def _worker_entry(self: Self) -> None:
|
|
842
|
+
def _worker_entry(self: Self, *, allow_failed: bool | None = None) -> None:
|
|
733
843
|
"""Entry point for worker process (called by submitit or locally)."""
|
|
734
844
|
with enter_holder(self):
|
|
735
845
|
logger = get_logger()
|
|
@@ -737,6 +847,9 @@ class Furu[T](ABC):
|
|
|
737
847
|
directory.mkdir(parents=True, exist_ok=True)
|
|
738
848
|
|
|
739
849
|
env_info = self._collect_submitit_env()
|
|
850
|
+
allow_failed_effective = (
|
|
851
|
+
allow_failed if allow_failed is not None else FURU_CONFIG.retry_failed
|
|
852
|
+
)
|
|
740
853
|
|
|
741
854
|
try:
|
|
742
855
|
with compute_lock(
|
|
@@ -758,19 +871,23 @@ class Furu[T](ABC):
|
|
|
758
871
|
poll_interval_sec=FURU_CONFIG.poll_interval,
|
|
759
872
|
wait_log_every_sec=FURU_CONFIG.wait_log_every_sec,
|
|
760
873
|
reconcile_fn=lambda d: self._reconcile(d),
|
|
874
|
+
allow_failed=allow_failed_effective,
|
|
761
875
|
) as ctx:
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
876
|
+
stage = "metadata"
|
|
877
|
+
try:
|
|
878
|
+
# Refresh metadata (now safe - attempt is already recorded)
|
|
879
|
+
metadata = MetadataManager.create_metadata(
|
|
880
|
+
self, directory, ignore_diff=FURU_CONFIG.ignore_git_diff
|
|
881
|
+
)
|
|
882
|
+
MetadataManager.write_metadata(metadata, directory)
|
|
767
883
|
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
884
|
+
# Set up signal handlers
|
|
885
|
+
stage = "signal handler setup"
|
|
886
|
+
self._setup_signal_handlers(
|
|
887
|
+
directory, ctx.stop_heartbeat, attempt_id=ctx.attempt_id
|
|
888
|
+
)
|
|
772
889
|
|
|
773
|
-
|
|
890
|
+
stage = "_create"
|
|
774
891
|
# Run computation
|
|
775
892
|
logger.debug(
|
|
776
893
|
"_create: begin %s %s %s",
|
|
@@ -798,13 +915,23 @@ class Furu[T](ABC):
|
|
|
798
915
|
extra={"furu_console_only": True},
|
|
799
916
|
)
|
|
800
917
|
except Exception as e:
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
918
|
+
if stage == "_create":
|
|
919
|
+
logger.error(
|
|
920
|
+
"_create failed %s %s %s",
|
|
921
|
+
self.__class__.__name__,
|
|
922
|
+
self._furu_hash,
|
|
923
|
+
directory,
|
|
924
|
+
extra={"furu_file_only": True},
|
|
925
|
+
)
|
|
926
|
+
else:
|
|
927
|
+
logger.error(
|
|
928
|
+
"attempt failed (%s) %s %s %s",
|
|
929
|
+
stage,
|
|
930
|
+
self.__class__.__name__,
|
|
931
|
+
self._furu_hash,
|
|
932
|
+
directory,
|
|
933
|
+
extra={"furu_file_only": True},
|
|
934
|
+
)
|
|
808
935
|
logger.error(
|
|
809
936
|
"%s", format_traceback(e), extra={"furu_file_only": True}
|
|
810
937
|
)
|
|
@@ -821,6 +948,18 @@ class Furu[T](ABC):
|
|
|
821
948
|
"traceback": tb,
|
|
822
949
|
},
|
|
823
950
|
)
|
|
951
|
+
self._add_exception_breadcrumbs(e, directory)
|
|
952
|
+
if stage != "_create":
|
|
953
|
+
message = (
|
|
954
|
+
"Failed to create metadata"
|
|
955
|
+
if stage == "metadata"
|
|
956
|
+
else "Failed to set up signal handlers"
|
|
957
|
+
)
|
|
958
|
+
raise FuruComputeError(
|
|
959
|
+
message,
|
|
960
|
+
StateManager.get_state_path(directory),
|
|
961
|
+
e,
|
|
962
|
+
) from e
|
|
824
963
|
raise
|
|
825
964
|
except FuruLockNotAcquired:
|
|
826
965
|
# Experiment already completed (success or failed), nothing to do
|
|
@@ -850,16 +989,22 @@ class Furu[T](ABC):
|
|
|
850
989
|
|
|
851
990
|
return info
|
|
852
991
|
|
|
853
|
-
def _run_locally(
|
|
992
|
+
def _run_locally(
|
|
993
|
+
self: Self,
|
|
994
|
+
start_time: float,
|
|
995
|
+
*,
|
|
996
|
+
allow_failed: bool,
|
|
997
|
+
) -> tuple[str, bool, T | None]:
|
|
854
998
|
"""Run computation locally, returning (status, created_here, result)."""
|
|
855
999
|
logger = get_logger()
|
|
856
1000
|
directory = self._base_furu_dir()
|
|
857
1001
|
|
|
858
1002
|
# Calculate remaining time for the lock wait
|
|
859
1003
|
max_wait: float | None = None
|
|
860
|
-
|
|
1004
|
+
max_wait_time = self._effective_max_wait_time_sec()
|
|
1005
|
+
if max_wait_time is not None:
|
|
861
1006
|
elapsed = time.time() - start_time
|
|
862
|
-
max_wait = max(0.0,
|
|
1007
|
+
max_wait = max(0.0, max_wait_time - elapsed)
|
|
863
1008
|
|
|
864
1009
|
try:
|
|
865
1010
|
with compute_lock(
|
|
@@ -878,26 +1023,23 @@ class Furu[T](ABC):
|
|
|
878
1023
|
poll_interval_sec=FURU_CONFIG.poll_interval,
|
|
879
1024
|
wait_log_every_sec=FURU_CONFIG.wait_log_every_sec,
|
|
880
1025
|
reconcile_fn=lambda d: self._reconcile(d),
|
|
1026
|
+
allow_failed=allow_failed,
|
|
881
1027
|
) as ctx:
|
|
882
|
-
|
|
1028
|
+
stage = "metadata"
|
|
883
1029
|
try:
|
|
1030
|
+
# Create metadata (now safe - attempt is already recorded)
|
|
884
1031
|
metadata = MetadataManager.create_metadata(
|
|
885
1032
|
self, directory, ignore_diff=FURU_CONFIG.ignore_git_diff
|
|
886
1033
|
)
|
|
887
1034
|
MetadataManager.write_metadata(metadata, directory)
|
|
888
|
-
except Exception as e:
|
|
889
|
-
raise FuruComputeError(
|
|
890
|
-
"Failed to create metadata",
|
|
891
|
-
StateManager.get_state_path(directory),
|
|
892
|
-
e,
|
|
893
|
-
) from e
|
|
894
|
-
|
|
895
|
-
# Set up preemption handler
|
|
896
|
-
self._setup_signal_handlers(
|
|
897
|
-
directory, ctx.stop_heartbeat, attempt_id=ctx.attempt_id
|
|
898
|
-
)
|
|
899
1035
|
|
|
900
|
-
|
|
1036
|
+
# Set up preemption handler
|
|
1037
|
+
stage = "signal handler setup"
|
|
1038
|
+
self._setup_signal_handlers(
|
|
1039
|
+
directory, ctx.stop_heartbeat, attempt_id=ctx.attempt_id
|
|
1040
|
+
)
|
|
1041
|
+
|
|
1042
|
+
stage = "_create"
|
|
901
1043
|
# Run the computation
|
|
902
1044
|
logger.debug(
|
|
903
1045
|
"_create: begin %s %s %s",
|
|
@@ -926,13 +1068,23 @@ class Furu[T](ABC):
|
|
|
926
1068
|
)
|
|
927
1069
|
return "success", True, result
|
|
928
1070
|
except Exception as e:
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
1071
|
+
if stage == "_create":
|
|
1072
|
+
logger.error(
|
|
1073
|
+
"_create failed %s %s %s",
|
|
1074
|
+
self.__class__.__name__,
|
|
1075
|
+
self._furu_hash,
|
|
1076
|
+
directory,
|
|
1077
|
+
extra={"furu_file_only": True},
|
|
1078
|
+
)
|
|
1079
|
+
else:
|
|
1080
|
+
logger.error(
|
|
1081
|
+
"attempt failed (%s) %s %s %s",
|
|
1082
|
+
stage,
|
|
1083
|
+
self.__class__.__name__,
|
|
1084
|
+
self._furu_hash,
|
|
1085
|
+
directory,
|
|
1086
|
+
extra={"furu_file_only": True},
|
|
1087
|
+
)
|
|
936
1088
|
logger.error(
|
|
937
1089
|
"%s", format_traceback(e), extra={"furu_file_only": True}
|
|
938
1090
|
)
|
|
@@ -950,6 +1102,18 @@ class Furu[T](ABC):
|
|
|
950
1102
|
"traceback": tb,
|
|
951
1103
|
},
|
|
952
1104
|
)
|
|
1105
|
+
self._add_exception_breadcrumbs(e, directory)
|
|
1106
|
+
if stage != "_create":
|
|
1107
|
+
message = (
|
|
1108
|
+
"Failed to create metadata"
|
|
1109
|
+
if stage == "metadata"
|
|
1110
|
+
else "Failed to set up signal handlers"
|
|
1111
|
+
)
|
|
1112
|
+
raise FuruComputeError(
|
|
1113
|
+
message,
|
|
1114
|
+
StateManager.get_state_path(directory),
|
|
1115
|
+
e,
|
|
1116
|
+
) from e
|
|
953
1117
|
raise
|
|
954
1118
|
except FuruLockNotAcquired:
|
|
955
1119
|
# Lock couldn't be acquired because experiment already completed
|
|
@@ -998,4 +1162,201 @@ class Furu[T](ABC):
|
|
|
998
1162
|
signal.signal(sig, handle_signal)
|
|
999
1163
|
|
|
1000
1164
|
|
|
1165
|
+
class DependencyChzSpec(Protocol):
|
|
1166
|
+
__chz_fields__: dict[str, ChzField]
|
|
1167
|
+
|
|
1168
|
+
|
|
1169
|
+
DependencySequence: TypeAlias = Sequence[Furu]
|
|
1170
|
+
DependencySet: TypeAlias = AbstractSet[Furu]
|
|
1171
|
+
DependencyMapping: TypeAlias = Mapping[str, Furu]
|
|
1172
|
+
DependencyCollection: TypeAlias = DependencySequence | DependencySet | DependencyMapping
|
|
1173
|
+
DependencyValue: TypeAlias = Furu | DependencyCollection
|
|
1174
|
+
DependencySpec: TypeAlias = DependencyValue | DependencyChzSpec
|
|
1175
|
+
DependencyLeaf: TypeAlias = str | int | float | bool | None | Path | bytes
|
|
1176
|
+
DependencyScanValue: TypeAlias = (
|
|
1177
|
+
DependencyLeaf
|
|
1178
|
+
| Furu
|
|
1179
|
+
| Mapping[Hashable, "DependencyScanValue"]
|
|
1180
|
+
| Sequence["DependencyScanValue"]
|
|
1181
|
+
| AbstractSet["DependencyScanValue"]
|
|
1182
|
+
| DependencyChzSpec
|
|
1183
|
+
)
|
|
1184
|
+
|
|
1185
|
+
|
|
1186
|
+
def _collect_dependencies(
|
|
1187
|
+
obj: Furu,
|
|
1188
|
+
dependencies: list[Furu],
|
|
1189
|
+
seen: set[str],
|
|
1190
|
+
*,
|
|
1191
|
+
recursive: bool,
|
|
1192
|
+
) -> None:
|
|
1193
|
+
for dependency in _direct_dependencies(obj):
|
|
1194
|
+
digest = dependency._furu_hash
|
|
1195
|
+
if digest in seen:
|
|
1196
|
+
continue
|
|
1197
|
+
seen.add(digest)
|
|
1198
|
+
dependencies.append(dependency)
|
|
1199
|
+
if recursive:
|
|
1200
|
+
_collect_dependencies(
|
|
1201
|
+
dependency,
|
|
1202
|
+
dependencies,
|
|
1203
|
+
seen,
|
|
1204
|
+
recursive=recursive,
|
|
1205
|
+
)
|
|
1206
|
+
|
|
1207
|
+
|
|
1208
|
+
def _direct_dependencies(obj: Furu) -> list[Furu]:
|
|
1209
|
+
dependencies: list[Furu] = []
|
|
1210
|
+
for field in chz.chz_fields(obj).values():
|
|
1211
|
+
value = cast(DependencyScanValue, getattr(obj, field.logical_name))
|
|
1212
|
+
dependencies.extend(_collect_dependencies_from_value(value))
|
|
1213
|
+
extra = obj._dependencies()
|
|
1214
|
+
if extra is not None:
|
|
1215
|
+
dependencies.extend(_collect_dependencies_from_spec(extra, path="dependencies"))
|
|
1216
|
+
return dependencies
|
|
1217
|
+
|
|
1218
|
+
|
|
1219
|
+
def _collect_dependencies_from_value(value: DependencyScanValue) -> list[Furu]:
|
|
1220
|
+
dependencies: list[Furu] = []
|
|
1221
|
+
if isinstance(value, Furu):
|
|
1222
|
+
dependencies.append(value)
|
|
1223
|
+
return dependencies
|
|
1224
|
+
if isinstance(value, dict):
|
|
1225
|
+
mapping = cast(Mapping[Hashable, DependencyScanValue], value)
|
|
1226
|
+
for item in mapping.values():
|
|
1227
|
+
dependencies.extend(_collect_dependencies_from_value(item))
|
|
1228
|
+
return dependencies
|
|
1229
|
+
if isinstance(value, (list, tuple)):
|
|
1230
|
+
sequence = cast(Sequence[DependencyScanValue], value)
|
|
1231
|
+
for item in sequence:
|
|
1232
|
+
dependencies.extend(_collect_dependencies_from_value(item))
|
|
1233
|
+
return dependencies
|
|
1234
|
+
if isinstance(value, (set, frozenset)):
|
|
1235
|
+
items = _sorted_dependency_set(cast(AbstractSet[DependencyScanValue], value))
|
|
1236
|
+
for item in items:
|
|
1237
|
+
dependencies.extend(_collect_dependencies_from_value(item))
|
|
1238
|
+
return dependencies
|
|
1239
|
+
if chz.is_chz(value):
|
|
1240
|
+
for field in chz.chz_fields(value).values():
|
|
1241
|
+
field_value = cast(DependencyScanValue, getattr(value, field.logical_name))
|
|
1242
|
+
dependencies.extend(_collect_dependencies_from_value(field_value))
|
|
1243
|
+
return dependencies
|
|
1244
|
+
|
|
1245
|
+
|
|
1246
|
+
def _collect_dependencies_from_spec(value: DependencySpec, path: str) -> list[Furu]:
|
|
1247
|
+
if isinstance(value, Furu):
|
|
1248
|
+
return [value]
|
|
1249
|
+
if isinstance(value, dict):
|
|
1250
|
+
return _collect_dependencies_from_mapping(
|
|
1251
|
+
cast(Mapping[Hashable, DependencyValue], value),
|
|
1252
|
+
path,
|
|
1253
|
+
)
|
|
1254
|
+
if isinstance(value, (list, tuple)):
|
|
1255
|
+
return _collect_dependencies_from_sequence(
|
|
1256
|
+
cast(Sequence[DependencyValue], value),
|
|
1257
|
+
path,
|
|
1258
|
+
)
|
|
1259
|
+
if isinstance(value, (set, frozenset)):
|
|
1260
|
+
return _collect_dependencies_from_set(
|
|
1261
|
+
cast(AbstractSet[DependencyValue], value),
|
|
1262
|
+
path,
|
|
1263
|
+
)
|
|
1264
|
+
if chz.is_chz(value):
|
|
1265
|
+
dependencies: list[Furu] = []
|
|
1266
|
+
for field in chz.chz_fields(value).values():
|
|
1267
|
+
field_value = getattr(value, field.logical_name)
|
|
1268
|
+
field_path = f"{path}.{field.logical_name}"
|
|
1269
|
+
dependencies.extend(
|
|
1270
|
+
_collect_dependencies_from_value_spec(field_value, field_path)
|
|
1271
|
+
)
|
|
1272
|
+
return dependencies
|
|
1273
|
+
raise _dependency_type_error(path, value)
|
|
1274
|
+
|
|
1275
|
+
|
|
1276
|
+
def _collect_dependencies_from_value_spec(
|
|
1277
|
+
value: DependencyValue,
|
|
1278
|
+
path: str,
|
|
1279
|
+
) -> list[Furu]:
|
|
1280
|
+
if isinstance(value, Furu):
|
|
1281
|
+
return [value]
|
|
1282
|
+
if isinstance(value, dict):
|
|
1283
|
+
return _collect_dependencies_from_mapping(
|
|
1284
|
+
cast(Mapping[Hashable, DependencyValue], value),
|
|
1285
|
+
path,
|
|
1286
|
+
)
|
|
1287
|
+
if isinstance(value, (list, tuple)):
|
|
1288
|
+
return _collect_dependencies_from_sequence(
|
|
1289
|
+
cast(Sequence[DependencyValue], value),
|
|
1290
|
+
path,
|
|
1291
|
+
)
|
|
1292
|
+
if isinstance(value, (set, frozenset)):
|
|
1293
|
+
return _collect_dependencies_from_set(
|
|
1294
|
+
cast(AbstractSet[DependencyValue], value),
|
|
1295
|
+
path,
|
|
1296
|
+
)
|
|
1297
|
+
raise _dependency_type_error(path, value)
|
|
1298
|
+
|
|
1299
|
+
|
|
1300
|
+
def _collect_dependencies_from_mapping(
|
|
1301
|
+
mapping: Mapping[Hashable, DependencyValue],
|
|
1302
|
+
path: str,
|
|
1303
|
+
) -> list[Furu]:
|
|
1304
|
+
dependencies: list[Furu] = []
|
|
1305
|
+
for key, item in mapping.items():
|
|
1306
|
+
if not isinstance(item, Furu):
|
|
1307
|
+
raise _dependency_type_error(f"{path}[{key!r}]", item)
|
|
1308
|
+
dependencies.append(item)
|
|
1309
|
+
return dependencies
|
|
1310
|
+
|
|
1311
|
+
|
|
1312
|
+
def _collect_dependencies_from_sequence(
|
|
1313
|
+
sequence: Sequence[DependencyValue],
|
|
1314
|
+
path: str,
|
|
1315
|
+
) -> list[Furu]:
|
|
1316
|
+
dependencies: list[Furu] = []
|
|
1317
|
+
for index, item in enumerate(sequence):
|
|
1318
|
+
if not isinstance(item, Furu):
|
|
1319
|
+
raise _dependency_type_error(f"{path}[{index}]", item)
|
|
1320
|
+
dependencies.append(item)
|
|
1321
|
+
return dependencies
|
|
1322
|
+
|
|
1323
|
+
|
|
1324
|
+
def _collect_dependencies_from_set(
|
|
1325
|
+
values: AbstractSet[DependencyValue],
|
|
1326
|
+
path: str,
|
|
1327
|
+
) -> list[Furu]:
|
|
1328
|
+
dependencies: list[Furu] = []
|
|
1329
|
+
ordered = sorted(
|
|
1330
|
+
list(cast(AbstractSet[DependencyScanValue], values)),
|
|
1331
|
+
key=_dependency_sort_key,
|
|
1332
|
+
)
|
|
1333
|
+
for index, item in enumerate(ordered):
|
|
1334
|
+
if not isinstance(item, Furu):
|
|
1335
|
+
raise _dependency_type_error(f"{path}[{index}]", item)
|
|
1336
|
+
dependencies.append(item)
|
|
1337
|
+
return dependencies
|
|
1338
|
+
|
|
1339
|
+
|
|
1340
|
+
def _sorted_dependency_set(
|
|
1341
|
+
values: AbstractSet[DependencyScanValue],
|
|
1342
|
+
) -> list[DependencyScanValue]:
|
|
1343
|
+
return sorted(list(values), key=_dependency_sort_key)
|
|
1344
|
+
|
|
1345
|
+
|
|
1346
|
+
def _dependency_sort_key(value: DependencyScanValue) -> tuple[int, str]:
|
|
1347
|
+
if isinstance(value, Furu):
|
|
1348
|
+
return (0, value._furu_hash)
|
|
1349
|
+
return (1, f"{type(value).__name__}:{value!r}")
|
|
1350
|
+
|
|
1351
|
+
|
|
1352
|
+
def _dependency_type_error(
|
|
1353
|
+
path: str,
|
|
1354
|
+
value: DependencySpec | DependencyValue | DependencyScanValue,
|
|
1355
|
+
) -> TypeError:
|
|
1356
|
+
return TypeError(
|
|
1357
|
+
f"{path} must be a Furu instance or a collection of Furu instances; "
|
|
1358
|
+
f"got {type(value).__name__}"
|
|
1359
|
+
)
|
|
1360
|
+
|
|
1361
|
+
|
|
1001
1362
|
_H = TypeVar("_H", bound=Furu, covariant=True)
|