furu 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- furu/__init__.py +3 -1
- furu/config.py +85 -5
- furu/core/__init__.py +2 -2
- furu/core/furu.py +438 -75
- furu/dashboard/frontend/dist/assets/{index-CbdDfSOZ.css → index-BXAIKNNr.css} +1 -1
- furu/dashboard/frontend/dist/assets/{index-DDv_TYB_.js → index-DS3FsqcY.js} +3 -3
- furu/dashboard/frontend/dist/index.html +2 -2
- furu/errors.py +47 -5
- furu/migration.py +8 -4
- furu/serialization/serializer.py +40 -2
- furu/storage/metadata.py +17 -5
- furu/storage/state.py +115 -3
- {furu-0.0.1.dist-info → furu-0.0.3.dist-info}/METADATA +48 -20
- {furu-0.0.1.dist-info → furu-0.0.3.dist-info}/RECORD +19 -19
- {furu-0.0.1.dist-info → furu-0.0.3.dist-info}/WHEEL +1 -1
- {furu-0.0.1.dist-info → furu-0.0.3.dist-info}/entry_points.txt +1 -0
|
@@ -11,8 +11,8 @@
|
|
|
11
11
|
href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,600;0,700;1,400&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
12
12
|
rel="stylesheet"
|
|
13
13
|
/>
|
|
14
|
-
<script type="module" crossorigin src="/assets/index-
|
|
15
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
14
|
+
<script type="module" crossorigin src="/assets/index-DS3FsqcY.js"></script>
|
|
15
|
+
<link rel="stylesheet" crossorigin href="/assets/index-BXAIKNNr.css">
|
|
16
16
|
</head>
|
|
17
17
|
<body>
|
|
18
18
|
<div id="root"></div>
|
furu/errors.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import traceback
|
|
2
|
+
from collections.abc import Sequence
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
|
|
4
5
|
|
|
@@ -17,13 +18,25 @@ MISSING = _FuruMissing()
|
|
|
17
18
|
class FuruError(Exception):
|
|
18
19
|
"""Base exception for Furu errors."""
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
def __init__(self, message: str, *, hints: Sequence[str] | None = None):
|
|
22
|
+
super().__init__(message)
|
|
23
|
+
self.hints = list(hints or [])
|
|
24
|
+
|
|
25
|
+
def _format_hints(self) -> str:
|
|
26
|
+
if not self.hints:
|
|
27
|
+
return ""
|
|
28
|
+
lines = ["", "Hints:"]
|
|
29
|
+
lines.extend([f" - {hint}" for hint in self.hints])
|
|
30
|
+
return "\n".join(lines)
|
|
21
31
|
|
|
22
32
|
|
|
23
33
|
class FuruWaitTimeout(FuruError):
|
|
24
34
|
"""Raised when waiting for a result exceeds _max_wait_time_sec."""
|
|
25
35
|
|
|
26
|
-
|
|
36
|
+
def __str__(self) -> str:
|
|
37
|
+
msg = super().__str__()
|
|
38
|
+
msg += self._format_hints()
|
|
39
|
+
return msg
|
|
27
40
|
|
|
28
41
|
|
|
29
42
|
class FuruLockNotAcquired(FuruError):
|
|
@@ -40,16 +53,45 @@ class FuruComputeError(FuruError):
|
|
|
40
53
|
message: str,
|
|
41
54
|
state_path: Path,
|
|
42
55
|
original_error: Exception | None = None,
|
|
56
|
+
*,
|
|
57
|
+
recorded_error_type: str | None = None,
|
|
58
|
+
recorded_error_message: str | None = None,
|
|
59
|
+
recorded_traceback: str | None = None,
|
|
60
|
+
hints: Sequence[str] | None = None,
|
|
43
61
|
):
|
|
62
|
+
super().__init__(message, hints=hints)
|
|
44
63
|
self.state_path = state_path
|
|
45
64
|
self.original_error = original_error
|
|
46
|
-
|
|
65
|
+
self.recorded_error_type = recorded_error_type
|
|
66
|
+
self.recorded_error_message = recorded_error_message
|
|
67
|
+
self.recorded_traceback = recorded_traceback
|
|
47
68
|
|
|
48
69
|
def __str__(self) -> str:
|
|
49
70
|
msg = super().__str__() # ty: ignore[invalid-super-argument]
|
|
71
|
+
internal_dir = self.state_path.parent
|
|
72
|
+
furu_dir = internal_dir.parent
|
|
73
|
+
log_path = internal_dir / "furu.log"
|
|
74
|
+
|
|
75
|
+
msg += f"\n\nDirectory: {furu_dir}"
|
|
76
|
+
msg += f"\nState file: {self.state_path}"
|
|
77
|
+
msg += f"\nLog file: {log_path}"
|
|
78
|
+
|
|
79
|
+
if self.recorded_error_type or self.recorded_error_message:
|
|
80
|
+
msg += "\n\nRecorded error (from state.json):"
|
|
81
|
+
if self.recorded_error_type:
|
|
82
|
+
msg += f"\n Type: {self.recorded_error_type}"
|
|
83
|
+
if self.recorded_error_message:
|
|
84
|
+
msg += f"\n Message: {self.recorded_error_message}"
|
|
85
|
+
|
|
86
|
+
if self.recorded_traceback:
|
|
87
|
+
msg += f"\n\nRecorded traceback:\n{self.recorded_traceback}"
|
|
88
|
+
|
|
50
89
|
if self.original_error:
|
|
51
90
|
msg += f"\n\nOriginal error: {self.original_error}"
|
|
52
|
-
if
|
|
91
|
+
if (
|
|
92
|
+
hasattr(self.original_error, "__traceback__")
|
|
93
|
+
and self.original_error.__traceback__ is not None
|
|
94
|
+
):
|
|
53
95
|
tb = "".join(
|
|
54
96
|
traceback.format_exception(
|
|
55
97
|
type(self.original_error),
|
|
@@ -58,7 +100,7 @@ class FuruComputeError(FuruError):
|
|
|
58
100
|
)
|
|
59
101
|
)
|
|
60
102
|
msg += f"\n\nTraceback:\n{tb}"
|
|
61
|
-
msg +=
|
|
103
|
+
msg += self._format_hints()
|
|
62
104
|
return msg
|
|
63
105
|
|
|
64
106
|
|
furu/migration.py
CHANGED
|
@@ -507,8 +507,10 @@ def _apply_single_migration(
|
|
|
507
507
|
event: dict[str, str | int] = {
|
|
508
508
|
"type": "migrated",
|
|
509
509
|
"policy": policy,
|
|
510
|
-
"
|
|
511
|
-
"
|
|
510
|
+
"from_namespace": candidate.from_ref.namespace,
|
|
511
|
+
"from_hash": candidate.from_ref.furu_hash,
|
|
512
|
+
"to_namespace": candidate.to_ref.namespace,
|
|
513
|
+
"to_hash": candidate.to_ref.furu_hash,
|
|
512
514
|
}
|
|
513
515
|
if default_values is not None:
|
|
514
516
|
event["default_values"] = json.dumps(default_values, sort_keys=True)
|
|
@@ -519,8 +521,10 @@ def _apply_single_migration(
|
|
|
519
521
|
overwrite_event = {
|
|
520
522
|
"type": "migration_overwrite",
|
|
521
523
|
"policy": policy,
|
|
522
|
-
"
|
|
523
|
-
"
|
|
524
|
+
"from_namespace": candidate.from_ref.namespace,
|
|
525
|
+
"from_hash": candidate.from_ref.furu_hash,
|
|
526
|
+
"to_namespace": candidate.to_ref.namespace,
|
|
527
|
+
"to_hash": candidate.to_ref.furu_hash,
|
|
524
528
|
"reason": "force_overwrite",
|
|
525
529
|
}
|
|
526
530
|
StateManager.append_event(to_dir, overwrite_event)
|
furu/serialization/serializer.py
CHANGED
|
@@ -6,9 +6,10 @@ import json
|
|
|
6
6
|
import pathlib
|
|
7
7
|
import textwrap
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any, Protocol, Sequence, cast, runtime_checkable
|
|
10
10
|
|
|
11
11
|
import chz
|
|
12
|
+
from chz.util import MISSING as CHZ_MISSING, MISSING_TYPE
|
|
12
13
|
|
|
13
14
|
from ..errors import _FuruMissing
|
|
14
15
|
from pydantic import BaseModel as PydanticBaseModel
|
|
@@ -91,13 +92,34 @@ class FuruSerializer:
|
|
|
91
92
|
def compute_hash(cls, obj: object, verbose: bool = False) -> str:
|
|
92
93
|
"""Compute deterministic hash of object."""
|
|
93
94
|
|
|
95
|
+
@runtime_checkable
|
|
96
|
+
class _DependencyHashProvider(Protocol):
|
|
97
|
+
def _dependency_hashes(self) -> Sequence[str]: ...
|
|
98
|
+
|
|
99
|
+
def _has_required_fields(
|
|
100
|
+
data_class: type[object],
|
|
101
|
+
data: dict[str, JsonValue],
|
|
102
|
+
) -> bool:
|
|
103
|
+
if not chz.is_chz(data_class):
|
|
104
|
+
return False
|
|
105
|
+
for field in chz.chz_fields(data_class).values():
|
|
106
|
+
name = field.logical_name
|
|
107
|
+
if name in data:
|
|
108
|
+
continue
|
|
109
|
+
if field._default is not CHZ_MISSING:
|
|
110
|
+
continue
|
|
111
|
+
if not isinstance(field._default_factory, MISSING_TYPE):
|
|
112
|
+
continue
|
|
113
|
+
return False
|
|
114
|
+
return True
|
|
115
|
+
|
|
94
116
|
def canonicalize(item: object) -> JsonValue:
|
|
95
117
|
if isinstance(item, _FuruMissing):
|
|
96
118
|
raise ValueError("Cannot hash Furu.MISSING")
|
|
97
119
|
|
|
98
120
|
if chz.is_chz(item):
|
|
99
121
|
fields = chz.chz_fields(item)
|
|
100
|
-
|
|
122
|
+
result = {
|
|
101
123
|
"__class__": cls.get_classname(item),
|
|
102
124
|
**{
|
|
103
125
|
name: canonicalize(getattr(item, name))
|
|
@@ -105,8 +127,24 @@ class FuruSerializer:
|
|
|
105
127
|
if not name.startswith("_")
|
|
106
128
|
},
|
|
107
129
|
}
|
|
130
|
+
if isinstance(item, _DependencyHashProvider):
|
|
131
|
+
dependency_hashes = list(item._dependency_hashes())
|
|
132
|
+
if dependency_hashes:
|
|
133
|
+
result["__dependencies__"] = dependency_hashes
|
|
134
|
+
return result
|
|
108
135
|
|
|
109
136
|
if isinstance(item, dict):
|
|
137
|
+
if cls.CLASS_MARKER in item:
|
|
138
|
+
config = cast(dict[str, JsonValue], item)
|
|
139
|
+
module_path, _, class_name = item[cls.CLASS_MARKER].rpartition(".")
|
|
140
|
+
module = importlib.import_module(module_path)
|
|
141
|
+
data_class = getattr(module, class_name, None)
|
|
142
|
+
if (
|
|
143
|
+
data_class is not None
|
|
144
|
+
and hasattr(data_class, "_dependency_hashes")
|
|
145
|
+
and _has_required_fields(data_class, config)
|
|
146
|
+
):
|
|
147
|
+
return canonicalize(cls.from_dict(config))
|
|
110
148
|
filtered = item
|
|
111
149
|
if cls.CLASS_MARKER in item:
|
|
112
150
|
filtered = {
|
furu/storage/metadata.py
CHANGED
|
@@ -124,7 +124,7 @@ class MetadataManager:
|
|
|
124
124
|
try:
|
|
125
125
|
head = cls.run_git_command(["rev-parse", "HEAD"])
|
|
126
126
|
branch = cls.run_git_command(["rev-parse", "--abbrev-ref", "HEAD"])
|
|
127
|
-
except subprocess.CalledProcessError:
|
|
127
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
128
128
|
return GitInfo(
|
|
129
129
|
git_commit="<no-git>",
|
|
130
130
|
git_branch="<no-git>",
|
|
@@ -133,15 +133,27 @@ class MetadataManager:
|
|
|
133
133
|
git_submodules={},
|
|
134
134
|
)
|
|
135
135
|
else:
|
|
136
|
-
|
|
137
|
-
|
|
136
|
+
try:
|
|
137
|
+
head = cls.run_git_command(["rev-parse", "HEAD"])
|
|
138
|
+
branch = cls.run_git_command(["rev-parse", "--abbrev-ref", "HEAD"])
|
|
139
|
+
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
|
140
|
+
raise RuntimeError(
|
|
141
|
+
"Failed to read git commit/branch for provenance. "
|
|
142
|
+
"If this is expected, set FURU_REQUIRE_GIT=0."
|
|
143
|
+
) from e
|
|
138
144
|
|
|
139
145
|
if FURU_CONFIG.require_git_remote:
|
|
140
|
-
|
|
146
|
+
try:
|
|
147
|
+
remote = cls.run_git_command(["remote", "get-url", "origin"])
|
|
148
|
+
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
|
149
|
+
raise RuntimeError(
|
|
150
|
+
"Git remote 'origin' is required for provenance but was not found. "
|
|
151
|
+
"Set FURU_REQUIRE_GIT_REMOTE=0 to allow missing origin."
|
|
152
|
+
) from e
|
|
141
153
|
else:
|
|
142
154
|
try:
|
|
143
155
|
remote = cls.run_git_command(["remote", "get-url", "origin"])
|
|
144
|
-
except subprocess.CalledProcessError:
|
|
156
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
145
157
|
remote = None
|
|
146
158
|
|
|
147
159
|
if ignore_diff:
|
furu/storage/state.py
CHANGED
|
@@ -977,6 +977,7 @@ def compute_lock(
|
|
|
977
977
|
poll_interval_sec: float = 10.0,
|
|
978
978
|
wait_log_every_sec: float = 10.0,
|
|
979
979
|
reconcile_fn: Callable[[Path], None] | None = None,
|
|
980
|
+
allow_failed: bool = False,
|
|
980
981
|
) -> Generator[ComputeLockContext, None, None]:
|
|
981
982
|
"""
|
|
982
983
|
Context manager that atomically acquires lock + records attempt + starts heartbeat.
|
|
@@ -1000,6 +1001,7 @@ def compute_lock(
|
|
|
1000
1001
|
poll_interval_sec: Interval between lock acquisition attempts
|
|
1001
1002
|
wait_log_every_sec: Interval between "waiting for lock" log messages
|
|
1002
1003
|
reconcile_fn: Optional function to call to reconcile stale attempts
|
|
1004
|
+
allow_failed: Allow recomputation even if state is failed
|
|
1003
1005
|
|
|
1004
1006
|
Yields:
|
|
1005
1007
|
ComputeLockContext with attempt_id and stop_heartbeat callable
|
|
@@ -1008,6 +1010,51 @@ def compute_lock(
|
|
|
1008
1010
|
FuruLockNotAcquired: If lock cannot be acquired (after waiting)
|
|
1009
1011
|
FuruWaitTimeout: If max_wait_time_sec is exceeded
|
|
1010
1012
|
"""
|
|
1013
|
+
|
|
1014
|
+
def _format_wait_duration(seconds: float) -> str:
|
|
1015
|
+
if seconds < 60.0:
|
|
1016
|
+
return f"{seconds:.1f}s"
|
|
1017
|
+
minutes = seconds / 60.0
|
|
1018
|
+
if minutes < 60.0:
|
|
1019
|
+
return f"{minutes:.1f}m"
|
|
1020
|
+
hours = minutes / 60.0
|
|
1021
|
+
if hours < 24.0:
|
|
1022
|
+
return f"{hours:.1f}h"
|
|
1023
|
+
days = hours / 24.0
|
|
1024
|
+
return f"{days:.1f}d"
|
|
1025
|
+
|
|
1026
|
+
def _format_owner(attempt: _StateAttempt) -> str:
|
|
1027
|
+
owner = attempt.owner
|
|
1028
|
+
parts: list[str] = []
|
|
1029
|
+
if attempt.id:
|
|
1030
|
+
parts.append(f"attempt {attempt.id}")
|
|
1031
|
+
if owner.host:
|
|
1032
|
+
parts.append(f"host {owner.host}")
|
|
1033
|
+
if owner.pid is not None:
|
|
1034
|
+
parts.append(f"pid {owner.pid}")
|
|
1035
|
+
if owner.user:
|
|
1036
|
+
parts.append(f"user {owner.user}")
|
|
1037
|
+
if not parts:
|
|
1038
|
+
return "owner unknown"
|
|
1039
|
+
return ", ".join(parts)
|
|
1040
|
+
|
|
1041
|
+
def _describe_wait(attempt: _StateAttempt, waited_sec: float) -> str:
|
|
1042
|
+
label = "last heartbeat"
|
|
1043
|
+
timestamp = attempt.heartbeat_at
|
|
1044
|
+
if attempt.status == "queued":
|
|
1045
|
+
label = "queued at"
|
|
1046
|
+
timestamp = attempt.started_at
|
|
1047
|
+
parsed = StateManager._parse_time(timestamp)
|
|
1048
|
+
timestamp_info = timestamp
|
|
1049
|
+
if parsed is not None:
|
|
1050
|
+
age = (StateManager._utcnow() - parsed).total_seconds()
|
|
1051
|
+
timestamp_info = f"{timestamp} ({_format_wait_duration(age)} ago)"
|
|
1052
|
+
return (
|
|
1053
|
+
"waited "
|
|
1054
|
+
f"{_format_wait_duration(waited_sec)}, {label} {timestamp_info}, "
|
|
1055
|
+
f"status {attempt.status}, backend {attempt.backend}, {_format_owner(attempt)}"
|
|
1056
|
+
)
|
|
1057
|
+
|
|
1011
1058
|
lock_path = StateManager.get_lock_path(directory, StateManager.COMPUTE_LOCK)
|
|
1012
1059
|
|
|
1013
1060
|
lock_fd: int | None = None
|
|
@@ -1025,12 +1072,75 @@ def compute_lock(
|
|
|
1025
1072
|
if max_wait_time_sec is not None:
|
|
1026
1073
|
elapsed = time.time() - start_time
|
|
1027
1074
|
if elapsed > max_wait_time_sec:
|
|
1075
|
+
state = StateManager.read_state(directory)
|
|
1076
|
+
attempt = state.attempt
|
|
1077
|
+
attempt_info = "no active attempt"
|
|
1078
|
+
if isinstance(attempt, (_StateAttemptQueued, _StateAttemptRunning)):
|
|
1079
|
+
attempt_info = _describe_wait(attempt, elapsed)
|
|
1080
|
+
message = (
|
|
1081
|
+
f"Timed out waiting for compute lock after {elapsed:.1f}s."
|
|
1082
|
+
f"\nDirectory: {directory}"
|
|
1083
|
+
f"\nLock file: {lock_path}"
|
|
1084
|
+
f"\nDetails: {attempt_info}"
|
|
1085
|
+
)
|
|
1028
1086
|
raise FuruWaitTimeout(
|
|
1029
|
-
|
|
1087
|
+
message,
|
|
1088
|
+
hints=[
|
|
1089
|
+
"Increase max wait: set FURU_MAX_WAIT_SECS (or override Furu._max_wait_time_sec).",
|
|
1090
|
+
"Change poll cadence: set FURU_POLL_INTERVAL_SECS.",
|
|
1091
|
+
"Change wait logging cadence: set FURU_WAIT_LOG_EVERY_SECS.",
|
|
1092
|
+
"If locks look stale too quickly/slowly: tune FURU_LEASE_SECS and FURU_HEARTBEAT_SECS.",
|
|
1093
|
+
"For more logs: set FURU_LOG_LEVEL=DEBUG.",
|
|
1094
|
+
],
|
|
1030
1095
|
)
|
|
1031
1096
|
|
|
1032
1097
|
lock_fd = StateManager.try_lock(lock_path)
|
|
1033
1098
|
if lock_fd is not None:
|
|
1099
|
+
state = StateManager.read_state(directory)
|
|
1100
|
+
if isinstance(state.result, _StateResultSuccess):
|
|
1101
|
+
StateManager.release_lock(lock_fd, lock_path)
|
|
1102
|
+
raise FuruLockNotAcquired(
|
|
1103
|
+
"Cannot acquire lock: experiment already succeeded"
|
|
1104
|
+
)
|
|
1105
|
+
if isinstance(state.result, _StateResultFailed) and not allow_failed:
|
|
1106
|
+
StateManager.release_lock(lock_fd, lock_path)
|
|
1107
|
+
raise FuruLockNotAcquired(
|
|
1108
|
+
"Cannot acquire lock: experiment already failed"
|
|
1109
|
+
)
|
|
1110
|
+
attempt = state.attempt
|
|
1111
|
+
if (
|
|
1112
|
+
isinstance(attempt, (_StateAttemptQueued, _StateAttemptRunning))
|
|
1113
|
+
and attempt.backend != backend
|
|
1114
|
+
):
|
|
1115
|
+
StateManager.release_lock(lock_fd, lock_path)
|
|
1116
|
+
lock_fd = None
|
|
1117
|
+
if reconcile_fn is not None:
|
|
1118
|
+
reconcile_fn(directory)
|
|
1119
|
+
state = StateManager.read_state(directory)
|
|
1120
|
+
if isinstance(state.result, _StateResultSuccess):
|
|
1121
|
+
raise FuruLockNotAcquired(
|
|
1122
|
+
"Cannot acquire lock: experiment already succeeded"
|
|
1123
|
+
)
|
|
1124
|
+
if isinstance(state.result, _StateResultFailed) and not allow_failed:
|
|
1125
|
+
raise FuruLockNotAcquired(
|
|
1126
|
+
"Cannot acquire lock: experiment already failed"
|
|
1127
|
+
)
|
|
1128
|
+
attempt = state.attempt
|
|
1129
|
+
if not isinstance(attempt, (_StateAttemptQueued, _StateAttemptRunning)):
|
|
1130
|
+
continue
|
|
1131
|
+
if attempt.backend == backend:
|
|
1132
|
+
continue
|
|
1133
|
+
now = time.time()
|
|
1134
|
+
if now >= next_wait_log_at:
|
|
1135
|
+
waited_sec = now - start_time
|
|
1136
|
+
logger.info(
|
|
1137
|
+
"compute_lock: waiting for lock creation %s (%s)",
|
|
1138
|
+
directory,
|
|
1139
|
+
_describe_wait(attempt, waited_sec),
|
|
1140
|
+
)
|
|
1141
|
+
next_wait_log_at = now + wait_log_every_sec
|
|
1142
|
+
time.sleep(poll_interval_sec)
|
|
1143
|
+
continue
|
|
1034
1144
|
break
|
|
1035
1145
|
|
|
1036
1146
|
# Lock held by someone else - reconcile and check state
|
|
@@ -1045,7 +1155,7 @@ def compute_lock(
|
|
|
1045
1155
|
raise FuruLockNotAcquired(
|
|
1046
1156
|
"Cannot acquire lock: experiment already succeeded"
|
|
1047
1157
|
)
|
|
1048
|
-
if isinstance(state.result, _StateResultFailed):
|
|
1158
|
+
if isinstance(state.result, _StateResultFailed) and not allow_failed:
|
|
1049
1159
|
raise FuruLockNotAcquired("Cannot acquire lock: experiment already failed")
|
|
1050
1160
|
|
|
1051
1161
|
# If no active attempt but lock exists, it's orphaned - clean it up
|
|
@@ -1064,9 +1174,11 @@ def compute_lock(
|
|
|
1064
1174
|
# Active attempt exists - wait for it
|
|
1065
1175
|
now = time.time()
|
|
1066
1176
|
if now >= next_wait_log_at:
|
|
1177
|
+
waited_sec = now - start_time
|
|
1067
1178
|
logger.info(
|
|
1068
|
-
"compute_lock: waiting for lock %s",
|
|
1179
|
+
"compute_lock: waiting for lock %s (%s)",
|
|
1069
1180
|
directory,
|
|
1181
|
+
_describe_wait(attempt, waited_sec),
|
|
1070
1182
|
)
|
|
1071
1183
|
next_wait_log_at = now + wait_log_every_sec
|
|
1072
1184
|
time.sleep(poll_interval_sec)
|
|
@@ -1,19 +1,20 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: furu
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: Cacheable, nested pipelines for Python. Define computations as configs; furu handles caching, state tracking, and result reuse across runs.
|
|
5
|
+
Author: Herman Brunborg
|
|
5
6
|
Author-email: Herman Brunborg <herman@brunborg.com>
|
|
6
|
-
Requires-Python: >=3.12
|
|
7
7
|
Requires-Dist: chz>=0.4.0
|
|
8
8
|
Requires-Dist: cloudpickle>=3.1.1
|
|
9
9
|
Requires-Dist: pydantic>=2.12.5
|
|
10
10
|
Requires-Dist: python-dotenv>=1.0.0
|
|
11
11
|
Requires-Dist: rich>=14.2.0
|
|
12
12
|
Requires-Dist: submitit>=1.5.3
|
|
13
|
+
Requires-Dist: fastapi>=0.109.0 ; extra == 'dashboard'
|
|
14
|
+
Requires-Dist: uvicorn[standard]>=0.27.0 ; extra == 'dashboard'
|
|
15
|
+
Requires-Dist: typer>=0.9.0 ; extra == 'dashboard'
|
|
16
|
+
Requires-Python: >=3.12
|
|
13
17
|
Provides-Extra: dashboard
|
|
14
|
-
Requires-Dist: fastapi>=0.109.0; extra == 'dashboard'
|
|
15
|
-
Requires-Dist: typer>=0.9.0; extra == 'dashboard'
|
|
16
|
-
Requires-Dist: uvicorn[standard]>=0.27.0; extra == 'dashboard'
|
|
17
18
|
Description-Content-Type: text/markdown
|
|
18
19
|
|
|
19
20
|
# furu
|
|
@@ -132,20 +133,25 @@ class TrainTextModel(furu.Furu[str]):
|
|
|
132
133
|
|
|
133
134
|
### Storage Structure
|
|
134
135
|
|
|
136
|
+
Furu uses two roots: `FURU_PATH` for `data/` + `raw/`, and
|
|
137
|
+
`FURU_VERSION_CONTROLLED_PATH` for `artifacts/`. Defaults:
|
|
138
|
+
|
|
139
|
+
```
|
|
140
|
+
FURU_PATH=<project>/furu-data
|
|
141
|
+
FURU_VERSION_CONTROLLED_PATH=<project>/furu-data/artifacts
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
`<project>` is the nearest directory containing `pyproject.toml` (falling back to
|
|
145
|
+
the git root). This means you can move `FURU_PATH` without relocating artifacts.
|
|
146
|
+
|
|
135
147
|
```
|
|
136
148
|
$FURU_PATH/
|
|
137
|
-
├── data/ #
|
|
138
|
-
│ └── <module>/<Class>/
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
│ │ ├── furu.log # Captured logs
|
|
144
|
-
│ │ └── SUCCESS.json # Marker file
|
|
145
|
-
│ └── <your outputs> # Files from _create()
|
|
146
|
-
├── git/ # For version_controlled=True
|
|
147
|
-
│ └── <same structure>
|
|
148
|
-
└── raw/ # Shared directory for large files
|
|
149
|
+
├── data/ # version_controlled=False
|
|
150
|
+
│ └── <module>/<Class>/<hash>/
|
|
151
|
+
└── raw/
|
|
152
|
+
|
|
153
|
+
$FURU_VERSION_CONTROLLED_PATH/ # version_controlled=True
|
|
154
|
+
└── <module>/<Class>/<hash>/
|
|
149
155
|
```
|
|
150
156
|
|
|
151
157
|
## Features
|
|
@@ -259,10 +265,17 @@ For artifacts that should be stored separately (e.g., checked into git):
|
|
|
259
265
|
|
|
260
266
|
```python
|
|
261
267
|
class VersionedConfig(furu.Furu[dict], version_controlled=True):
|
|
262
|
-
# Stored under $
|
|
268
|
+
# Stored under $FURU_VERSION_CONTROLLED_PATH
|
|
269
|
+
# Default: <project>/furu-data/artifacts
|
|
263
270
|
...
|
|
264
271
|
```
|
|
265
272
|
|
|
273
|
+
`<project>` is the nearest directory containing `pyproject.toml`, or the git root
|
|
274
|
+
if `pyproject.toml` is missing.
|
|
275
|
+
|
|
276
|
+
It is typical to keep `furu-data/data/` and `furu-data/raw/` in `.gitignore` while
|
|
277
|
+
committing `furu-data/artifacts/`.
|
|
278
|
+
|
|
266
279
|
## Logging
|
|
267
280
|
|
|
268
281
|
Furu installs stdlib `logging` handlers that capture logs to per-artifact files.
|
|
@@ -323,6 +336,17 @@ except FuruLockNotAcquired:
|
|
|
323
336
|
print("Could not acquire lock")
|
|
324
337
|
```
|
|
325
338
|
|
|
339
|
+
By default, failed artifacts are retried on the next `load_or_create()` call. Set
|
|
340
|
+
`FURU_RETRY_FAILED=0` or pass `retry_failed=False` to keep failures sticky.
|
|
341
|
+
|
|
342
|
+
`FURU_MAX_WAIT_SECS` overrides the per-class `_max_wait_time_sec` (default 600s)
|
|
343
|
+
timeout used when waiting for compute locks before raising `FuruWaitTimeout`.
|
|
344
|
+
|
|
345
|
+
Failures during metadata collection or signal handler setup (before `_create()`
|
|
346
|
+
runs) raise `FuruComputeError` with the original exception attached. These
|
|
347
|
+
failures still mark the attempt as failed and record details in `state.json`
|
|
348
|
+
and `furu.log`.
|
|
349
|
+
|
|
326
350
|
## Submitit Integration
|
|
327
351
|
|
|
328
352
|
Run computations on SLURM clusters via [submitit](https://github.com/facebookincubator/submitit):
|
|
@@ -397,10 +421,14 @@ The `/api/experiments` endpoint supports:
|
|
|
397
421
|
|
|
398
422
|
| Variable | Default | Description |
|
|
399
423
|
|----------|---------|-------------|
|
|
400
|
-
| `FURU_PATH` |
|
|
424
|
+
| `FURU_PATH` | `<project>/furu-data` | Base storage directory for non-versioned artifacts |
|
|
425
|
+
| `FURU_VERSION_CONTROLLED_PATH` | `<project>/furu-data/artifacts` | Override version-controlled storage root |
|
|
401
426
|
| `FURU_LOG_LEVEL` | `INFO` | Console verbosity (`DEBUG`, `INFO`, `WARNING`, `ERROR`) |
|
|
402
427
|
| `FURU_IGNORE_DIFF` | `false` | Skip embedding git diff in metadata |
|
|
428
|
+
| `FURU_ALWAYS_RERUN` | `""` | Comma-separated class qualnames to always rerun (use `ALL` to bypass cache globally; cannot combine with other entries; entries must be importable) |
|
|
429
|
+
| `FURU_RETRY_FAILED` | `true` | Retry failed artifacts by default (set to `0` to keep failures sticky) |
|
|
403
430
|
| `FURU_POLL_INTERVAL_SECS` | `10` | Polling interval for queued/running jobs |
|
|
431
|
+
| `FURU_MAX_WAIT_SECS` | unset | Override wait timeout (falls back to `_max_wait_time_sec`, default 600s) |
|
|
404
432
|
| `FURU_WAIT_LOG_EVERY_SECS` | `10` | Interval between "waiting" log messages |
|
|
405
433
|
| `FURU_STALE_AFTER_SECS` | `1800` | Consider running jobs stale after this duration |
|
|
406
434
|
| `FURU_LEASE_SECS` | `120` | Compute lock lease duration |
|
|
@@ -1,36 +1,36 @@
|
|
|
1
|
-
furu/__init__.py,sha256=
|
|
2
|
-
furu/config.py,sha256=F_Bh9vs0Dq5-3fXMylEBbm7F9-Q2n9aLt1iTb-RAl-4,3538
|
|
3
|
-
furu/errors.py,sha256=d1Kp5O9cVoQwXmQeZC-35u7xldw_c3ryYXrbVfv-Lws,2001
|
|
4
|
-
furu/migrate.py,sha256=x_Uh7oXAv40L5ZAHJhdnw-o7ct56rWUSZLbHHfRObeY,1313
|
|
5
|
-
furu/migration.py,sha256=A91dng1XRn1N_xJrmBhh-OvU22GlseqOh6PmVhNZh3w,31307
|
|
1
|
+
furu/__init__.py,sha256=c0rtDRCWRafo0gB4x7qOMVL8ZXtxHOrPnJIs_CwrWlY,1818
|
|
6
2
|
furu/adapters/__init__.py,sha256=onLzEj9hccPK15g8a8va2T19nqQXoxb9rQlJIjKSKnE,69
|
|
7
3
|
furu/adapters/submitit.py,sha256=OuCP0pEkO1kI4WLcSUvMqXwVCCy-8uwUE7v1qvkLZnU,6214
|
|
8
|
-
furu/
|
|
9
|
-
furu/core/
|
|
4
|
+
furu/config.py,sha256=UvSkUDNh0iuMKyl0OelKO5i7FAdkHnqnfbTFXaIaXvY,6886
|
|
5
|
+
furu/core/__init__.py,sha256=6hH7i6r627c0FZn6eQVsSG7LD4QmTta6iQw0AiPQPTM,156
|
|
6
|
+
furu/core/furu.py,sha256=Uz5vVo161Duvl94hwn7u2WH9MaDFQFqlxowzHGigkkY,51592
|
|
10
7
|
furu/core/list.py,sha256=hwwlvqaKB1grPBGKXc15scF1RCqDvWc0AoDbhKlN4W0,3625
|
|
11
8
|
furu/dashboard/__init__.py,sha256=zNVddterfpjQtcpihIl3TRJdgdjOHYR0uO0cOSaGABg,172
|
|
12
9
|
furu/dashboard/__main__.py,sha256=cNs65IMl4kwZFpxa9xLXmFSy4-M5D1X1ZBfTDxW11vo,144
|
|
13
|
-
furu/dashboard/main.py,sha256=8JYc79gbJ9MjvIRdGDuAcR2Mme9kyY4ryZb11ZZ4uVA,4069
|
|
14
|
-
furu/dashboard/scanner.py,sha256=qXCvkvFByBc09TUdth5Js67rS8zpRBlRkVQ9dJ7YbdE,34696
|
|
15
10
|
furu/dashboard/api/__init__.py,sha256=9-WyWOt-VQJJBIsdW29D-7JvR-BivJd9G_SRaRptCz0,80
|
|
16
11
|
furu/dashboard/api/models.py,sha256=SCu-kLJyW7dwSKswdgQNS3wQuj25ORs0pHkvX9xBbo4,4767
|
|
17
12
|
furu/dashboard/api/routes.py,sha256=iZez0khIUvbgfeSoy1BJvmoEEbgUrdSQA8SN8iAIkM8,4813
|
|
13
|
+
furu/dashboard/frontend/dist/assets/index-BXAIKNNr.css,sha256=qhsN0Td3mM-GAR8mZ0CtocynABLKa1ncl9ioDrTKOIQ,34768
|
|
14
|
+
furu/dashboard/frontend/dist/assets/index-DS3FsqcY.js,sha256=nfrKjhWThPtL8n5iTd9_1W-bsyMGwg2O8Iq2jkjj9Lg,544699
|
|
18
15
|
furu/dashboard/frontend/dist/favicon.svg,sha256=3TSLHNZITFe3JTPoYHZnDgiGsJxIzf39v97l2A1Hodo,369
|
|
19
|
-
furu/dashboard/frontend/dist/index.html,sha256=
|
|
20
|
-
furu/dashboard/
|
|
21
|
-
furu/dashboard/
|
|
16
|
+
furu/dashboard/frontend/dist/index.html,sha256=d9a8ZFKZ5uDtN3urqVNmS8LWMBhOC0eW7X0noT0RcYQ,810
|
|
17
|
+
furu/dashboard/main.py,sha256=8JYc79gbJ9MjvIRdGDuAcR2Mme9kyY4ryZb11ZZ4uVA,4069
|
|
18
|
+
furu/dashboard/scanner.py,sha256=qXCvkvFByBc09TUdth5Js67rS8zpRBlRkVQ9dJ7YbdE,34696
|
|
19
|
+
furu/errors.py,sha256=tWKLOtkP5uYDuqozeImCN7WzjFforPj1WImW0AWc4Vk,3684
|
|
20
|
+
furu/migrate.py,sha256=x_Uh7oXAv40L5ZAHJhdnw-o7ct56rWUSZLbHHfRObeY,1313
|
|
21
|
+
furu/migration.py,sha256=R2-tARMx4VKryiqJ7WHia_dPVxRbTqofPpCFVE9zQ8U,31411
|
|
22
22
|
furu/runtime/__init__.py,sha256=fQqE7wUuWunLD73Vm3lss7BFSij3UVxXOKQXBAOS8zw,504
|
|
23
23
|
furu/runtime/env.py,sha256=o1phhoTDhOnhALr3Ozf1ldrdvk2ClyEvBWbebHM6BXg,160
|
|
24
24
|
furu/runtime/logging.py,sha256=JkuTFtbv6dYk088P6_Bga46bnKSDt-ElAqmiY86hMys,9773
|
|
25
25
|
furu/runtime/tracebacks.py,sha256=PGCuOq8QkWSoun791gjUXM8frOP2wWV8IBlqaA4nuGE,1631
|
|
26
26
|
furu/serialization/__init__.py,sha256=L7oHuIbxdSh7GCY3thMQnDwlt_ERH-TMy0YKEAZLrPs,341
|
|
27
27
|
furu/serialization/migrations.py,sha256=HD5g8JCBdH3Y0rHJYc4Ug1IXBVcUDxLE7nfiXZnXcUE,7772
|
|
28
|
-
furu/serialization/serializer.py,sha256=
|
|
28
|
+
furu/serialization/serializer.py,sha256=_nfUaAOy_KHegvfXlpPh4rCuvkzalJva75OvDg5nXiI,10114
|
|
29
29
|
furu/storage/__init__.py,sha256=cLLL-GPpSu9C72Mdk5S6TGu3g-SnBfEuxzfpx5ZJPtw,616
|
|
30
|
-
furu/storage/metadata.py,sha256=
|
|
30
|
+
furu/storage/metadata.py,sha256=MH6w5hs-2rwHD6G9erMPM5pE3hm0h5Pk_G3Z6eyyGB0,9899
|
|
31
31
|
furu/storage/migration.py,sha256=Ars9aYwvhXpIBDf6L9ojGjp_l656-RfdtEAFKN0sZZY,2640
|
|
32
|
-
furu/storage/state.py,sha256=
|
|
33
|
-
furu-0.0.
|
|
34
|
-
furu-0.0.
|
|
35
|
-
furu-0.0.
|
|
36
|
-
furu-0.0.
|
|
32
|
+
furu/storage/state.py,sha256=rAzR0XJS3OvwGMATlppxNQwX1FrSIffUTkptSwOjBcs,42627
|
|
33
|
+
furu-0.0.3.dist-info/WHEEL,sha256=XV0cjMrO7zXhVAIyyc8aFf1VjZ33Fen4IiJk5zFlC3g,80
|
|
34
|
+
furu-0.0.3.dist-info/entry_points.txt,sha256=hZkjtFzNlb33Zk-aUfLMRj-XgVDxdT82-JXG9d4bu2E,60
|
|
35
|
+
furu-0.0.3.dist-info/METADATA,sha256=NY6H_CMvm2-wc21GdRpMWxa5cK4HMxMwylTDVaZy2aY,14615
|
|
36
|
+
furu-0.0.3.dist-info/RECORD,,
|