furu 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
furu/__init__.py ADDED
@@ -0,0 +1,82 @@
1
+ """
2
+ Furu: cacheable, nested pipelines as config objects.
3
+
4
+ This package uses a src-layout. Import the package as `furu`.
5
+ """
6
+
7
+ from importlib.metadata import version
8
+
9
+ import chz
10
+ import submitit
11
+
12
+ __version__ = version("furu")
13
+
14
+ from .config import FURU_CONFIG, FuruConfig, get_furu_root, set_furu_root
15
+ from .adapters import SubmititAdapter
16
+ from .core import Furu, FuruList
17
+ from .errors import (
18
+ FuruComputeError,
19
+ FuruError,
20
+ FuruLockNotAcquired,
21
+ FuruMigrationRequired,
22
+ FuruWaitTimeout,
23
+ MISSING,
24
+ )
25
+ from .runtime import (
26
+ configure_logging,
27
+ current_holder,
28
+ current_log_dir,
29
+ enter_holder,
30
+ get_logger,
31
+ load_env,
32
+ log,
33
+ write_separator,
34
+ )
35
+ from .migrate import migrate
36
+ from .migration import (
37
+ NamespacePair,
38
+ MigrationCandidate,
39
+ MigrationSkip,
40
+ apply_migration,
41
+ find_migration_candidates,
42
+ find_migration_candidates_initialized_target,
43
+ )
44
+ from .serialization import FuruSerializer
45
+ from .storage import MetadataManager, StateManager
46
+
47
+ __all__ = [
48
+ "__version__",
49
+ "FURU_CONFIG",
50
+ "Furu",
51
+ "FuruComputeError",
52
+ "FuruConfig",
53
+ "FuruError",
54
+ "FuruList",
55
+ "FuruLockNotAcquired",
56
+ "FuruMigrationRequired",
57
+ "FuruSerializer",
58
+ "FuruWaitTimeout",
59
+ "MISSING",
60
+ "migrate",
61
+ "NamespacePair",
62
+ "MigrationCandidate",
63
+ "MigrationSkip",
64
+ "apply_migration",
65
+ "find_migration_candidates",
66
+ "find_migration_candidates_initialized_target",
67
+ "MetadataManager",
68
+ "StateManager",
69
+ "SubmititAdapter",
70
+ "chz",
71
+ "configure_logging",
72
+ "current_holder",
73
+ "current_log_dir",
74
+ "enter_holder",
75
+ "get_furu_root",
76
+ "get_logger",
77
+ "load_env",
78
+ "log",
79
+ "write_separator",
80
+ "set_furu_root",
81
+ "submitit",
82
+ ]
@@ -0,0 +1,3 @@
1
+ from .submitit import SubmititAdapter
2
+
3
+ __all__ = ["SubmititAdapter"]
@@ -0,0 +1,195 @@
1
+ import threading
2
+ import time
3
+ from pathlib import Path
4
+ from typing import Any, Callable, Protocol
5
+
6
+
7
+ from ..config import FURU_CONFIG
8
+ from ..storage import StateManager
9
+ from ..storage.state import _FuruState, ProbeResult
10
+
11
+
12
+ # Protocol for submitit Job-like objects. We use this instead of Any because
13
+ # submitit is an external library and we want to document the interface we expect.
14
+ class SubmititJobProtocol(Protocol):
15
+ """Protocol for submitit Job objects."""
16
+
17
+ job_id: str | None
18
+
19
+ def done(self) -> bool: ...
20
+ def state(self) -> str: ...
21
+ def result(self, timeout: float | None = None) -> object: ...
22
+ def wait(self) -> None: ...
23
+
24
+
25
+ # Type alias for submitit Executor. The executor is from an external library
26
+ # with a complex generic type, so we use Any here.
27
+ SubmititExecutor = Any
28
+
29
+ # Type alias for submitit Job. Jobs come from external library and can be
30
+ # various types depending on the executor backend.
31
+ SubmititJob = Any
32
+
33
+
34
+ class SubmititAdapter:
35
+ """Adapter for working with submitit executors."""
36
+
37
+ JOB_PICKLE = "job.pkl"
38
+
39
+ def __init__(self, executor: SubmititExecutor):
40
+ self.executor = executor
41
+
42
+ def submit(self, fn: Callable[[], None]) -> SubmititJob:
43
+ """Submit a job to the executor."""
44
+ return self.executor.submit(fn)
45
+
46
+ def wait(self, job: SubmititJob, timeout: float | None = None) -> None:
47
+ """Wait for job completion."""
48
+ if timeout:
49
+ job.result(timeout=timeout)
50
+ else:
51
+ job.wait()
52
+
53
+ def get_job_id(self, job: SubmititJob) -> str | None:
54
+ """Get job ID if available."""
55
+ job_id = getattr(job, "job_id", None)
56
+ if job_id:
57
+ return str(job_id)
58
+ return None
59
+
60
+ def is_done(self, job: SubmititJob) -> bool:
61
+ """Check if job is done."""
62
+ done_fn = getattr(job, "done", None)
63
+ if done_fn and callable(done_fn):
64
+ return done_fn()
65
+ return False
66
+
67
+ def get_state(self, job: SubmititJob) -> str | None:
68
+ """Get job state from scheduler."""
69
+ state_fn = getattr(job, "state", None)
70
+ if state_fn and callable(state_fn):
71
+ return state_fn()
72
+ return None
73
+
74
+ def pickle_job(self, job: SubmititJob, directory: Path) -> None:
75
+ """Pickle job handle to file."""
76
+ import cloudpickle as pickle
77
+
78
+ job_path = StateManager.get_internal_dir(directory) / self.JOB_PICKLE
79
+ job_path.parent.mkdir(parents=True, exist_ok=True)
80
+ with job_path.open("wb") as f:
81
+ pickle.dump(job, f)
82
+
83
+ def load_job(self, directory: Path) -> SubmititJob | None:
84
+ """Load job handle from pickle file."""
85
+ job_path = StateManager.get_internal_dir(directory) / self.JOB_PICKLE
86
+ if not job_path.is_file():
87
+ return None
88
+
89
+ import cloudpickle as pickle
90
+
91
+ with job_path.open("rb") as f:
92
+ return pickle.load(f)
93
+
94
+ def watch_job_id(
95
+ self,
96
+ job: SubmititJob,
97
+ directory: Path,
98
+ *,
99
+ attempt_id: str,
100
+ callback: Callable[[str], None] | None = None,
101
+ ) -> None:
102
+ """Watch for job ID in background thread and update state."""
103
+
104
+ def watcher():
105
+ while True:
106
+ job_id = self.get_job_id(job)
107
+ if job_id:
108
+
109
+ def mutate(state: _FuruState) -> None:
110
+ attempt = state.attempt
111
+ if attempt is None or attempt.id != attempt_id:
112
+ return
113
+ attempt.scheduler["job_id"] = job_id
114
+
115
+ StateManager.update_state(directory, mutate)
116
+ if callback:
117
+ callback(job_id)
118
+ break
119
+
120
+ if self.is_done(job):
121
+ break
122
+
123
+ time.sleep(0.5)
124
+
125
+ thread = threading.Thread(target=watcher, daemon=True)
126
+ thread.start()
127
+
128
+ def classify_scheduler_state(self, state: str | None) -> str | None:
129
+ """Map scheduler state to Furu status."""
130
+ if not state:
131
+ return None
132
+
133
+ s = state.upper()
134
+
135
+ if "COMPLETE" in s or "COMPLETED" in s:
136
+ return "success"
137
+
138
+ if s in {
139
+ "PREEMPTED",
140
+ "TIMEOUT",
141
+ "NODE_FAIL",
142
+ "REQUEUED",
143
+ "REQUEUE_HOLD",
144
+ }:
145
+ return "preempted"
146
+
147
+ if s == "CANCELLED":
148
+ return "preempted" if FURU_CONFIG.cancelled_is_preempted else "failed"
149
+
150
+ if "FAIL" in s or "ERROR" in s:
151
+ return "failed"
152
+
153
+ return None
154
+
155
+ def probe(self, directory: Path, state: _FuruState) -> ProbeResult:
156
+ """
157
+ Best-effort scheduler reconciliation.
158
+
159
+ Returns a dict for `StateManager.reconcile(..., submitit_probe=...)`:
160
+ - `terminal_status`: one of {failed, cancelled, preempted, crashed}
161
+ - `scheduler_state`: raw scheduler state when available
162
+ - `reason`: best-effort reason string
163
+
164
+ Returns empty dict if job status cannot be determined (e.g., job pickle
165
+ doesn't exist yet), allowing reconcile to fall back to lease expiry.
166
+ """
167
+ job = self.load_job(directory)
168
+ if job is None:
169
+ # Job pickle doesn't exist - can't determine status, fall back to lease expiry
170
+ return {}
171
+
172
+ scheduler_state = self.get_state(job)
173
+ classified = self.classify_scheduler_state(scheduler_state)
174
+ if classified is None:
175
+ if self.is_done(job):
176
+ return {
177
+ "terminal_status": "crashed",
178
+ "scheduler_state": scheduler_state,
179
+ "reason": "job_done_unknown_state",
180
+ }
181
+ return {}
182
+
183
+ # `COMPLETED` doesn't guarantee the worker wrote a success marker/state.
184
+ if classified == "success":
185
+ return {
186
+ "terminal_status": "crashed",
187
+ "scheduler_state": scheduler_state,
188
+ "reason": "scheduler_completed_no_success_marker",
189
+ }
190
+
191
+ return {
192
+ "terminal_status": classified,
193
+ "scheduler_state": scheduler_state,
194
+ "reason": f"scheduler:{scheduler_state}",
195
+ }
furu/config.py ADDED
@@ -0,0 +1,98 @@
1
+ import os
2
+ from pathlib import Path
3
+
4
+
5
+ class FuruConfig:
6
+ """Central configuration for Furu behavior."""
7
+
8
+ def __init__(self):
9
+ def _get_base_root() -> Path:
10
+ env = os.getenv("FURU_PATH")
11
+ if env:
12
+ return Path(env).expanduser().resolve()
13
+ return Path("data-furu").resolve()
14
+
15
+ self.base_root = _get_base_root()
16
+ self.poll_interval = float(os.getenv("FURU_POLL_INTERVAL_SECS", "10"))
17
+ self.wait_log_every_sec = float(os.getenv("FURU_WAIT_LOG_EVERY_SECS", "10"))
18
+ self.stale_timeout = float(os.getenv("FURU_STALE_AFTER_SECS", str(30 * 60)))
19
+ self.lease_duration_sec = float(os.getenv("FURU_LEASE_SECS", "120"))
20
+ hb = os.getenv("FURU_HEARTBEAT_SECS")
21
+ self.heartbeat_interval_sec = (
22
+ float(hb) if hb is not None else max(1.0, self.lease_duration_sec / 3.0)
23
+ )
24
+ self.max_requeues = int(os.getenv("FURU_PREEMPT_MAX", "5"))
25
+ self.ignore_git_diff = os.getenv("FURU_IGNORE_DIFF", "0").lower() in {
26
+ "1",
27
+ "true",
28
+ "yes",
29
+ }
30
+ self.require_git = os.getenv("FURU_REQUIRE_GIT", "1").lower() in {
31
+ "1",
32
+ "true",
33
+ "yes",
34
+ }
35
+ self.require_git_remote = os.getenv("FURU_REQUIRE_GIT_REMOTE", "1").lower() in {
36
+ "1",
37
+ "true",
38
+ "yes",
39
+ }
40
+ self.force_recompute = {
41
+ item.strip()
42
+ for item in os.getenv("FURU_FORCE_RECOMPUTE", "").split(",")
43
+ if item.strip()
44
+ }
45
+ self.cancelled_is_preempted = os.getenv(
46
+ "FURU_CANCELLED_IS_PREEMPTED", "false"
47
+ ).lower() in {"1", "true", "yes"}
48
+
49
+ # Parse FURU_CACHE_METADATA: "never", "forever", or duration like "5m", "1h"
50
+ # Default: "5m" (5 minutes) - balances performance with freshness
51
+ self.cache_metadata_ttl_sec: float | None = self._parse_cache_duration(
52
+ os.getenv("FURU_CACHE_METADATA", "5m")
53
+ )
54
+
55
+ @staticmethod
56
+ def _parse_cache_duration(value: str) -> float | None:
57
+ """Parse cache duration string into seconds. Returns None for 'never', float('inf') for 'forever'."""
58
+ value = value.strip().lower()
59
+ if value in {"never", "0", "false", "no"}:
60
+ return None # No caching
61
+ if value in {"forever", "inf", "true", "yes", "1"}:
62
+ return float("inf") # Cache forever
63
+
64
+ # Parse duration like "5m", "1h", "30s"
65
+ import re
66
+
67
+ match = re.match(r"^(\d+(?:\.\d+)?)\s*([smh]?)$", value)
68
+ if not match:
69
+ raise ValueError(
70
+ f"Invalid FURU_CACHE_METADATA value: {value!r}. "
71
+ "Use 'never', 'forever', or duration like '5m', '1h', '30s'"
72
+ )
73
+
74
+ num = float(match.group(1))
75
+ unit = match.group(2) or "s"
76
+ multipliers = {"s": 1, "m": 60, "h": 3600}
77
+ return num * multipliers[unit]
78
+
79
+ def get_root(self, version_controlled: bool = False) -> Path:
80
+ """Get root directory for storage (version_controlled determines subdirectory)."""
81
+ if version_controlled:
82
+ return self.base_root / "git"
83
+ return self.base_root / "data"
84
+
85
+ @property
86
+ def raw_dir(self) -> Path:
87
+ return self.base_root / "raw"
88
+
89
+
90
+ FURU_CONFIG = FuruConfig()
91
+
92
+
93
+ def get_furu_root(*, version_controlled: bool = False) -> Path:
94
+ return FURU_CONFIG.get_root(version_controlled=version_controlled)
95
+
96
+
97
+ def set_furu_root(path: Path) -> None:
98
+ FURU_CONFIG.base_root = path.resolve()
furu/core/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .furu import Furu
2
+ from .list import FuruList
3
+
4
+ __all__ = ["Furu", "FuruList"]