furu 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
furu/__init__.py CHANGED
@@ -17,8 +17,12 @@ from .core import DependencyChzSpec, DependencySpec, Furu, FuruList
17
17
  from .errors import (
18
18
  FuruComputeError,
19
19
  FuruError,
20
+ FuruExecutionError,
20
21
  FuruLockNotAcquired,
22
+ FuruMissingArtifact,
21
23
  FuruMigrationRequired,
24
+ FuruSpecMismatch,
25
+ FuruValidationError,
22
26
  FuruWaitTimeout,
23
27
  MISSING,
24
28
  )
@@ -51,9 +55,13 @@ __all__ = [
51
55
  "FuruComputeError",
52
56
  "FuruConfig",
53
57
  "FuruError",
58
+ "FuruExecutionError",
54
59
  "FuruList",
55
60
  "FuruLockNotAcquired",
61
+ "FuruMissingArtifact",
56
62
  "FuruMigrationRequired",
63
+ "FuruSpecMismatch",
64
+ "FuruValidationError",
57
65
  "FuruSerializer",
58
66
  "FuruWaitTimeout",
59
67
  "DependencyChzSpec",
furu/adapters/submitit.py CHANGED
@@ -6,6 +6,7 @@ from typing import Any, Callable, Protocol
6
6
 
7
7
  from ..config import FURU_CONFIG
8
8
  from ..storage import StateManager
9
+ from ..runtime.logging import get_logger
9
10
  from ..storage.state import _FuruState, ProbeResult
10
11
 
11
12
 
@@ -102,19 +103,39 @@ class SubmititAdapter:
102
103
  """Watch for job ID in background thread and update state."""
103
104
 
104
105
  def watcher():
106
+ _ = attempt_id # intentionally unused; queued->running attempt swap is expected
105
107
  while True:
106
108
  job_id = self.get_job_id(job)
107
109
  if job_id:
108
110
 
109
111
  def mutate(state: _FuruState) -> None:
110
112
  attempt = state.attempt
111
- if attempt is None or attempt.id != attempt_id:
113
+ if attempt is None:
114
+ return
115
+ if attempt.backend != "submitit":
116
+ return
117
+ if (
118
+ attempt.status not in {"queued", "running"}
119
+ and attempt.status not in StateManager.TERMINAL_STATUSES
120
+ ):
121
+ return
122
+ existing = attempt.scheduler.get("job_id")
123
+ if existing == job_id:
112
124
  return
113
125
  attempt.scheduler["job_id"] = job_id
114
126
 
115
127
  StateManager.update_state(directory, mutate)
116
128
  if callback:
117
- callback(job_id)
129
+ try:
130
+ callback(job_id)
131
+ except Exception:
132
+ # Avoid killing the watcher thread; state update already happened.
133
+ logger = get_logger()
134
+ logger.exception(
135
+ "submitit watcher: job_id callback failed for %s: %s",
136
+ directory,
137
+ job_id,
138
+ )
118
139
  break
119
140
 
120
141
  if self.is_done(job):
furu/config.py CHANGED
@@ -18,6 +18,11 @@ class FuruConfig:
18
18
  return (project_root / self.DEFAULT_ROOT_DIR).resolve()
19
19
 
20
20
  self.base_root = _get_base_root()
21
+ self.submitit_root = (
22
+ Path(os.getenv("FURU_SUBMITIT_PATH", str(self.base_root / "submitit")))
23
+ .expanduser()
24
+ .resolve()
25
+ )
21
26
  self.version_controlled_root_override = self._get_version_controlled_override()
22
27
  self.poll_interval = float(os.getenv("FURU_POLL_INTERVAL_SECS", "10"))
23
28
  self.wait_log_every_sec = float(os.getenv("FURU_WAIT_LOG_EVERY_SECS", "10"))
@@ -30,6 +35,7 @@ class FuruConfig:
30
35
  float(hb) if hb is not None else max(1.0, self.lease_duration_sec / 3.0)
31
36
  )
32
37
  self.max_requeues = int(os.getenv("FURU_PREEMPT_MAX", "5"))
38
+ self.max_compute_retries = int(os.getenv("FURU_MAX_COMPUTE_RETRIES", "3"))
33
39
  self.retry_failed = os.getenv("FURU_RETRY_FAILED", "1").lower() in {
34
40
  "1",
35
41
  "true",
@@ -109,6 +115,9 @@ class FuruConfig:
109
115
  return self._resolve_version_controlled_root()
110
116
  return self.base_root / "data"
111
117
 
118
+ def get_submitit_root(self) -> Path:
119
+ return self.submitit_root
120
+
112
121
  @classmethod
113
122
  def _get_version_controlled_override(cls) -> Path | None:
114
123
  env = os.getenv("FURU_VERSION_CONTROLLED_PATH")
@@ -175,4 +184,7 @@ def get_furu_root(*, version_controlled: bool = False) -> Path:
175
184
 
176
185
 
177
186
  def set_furu_root(path: Path) -> None:
178
- FURU_CONFIG.base_root = path.resolve()
187
+ root = path.resolve()
188
+ FURU_CONFIG.base_root = root
189
+ if os.getenv("FURU_SUBMITIT_PATH") is None:
190
+ FURU_CONFIG.submitit_root = (root / "submitit").resolve()