furu 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
furu/__init__.py CHANGED
@@ -13,12 +13,16 @@ __version__ = version("furu")
13
13
 
14
14
  from .config import FURU_CONFIG, FuruConfig, get_furu_root, set_furu_root
15
15
  from .adapters import SubmititAdapter
16
- from .core import Furu, FuruList
16
+ from .core import DependencyChzSpec, DependencySpec, Furu, FuruList
17
17
  from .errors import (
18
18
  FuruComputeError,
19
19
  FuruError,
20
+ FuruExecutionError,
20
21
  FuruLockNotAcquired,
22
+ FuruMissingArtifact,
21
23
  FuruMigrationRequired,
24
+ FuruSpecMismatch,
25
+ FuruValidationError,
22
26
  FuruWaitTimeout,
23
27
  MISSING,
24
28
  )
@@ -51,11 +55,17 @@ __all__ = [
51
55
  "FuruComputeError",
52
56
  "FuruConfig",
53
57
  "FuruError",
58
+ "FuruExecutionError",
54
59
  "FuruList",
55
60
  "FuruLockNotAcquired",
61
+ "FuruMissingArtifact",
56
62
  "FuruMigrationRequired",
63
+ "FuruSpecMismatch",
64
+ "FuruValidationError",
57
65
  "FuruSerializer",
58
66
  "FuruWaitTimeout",
67
+ "DependencyChzSpec",
68
+ "DependencySpec",
59
69
  "MISSING",
60
70
  "migrate",
61
71
  "NamespacePair",
furu/adapters/submitit.py CHANGED
@@ -6,6 +6,7 @@ from typing import Any, Callable, Protocol
6
6
 
7
7
  from ..config import FURU_CONFIG
8
8
  from ..storage import StateManager
9
+ from ..runtime.logging import get_logger
9
10
  from ..storage.state import _FuruState, ProbeResult
10
11
 
11
12
 
@@ -102,19 +103,39 @@ class SubmititAdapter:
102
103
  """Watch for job ID in background thread and update state."""
103
104
 
104
105
  def watcher():
106
+ _ = attempt_id # intentionally unused; queued->running attempt swap is expected
105
107
  while True:
106
108
  job_id = self.get_job_id(job)
107
109
  if job_id:
108
110
 
109
111
  def mutate(state: _FuruState) -> None:
110
112
  attempt = state.attempt
111
- if attempt is None or attempt.id != attempt_id:
113
+ if attempt is None:
114
+ return
115
+ if attempt.backend != "submitit":
116
+ return
117
+ if (
118
+ attempt.status not in {"queued", "running"}
119
+ and attempt.status not in StateManager.TERMINAL_STATUSES
120
+ ):
121
+ return
122
+ existing = attempt.scheduler.get("job_id")
123
+ if existing == job_id:
112
124
  return
113
125
  attempt.scheduler["job_id"] = job_id
114
126
 
115
127
  StateManager.update_state(directory, mutate)
116
128
  if callback:
117
- callback(job_id)
129
+ try:
130
+ callback(job_id)
131
+ except Exception:
132
+ # Avoid killing the watcher thread; state update already happened.
133
+ logger = get_logger()
134
+ logger.exception(
135
+ "submitit watcher: job_id callback failed for %s: %s",
136
+ directory,
137
+ job_id,
138
+ )
118
139
  break
119
140
 
120
141
  if self.is_done(job):
furu/config.py CHANGED
@@ -18,16 +18,29 @@ class FuruConfig:
18
18
  return (project_root / self.DEFAULT_ROOT_DIR).resolve()
19
19
 
20
20
  self.base_root = _get_base_root()
21
+ self.submitit_root = (
22
+ Path(os.getenv("FURU_SUBMITIT_PATH", str(self.base_root / "submitit")))
23
+ .expanduser()
24
+ .resolve()
25
+ )
21
26
  self.version_controlled_root_override = self._get_version_controlled_override()
22
27
  self.poll_interval = float(os.getenv("FURU_POLL_INTERVAL_SECS", "10"))
23
28
  self.wait_log_every_sec = float(os.getenv("FURU_WAIT_LOG_EVERY_SECS", "10"))
24
29
  self.stale_timeout = float(os.getenv("FURU_STALE_AFTER_SECS", str(30 * 60)))
30
+ max_wait_env = os.getenv("FURU_MAX_WAIT_SECS")
31
+ self.max_wait_time_sec = float(max_wait_env) if max_wait_env else None
25
32
  self.lease_duration_sec = float(os.getenv("FURU_LEASE_SECS", "120"))
26
33
  hb = os.getenv("FURU_HEARTBEAT_SECS")
27
34
  self.heartbeat_interval_sec = (
28
35
  float(hb) if hb is not None else max(1.0, self.lease_duration_sec / 3.0)
29
36
  )
30
37
  self.max_requeues = int(os.getenv("FURU_PREEMPT_MAX", "5"))
38
+ self.max_compute_retries = int(os.getenv("FURU_MAX_COMPUTE_RETRIES", "3"))
39
+ self.retry_failed = os.getenv("FURU_RETRY_FAILED", "1").lower() in {
40
+ "1",
41
+ "true",
42
+ "yes",
43
+ }
31
44
  self.ignore_git_diff = os.getenv("FURU_IGNORE_DIFF", "0").lower() in {
32
45
  "1",
33
46
  "true",
@@ -102,6 +115,9 @@ class FuruConfig:
102
115
  return self._resolve_version_controlled_root()
103
116
  return self.base_root / "data"
104
117
 
118
+ def get_submitit_root(self) -> Path:
119
+ return self.submitit_root
120
+
105
121
  @classmethod
106
122
  def _get_version_controlled_override(cls) -> Path | None:
107
123
  env = os.getenv("FURU_VERSION_CONTROLLED_PATH")
@@ -151,8 +167,7 @@ class FuruConfig:
151
167
  value = getattr(target, attr, missing_sentinel)
152
168
  if value is missing_sentinel:
153
169
  raise ValueError(
154
- "FURU_ALWAYS_RERUN entry does not exist: "
155
- f"{namespace!r}"
170
+ f"FURU_ALWAYS_RERUN entry does not exist: {namespace!r}"
156
171
  )
157
172
  target = value
158
173
 
@@ -169,4 +184,7 @@ def get_furu_root(*, version_controlled: bool = False) -> Path:
169
184
 
170
185
 
171
186
  def set_furu_root(path: Path) -> None:
172
- FURU_CONFIG.base_root = path.resolve()
187
+ root = path.resolve()
188
+ FURU_CONFIG.base_root = root
189
+ if os.getenv("FURU_SUBMITIT_PATH") is None:
190
+ FURU_CONFIG.submitit_root = (root / "submitit").resolve()
furu/core/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from .furu import Furu
1
+ from .furu import DependencyChzSpec, DependencySpec, Furu
2
2
  from .list import FuruList
3
3
 
4
- __all__ = ["Furu", "FuruList"]
4
+ __all__ = ["DependencyChzSpec", "DependencySpec", "Furu", "FuruList"]