furu 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
furu/core/furu.py CHANGED
@@ -5,6 +5,7 @@ import os
5
5
  import signal
6
6
  import socket
7
7
  import sys
8
+ import threading
8
9
  import time
9
10
  import traceback
10
11
  from abc import ABC, abstractmethod
@@ -24,7 +25,6 @@ from typing import (
24
25
  TypeAlias,
25
26
  TypeVar,
26
27
  cast,
27
- overload,
28
28
  )
29
29
 
30
30
  import chz
@@ -40,6 +40,7 @@ from ..errors import (
40
40
  MISSING,
41
41
  FuruComputeError,
42
42
  FuruLockNotAcquired,
43
+ FuruValidationError,
43
44
  FuruWaitTimeout,
44
45
  )
45
46
  from ..runtime import current_holder
@@ -190,13 +191,20 @@ class Furu[T](ABC):
190
191
  raise NotImplementedError(f"{self.__class__.__name__}._load() not implemented")
191
192
 
192
193
  def _validate(self: Self) -> bool:
193
- """Validate that result is complete and correct (override if needed)."""
194
+ """
195
+ Validate that result is complete and correct (override if needed).
196
+
197
+ Return False or raise FuruValidationError to mark artifacts as invalid.
198
+ """
194
199
  return True
195
200
 
196
201
  def _dependencies(self: Self) -> "DependencySpec | None":
197
202
  """Return extra dependencies not captured by fields."""
198
203
  return None
199
204
 
205
+ def _executor_spec_key(self: Self) -> str:
206
+ return "default"
207
+
200
208
  def _get_dependencies(self: Self, *, recursive: bool = True) -> list["Furu"]:
201
209
  """Collect Furu dependencies from fields and `_dependencies()`."""
202
210
  seen = {self._furu_hash}
@@ -238,6 +246,27 @@ class Furu[T](ABC):
238
246
  directory, {"type": "result_invalidated", "reason": reason, "at": now}
239
247
  )
240
248
 
249
+ def _prepare_executor_rerun(self: Self, directory: Path) -> None:
250
+ if not self._always_rerun():
251
+ return
252
+ if not directory.exists():
253
+ return
254
+ migration = self._alias_record(directory)
255
+ if migration is not None and self._alias_is_active(directory, migration):
256
+ self._maybe_detach_alias(
257
+ directory=directory,
258
+ record=migration,
259
+ reason="always_rerun",
260
+ )
261
+ state = StateManager.read_state(directory)
262
+ if isinstance(state.result, _StateResultSuccess):
263
+ self._invalidate_cached_success(directory, reason="always_rerun enabled")
264
+
265
+ @property
266
+ def furu_hash(self: Self) -> str:
267
+ """Return the stable content hash for this Furu object."""
268
+ return self._furu_hash
269
+
241
270
  @property
242
271
  def _furu_hash(self: Self) -> str:
243
272
  """Compute hash of this object's content for storage identification."""
@@ -290,6 +319,33 @@ class Furu[T](ABC):
290
319
  """Log a message to the current holder's `furu.log`."""
291
320
  return log(message, level=level)
292
321
 
322
+ def _exists_quiet(self: Self) -> bool:
323
+ directory = self._base_furu_dir()
324
+ state = self.get_state(directory)
325
+
326
+ if not isinstance(state.result, _StateResultSuccess):
327
+ return False
328
+ try:
329
+ return self._validate()
330
+ except FuruValidationError as exc:
331
+ logger = get_logger()
332
+ logger.warning(
333
+ "exists %s -> false (validate invalid for %s: %s)",
334
+ directory,
335
+ f"{self.__class__.__name__}({self._furu_hash})",
336
+ exc,
337
+ )
338
+ return False
339
+ except Exception as exc:
340
+ logger = get_logger()
341
+ logger.exception(
342
+ "exists %s -> false (validate crashed for %s: %s)",
343
+ directory,
344
+ f"{self.__class__.__name__}({self._furu_hash})",
345
+ exc,
346
+ )
347
+ return False
348
+
293
349
  def exists(self: Self) -> bool:
294
350
  """Check if result exists and is valid."""
295
351
  logger = get_logger()
@@ -313,47 +369,93 @@ class Furu[T](ABC):
313
369
  """Get migration record for this object."""
314
370
  return MigrationManager.read_migration(self._base_furu_dir())
315
371
 
316
- @overload
317
- def load_or_create(
318
- self,
319
- executor: submitit.Executor,
320
- *,
321
- retry_failed: bool | None = None,
322
- ) -> T | submitit.Job[T]: ...
323
-
324
- @overload
325
- def load_or_create(
326
- self,
327
- executor: None = None,
328
- *,
329
- retry_failed: bool | None = None,
330
- ) -> T: ...
331
-
332
- def load_or_create(
333
- self: Self,
334
- executor: submitit.Executor | None = None,
335
- *,
336
- retry_failed: bool | None = None,
337
- ) -> T | submitit.Job[T]:
372
+ def get(self: Self, *, force: bool = False) -> T:
338
373
  """
339
374
  Load result if it exists, computing if necessary.
340
375
 
341
376
  Args:
342
- executor: Optional executor for batch submission (e.g., submitit.Executor)
343
- retry_failed: Whether to retry failed results (default uses FURU_RETRY_FAILED)
377
+ force: Allow computation inside executor contexts if the spec matches.
344
378
 
345
379
  Returns:
346
- Result if wait=True, job handle if wait=False, or None if already exists
380
+ Loaded or computed result.
347
381
 
348
382
  Raises:
349
383
  FuruComputeError: If computation fails with detailed error information
350
384
  """
385
+ from furu.execution.context import EXEC_CONTEXT
386
+ from furu.errors import (
387
+ FuruExecutionError,
388
+ FuruMissingArtifact,
389
+ FuruSpecMismatch,
390
+ )
391
+
392
+ ctx = EXEC_CONTEXT.get()
393
+ if ctx.mode == "executor":
394
+ directory = self._base_furu_dir()
395
+ if force:
396
+ if (
397
+ ctx.current_node_hash is None
398
+ or self._furu_hash != ctx.current_node_hash
399
+ ):
400
+ raise FuruExecutionError(
401
+ "force=True not allowed: only the current node may compute in executor mode. "
402
+ f"current_node_hash={ctx.current_node_hash!r} "
403
+ f"obj={self.__class__.__name__}({self._furu_hash})",
404
+ hints=[
405
+ "Declare this object as a dependency instead of calling dep.get(force=True).",
406
+ "Inside executor mode, use get(force=True) only on the node being executed.",
407
+ ],
408
+ )
409
+ self._prepare_executor_rerun(directory)
410
+
411
+ exists_ok = self._exists_quiet()
412
+ if exists_ok and not (force and self._always_rerun()):
413
+ return self._load()
414
+
415
+ if force and not exists_ok:
416
+ state = self.get_state(directory)
417
+ if isinstance(state.result, _StateResultSuccess):
418
+ self._invalidate_cached_success(
419
+ directory, reason="_validate returned false (executor)"
420
+ )
421
+
422
+ if not force:
423
+ raise FuruMissingArtifact(
424
+ "Missing artifact "
425
+ f"{self.__class__.__name__}({self._furu_hash}) in executor mode. "
426
+ f"Requested by {ctx.current_node_hash}. Declare it as a dependency."
427
+ )
428
+
429
+ required = self._executor_spec_key()
430
+ if ctx.spec_key is None or required != ctx.spec_key:
431
+ raise FuruSpecMismatch(
432
+ "force=True not allowed: "
433
+ f"required={required!r} != worker={ctx.spec_key!r} (v1 exact match)"
434
+ )
435
+
436
+ status, created_here, result = self._run_locally(
437
+ start_time=time.time(),
438
+ allow_failed=FURU_CONFIG.retry_failed,
439
+ executor_mode=True,
440
+ )
441
+ if status == "success":
442
+ if created_here:
443
+ return cast(T, result)
444
+ return self._load()
445
+
446
+ raise self._build_failed_state_error(
447
+ self._base_furu_dir(),
448
+ None,
449
+ message="Computation previously failed",
450
+ )
451
+
452
+ return self._get_impl_interactive(force=force)
453
+
454
+ def _get_impl_interactive(self: Self, *, force: bool) -> T:
351
455
  logger = get_logger()
352
456
  parent_holder = current_holder()
353
457
  has_parent = parent_holder is not None and parent_holder is not self
354
- retry_failed_effective = (
355
- retry_failed if retry_failed is not None else FURU_CONFIG.retry_failed
356
- )
458
+ retry_failed_effective = FURU_CONFIG.retry_failed
357
459
  if has_parent:
358
460
  logger.debug(
359
461
  "dep: begin %s %s %s",
@@ -445,7 +547,6 @@ class Furu[T](ABC):
445
547
  message="Computation previously failed",
446
548
  )
447
549
 
448
- needs_reconcile = True
449
550
  if isinstance(state0.result, _StateResultSuccess):
450
551
  # Double check logic if we fell through to here (e.g. race condition or invalidation above)
451
552
  if self._always_rerun():
@@ -460,9 +561,6 @@ class Furu[T](ABC):
460
561
  directory, reason="_validate returned false"
461
562
  )
462
563
  state0 = StateManager.read_state(directory)
463
- else:
464
- # Valid success found, skip reconcile
465
- needs_reconcile = False
466
564
  except Exception as e:
467
565
  self._invalidate_cached_success(
468
566
  directory,
@@ -470,11 +568,6 @@ class Furu[T](ABC):
470
568
  )
471
569
  state0 = StateManager.read_state(directory)
472
570
 
473
- if needs_reconcile and executor is not None:
474
- adapter0 = SubmititAdapter(executor)
475
- self._reconcile(directory, adapter=adapter0)
476
- state0 = StateManager.read_state(directory)
477
-
478
571
  attempt0 = state0.attempt
479
572
  if isinstance(state0.result, _StateResultSuccess):
480
573
  decision = "success->load"
@@ -494,7 +587,7 @@ class Furu[T](ABC):
494
587
  if decision != "success->load":
495
588
  write_separator()
496
589
  logger.debug(
497
- "load_or_create %s %s %s (%s)",
590
+ "get %s %s %s (%s)",
498
591
  self.__class__.__name__,
499
592
  self._furu_hash,
500
593
  directory,
@@ -514,7 +607,7 @@ class Furu[T](ABC):
514
607
  # failures even when we suppressed the cache-hit header line.
515
608
  write_separator()
516
609
  logger.error(
517
- "load_or_create %s %s (load failed)",
610
+ "get %s %s (load failed)",
518
611
  self.__class__.__name__,
519
612
  self._furu_hash,
520
613
  )
@@ -524,51 +617,30 @@ class Furu[T](ABC):
524
617
  e,
525
618
  ) from e
526
619
 
527
- # Synchronous execution
528
- if executor is None:
529
- status, created_here, result = self._run_locally(
530
- start_time=start_time,
531
- allow_failed=retry_failed_effective,
532
- )
533
- if status == "success":
534
- ok = True
535
- if created_here:
536
- logger.debug(
537
- "load_or_create: %s created -> return",
538
- self.__class__.__name__,
539
- )
540
- return cast(T, result)
620
+ status, created_here, result = self._run_locally(
621
+ start_time=start_time,
622
+ allow_failed=retry_failed_effective,
623
+ executor_mode=False,
624
+ )
625
+ if status == "success":
626
+ ok = True
627
+ if created_here:
541
628
  logger.debug(
542
- "load_or_create: %s success -> _load()",
629
+ "get: %s created -> return",
543
630
  self.__class__.__name__,
544
631
  )
545
- return self._load()
546
-
547
- raise self._build_failed_state_error(
548
- directory,
549
- None,
550
- message="Computation previously failed",
632
+ return cast(T, result)
633
+ logger.debug(
634
+ "get: %s success -> _load()",
635
+ self.__class__.__name__,
551
636
  )
637
+ return self._load()
552
638
 
553
- # Asynchronous execution with submitit
554
- (submitit_folder := self._base_furu_dir() / "submitit").mkdir(
555
- exist_ok=True, parents=True
556
- )
557
- executor.folder = submitit_folder
558
- adapter = SubmititAdapter(executor)
559
-
560
- logger.debug(
561
- "load_or_create: %s -> submitit submit_once()",
562
- self.__class__.__name__,
563
- )
564
- job = self._submit_once(
565
- adapter,
639
+ raise self._build_failed_state_error(
566
640
  directory,
567
641
  None,
568
- allow_failed=retry_failed_effective,
642
+ message="Computation previously failed",
569
643
  )
570
- ok = True
571
- return cast(submitit.Job[T], job)
572
644
  finally:
573
645
  if has_parent:
574
646
  logger.debug(
@@ -579,7 +651,7 @@ class Furu[T](ABC):
579
651
  )
580
652
 
581
653
  def _log_console_start(self, action_color: str) -> None:
582
- """Log the start of load_or_create to console with caller info."""
654
+ """Log the start of get to console with caller info."""
583
655
  logger = get_logger()
584
656
  frame = sys._getframe(1)
585
657
 
@@ -599,7 +671,7 @@ class Furu[T](ABC):
599
671
  frame = frame.f_back
600
672
 
601
673
  logger.info(
602
- "load_or_create %s %s",
674
+ "get %s %s",
603
675
  self.__class__.__name__,
604
676
  self._furu_hash,
605
677
  extra={
@@ -612,20 +684,14 @@ class Furu[T](ABC):
612
684
  def _add_exception_breadcrumbs(self, exc: BaseException, directory: Path) -> None:
613
685
  if not hasattr(exc, "add_note"):
614
686
  return
615
- state_path = StateManager.get_state_path(directory)
616
- log_path = StateManager.get_internal_dir(directory) / "furu.log"
617
- note = (
618
- f"Furu directory: {directory}\n"
619
- f"State file: {state_path}\n"
620
- f"Log file: {log_path}"
621
- )
687
+ note = f"Furu dir: {directory}"
622
688
  exc.add_note(note)
623
689
 
624
690
  @staticmethod
625
691
  def _failed_state_hints() -> list[str]:
626
692
  return [
627
- "To retry this failed artifact: set FURU_RETRY_FAILED=1 or call load_or_create(retry_failed=True).",
628
- "To inspect details: open the state file and furu.log shown above.",
693
+ "To retry this failed artifact: set FURU_RETRY_FAILED=1 or call get() again.",
694
+ "To inspect details: open the furu dir shown above.",
629
695
  ]
630
696
 
631
697
  def _build_failed_state_error(
@@ -843,127 +909,201 @@ class Furu[T](ABC):
843
909
  """Entry point for worker process (called by submitit or locally)."""
844
910
  with enter_holder(self):
845
911
  logger = get_logger()
846
- directory = self._base_furu_dir()
847
- directory.mkdir(parents=True, exist_ok=True)
848
-
849
- env_info = self._collect_submitit_env()
850
- allow_failed_effective = (
851
- allow_failed if allow_failed is not None else FURU_CONFIG.retry_failed
912
+ # Ensure executor semantics apply to *all* work in the worker, not
913
+ # just `_create()`. This prevents accidental dependency computation
914
+ # (e.g., from within `_validate()` or metadata hooks).
915
+ from furu.execution.context import EXEC_CONTEXT, ExecContext
916
+
917
+ exec_token = EXEC_CONTEXT.set(
918
+ ExecContext(
919
+ mode="executor",
920
+ spec_key=self._executor_spec_key(),
921
+ backend="submitit",
922
+ current_node_hash=self._furu_hash,
923
+ )
852
924
  )
853
-
854
925
  try:
855
- with compute_lock(
856
- directory,
857
- backend="submitit",
858
- lease_duration_sec=FURU_CONFIG.lease_duration_sec,
859
- heartbeat_interval_sec=FURU_CONFIG.heartbeat_interval_sec,
860
- owner={
861
- "pid": os.getpid(),
862
- "host": socket.gethostname(),
863
- "user": getpass.getuser(),
864
- "command": " ".join(sys.argv) if sys.argv else "<unknown>",
865
- },
866
- scheduler={
867
- "backend": env_info.get("backend"),
868
- "job_id": env_info.get("slurm_job_id"),
869
- },
870
- max_wait_time_sec=None, # Workers wait indefinitely
871
- poll_interval_sec=FURU_CONFIG.poll_interval,
872
- wait_log_every_sec=FURU_CONFIG.wait_log_every_sec,
873
- reconcile_fn=lambda d: self._reconcile(d),
874
- allow_failed=allow_failed_effective,
875
- ) as ctx:
876
- stage = "metadata"
877
- try:
878
- # Refresh metadata (now safe - attempt is already recorded)
879
- metadata = MetadataManager.create_metadata(
880
- self, directory, ignore_diff=FURU_CONFIG.ignore_git_diff
881
- )
882
- MetadataManager.write_metadata(metadata, directory)
926
+ directory = self._base_furu_dir()
927
+ directory.mkdir(parents=True, exist_ok=True)
928
+ always_rerun = self._always_rerun()
929
+ needs_success_invalidation = False
930
+ if not always_rerun:
931
+ exists_ok = self._exists_quiet()
932
+ if not exists_ok:
933
+ state = self.get_state(directory)
934
+ if isinstance(state.result, _StateResultSuccess):
935
+ needs_success_invalidation = True
936
+
937
+ env_info = self._collect_submitit_env()
938
+ allow_failed_effective = (
939
+ allow_failed
940
+ if allow_failed is not None
941
+ else FURU_CONFIG.retry_failed
942
+ )
943
+ allow_success = always_rerun or needs_success_invalidation
883
944
 
884
- # Set up signal handlers
885
- stage = "signal handler setup"
886
- self._setup_signal_handlers(
887
- directory, ctx.stop_heartbeat, attempt_id=ctx.attempt_id
888
- )
945
+ try:
946
+ with compute_lock(
947
+ directory,
948
+ backend="submitit",
949
+ lease_duration_sec=FURU_CONFIG.lease_duration_sec,
950
+ heartbeat_interval_sec=FURU_CONFIG.heartbeat_interval_sec,
951
+ owner={
952
+ "pid": os.getpid(),
953
+ "host": socket.gethostname(),
954
+ "user": getpass.getuser(),
955
+ "command": " ".join(sys.argv) if sys.argv else "<unknown>",
956
+ },
957
+ scheduler={
958
+ "backend": env_info.get("backend"),
959
+ "job_id": env_info.get("slurm_job_id"),
960
+ },
961
+ max_wait_time_sec=None, # Workers wait indefinitely
962
+ poll_interval_sec=FURU_CONFIG.poll_interval,
963
+ wait_log_every_sec=FURU_CONFIG.wait_log_every_sec,
964
+ reconcile_fn=lambda d: self._reconcile(d),
965
+ allow_failed=allow_failed_effective,
966
+ allow_success=allow_success,
967
+ ) as ctx:
968
+ self._prepare_executor_rerun(directory)
969
+ if not always_rerun:
970
+ exists_ok = self._exists_quiet()
971
+ if not exists_ok:
972
+ state = self.get_state(directory)
973
+ if isinstance(state.result, _StateResultSuccess):
974
+ self._invalidate_cached_success(
975
+ directory,
976
+ reason="_validate returned false (worker)",
977
+ )
978
+
979
+ stage = "metadata"
980
+ try:
981
+ # Refresh metadata (now safe - attempt is already recorded)
982
+ metadata = MetadataManager.create_metadata(
983
+ self,
984
+ directory,
985
+ ignore_diff=FURU_CONFIG.ignore_git_diff,
986
+ )
987
+ MetadataManager.write_metadata(metadata, directory)
889
988
 
890
- stage = "_create"
891
- # Run computation
892
- logger.debug(
893
- "_create: begin %s %s %s",
894
- self.__class__.__name__,
895
- self._furu_hash,
896
- directory,
897
- )
898
- self._create()
899
- logger.debug(
900
- "_create: ok %s %s %s",
901
- self.__class__.__name__,
902
- self._furu_hash,
903
- directory,
904
- )
905
- StateManager.write_success_marker(
906
- directory, attempt_id=ctx.attempt_id
907
- )
908
- StateManager.finish_attempt_success(
909
- directory, attempt_id=ctx.attempt_id
910
- )
911
- logger.info(
912
- "_create ok %s %s",
913
- self.__class__.__name__,
914
- self._furu_hash,
915
- extra={"furu_console_only": True},
916
- )
917
- except Exception as e:
918
- if stage == "_create":
919
- logger.error(
920
- "_create failed %s %s %s",
989
+ # Set up signal handlers
990
+ stage = "signal handler setup"
991
+ self._setup_signal_handlers(
992
+ directory,
993
+ ctx.stop_heartbeat,
994
+ attempt_id=ctx.attempt_id,
995
+ )
996
+
997
+ stage = "_create"
998
+ # Run computation
999
+ logger.debug(
1000
+ "_create: begin %s %s %s",
921
1001
  self.__class__.__name__,
922
1002
  self._furu_hash,
923
1003
  directory,
924
- extra={"furu_file_only": True},
925
1004
  )
926
- else:
927
- logger.error(
928
- "attempt failed (%s) %s %s %s",
929
- stage,
1005
+ self._create()
1006
+ logger.debug(
1007
+ "_create: ok %s %s %s",
930
1008
  self.__class__.__name__,
931
1009
  self._furu_hash,
932
1010
  directory,
1011
+ )
1012
+ StateManager.write_success_marker(
1013
+ directory, attempt_id=ctx.attempt_id
1014
+ )
1015
+ StateManager.finish_attempt_success(
1016
+ directory, attempt_id=ctx.attempt_id
1017
+ )
1018
+ logger.info(
1019
+ "_create ok %s %s",
1020
+ self.__class__.__name__,
1021
+ self._furu_hash,
1022
+ extra={"furu_console_only": True},
1023
+ )
1024
+ except Exception as e:
1025
+ if stage == "_create":
1026
+ logger.error(
1027
+ "_create failed %s %s %s",
1028
+ self.__class__.__name__,
1029
+ self._furu_hash,
1030
+ directory,
1031
+ extra={"furu_file_only": True},
1032
+ )
1033
+ else:
1034
+ logger.error(
1035
+ "attempt failed (%s) %s %s %s",
1036
+ stage,
1037
+ self.__class__.__name__,
1038
+ self._furu_hash,
1039
+ directory,
1040
+ extra={"furu_file_only": True},
1041
+ )
1042
+ logger.error(
1043
+ "%s",
1044
+ format_traceback(e),
933
1045
  extra={"furu_file_only": True},
934
1046
  )
935
- logger.error(
936
- "%s", format_traceback(e), extra={"furu_file_only": True}
937
- )
938
1047
 
939
- tb = "".join(
940
- traceback.format_exception(type(e), e, e.__traceback__)
941
- )
942
- StateManager.finish_attempt_failed(
943
- directory,
944
- attempt_id=ctx.attempt_id,
945
- error={
946
- "type": type(e).__name__,
947
- "message": str(e),
948
- "traceback": tb,
949
- },
950
- )
951
- self._add_exception_breadcrumbs(e, directory)
952
- if stage != "_create":
953
- message = (
954
- "Failed to create metadata"
955
- if stage == "metadata"
956
- else "Failed to set up signal handlers"
1048
+ tb = "".join(
1049
+ traceback.format_exception(type(e), e, e.__traceback__)
1050
+ )
1051
+ StateManager.finish_attempt_failed(
1052
+ directory,
1053
+ attempt_id=ctx.attempt_id,
1054
+ error={
1055
+ "type": type(e).__name__,
1056
+ "message": str(e),
1057
+ "traceback": tb,
1058
+ },
957
1059
  )
958
- raise FuruComputeError(
959
- message,
960
- StateManager.get_state_path(directory),
961
- e,
962
- ) from e
963
- raise
964
- except FuruLockNotAcquired:
965
- # Experiment already completed (success or failed), nothing to do
966
- return
1060
+ self._add_exception_breadcrumbs(e, directory)
1061
+ if stage != "_create":
1062
+ message = (
1063
+ "Failed to create metadata"
1064
+ if stage == "metadata"
1065
+ else "Failed to set up signal handlers"
1066
+ )
1067
+ raise FuruComputeError(
1068
+ message,
1069
+ StateManager.get_state_path(directory),
1070
+ e,
1071
+ ) from e
1072
+ raise
1073
+ except FuruLockNotAcquired as exc:
1074
+ # Experiment already completed; succeed if success, fail if failed.
1075
+ state = StateManager.read_state(directory)
1076
+ state_path = StateManager.get_state_path(directory)
1077
+ attempt = state.attempt
1078
+ attempt_info = "no active attempt"
1079
+ if attempt is not None:
1080
+ attempt_info = (
1081
+ f"attempt {attempt.id} status {attempt.status} "
1082
+ f"backend {attempt.backend}"
1083
+ )
1084
+ hints = [
1085
+ f"Furu hash: {self._furu_hash}",
1086
+ f"Directory: {directory}",
1087
+ f"State file: {state_path}",
1088
+ f"Attempt: {attempt_info}",
1089
+ ]
1090
+ if isinstance(state.result, _StateResultSuccess):
1091
+ return
1092
+ if isinstance(state.result, _StateResultFailed):
1093
+ if allow_failed_effective:
1094
+ return
1095
+ raise FuruComputeError(
1096
+ "Worker refused to run: experiment already failed",
1097
+ state_path,
1098
+ exc,
1099
+ hints=hints,
1100
+ ) from exc
1101
+ raise FuruLockNotAcquired(
1102
+ "Worker refused to run: experiment already running elsewhere",
1103
+ hints=hints,
1104
+ ) from exc
1105
+ finally:
1106
+ EXEC_CONTEXT.reset(exec_token)
967
1107
 
968
1108
  def _collect_submitit_env(self: Self) -> _SubmititEnvInfo:
969
1109
  """Collect submitit/slurm environment information."""
@@ -994,6 +1134,7 @@ class Furu[T](ABC):
994
1134
  start_time: float,
995
1135
  *,
996
1136
  allow_failed: bool,
1137
+ executor_mode: bool = False,
997
1138
  ) -> tuple[str, bool, T | None]:
998
1139
  """Run computation locally, returning (status, created_here, result)."""
999
1140
  logger = get_logger()
@@ -1047,7 +1188,23 @@ class Furu[T](ABC):
1047
1188
  self._furu_hash,
1048
1189
  directory,
1049
1190
  )
1050
- result = self._create()
1191
+ token = None
1192
+ if executor_mode:
1193
+ from furu.execution.context import EXEC_CONTEXT, ExecContext
1194
+
1195
+ token = EXEC_CONTEXT.set(
1196
+ ExecContext(
1197
+ mode="executor",
1198
+ spec_key=self._executor_spec_key(),
1199
+ backend="local",
1200
+ current_node_hash=self._furu_hash,
1201
+ )
1202
+ )
1203
+ try:
1204
+ result = self._create()
1205
+ finally:
1206
+ if token is not None:
1207
+ EXEC_CONTEXT.reset(token)
1051
1208
  logger.debug(
1052
1209
  "_create: ok %s %s %s",
1053
1210
  self.__class__.__name__,
@@ -1145,6 +1302,8 @@ class Furu[T](ABC):
1145
1302
  attempt_id: str,
1146
1303
  ) -> None:
1147
1304
  """Set up signal handlers for graceful preemption."""
1305
+ if threading.current_thread() is not threading.main_thread():
1306
+ return
1148
1307
 
1149
1308
  def handle_signal(signum: int, frame: FrameType | None) -> None:
1150
1309
  try: