furu 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- furu/config.py +27 -40
- furu/core/furu.py +203 -126
- furu/core/list.py +3 -2
- furu/dashboard/frontend/dist/assets/{index-DS3FsqcY.js → index-BjyrY-Zz.js} +1 -1
- furu/dashboard/frontend/dist/index.html +1 -1
- furu/execution/local.py +9 -7
- furu/execution/plan.py +117 -25
- furu/execution/slurm_dag.py +16 -14
- furu/execution/slurm_pool.py +5 -5
- furu/execution/slurm_spec.py +2 -2
- furu/migration.py +1 -2
- furu/runtime/env.py +1 -1
- furu/runtime/logging.py +30 -4
- furu/runtime/overrides.py +37 -0
- furu/storage/metadata.py +26 -29
- furu/storage/migration.py +0 -1
- furu/storage/state.py +86 -92
- furu/testing.py +232 -0
- {furu-0.0.4.dist-info → furu-0.0.6.dist-info}/METADATA +101 -6
- {furu-0.0.4.dist-info → furu-0.0.6.dist-info}/RECORD +22 -20
- {furu-0.0.4.dist-info → furu-0.0.6.dist-info}/WHEEL +1 -1
- {furu-0.0.4.dist-info → furu-0.0.6.dist-info}/entry_points.txt +0 -0
furu/core/furu.py
CHANGED
|
@@ -9,6 +9,7 @@ import threading
|
|
|
9
9
|
import time
|
|
10
10
|
import traceback
|
|
11
11
|
from abc import ABC, abstractmethod
|
|
12
|
+
from functools import cached_property
|
|
12
13
|
from pathlib import Path
|
|
13
14
|
from types import FrameType
|
|
14
15
|
from typing import (
|
|
@@ -21,17 +22,16 @@ from typing import (
|
|
|
21
22
|
Protocol,
|
|
22
23
|
Self,
|
|
23
24
|
Sequence,
|
|
24
|
-
TypedDict,
|
|
25
25
|
TypeAlias,
|
|
26
|
+
TypedDict,
|
|
26
27
|
TypeVar,
|
|
27
28
|
cast,
|
|
28
29
|
)
|
|
29
30
|
|
|
30
31
|
import chz
|
|
31
32
|
import submitit
|
|
32
|
-
from typing_extensions import dataclass_transform
|
|
33
|
-
|
|
34
33
|
from chz.field import Field as ChzField
|
|
34
|
+
from typing_extensions import dataclass_transform
|
|
35
35
|
|
|
36
36
|
from ..adapters import SubmititAdapter
|
|
37
37
|
from ..adapters.submitit import SubmititJob
|
|
@@ -46,6 +46,7 @@ from ..errors import (
|
|
|
46
46
|
from ..runtime import current_holder
|
|
47
47
|
from ..runtime.logging import enter_holder, get_logger, log, write_separator
|
|
48
48
|
from ..runtime.tracebacks import format_traceback
|
|
49
|
+
from ..runtime.overrides import has_override, lookup_override
|
|
49
50
|
from ..serialization import FuruSerializer
|
|
50
51
|
from ..serialization.serializer import JsonValue
|
|
51
52
|
from ..storage import (
|
|
@@ -63,7 +64,6 @@ from ..storage.state import (
|
|
|
63
64
|
_StateAttemptRunning,
|
|
64
65
|
_StateResultAbsent,
|
|
65
66
|
_StateResultFailed,
|
|
66
|
-
_StateResultMigrated,
|
|
67
67
|
_StateResultSuccess,
|
|
68
68
|
compute_lock,
|
|
69
69
|
)
|
|
@@ -207,7 +207,7 @@ class Furu[T](ABC):
|
|
|
207
207
|
|
|
208
208
|
def _get_dependencies(self: Self, *, recursive: bool = True) -> list["Furu"]:
|
|
209
209
|
"""Collect Furu dependencies from fields and `_dependencies()`."""
|
|
210
|
-
seen = {self.
|
|
210
|
+
seen = {self.furu_hash}
|
|
211
211
|
dependencies: list[Furu] = []
|
|
212
212
|
_collect_dependencies(self, dependencies, seen, recursive=recursive)
|
|
213
213
|
return dependencies
|
|
@@ -221,7 +221,7 @@ class Furu[T](ABC):
|
|
|
221
221
|
for dependency in dependencies:
|
|
222
222
|
if dependency is self:
|
|
223
223
|
raise ValueError("Furu dependencies cannot include self")
|
|
224
|
-
digests.add(dependency.
|
|
224
|
+
digests.add(dependency.furu_hash)
|
|
225
225
|
return sorted(digests)
|
|
226
226
|
|
|
227
227
|
def _invalidate_cached_success(self: Self, directory: Path, *, reason: str) -> None:
|
|
@@ -229,7 +229,7 @@ class Furu[T](ABC):
|
|
|
229
229
|
logger.warning(
|
|
230
230
|
"invalidate %s %s %s (%s)",
|
|
231
231
|
self.__class__.__name__,
|
|
232
|
-
self.
|
|
232
|
+
self.furu_hash,
|
|
233
233
|
directory,
|
|
234
234
|
reason,
|
|
235
235
|
)
|
|
@@ -262,14 +262,9 @@ class Furu[T](ABC):
|
|
|
262
262
|
if isinstance(state.result, _StateResultSuccess):
|
|
263
263
|
self._invalidate_cached_success(directory, reason="always_rerun enabled")
|
|
264
264
|
|
|
265
|
-
@
|
|
265
|
+
@cached_property
|
|
266
266
|
def furu_hash(self: Self) -> str:
|
|
267
267
|
"""Return the stable content hash for this Furu object."""
|
|
268
|
-
return self._furu_hash
|
|
269
|
-
|
|
270
|
-
@property
|
|
271
|
-
def _furu_hash(self: Self) -> str:
|
|
272
|
-
"""Compute hash of this object's content for storage identification."""
|
|
273
268
|
return FuruSerializer.compute_hash(self)
|
|
274
269
|
|
|
275
270
|
def _always_rerun(self: Self) -> bool:
|
|
@@ -282,15 +277,17 @@ class Furu[T](ABC):
|
|
|
282
277
|
|
|
283
278
|
def _base_furu_dir(self: Self) -> Path:
|
|
284
279
|
root = FURU_CONFIG.get_root(self.version_controlled)
|
|
285
|
-
return root / self.__class__._namespace() / self.
|
|
280
|
+
return root / self.__class__._namespace() / self.furu_hash
|
|
286
281
|
|
|
287
|
-
@
|
|
282
|
+
@cached_property
|
|
288
283
|
def furu_dir(self: Self) -> Path:
|
|
289
284
|
"""Get the directory for this Furu object."""
|
|
290
285
|
directory = self._base_furu_dir()
|
|
291
286
|
migration = self._alias_record(directory)
|
|
292
|
-
if migration is not None
|
|
293
|
-
|
|
287
|
+
if migration is not None:
|
|
288
|
+
target_dir = self._alias_target_dir(directory, migration)
|
|
289
|
+
if target_dir is not None:
|
|
290
|
+
return target_dir
|
|
294
291
|
return directory
|
|
295
292
|
|
|
296
293
|
@property
|
|
@@ -320,10 +317,11 @@ class Furu[T](ABC):
|
|
|
320
317
|
return log(message, level=level)
|
|
321
318
|
|
|
322
319
|
def _exists_quiet(self: Self) -> bool:
|
|
320
|
+
if has_override(self.furu_hash):
|
|
321
|
+
return True
|
|
323
322
|
directory = self._base_furu_dir()
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
if not isinstance(state.result, _StateResultSuccess):
|
|
323
|
+
success_dir = self._success_marker_dir(directory)
|
|
324
|
+
if success_dir is None:
|
|
327
325
|
return False
|
|
328
326
|
try:
|
|
329
327
|
return self._validate()
|
|
@@ -332,7 +330,7 @@ class Furu[T](ABC):
|
|
|
332
330
|
logger.warning(
|
|
333
331
|
"exists %s -> false (validate invalid for %s: %s)",
|
|
334
332
|
directory,
|
|
335
|
-
f"{self.__class__.__name__}({self.
|
|
333
|
+
f"{self.__class__.__name__}({self.furu_hash})",
|
|
336
334
|
exc,
|
|
337
335
|
)
|
|
338
336
|
return False
|
|
@@ -341,7 +339,7 @@ class Furu[T](ABC):
|
|
|
341
339
|
logger.exception(
|
|
342
340
|
"exists %s -> false (validate crashed for %s: %s)",
|
|
343
341
|
directory,
|
|
344
|
-
f"{self.__class__.__name__}({self.
|
|
342
|
+
f"{self.__class__.__name__}({self.furu_hash})",
|
|
345
343
|
exc,
|
|
346
344
|
)
|
|
347
345
|
return False
|
|
@@ -350,9 +348,11 @@ class Furu[T](ABC):
|
|
|
350
348
|
"""Check if result exists and is valid."""
|
|
351
349
|
logger = get_logger()
|
|
352
350
|
directory = self._base_furu_dir()
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
351
|
+
if has_override(self.furu_hash):
|
|
352
|
+
logger.info("exists %s -> true (override)", directory)
|
|
353
|
+
return True
|
|
354
|
+
success_dir = self._success_marker_dir(directory)
|
|
355
|
+
if success_dir is None:
|
|
356
356
|
logger.info("exists %s -> false", directory)
|
|
357
357
|
return False
|
|
358
358
|
|
|
@@ -382,72 +382,113 @@ class Furu[T](ABC):
|
|
|
382
382
|
Raises:
|
|
383
383
|
FuruComputeError: If computation fails with detailed error information
|
|
384
384
|
"""
|
|
385
|
-
|
|
385
|
+
has_override_value, override_value = lookup_override(self.furu_hash)
|
|
386
|
+
if has_override_value:
|
|
387
|
+
return cast(T, override_value)
|
|
386
388
|
from furu.errors import (
|
|
387
389
|
FuruExecutionError,
|
|
388
390
|
FuruMissingArtifact,
|
|
389
391
|
FuruSpecMismatch,
|
|
390
392
|
)
|
|
393
|
+
from furu.execution.context import EXEC_CONTEXT
|
|
391
394
|
|
|
392
395
|
ctx = EXEC_CONTEXT.get()
|
|
393
396
|
if ctx.mode == "executor":
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
"force=True not allowed: only the current node may compute in executor mode. "
|
|
402
|
-
f"current_node_hash={ctx.current_node_hash!r} "
|
|
403
|
-
f"obj={self.__class__.__name__}({self._furu_hash})",
|
|
404
|
-
hints=[
|
|
405
|
-
"Declare this object as a dependency instead of calling dep.get(force=True).",
|
|
406
|
-
"Inside executor mode, use get(force=True) only on the node being executed.",
|
|
407
|
-
],
|
|
408
|
-
)
|
|
409
|
-
self._prepare_executor_rerun(directory)
|
|
397
|
+
logger = get_logger()
|
|
398
|
+
parent_holder = current_holder()
|
|
399
|
+
has_parent = parent_holder is not None and parent_holder is not self
|
|
400
|
+
needs_holder = parent_holder is None or has_parent
|
|
401
|
+
caller_info: _CallerInfo = {}
|
|
402
|
+
if has_parent:
|
|
403
|
+
caller_info = self._get_caller_info()
|
|
410
404
|
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
405
|
+
def _executor_get() -> T:
|
|
406
|
+
directory = self._base_furu_dir()
|
|
407
|
+
if force:
|
|
408
|
+
if (
|
|
409
|
+
ctx.current_node_hash is None
|
|
410
|
+
or self.furu_hash != ctx.current_node_hash
|
|
411
|
+
):
|
|
412
|
+
raise FuruExecutionError(
|
|
413
|
+
"force=True not allowed: only the current node may compute in executor mode. "
|
|
414
|
+
f"current_node_hash={ctx.current_node_hash!r} "
|
|
415
|
+
f"obj={self.__class__.__name__}({self.furu_hash})",
|
|
416
|
+
hints=[
|
|
417
|
+
"Declare this object as a dependency instead of calling dep.get(force=True).",
|
|
418
|
+
"Inside executor mode, use get(force=True) only on the node being executed.",
|
|
419
|
+
],
|
|
420
|
+
)
|
|
421
|
+
self._prepare_executor_rerun(directory)
|
|
422
|
+
|
|
423
|
+
exists_ok = self._exists_quiet()
|
|
424
|
+
if exists_ok and not (force and self._always_rerun()):
|
|
425
|
+
return self._load()
|
|
414
426
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
427
|
+
if force and not exists_ok:
|
|
428
|
+
state = self.get_state(directory)
|
|
429
|
+
if isinstance(state.result, _StateResultSuccess):
|
|
430
|
+
self._invalidate_cached_success(
|
|
431
|
+
directory, reason="_validate returned false (executor)"
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
if not force:
|
|
435
|
+
raise FuruMissingArtifact(
|
|
436
|
+
"Missing artifact "
|
|
437
|
+
f"{self.__class__.__name__}({self.furu_hash}) in executor mode. "
|
|
438
|
+
f"Requested by {ctx.current_node_hash}. Declare it as a dependency."
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
required = self._executor_spec_key()
|
|
442
|
+
if ctx.spec_key is None or required != ctx.spec_key:
|
|
443
|
+
raise FuruSpecMismatch(
|
|
444
|
+
"force=True not allowed: "
|
|
445
|
+
f"required={required!r} != worker={ctx.spec_key!r} (v1 exact match)"
|
|
420
446
|
)
|
|
421
447
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
448
|
+
StateManager.ensure_internal_dir(directory)
|
|
449
|
+
status, created_here, result = self._run_locally(
|
|
450
|
+
start_time=time.time(),
|
|
451
|
+
allow_failed=FURU_CONFIG.retry_failed,
|
|
452
|
+
executor_mode=True,
|
|
427
453
|
)
|
|
454
|
+
if status == "success":
|
|
455
|
+
if created_here:
|
|
456
|
+
return cast(T, result)
|
|
457
|
+
return self._load()
|
|
428
458
|
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
"
|
|
433
|
-
f"required={required!r} != worker={ctx.spec_key!r} (v1 exact match)"
|
|
459
|
+
raise self._build_failed_state_error(
|
|
460
|
+
self._base_furu_dir(),
|
|
461
|
+
None,
|
|
462
|
+
message="Computation previously failed",
|
|
434
463
|
)
|
|
435
464
|
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
return self._load()
|
|
465
|
+
if has_parent:
|
|
466
|
+
logger.debug(
|
|
467
|
+
"dep: begin %s %s %s",
|
|
468
|
+
self.__class__.__name__,
|
|
469
|
+
self.furu_hash,
|
|
470
|
+
self._base_furu_dir(),
|
|
471
|
+
extra=caller_info,
|
|
472
|
+
)
|
|
445
473
|
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
474
|
+
ok = False
|
|
475
|
+
try:
|
|
476
|
+
if needs_holder:
|
|
477
|
+
with enter_holder(self):
|
|
478
|
+
result = _executor_get()
|
|
479
|
+
else:
|
|
480
|
+
result = _executor_get()
|
|
481
|
+
ok = True
|
|
482
|
+
return result
|
|
483
|
+
finally:
|
|
484
|
+
if has_parent:
|
|
485
|
+
logger.debug(
|
|
486
|
+
"dep: end %s %s (%s)",
|
|
487
|
+
self.__class__.__name__,
|
|
488
|
+
self.furu_hash,
|
|
489
|
+
"ok" if ok else "error",
|
|
490
|
+
extra=caller_info,
|
|
491
|
+
)
|
|
451
492
|
|
|
452
493
|
return self._get_impl_interactive(force=force)
|
|
453
494
|
|
|
@@ -455,13 +496,15 @@ class Furu[T](ABC):
|
|
|
455
496
|
logger = get_logger()
|
|
456
497
|
parent_holder = current_holder()
|
|
457
498
|
has_parent = parent_holder is not None and parent_holder is not self
|
|
499
|
+
caller_info = self._get_caller_info()
|
|
458
500
|
retry_failed_effective = FURU_CONFIG.retry_failed
|
|
459
501
|
if has_parent:
|
|
460
502
|
logger.debug(
|
|
461
503
|
"dep: begin %s %s %s",
|
|
462
504
|
self.__class__.__name__,
|
|
463
|
-
self.
|
|
505
|
+
self.furu_hash,
|
|
464
506
|
self._base_furu_dir(),
|
|
507
|
+
extra=caller_info,
|
|
465
508
|
)
|
|
466
509
|
|
|
467
510
|
ok = False
|
|
@@ -469,19 +512,21 @@ class Furu[T](ABC):
|
|
|
469
512
|
with enter_holder(self):
|
|
470
513
|
start_time = time.time()
|
|
471
514
|
base_dir = self._base_furu_dir()
|
|
472
|
-
base_dir.mkdir(parents=True, exist_ok=True)
|
|
473
515
|
directory = base_dir
|
|
474
516
|
migration = self._alias_record(base_dir)
|
|
475
517
|
alias_active = False
|
|
518
|
+
base_marker = StateManager.success_marker_exists(base_dir)
|
|
476
519
|
|
|
477
520
|
if (
|
|
478
521
|
migration is not None
|
|
479
522
|
and migration.kind == "alias"
|
|
480
523
|
and migration.overwritten_at is None
|
|
524
|
+
and not base_marker
|
|
481
525
|
):
|
|
482
|
-
target_dir =
|
|
483
|
-
|
|
484
|
-
|
|
526
|
+
target_dir = self._alias_target_dir(
|
|
527
|
+
base_dir, migration, base_marker=base_marker
|
|
528
|
+
)
|
|
529
|
+
if target_dir is not None:
|
|
485
530
|
alias_active = True
|
|
486
531
|
directory = target_dir
|
|
487
532
|
else:
|
|
@@ -582,17 +627,25 @@ class Furu[T](ABC):
|
|
|
582
627
|
# Cache hits can be extremely noisy in pipelines; keep logs for state
|
|
583
628
|
# transitions (create/wait) and error cases, but suppress repeated
|
|
584
629
|
# "success->load" lines and the raw separator on successful loads.
|
|
585
|
-
self._log_console_start(
|
|
630
|
+
self._log_console_start(
|
|
631
|
+
action_color=action_color,
|
|
632
|
+
caller_info=caller_info,
|
|
633
|
+
)
|
|
586
634
|
|
|
587
635
|
if decision != "success->load":
|
|
636
|
+
if decision == "create":
|
|
637
|
+
StateManager.ensure_internal_dir(directory)
|
|
588
638
|
write_separator()
|
|
589
639
|
logger.debug(
|
|
590
640
|
"get %s %s %s (%s)",
|
|
591
641
|
self.__class__.__name__,
|
|
592
|
-
self.
|
|
642
|
+
self.furu_hash,
|
|
593
643
|
directory,
|
|
594
644
|
decision,
|
|
595
|
-
extra={
|
|
645
|
+
extra={
|
|
646
|
+
"furu_action_color": action_color,
|
|
647
|
+
**caller_info,
|
|
648
|
+
},
|
|
596
649
|
)
|
|
597
650
|
|
|
598
651
|
# Fast path: already successful
|
|
@@ -609,7 +662,7 @@ class Furu[T](ABC):
|
|
|
609
662
|
logger.error(
|
|
610
663
|
"get %s %s (load failed)",
|
|
611
664
|
self.__class__.__name__,
|
|
612
|
-
self.
|
|
665
|
+
self.furu_hash,
|
|
613
666
|
)
|
|
614
667
|
raise FuruComputeError(
|
|
615
668
|
f"Failed to load result from {directory}",
|
|
@@ -646,15 +699,14 @@ class Furu[T](ABC):
|
|
|
646
699
|
logger.debug(
|
|
647
700
|
"dep: end %s %s (%s)",
|
|
648
701
|
self.__class__.__name__,
|
|
649
|
-
self.
|
|
702
|
+
self.furu_hash,
|
|
650
703
|
"ok" if ok else "error",
|
|
704
|
+
extra=caller_info,
|
|
651
705
|
)
|
|
652
706
|
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
logger = get_logger()
|
|
707
|
+
@staticmethod
|
|
708
|
+
def _get_caller_info() -> _CallerInfo:
|
|
656
709
|
frame = sys._getframe(1)
|
|
657
|
-
|
|
658
710
|
caller_info: _CallerInfo = {}
|
|
659
711
|
if frame is not None:
|
|
660
712
|
# Walk up the stack to find the caller outside of furu package
|
|
@@ -669,11 +721,20 @@ class Furu[T](ABC):
|
|
|
669
721
|
}
|
|
670
722
|
break
|
|
671
723
|
frame = frame.f_back
|
|
724
|
+
return caller_info
|
|
725
|
+
|
|
726
|
+
def _log_console_start(
|
|
727
|
+
self, action_color: str, caller_info: _CallerInfo | None = None
|
|
728
|
+
) -> None:
|
|
729
|
+
"""Log the start of get to console with caller info."""
|
|
730
|
+
logger = get_logger()
|
|
731
|
+
if caller_info is None:
|
|
732
|
+
caller_info = self._get_caller_info()
|
|
672
733
|
|
|
673
734
|
logger.info(
|
|
674
735
|
"get %s %s",
|
|
675
736
|
self.__class__.__name__,
|
|
676
|
-
self.
|
|
737
|
+
self.furu_hash,
|
|
677
738
|
extra={
|
|
678
739
|
"furu_console_only": True,
|
|
679
740
|
"furu_action_color": action_color,
|
|
@@ -744,9 +805,11 @@ class Furu[T](ABC):
|
|
|
744
805
|
"""Return the alias-aware state for this Furu directory."""
|
|
745
806
|
base_dir = directory or self._base_furu_dir()
|
|
746
807
|
record = self._alias_record(base_dir)
|
|
747
|
-
if record is None
|
|
808
|
+
if record is None:
|
|
809
|
+
return StateManager.read_state(base_dir)
|
|
810
|
+
target_dir = self._alias_target_dir(base_dir, record)
|
|
811
|
+
if target_dir is None:
|
|
748
812
|
return StateManager.read_state(base_dir)
|
|
749
|
-
target_dir = MigrationManager.resolve_dir(record, target="from")
|
|
750
813
|
return StateManager.read_state(target_dir)
|
|
751
814
|
|
|
752
815
|
def _alias_record(self, directory: Path) -> MigrationRecord | None:
|
|
@@ -755,15 +818,36 @@ class Furu[T](ABC):
|
|
|
755
818
|
return None
|
|
756
819
|
return record
|
|
757
820
|
|
|
758
|
-
def
|
|
821
|
+
def _alias_target_dir(
|
|
822
|
+
self,
|
|
823
|
+
directory: Path,
|
|
824
|
+
record: MigrationRecord,
|
|
825
|
+
*,
|
|
826
|
+
base_marker: bool | None = None,
|
|
827
|
+
) -> Path | None:
|
|
759
828
|
if record.overwritten_at is not None:
|
|
760
|
-
return
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
829
|
+
return None
|
|
830
|
+
if base_marker is None:
|
|
831
|
+
base_marker = StateManager.success_marker_exists(directory)
|
|
832
|
+
if base_marker:
|
|
833
|
+
return None
|
|
764
834
|
target = MigrationManager.resolve_dir(record, target="from")
|
|
765
|
-
|
|
766
|
-
|
|
835
|
+
if StateManager.success_marker_exists(target):
|
|
836
|
+
return target
|
|
837
|
+
return None
|
|
838
|
+
|
|
839
|
+
def _success_marker_dir(self, directory: Path) -> Path | None:
|
|
840
|
+
base_marker = StateManager.success_marker_exists(directory)
|
|
841
|
+
record = self._alias_record(directory)
|
|
842
|
+
if record is None:
|
|
843
|
+
return directory if base_marker else None
|
|
844
|
+
target_dir = self._alias_target_dir(directory, record, base_marker=base_marker)
|
|
845
|
+
if target_dir is not None:
|
|
846
|
+
return target_dir
|
|
847
|
+
return directory if base_marker else None
|
|
848
|
+
|
|
849
|
+
def _alias_is_active(self, directory: Path, record: MigrationRecord) -> bool:
|
|
850
|
+
return self._alias_target_dir(directory, record) is not None
|
|
767
851
|
|
|
768
852
|
def _maybe_detach_alias(
|
|
769
853
|
self: Self,
|
|
@@ -804,6 +888,7 @@ class Furu[T](ABC):
|
|
|
804
888
|
) -> SubmititJob | None:
|
|
805
889
|
"""Submit job once without waiting (fire-and-forget mode)."""
|
|
806
890
|
logger = get_logger()
|
|
891
|
+
StateManager.ensure_internal_dir(directory)
|
|
807
892
|
self._reconcile(directory, adapter=adapter)
|
|
808
893
|
state = StateManager.read_state(directory)
|
|
809
894
|
attempt = state.attempt
|
|
@@ -824,7 +909,7 @@ class Furu[T](ABC):
|
|
|
824
909
|
logger.debug(
|
|
825
910
|
"submit: waiting for submit lock %s %s %s",
|
|
826
911
|
self.__class__.__name__,
|
|
827
|
-
self.
|
|
912
|
+
self.furu_hash,
|
|
828
913
|
directory,
|
|
829
914
|
)
|
|
830
915
|
time.sleep(0.5)
|
|
@@ -833,9 +918,7 @@ class Furu[T](ABC):
|
|
|
833
918
|
attempt_id: str | None = None
|
|
834
919
|
try:
|
|
835
920
|
# Create metadata
|
|
836
|
-
metadata = MetadataManager.create_metadata(
|
|
837
|
-
self, directory, ignore_diff=FURU_CONFIG.ignore_git_diff
|
|
838
|
-
)
|
|
921
|
+
metadata = MetadataManager.create_metadata(self, directory)
|
|
839
922
|
MetadataManager.write_metadata(metadata, directory)
|
|
840
923
|
|
|
841
924
|
env_info = MetadataManager.collect_environment_info()
|
|
@@ -919,12 +1002,12 @@ class Furu[T](ABC):
|
|
|
919
1002
|
mode="executor",
|
|
920
1003
|
spec_key=self._executor_spec_key(),
|
|
921
1004
|
backend="submitit",
|
|
922
|
-
current_node_hash=self.
|
|
1005
|
+
current_node_hash=self.furu_hash,
|
|
923
1006
|
)
|
|
924
1007
|
)
|
|
925
1008
|
try:
|
|
926
1009
|
directory = self._base_furu_dir()
|
|
927
|
-
|
|
1010
|
+
StateManager.ensure_internal_dir(directory)
|
|
928
1011
|
always_rerun = self._always_rerun()
|
|
929
1012
|
needs_success_invalidation = False
|
|
930
1013
|
if not always_rerun:
|
|
@@ -979,11 +1062,7 @@ class Furu[T](ABC):
|
|
|
979
1062
|
stage = "metadata"
|
|
980
1063
|
try:
|
|
981
1064
|
# Refresh metadata (now safe - attempt is already recorded)
|
|
982
|
-
metadata = MetadataManager.create_metadata(
|
|
983
|
-
self,
|
|
984
|
-
directory,
|
|
985
|
-
ignore_diff=FURU_CONFIG.ignore_git_diff,
|
|
986
|
-
)
|
|
1065
|
+
metadata = MetadataManager.create_metadata(self, directory)
|
|
987
1066
|
MetadataManager.write_metadata(metadata, directory)
|
|
988
1067
|
|
|
989
1068
|
# Set up signal handlers
|
|
@@ -999,14 +1078,14 @@ class Furu[T](ABC):
|
|
|
999
1078
|
logger.debug(
|
|
1000
1079
|
"_create: begin %s %s %s",
|
|
1001
1080
|
self.__class__.__name__,
|
|
1002
|
-
self.
|
|
1081
|
+
self.furu_hash,
|
|
1003
1082
|
directory,
|
|
1004
1083
|
)
|
|
1005
1084
|
self._create()
|
|
1006
1085
|
logger.debug(
|
|
1007
1086
|
"_create: ok %s %s %s",
|
|
1008
1087
|
self.__class__.__name__,
|
|
1009
|
-
self.
|
|
1088
|
+
self.furu_hash,
|
|
1010
1089
|
directory,
|
|
1011
1090
|
)
|
|
1012
1091
|
StateManager.write_success_marker(
|
|
@@ -1018,7 +1097,7 @@ class Furu[T](ABC):
|
|
|
1018
1097
|
logger.info(
|
|
1019
1098
|
"_create ok %s %s",
|
|
1020
1099
|
self.__class__.__name__,
|
|
1021
|
-
self.
|
|
1100
|
+
self.furu_hash,
|
|
1022
1101
|
extra={"furu_console_only": True},
|
|
1023
1102
|
)
|
|
1024
1103
|
except Exception as e:
|
|
@@ -1026,7 +1105,7 @@ class Furu[T](ABC):
|
|
|
1026
1105
|
logger.error(
|
|
1027
1106
|
"_create failed %s %s %s",
|
|
1028
1107
|
self.__class__.__name__,
|
|
1029
|
-
self.
|
|
1108
|
+
self.furu_hash,
|
|
1030
1109
|
directory,
|
|
1031
1110
|
extra={"furu_file_only": True},
|
|
1032
1111
|
)
|
|
@@ -1035,7 +1114,7 @@ class Furu[T](ABC):
|
|
|
1035
1114
|
"attempt failed (%s) %s %s %s",
|
|
1036
1115
|
stage,
|
|
1037
1116
|
self.__class__.__name__,
|
|
1038
|
-
self.
|
|
1117
|
+
self.furu_hash,
|
|
1039
1118
|
directory,
|
|
1040
1119
|
extra={"furu_file_only": True},
|
|
1041
1120
|
)
|
|
@@ -1082,7 +1161,7 @@ class Furu[T](ABC):
|
|
|
1082
1161
|
f"backend {attempt.backend}"
|
|
1083
1162
|
)
|
|
1084
1163
|
hints = [
|
|
1085
|
-
f"Furu hash: {self.
|
|
1164
|
+
f"Furu hash: {self.furu_hash}",
|
|
1086
1165
|
f"Directory: {directory}",
|
|
1087
1166
|
f"State file: {state_path}",
|
|
1088
1167
|
f"Attempt: {attempt_info}",
|
|
@@ -1169,9 +1248,7 @@ class Furu[T](ABC):
|
|
|
1169
1248
|
stage = "metadata"
|
|
1170
1249
|
try:
|
|
1171
1250
|
# Create metadata (now safe - attempt is already recorded)
|
|
1172
|
-
metadata = MetadataManager.create_metadata(
|
|
1173
|
-
self, directory, ignore_diff=FURU_CONFIG.ignore_git_diff
|
|
1174
|
-
)
|
|
1251
|
+
metadata = MetadataManager.create_metadata(self, directory)
|
|
1175
1252
|
MetadataManager.write_metadata(metadata, directory)
|
|
1176
1253
|
|
|
1177
1254
|
# Set up preemption handler
|
|
@@ -1185,7 +1262,7 @@ class Furu[T](ABC):
|
|
|
1185
1262
|
logger.debug(
|
|
1186
1263
|
"_create: begin %s %s %s",
|
|
1187
1264
|
self.__class__.__name__,
|
|
1188
|
-
self.
|
|
1265
|
+
self.furu_hash,
|
|
1189
1266
|
directory,
|
|
1190
1267
|
)
|
|
1191
1268
|
token = None
|
|
@@ -1197,7 +1274,7 @@ class Furu[T](ABC):
|
|
|
1197
1274
|
mode="executor",
|
|
1198
1275
|
spec_key=self._executor_spec_key(),
|
|
1199
1276
|
backend="local",
|
|
1200
|
-
current_node_hash=self.
|
|
1277
|
+
current_node_hash=self.furu_hash,
|
|
1201
1278
|
)
|
|
1202
1279
|
)
|
|
1203
1280
|
try:
|
|
@@ -1208,7 +1285,7 @@ class Furu[T](ABC):
|
|
|
1208
1285
|
logger.debug(
|
|
1209
1286
|
"_create: ok %s %s %s",
|
|
1210
1287
|
self.__class__.__name__,
|
|
1211
|
-
self.
|
|
1288
|
+
self.furu_hash,
|
|
1212
1289
|
directory,
|
|
1213
1290
|
)
|
|
1214
1291
|
StateManager.write_success_marker(
|
|
@@ -1220,7 +1297,7 @@ class Furu[T](ABC):
|
|
|
1220
1297
|
logger.info(
|
|
1221
1298
|
"_create ok %s %s",
|
|
1222
1299
|
self.__class__.__name__,
|
|
1223
|
-
self.
|
|
1300
|
+
self.furu_hash,
|
|
1224
1301
|
extra={"furu_console_only": True},
|
|
1225
1302
|
)
|
|
1226
1303
|
return "success", True, result
|
|
@@ -1229,7 +1306,7 @@ class Furu[T](ABC):
|
|
|
1229
1306
|
logger.error(
|
|
1230
1307
|
"_create failed %s %s %s",
|
|
1231
1308
|
self.__class__.__name__,
|
|
1232
|
-
self.
|
|
1309
|
+
self.furu_hash,
|
|
1233
1310
|
directory,
|
|
1234
1311
|
extra={"furu_file_only": True},
|
|
1235
1312
|
)
|
|
@@ -1238,7 +1315,7 @@ class Furu[T](ABC):
|
|
|
1238
1315
|
"attempt failed (%s) %s %s %s",
|
|
1239
1316
|
stage,
|
|
1240
1317
|
self.__class__.__name__,
|
|
1241
|
-
self.
|
|
1318
|
+
self.furu_hash,
|
|
1242
1319
|
directory,
|
|
1243
1320
|
extra={"furu_file_only": True},
|
|
1244
1321
|
)
|
|
@@ -1350,7 +1427,7 @@ def _collect_dependencies(
|
|
|
1350
1427
|
recursive: bool,
|
|
1351
1428
|
) -> None:
|
|
1352
1429
|
for dependency in _direct_dependencies(obj):
|
|
1353
|
-
digest = dependency.
|
|
1430
|
+
digest = dependency.furu_hash
|
|
1354
1431
|
if digest in seen:
|
|
1355
1432
|
continue
|
|
1356
1433
|
seen.add(digest)
|
|
@@ -1504,7 +1581,7 @@ def _sorted_dependency_set(
|
|
|
1504
1581
|
|
|
1505
1582
|
def _dependency_sort_key(value: DependencyScanValue) -> tuple[int, str]:
|
|
1506
1583
|
if isinstance(value, Furu):
|
|
1507
|
-
return (0, value.
|
|
1584
|
+
return (0, cast(str, value.furu_hash))
|
|
1508
1585
|
return (1, f"{type(value).__name__}:{value!r}")
|
|
1509
1586
|
|
|
1510
1587
|
|