nshtrainer 0.8.7__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nshtrainer/model/config.py
CHANGED
|
@@ -805,6 +805,36 @@ class CheckpointLoadingConfig(C.Config):
|
|
|
805
805
|
"""
|
|
806
806
|
|
|
807
807
|
|
|
808
|
+
def _create_symlink_to_nshrunner(base_dir: Path):
|
|
809
|
+
# Resolve the current nshrunner session directory
|
|
810
|
+
if not (session_dir := os.environ.get("NSHRUNNER_SESSION_DIR")):
|
|
811
|
+
log.warning("NSHRUNNER_SESSION_DIR is not set. Skipping symlink creation.")
|
|
812
|
+
return
|
|
813
|
+
session_dir = Path(session_dir)
|
|
814
|
+
if not session_dir.exists() or not session_dir.is_dir():
|
|
815
|
+
log.warning(
|
|
816
|
+
f"NSHRUNNER_SESSION_DIR is not a valid directory: {session_dir}. "
|
|
817
|
+
"Skipping symlink creation."
|
|
818
|
+
)
|
|
819
|
+
return
|
|
820
|
+
|
|
821
|
+
# Create the symlink
|
|
822
|
+
symlink_path = base_dir / "nshrunner"
|
|
823
|
+
if symlink_path.exists():
|
|
824
|
+
# If it already points to the correct directory, we're done
|
|
825
|
+
if symlink_path.resolve() == session_dir.resolve():
|
|
826
|
+
return
|
|
827
|
+
|
|
828
|
+
# Otherwise, we should log a warning and remove the existing symlink
|
|
829
|
+
log.warning(
|
|
830
|
+
f"A symlink pointing to {symlink_path.resolve()} already exists at {symlink_path}. "
|
|
831
|
+
"Removing the existing symlink."
|
|
832
|
+
)
|
|
833
|
+
symlink_path.unlink()
|
|
834
|
+
|
|
835
|
+
symlink_path.symlink_to(session_dir)
|
|
836
|
+
|
|
837
|
+
|
|
808
838
|
class DirectoryConfig(C.Config):
|
|
809
839
|
project_root: Path | None = None
|
|
810
840
|
"""
|
|
@@ -813,30 +843,33 @@ class DirectoryConfig(C.Config):
|
|
|
813
843
|
This isn't specific to the run; it is the parent directory of all runs.
|
|
814
844
|
"""
|
|
815
845
|
|
|
846
|
+
create_symlink_to_nshrunner_root: bool = True
|
|
847
|
+
"""Should we create a symlink to the root folder for the Runner (if we're in one)?"""
|
|
848
|
+
|
|
816
849
|
log: Path | None = None
|
|
817
|
-
"""Base directory for all experiment tracking (e.g., WandB, Tensorboard, etc.) files. If None, will use
|
|
850
|
+
"""Base directory for all experiment tracking (e.g., WandB, Tensorboard, etc.) files. If None, will use nshtrainer/{id}/log/."""
|
|
818
851
|
|
|
819
852
|
stdio: Path | None = None
|
|
820
|
-
"""stdout/stderr log directory to use for the trainer. If None, will use
|
|
853
|
+
"""stdout/stderr log directory to use for the trainer. If None, will use nshtrainer/{id}/stdio/."""
|
|
821
854
|
|
|
822
855
|
checkpoint: Path | None = None
|
|
823
|
-
"""Checkpoint directory to use for the trainer. If None, will use
|
|
856
|
+
"""Checkpoint directory to use for the trainer. If None, will use nshtrainer/{id}/checkpoint/."""
|
|
824
857
|
|
|
825
858
|
activation: Path | None = None
|
|
826
|
-
"""Activation directory to use for the trainer. If None, will use
|
|
859
|
+
"""Activation directory to use for the trainer. If None, will use nshtrainer/{id}/activation/."""
|
|
827
860
|
|
|
828
861
|
profile: Path | None = None
|
|
829
|
-
"""Directory to save profiling information to. If None, will use
|
|
862
|
+
"""Directory to save profiling information to. If None, will use nshtrainer/{id}/profile/."""
|
|
830
863
|
|
|
831
864
|
def resolve_run_root_directory(self, run_id: str) -> Path:
|
|
832
865
|
if (project_root_dir := self.project_root) is None:
|
|
833
866
|
project_root_dir = Path.cwd()
|
|
834
867
|
|
|
835
|
-
# The default base dir is $CWD/
|
|
836
|
-
base_dir = project_root_dir / "
|
|
868
|
+
# The default base dir is $CWD/nshtrainer/{id}/
|
|
869
|
+
base_dir = project_root_dir / "nshtrainer"
|
|
837
870
|
base_dir.mkdir(exist_ok=True)
|
|
838
871
|
|
|
839
|
-
# Add a .gitignore file to the
|
|
872
|
+
# Add a .gitignore file to the nshtrainer directory
|
|
840
873
|
# which will ignore all files except for the .gitignore file itself
|
|
841
874
|
gitignore_path = base_dir / ".gitignore"
|
|
842
875
|
if not gitignore_path.exists():
|
|
@@ -846,6 +879,10 @@ class DirectoryConfig(C.Config):
|
|
|
846
879
|
base_dir = base_dir / run_id
|
|
847
880
|
base_dir.mkdir(exist_ok=True)
|
|
848
881
|
|
|
882
|
+
# Create a symlink to the root folder for the Runner
|
|
883
|
+
if self.create_symlink_to_nshrunner_root:
|
|
884
|
+
_create_symlink_to_nshrunner(base_dir)
|
|
885
|
+
|
|
849
886
|
return base_dir
|
|
850
887
|
|
|
851
888
|
def resolve_subdirectory(
|
|
@@ -854,7 +891,7 @@ class DirectoryConfig(C.Config):
|
|
|
854
891
|
# subdirectory: Literal["log", "stdio", "checkpoint", "activation", "profile"],
|
|
855
892
|
subdirectory: str,
|
|
856
893
|
) -> Path:
|
|
857
|
-
# The subdir will be $CWD/
|
|
894
|
+
# The subdir will be $CWD/nshtrainer/{id}/{log, stdio, checkpoint, activation}/
|
|
858
895
|
if (subdir := getattr(self, subdirectory, None)) is not None:
|
|
859
896
|
assert isinstance(
|
|
860
897
|
subdir, Path
|
|
@@ -874,7 +911,7 @@ class DirectoryConfig(C.Config):
|
|
|
874
911
|
if (log_dir := logger.log_dir) is not None:
|
|
875
912
|
return log_dir
|
|
876
913
|
|
|
877
|
-
# Save to
|
|
914
|
+
# Save to nshtrainer/{id}/log/{logger kind}/{id}/
|
|
878
915
|
log_dir = self.resolve_subdirectory(run_id, "log")
|
|
879
916
|
log_dir = log_dir / logger.kind
|
|
880
917
|
|
|
@@ -47,7 +47,7 @@ nshtrainer/lr_scheduler/linear_warmup_cosine.py,sha256=mn6cyizyI_stkXtg6zxIEGF9b
|
|
|
47
47
|
nshtrainer/lr_scheduler/reduce_lr_on_plateau.py,sha256=h76oTHYpMxauV_l6lviya5DW-WKArwxxf7ZQizhmbCw,2782
|
|
48
48
|
nshtrainer/model/__init__.py,sha256=y32Hla-5whpzLL2BtCJpBakSp8o-1nQbpO0j_-xq_Po,1864
|
|
49
49
|
nshtrainer/model/base.py,sha256=YtqnjiMf0cLVjFEQuOLm5WwCkVnZftiHlIdCrxdax3s,21297
|
|
50
|
-
nshtrainer/model/config.py,sha256
|
|
50
|
+
nshtrainer/model/config.py,sha256=-I_HLTTwqWimnnoKJ64oBEq3x31CZj9rwrg9MnFzs38,68215
|
|
51
51
|
nshtrainer/model/modules/callback.py,sha256=JF59U9-CjJsAIspEhTJbVaGN0wGctZG7UquE3IS7R8A,6408
|
|
52
52
|
nshtrainer/model/modules/debug.py,sha256=DTVty8cKnzj1GCULRyGx_sWTTsq9NLi30dzqjRTnuCU,1127
|
|
53
53
|
nshtrainer/model/modules/distributed.py,sha256=ABpR9d-3uBS_fivfy_WYW-dExW6vp5BPaoPQnOudHng,1725
|
|
@@ -72,6 +72,6 @@ nshtrainer/util/seed.py,sha256=HEXgVs-wldByahOysKwq7506OHxdYTEgmP-tDQVAEkQ,287
|
|
|
72
72
|
nshtrainer/util/slurm.py,sha256=rofIU26z3SdL79SF45tNez6juou1cyDLz07oXEZb9Hg,1566
|
|
73
73
|
nshtrainer/util/typed.py,sha256=NGuDkDzFlc1fAoaXjOFZVbmj0mRFjsQi1E_hPa7Bn5U,128
|
|
74
74
|
nshtrainer/util/typing_utils.py,sha256=8ptjSSLZxlmy4FY6lzzkoGoF5fGNClo8-B_c0XHQaNU,385
|
|
75
|
-
nshtrainer-0.
|
|
76
|
-
nshtrainer-0.
|
|
77
|
-
nshtrainer-0.
|
|
75
|
+
nshtrainer-0.9.1.dist-info/METADATA,sha256=3s9luSztUNVhu3t_sSmOw3HhwuVVUoiLhQwlxBiaaSg,647
|
|
76
|
+
nshtrainer-0.9.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
77
|
+
nshtrainer-0.9.1.dist-info/RECORD,,
|
|
File without changes
|