nshtrainer 1.0.0b45__py3-none-any.whl → 1.0.0b46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nshtrainer/_checkpoint/metadata.py +20 -5
- nshtrainer/_checkpoint/saver.py +6 -2
- nshtrainer/callbacks/checkpoint/_base.py +1 -1
- nshtrainer/util/path.py +2 -1
- {nshtrainer-1.0.0b45.dist-info → nshtrainer-1.0.0b46.dist-info}/METADATA +1 -1
- {nshtrainer-1.0.0b45.dist-info → nshtrainer-1.0.0b46.dist-info}/RECORD +7 -7
- {nshtrainer-1.0.0b45.dist-info → nshtrainer-1.0.0b46.dist-info}/WHEEL +0 -0
@@ -139,15 +139,30 @@ def remove_checkpoint_metadata(checkpoint_path: Path):
|
|
139
139
|
log.debug(f"Removed {path}")
|
140
140
|
|
141
141
|
|
142
|
+
def remove_checkpoint_metadata_link(ckpt_link_path: Path):
|
143
|
+
path = _metadata_path(ckpt_link_path)
|
144
|
+
# If the metadata does not exist, we can safely ignore this
|
145
|
+
if not path.exists(follow_symlinks=False):
|
146
|
+
# This is EXTREMELY important here
|
147
|
+
# Otherwise, we've already deleted the file that the symlink
|
148
|
+
# used to point to, so this always returns False
|
149
|
+
log.debug(f"Metadata file does not exist: {path}")
|
150
|
+
return
|
151
|
+
|
152
|
+
# If the metadata exists, we can remove it
|
153
|
+
try:
|
154
|
+
path.unlink(missing_ok=True)
|
155
|
+
except Exception:
|
156
|
+
log.warning(f"Failed to remove {path}", exc_info=True)
|
157
|
+
else:
|
158
|
+
log.debug(f"Removed {path}")
|
159
|
+
|
160
|
+
|
142
161
|
def link_checkpoint_metadata(checkpoint_path: Path, linked_checkpoint_path: Path):
|
143
162
|
# First, remove any existing metadata files
|
144
|
-
|
163
|
+
remove_checkpoint_metadata_link(linked_checkpoint_path)
|
145
164
|
|
146
165
|
# Link the metadata files to the new checkpoint
|
147
166
|
path = _metadata_path(checkpoint_path)
|
148
167
|
linked_path = _metadata_path(linked_checkpoint_path)
|
149
|
-
|
150
|
-
if not path.exists():
|
151
|
-
raise FileNotFoundError(f"Checkpoint path does not exist: {checkpoint_path}")
|
152
|
-
|
153
168
|
try_symlink_or_copy(path, linked_path)
|
nshtrainer/_checkpoint/saver.py
CHANGED
@@ -8,7 +8,11 @@ from pathlib import Path
|
|
8
8
|
from lightning.pytorch import Trainer
|
9
9
|
|
10
10
|
from ..util.path import try_symlink_or_copy
|
11
|
-
from .metadata import
|
11
|
+
from .metadata import (
|
12
|
+
link_checkpoint_metadata,
|
13
|
+
remove_checkpoint_metadata,
|
14
|
+
remove_checkpoint_metadata_link,
|
15
|
+
)
|
12
16
|
|
13
17
|
log = logging.getLogger(__name__)
|
14
18
|
|
@@ -39,7 +43,7 @@ def link_checkpoint(
|
|
39
43
|
log.debug(f"Removed {linkpath=}")
|
40
44
|
|
41
45
|
if metadata:
|
42
|
-
|
46
|
+
remove_checkpoint_metadata_link(linkpath)
|
43
47
|
|
44
48
|
try_symlink_or_copy(filepath, linkpath)
|
45
49
|
if metadata:
|
@@ -160,7 +160,7 @@ class CheckpointBase(Checkpoint, ABC, Generic[TConfig]):
|
|
160
160
|
filepath = self.resolve_checkpoint_path(self.current_metrics(trainer))
|
161
161
|
trainer.save_checkpoint(filepath, self.config.save_weights_only)
|
162
162
|
|
163
|
-
if trainer.is_global_zero:
|
163
|
+
if trainer.hparams.save_checkpoint_metadata and trainer.is_global_zero:
|
164
164
|
# Remove old checkpoints
|
165
165
|
self.remove_old_checkpoints(trainer)
|
166
166
|
|
nshtrainer/util/path.py
CHANGED
@@ -120,7 +120,8 @@ def try_symlink_or_copy(
|
|
120
120
|
shutil.copy(file_path, link_path)
|
121
121
|
else:
|
122
122
|
link_path.symlink_to(
|
123
|
-
symlink_target,
|
123
|
+
symlink_target,
|
124
|
+
target_is_directory=target_is_directory,
|
124
125
|
)
|
125
126
|
except Exception:
|
126
127
|
log.warning(
|
@@ -1,8 +1,8 @@
|
|
1
1
|
nshtrainer/.nshconfig.generated.json,sha256=yZd6cn1RhvNNJUgiUTRYut8ofZYvbulnpPG-rZIRhi4,106
|
2
2
|
nshtrainer/__init__.py,sha256=g_moPnfQxSxFZX5NB9ILQQOJrt4RTRuiFt9N0STIpxM,874
|
3
3
|
nshtrainer/_callback.py,sha256=tXQCDzS6CvMTuTY5lQSH5qZs1pXUi-gt9bQdpXMVdEs,12715
|
4
|
-
nshtrainer/_checkpoint/metadata.py,sha256=
|
5
|
-
nshtrainer/_checkpoint/saver.py,sha256=
|
4
|
+
nshtrainer/_checkpoint/metadata.py,sha256=XoKqY3eR95CYuc_Kk9ck-p4iM2Q1OXU3vSXNrzohHz0,5332
|
5
|
+
nshtrainer/_checkpoint/saver.py,sha256=65UDrz3KuhkgVfco-RkWuoa1wzTZoXxunlC769yJaMc,1639
|
6
6
|
nshtrainer/_directory.py,sha256=TJR9ccyuzRlAVfVjGyeQ3E2AFAcz-XbBCxWfiXo2SlY,3191
|
7
7
|
nshtrainer/_experimental/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
8
8
|
nshtrainer/_hf_hub.py,sha256=4OsCbIITnZk_YLyoMrVyZ0SIN04FBxlC0ig2Et8UAdo,14287
|
@@ -10,7 +10,7 @@ nshtrainer/callbacks/__init__.py,sha256=w80d6PGNu3wjUj9NiRGMqCX9NnXD5ZlvbY-DIK4z
|
|
10
10
|
nshtrainer/callbacks/actsave.py,sha256=NSXIIu62MNYe5gz479SMW33bdoKYoYtWtd_iTWFpKpc,3881
|
11
11
|
nshtrainer/callbacks/base.py,sha256=Alaou1IHAIlMEM7g58d_02ozY2xWlshBN7fsw5Ee21s,3683
|
12
12
|
nshtrainer/callbacks/checkpoint/__init__.py,sha256=l8tkHc83_mLiU0-wT09SWdRzwpm2ulbkLzcuCmuTwzE,620
|
13
|
-
nshtrainer/callbacks/checkpoint/_base.py,sha256=
|
13
|
+
nshtrainer/callbacks/checkpoint/_base.py,sha256=f7lpk8W4xqxk3PolBEU3AWt9VTIpoLW7wMUhC5DNm3c,6345
|
14
14
|
nshtrainer/callbacks/checkpoint/best_checkpoint.py,sha256=2CQuhPJ3Fi7lDw7z-J8kXXXuDU8-4HcU48oZxR49apk,2667
|
15
15
|
nshtrainer/callbacks/checkpoint/last_checkpoint.py,sha256=vn-as3ex7kaTRcKsIurVtM6kUSHYNwHJeYG82j2dMcc,3554
|
16
16
|
nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py,sha256=nljzETqkHwA-4g8mxaeFK5HxA8My0dlIPzIUscSMWyk,3525
|
@@ -149,11 +149,11 @@ nshtrainer/util/config/__init__.py,sha256=Z39JJufSb61Lhn2GfVcv3eFW_eorOrN9-9llDW
|
|
149
149
|
nshtrainer/util/config/dtype.py,sha256=Fn_MhhQoHPyFAnFPSwvcvLiGR3yWFIszMba02CJiC4g,2213
|
150
150
|
nshtrainer/util/config/duration.py,sha256=mM-UfU_HvhXwW33TYEDg0x58n80tnle2e6VaWtxZTjk,764
|
151
151
|
nshtrainer/util/environment.py,sha256=s-B5nY0cKYXdFMdNYumvC_xxacMATiI4DvV2gUDu20k,4195
|
152
|
-
nshtrainer/util/path.py,sha256=
|
152
|
+
nshtrainer/util/path.py,sha256=wADa_qFiekiX0PENf9jI-9_abICB0vJFXav8pNLR7lw,3976
|
153
153
|
nshtrainer/util/seed.py,sha256=diMV8iwBKN7Xxt5pELmui-gyqyT80_CZzomrWhNss0k,316
|
154
154
|
nshtrainer/util/slurm.py,sha256=HflkP5iI_r4UHMyPjw9R4dD5AHsJUpcfJw5PLvGYBRM,1603
|
155
155
|
nshtrainer/util/typed.py,sha256=Xt5fUU6zwLKSTLUdenovnKK0N8qUq89Kddz2_XeykVQ,164
|
156
156
|
nshtrainer/util/typing_utils.py,sha256=MjY-CUX9R5Tzat-BlFnQjwl1PQ_W2yZQoXhkYHlJ_VA,442
|
157
|
-
nshtrainer-1.0.
|
158
|
-
nshtrainer-1.0.
|
159
|
-
nshtrainer-1.0.
|
157
|
+
nshtrainer-1.0.0b46.dist-info/METADATA,sha256=L6-5RyLlIcoFyURkoCuHsAgItT0gSl6Ip0l4iDKvs4o,988
|
158
|
+
nshtrainer-1.0.0b46.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
159
|
+
nshtrainer-1.0.0b46.dist-info/RECORD,,
|
File without changes
|