nshtrainer 1.0.0b45__py3-none-any.whl → 1.0.0b46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -139,15 +139,30 @@ def remove_checkpoint_metadata(checkpoint_path: Path):
139
139
  log.debug(f"Removed {path}")
140
140
 
141
141
 
142
+ def remove_checkpoint_metadata_link(ckpt_link_path: Path):
143
+ path = _metadata_path(ckpt_link_path)
144
+ # If the metadata does not exist, we can safely ignore this
145
+ if not path.exists(follow_symlinks=False):
146
+ # This is EXTREMELY important here
147
+ # Otherwise, we've already deleted the file that the symlink
148
+ # used to point to, so this always returns False
149
+ log.debug(f"Metadata file does not exist: {path}")
150
+ return
151
+
152
+ # If the metadata exists, we can remove it
153
+ try:
154
+ path.unlink(missing_ok=True)
155
+ except Exception:
156
+ log.warning(f"Failed to remove {path}", exc_info=True)
157
+ else:
158
+ log.debug(f"Removed {path}")
159
+
160
+
142
161
  def link_checkpoint_metadata(checkpoint_path: Path, linked_checkpoint_path: Path):
143
162
  # First, remove any existing metadata files
144
- remove_checkpoint_metadata(linked_checkpoint_path)
163
+ remove_checkpoint_metadata_link(linked_checkpoint_path)
145
164
 
146
165
  # Link the metadata files to the new checkpoint
147
166
  path = _metadata_path(checkpoint_path)
148
167
  linked_path = _metadata_path(linked_checkpoint_path)
149
-
150
- if not path.exists():
151
- raise FileNotFoundError(f"Checkpoint path does not exist: {checkpoint_path}")
152
-
153
168
  try_symlink_or_copy(path, linked_path)
@@ -8,7 +8,11 @@ from pathlib import Path
8
8
  from lightning.pytorch import Trainer
9
9
 
10
10
  from ..util.path import try_symlink_or_copy
11
- from .metadata import link_checkpoint_metadata, remove_checkpoint_metadata
11
+ from .metadata import (
12
+ link_checkpoint_metadata,
13
+ remove_checkpoint_metadata,
14
+ remove_checkpoint_metadata_link,
15
+ )
12
16
 
13
17
  log = logging.getLogger(__name__)
14
18
 
@@ -39,7 +43,7 @@ def link_checkpoint(
39
43
  log.debug(f"Removed {linkpath=}")
40
44
 
41
45
  if metadata:
42
- remove_checkpoint_metadata(linkpath)
46
+ remove_checkpoint_metadata_link(linkpath)
43
47
 
44
48
  try_symlink_or_copy(filepath, linkpath)
45
49
  if metadata:
@@ -160,7 +160,7 @@ class CheckpointBase(Checkpoint, ABC, Generic[TConfig]):
160
160
  filepath = self.resolve_checkpoint_path(self.current_metrics(trainer))
161
161
  trainer.save_checkpoint(filepath, self.config.save_weights_only)
162
162
 
163
- if trainer.is_global_zero:
163
+ if trainer.hparams.save_checkpoint_metadata and trainer.is_global_zero:
164
164
  # Remove old checkpoints
165
165
  self.remove_old_checkpoints(trainer)
166
166
 
nshtrainer/util/path.py CHANGED
@@ -120,7 +120,8 @@ def try_symlink_or_copy(
120
120
  shutil.copy(file_path, link_path)
121
121
  else:
122
122
  link_path.symlink_to(
123
- symlink_target, target_is_directory=target_is_directory
123
+ symlink_target,
124
+ target_is_directory=target_is_directory,
124
125
  )
125
126
  except Exception:
126
127
  log.warning(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: nshtrainer
3
- Version: 1.0.0b45
3
+ Version: 1.0.0b46
4
4
  Summary:
5
5
  Author: Nima Shoghi
6
6
  Author-email: nimashoghi@gmail.com
@@ -1,8 +1,8 @@
1
1
  nshtrainer/.nshconfig.generated.json,sha256=yZd6cn1RhvNNJUgiUTRYut8ofZYvbulnpPG-rZIRhi4,106
2
2
  nshtrainer/__init__.py,sha256=g_moPnfQxSxFZX5NB9ILQQOJrt4RTRuiFt9N0STIpxM,874
3
3
  nshtrainer/_callback.py,sha256=tXQCDzS6CvMTuTY5lQSH5qZs1pXUi-gt9bQdpXMVdEs,12715
4
- nshtrainer/_checkpoint/metadata.py,sha256=LQZ8g50rKxQQx-FqiW3n8EWmal9qSWRouOpIIn6NJJY,4758
5
- nshtrainer/_checkpoint/saver.py,sha256=rWl4d2lCTMU4_wt8yZFL2pFQaP9hj5sPgqHMPQ4zuyI,1584
4
+ nshtrainer/_checkpoint/metadata.py,sha256=XoKqY3eR95CYuc_Kk9ck-p4iM2Q1OXU3vSXNrzohHz0,5332
5
+ nshtrainer/_checkpoint/saver.py,sha256=65UDrz3KuhkgVfco-RkWuoa1wzTZoXxunlC769yJaMc,1639
6
6
  nshtrainer/_directory.py,sha256=TJR9ccyuzRlAVfVjGyeQ3E2AFAcz-XbBCxWfiXo2SlY,3191
7
7
  nshtrainer/_experimental/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
8
8
  nshtrainer/_hf_hub.py,sha256=4OsCbIITnZk_YLyoMrVyZ0SIN04FBxlC0ig2Et8UAdo,14287
@@ -10,7 +10,7 @@ nshtrainer/callbacks/__init__.py,sha256=w80d6PGNu3wjUj9NiRGMqCX9NnXD5ZlvbY-DIK4z
10
10
  nshtrainer/callbacks/actsave.py,sha256=NSXIIu62MNYe5gz479SMW33bdoKYoYtWtd_iTWFpKpc,3881
11
11
  nshtrainer/callbacks/base.py,sha256=Alaou1IHAIlMEM7g58d_02ozY2xWlshBN7fsw5Ee21s,3683
12
12
  nshtrainer/callbacks/checkpoint/__init__.py,sha256=l8tkHc83_mLiU0-wT09SWdRzwpm2ulbkLzcuCmuTwzE,620
13
- nshtrainer/callbacks/checkpoint/_base.py,sha256=wCJBRI0pQYZc3GBu0b-aUBlBDhd39AdL82VvFgKmv3k,6300
13
+ nshtrainer/callbacks/checkpoint/_base.py,sha256=f7lpk8W4xqxk3PolBEU3AWt9VTIpoLW7wMUhC5DNm3c,6345
14
14
  nshtrainer/callbacks/checkpoint/best_checkpoint.py,sha256=2CQuhPJ3Fi7lDw7z-J8kXXXuDU8-4HcU48oZxR49apk,2667
15
15
  nshtrainer/callbacks/checkpoint/last_checkpoint.py,sha256=vn-as3ex7kaTRcKsIurVtM6kUSHYNwHJeYG82j2dMcc,3554
16
16
  nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py,sha256=nljzETqkHwA-4g8mxaeFK5HxA8My0dlIPzIUscSMWyk,3525
@@ -149,11 +149,11 @@ nshtrainer/util/config/__init__.py,sha256=Z39JJufSb61Lhn2GfVcv3eFW_eorOrN9-9llDW
149
149
  nshtrainer/util/config/dtype.py,sha256=Fn_MhhQoHPyFAnFPSwvcvLiGR3yWFIszMba02CJiC4g,2213
150
150
  nshtrainer/util/config/duration.py,sha256=mM-UfU_HvhXwW33TYEDg0x58n80tnle2e6VaWtxZTjk,764
151
151
  nshtrainer/util/environment.py,sha256=s-B5nY0cKYXdFMdNYumvC_xxacMATiI4DvV2gUDu20k,4195
152
- nshtrainer/util/path.py,sha256=9fIjE3S78pPL6wjAgEJUYfIJQAPdKOQqIYvTS9lWTUk,3959
152
+ nshtrainer/util/path.py,sha256=wADa_qFiekiX0PENf9jI-9_abICB0vJFXav8pNLR7lw,3976
153
153
  nshtrainer/util/seed.py,sha256=diMV8iwBKN7Xxt5pELmui-gyqyT80_CZzomrWhNss0k,316
154
154
  nshtrainer/util/slurm.py,sha256=HflkP5iI_r4UHMyPjw9R4dD5AHsJUpcfJw5PLvGYBRM,1603
155
155
  nshtrainer/util/typed.py,sha256=Xt5fUU6zwLKSTLUdenovnKK0N8qUq89Kddz2_XeykVQ,164
156
156
  nshtrainer/util/typing_utils.py,sha256=MjY-CUX9R5Tzat-BlFnQjwl1PQ_W2yZQoXhkYHlJ_VA,442
157
- nshtrainer-1.0.0b45.dist-info/METADATA,sha256=_RPpe6F7DXpsQSmBF1GTc-E5VUfaC69fIYfoFhsip2s,988
158
- nshtrainer-1.0.0b45.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
159
- nshtrainer-1.0.0b45.dist-info/RECORD,,
157
+ nshtrainer-1.0.0b46.dist-info/METADATA,sha256=L6-5RyLlIcoFyURkoCuHsAgItT0gSl6Ip0l4iDKvs4o,988
158
+ nshtrainer-1.0.0b46.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
159
+ nshtrainer-1.0.0b46.dist-info/RECORD,,