nshtrainer 0.11.3__py3-none-any.whl → 0.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -106,6 +106,26 @@ class BestCheckpoint(Checkpoint):
106
106
  reverse=(self.metric.mode == "min"),
107
107
  )
108
108
 
109
+ def _create_symlink(self, trainer: Trainer, best_ckpt_path: Path):
110
+ # Resolve the symlink filename
111
+ if (symlink_filename := self._best_symlink_filename()) is None:
112
+ return
113
+
114
+ # If the symlink already exists and points to the best checkpoint,
115
+ # then we don't need to create a new symlink.
116
+ symlink_path = self.dirpath / symlink_filename
117
+ if symlink_path.exists() and symlink_path.resolve() == best_ckpt_path:
118
+ return
119
+
120
+ _link_checkpoint(
121
+ trainer,
122
+ best_ckpt_path,
123
+ symlink_path,
124
+ metadata=True,
125
+ barrier=False,
126
+ )
127
+ log.debug(f"Created best symlink: {symlink_path}")
128
+
109
129
  def _save_best_checkpoint(self, trainer: Trainer):
110
130
  if (current := self._get_metric_value(trainer.callback_metrics)) is None:
111
131
  log.warning(
@@ -130,6 +150,7 @@ class BestCheckpoint(Checkpoint):
130
150
  # Save the current model
131
151
  filepath = self._ckpt_path(trainer)
132
152
  trainer.save_checkpoint(filepath, self.config.save_weights_only)
153
+ log.debug(f"Saved best checkpoint: {filepath}")
133
154
 
134
155
  # Remove worst checkpoint if we've reached save_top_k
135
156
  # NOTE: We add 1 to save_top_k here because we have just saved a new checkpoint
@@ -143,13 +164,9 @@ class BestCheckpoint(Checkpoint):
143
164
  )
144
165
 
145
166
  # Create symlink to best model
146
- if (symlink_filename := self._best_symlink_filename()) is not None:
147
- symlink_path = self.dirpath / symlink_filename
148
- _link_checkpoint(
149
- trainer,
150
- filepath,
151
- symlink_path,
152
- barrier=True,
153
- metadata=True,
154
- )
155
- log.debug(f"Created best symlink: {symlink_path}")
167
+ if sorted_ckpts:
168
+ _, best_ckpt_path = sorted_ckpts[0]
169
+ self._create_symlink(trainer, best_ckpt_path)
170
+
171
+ # Barrier to ensure all processes have saved the checkpoint before continuing
172
+ trainer.strategy.barrier()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nshtrainer
3
- Version: 0.11.3
3
+ Version: 0.11.5
4
4
  Summary:
5
5
  Author: Nima Shoghi
6
6
  Author-email: nimashoghi@gmail.com
@@ -11,7 +11,7 @@ nshtrainer/callbacks/_throughput_monitor_callback.py,sha256=aJo_11rc4lo0IYOd-kHm
11
11
  nshtrainer/callbacks/actsave.py,sha256=qbnaKts4_dvjPeAaPtv7Ds12_vEWzaHUfg_--49NB9I,4041
12
12
  nshtrainer/callbacks/base.py,sha256=UnlYZAqSb8UwBJR-N5-XunxFx2yZjZ4lyGqUfhbCRlI,3555
13
13
  nshtrainer/callbacks/checkpoint/__init__.py,sha256=zrEVCGFikfkt0iOMceOFzXsZG2-6QrqY79RKBCS7bu4,738
14
- nshtrainer/callbacks/checkpoint/best_checkpoint.py,sha256=9qp6z4AmriakkiMbay2_QR97RxNLejHAdvJPU__8Y_c,5519
14
+ nshtrainer/callbacks/checkpoint/best_checkpoint.py,sha256=w99O5GWRcV89XBe4j__v2TvNEHys0x_r3tSTr-6Lhec,6154
15
15
  nshtrainer/callbacks/checkpoint/latest_epoch_checkpoint.py,sha256=NES-acaslPBiZQIMAdk_YwtnBrkm_y_BJQ8Ian0UKP0,4294
16
16
  nshtrainer/callbacks/checkpoint/model_checkpoint.py,sha256=mLFMbNzeMiBer3BCb7o3ucswKpOCQlYyN3wdB92N-LY,6884
17
17
  nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py,sha256=s8tOHrnb_uVqLVeV2K38ZszXrXPTEGdDVfXuXgo_KDQ,3277
@@ -82,6 +82,6 @@ nshtrainer/util/seed.py,sha256=Or2wMPsnQxfnZ2xfBiyMcHFIUt3tGTNeMMyOEanCkqs,280
82
82
  nshtrainer/util/slurm.py,sha256=rofIU26z3SdL79SF45tNez6juou1cyDLz07oXEZb9Hg,1566
83
83
  nshtrainer/util/typed.py,sha256=NGuDkDzFlc1fAoaXjOFZVbmj0mRFjsQi1E_hPa7Bn5U,128
84
84
  nshtrainer/util/typing_utils.py,sha256=8ptjSSLZxlmy4FY6lzzkoGoF5fGNClo8-B_c0XHQaNU,385
85
- nshtrainer-0.11.3.dist-info/METADATA,sha256=lJqbTAjie2SQ9HT_yPyHIfrwTJQ_ognnuqD9YJ_QNPQ,860
86
- nshtrainer-0.11.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
87
- nshtrainer-0.11.3.dist-info/RECORD,,
85
+ nshtrainer-0.11.5.dist-info/METADATA,sha256=KHgvYOhQXbc37awWeLbpbdVQbSEU4J7KoC7Lr5286KE,860
86
+ nshtrainer-0.11.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
87
+ nshtrainer-0.11.5.dist-info/RECORD,,