nshtrainer 0.11.3__tar.gz → 0.11.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/PKG-INFO +1 -1
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/pyproject.toml +1 -1
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/checkpoint/best_checkpoint.py +27 -10
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/README.md +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/__init__.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_checkpoint/loader.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_checkpoint/metadata.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_checkpoint/saver.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_experimental/__init__.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_experimental/flops/__init__.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_experimental/flops/flop_counter.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_experimental/flops/module_tracker.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/__init__.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/_throughput_monitor_callback.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/actsave.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/base.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/checkpoint/__init__.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/checkpoint/latest_epoch_checkpoint.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/checkpoint/model_checkpoint.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/early_stopping.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/ema.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/finite_checks.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/gradient_skipping.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/interval.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/log_epoch.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/norm_logging.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/print_table.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/throughput_monitor.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/timer.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/wandb_watch.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/data/__init__.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/data/balanced_batch_sampler.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/data/transform.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/__init__.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/_experimental.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/actsave.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/callbacks.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/config.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/data.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/log.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/lr_scheduler.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/model.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/nn.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/optimizer.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/runner.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/snapshot.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/snoop.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/trainer.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/typecheck.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/util.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/lr_scheduler/__init__.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/lr_scheduler/_base.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/lr_scheduler/linear_warmup_cosine.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/metrics/__init__.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/metrics/_config.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/__init__.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/base.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/config.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/callback.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/debug.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/distributed.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/logger.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/profiler.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/rlp_sanity_checks.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/shared_parameters.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/nn/__init__.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/nn/mlp.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/nn/module_dict.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/nn/module_list.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/nn/nonlinearity.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/optimizer.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/runner.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/scripts/find_packages.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/trainer/__init__.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/trainer/_runtime_callback.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/trainer/checkpoint_connector.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/trainer/signal_connector.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/trainer/trainer.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/_environment_info.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/_useful_types.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/environment.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/seed.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/slurm.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/typed.py +0 -0
- {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/typing_utils.py +0 -0
{nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/checkpoint/best_checkpoint.py
RENAMED
|
@@ -106,6 +106,26 @@ class BestCheckpoint(Checkpoint):
|
|
|
106
106
|
reverse=(self.metric.mode == "min"),
|
|
107
107
|
)
|
|
108
108
|
|
|
109
|
+
def _create_symlink(self, trainer: Trainer, best_ckpt_path: Path):
|
|
110
|
+
# Resolve the symlink filename
|
|
111
|
+
if (symlink_filename := self._best_symlink_filename()) is None:
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
# If the symlink already exists and points to the best checkpoint,
|
|
115
|
+
# then we don't need to create a new symlink.
|
|
116
|
+
symlink_path = self.dirpath / symlink_filename
|
|
117
|
+
if symlink_path.exists() and symlink_path.resolve() == best_ckpt_path:
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
_link_checkpoint(
|
|
121
|
+
trainer,
|
|
122
|
+
best_ckpt_path,
|
|
123
|
+
symlink_path,
|
|
124
|
+
metadata=True,
|
|
125
|
+
barrier=False,
|
|
126
|
+
)
|
|
127
|
+
log.debug(f"Created best symlink: {symlink_path}")
|
|
128
|
+
|
|
109
129
|
def _save_best_checkpoint(self, trainer: Trainer):
|
|
110
130
|
if (current := self._get_metric_value(trainer.callback_metrics)) is None:
|
|
111
131
|
log.warning(
|
|
@@ -130,6 +150,7 @@ class BestCheckpoint(Checkpoint):
|
|
|
130
150
|
# Save the current model
|
|
131
151
|
filepath = self._ckpt_path(trainer)
|
|
132
152
|
trainer.save_checkpoint(filepath, self.config.save_weights_only)
|
|
153
|
+
log.debug(f"Saved best checkpoint: {filepath}")
|
|
133
154
|
|
|
134
155
|
# Remove worst checkpoint if we've reached save_top_k
|
|
135
156
|
# NOTE: We add 1 to save_top_k here because we have just saved a new checkpoint
|
|
@@ -143,13 +164,9 @@ class BestCheckpoint(Checkpoint):
|
|
|
143
164
|
)
|
|
144
165
|
|
|
145
166
|
# Create symlink to best model
|
|
146
|
-
if
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
barrier=True,
|
|
153
|
-
metadata=True,
|
|
154
|
-
)
|
|
155
|
-
log.debug(f"Created best symlink: {symlink_path}")
|
|
167
|
+
if sorted_ckpts:
|
|
168
|
+
_, best_ckpt_path = sorted_ckpts[0]
|
|
169
|
+
self._create_symlink(trainer, best_ckpt_path)
|
|
170
|
+
|
|
171
|
+
# Barrier to ensure all processes have saved the checkpoint before continuing
|
|
172
|
+
trainer.strategy.barrier()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_experimental/flops/module_tracker.py
RENAMED
|
File without changes
|
|
File without changes
|
{nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/_throughput_monitor_callback.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/checkpoint/model_checkpoint.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|