nshtrainer 0.10.12__tar.gz → 0.10.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/PKG-INFO +1 -1
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/pyproject.toml +1 -1
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_checkpoint/saver.py +13 -4
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/latest_epoch_checkpoint.py +1 -1
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/model_checkpoint.py +1 -1
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/README.md +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/__init__.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_checkpoint/loader.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_checkpoint/metadata.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_experimental/__init__.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_experimental/flops/__init__.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_experimental/flops/flop_counter.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_experimental/flops/module_tracker.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/__init__.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/_throughput_monitor_callback.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/actsave.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/base.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/early_stopping.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/ema.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/finite_checks.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/gradient_skipping.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/interval.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/log_epoch.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/norm_logging.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/on_exception_checkpoint.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/print_table.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/throughput_monitor.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/timer.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/wandb_watch.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/data/__init__.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/data/balanced_batch_sampler.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/data/transform.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/__init__.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/_experimental.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/actsave.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/callbacks.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/config.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/data.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/log.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/lr_scheduler.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/model.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/nn.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/optimizer.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/runner.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/snapshot.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/snoop.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/trainer.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/typecheck.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/util.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/lr_scheduler/__init__.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/lr_scheduler/_base.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/lr_scheduler/linear_warmup_cosine.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/metrics/__init__.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/metrics/_config.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/__init__.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/base.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/config.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/callback.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/debug.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/distributed.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/logger.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/profiler.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/rlp_sanity_checks.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/shared_parameters.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/nn/__init__.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/nn/mlp.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/nn/module_dict.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/nn/module_list.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/nn/nonlinearity.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/optimizer.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/runner.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/scripts/find_packages.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/trainer/__init__.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/trainer/_runtime_callback.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/trainer/checkpoint_connector.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/trainer/signal_connector.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/trainer/trainer.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/util/_environment_info.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/util/environment.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/util/seed.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/util/slurm.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/util/typed.py +0 -0
- {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/util/typing_utils.py +0 -0
|
@@ -36,7 +36,8 @@ def _link_checkpoint(
|
|
|
36
36
|
# fall back to copying the file
|
|
37
37
|
shutil.copy(filepath, linkpath)
|
|
38
38
|
|
|
39
|
-
|
|
39
|
+
if metadata:
|
|
40
|
+
_link_checkpoint_metadata(filepath, linkpath)
|
|
40
41
|
if barrier:
|
|
41
42
|
trainer.strategy.barrier()
|
|
42
43
|
|
|
@@ -44,9 +45,17 @@ def _link_checkpoint(
|
|
|
44
45
|
def _remove_checkpoint(
|
|
45
46
|
trainer: Trainer,
|
|
46
47
|
filepath: str | Path | os.PathLike,
|
|
47
|
-
|
|
48
|
+
*,
|
|
49
|
+
metadata: bool,
|
|
50
|
+
barrier: bool,
|
|
48
51
|
):
|
|
49
52
|
if not isinstance(filepath, Path):
|
|
50
53
|
filepath = Path(filepath)
|
|
51
|
-
|
|
52
|
-
|
|
54
|
+
|
|
55
|
+
if trainer.is_global_zero:
|
|
56
|
+
trainer.strategy.remove_checkpoint(filepath)
|
|
57
|
+
if metadata:
|
|
58
|
+
_remove_checkpoint_metadata(filepath)
|
|
59
|
+
|
|
60
|
+
if barrier:
|
|
61
|
+
trainer.strategy.barrier()
|
{nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/latest_epoch_checkpoint.py
RENAMED
|
@@ -69,7 +69,7 @@ class LatestEpochCheckpoint(Checkpoint):
|
|
|
69
69
|
|
|
70
70
|
def _remove_checkpoints(self, trainer: Trainer, ckpt_paths: list[Path]):
|
|
71
71
|
for ckpt_path in ckpt_paths:
|
|
72
|
-
_remove_checkpoint(trainer, ckpt_path,
|
|
72
|
+
_remove_checkpoint(trainer, ckpt_path, metadata=True, barrier=False)
|
|
73
73
|
|
|
74
74
|
def _remove_old_checkpoints(self, trainer: Trainer):
|
|
75
75
|
if (latest_k := self.config.latest_k) == "all":
|
|
@@ -202,4 +202,4 @@ class ModelCheckpoint(_ModelCheckpoint):
|
|
|
202
202
|
|
|
203
203
|
@override
|
|
204
204
|
def _remove_checkpoint(self, trainer: Trainer, filepath: str):
|
|
205
|
-
return _remove_checkpoint(trainer, filepath,
|
|
205
|
+
return _remove_checkpoint(trainer, filepath, metadata=True, barrier=False)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_experimental/flops/flop_counter.py
RENAMED
|
File without changes
|
{nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_experimental/flops/module_tracker.py
RENAMED
|
File without changes
|
|
File without changes
|
{nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/_throughput_monitor_callback.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/on_exception_checkpoint.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/lr_scheduler/linear_warmup_cosine.py
RENAMED
|
File without changes
|
{nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/lr_scheduler/reduce_lr_on_plateau.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|