nshtrainer 0.10.15__py3-none-any.whl → 0.10.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nshtrainer/_checkpoint/metadata.py +5 -9
- nshtrainer/callbacks/latest_epoch_checkpoint.py +4 -2
- nshtrainer/model/config.py +1 -1
- {nshtrainer-0.10.15.dist-info → nshtrainer-0.10.17.dist-info}/METADATA +1 -1
- {nshtrainer-0.10.15.dist-info → nshtrainer-0.10.17.dist-info}/RECORD +6 -6
- {nshtrainer-0.10.15.dist-info → nshtrainer-0.10.17.dist-info}/WHEEL +0 -0
|
@@ -105,10 +105,8 @@ def _write_checkpoint_metadata(
|
|
|
105
105
|
|
|
106
106
|
|
|
107
107
|
def _remove_checkpoint_metadata(checkpoint_path: Path):
|
|
108
|
-
for
|
|
109
|
-
checkpoint_path.with_suffix(
|
|
110
|
-
checkpoint_path.with_suffix(HPARAMS_PATH_SUFFIX),
|
|
111
|
-
):
|
|
108
|
+
for suffix in (METADATA_PATH_SUFFIX, HPARAMS_PATH_SUFFIX):
|
|
109
|
+
path = checkpoint_path.with_suffix(suffix)
|
|
112
110
|
try:
|
|
113
111
|
path.unlink(missing_ok=True)
|
|
114
112
|
except Exception as e:
|
|
@@ -122,11 +120,9 @@ def _link_checkpoint_metadata(checkpoint_path: Path, linked_checkpoint_path: Pat
|
|
|
122
120
|
_remove_checkpoint_metadata(linked_checkpoint_path)
|
|
123
121
|
|
|
124
122
|
# Link the metadata files to the new checkpoint
|
|
125
|
-
for
|
|
126
|
-
checkpoint_path.with_suffix(
|
|
127
|
-
|
|
128
|
-
):
|
|
129
|
-
linked_path = linked_checkpoint_path.with_suffix(path.suffix)
|
|
123
|
+
for suffix in (METADATA_PATH_SUFFIX, HPARAMS_PATH_SUFFIX):
|
|
124
|
+
path = checkpoint_path.with_suffix(suffix)
|
|
125
|
+
linked_path = linked_checkpoint_path.with_suffix(suffix)
|
|
130
126
|
try:
|
|
131
127
|
try:
|
|
132
128
|
linked_path.symlink_to(path)
|
|
@@ -64,7 +64,7 @@ class LatestEpochCheckpoint(Checkpoint):
|
|
|
64
64
|
filename = self.config.filename.format(
|
|
65
65
|
epoch=trainer.current_epoch, step=trainer.global_step
|
|
66
66
|
)
|
|
67
|
-
filename = f"{self.PREFIX}{filename}
|
|
67
|
+
filename = f"{self.PREFIX}{filename}{self.EXTENSION}"
|
|
68
68
|
return self.dirpath / filename
|
|
69
69
|
|
|
70
70
|
def _remove_checkpoints(self, trainer: Trainer, ckpt_paths: list[Path]):
|
|
@@ -95,7 +95,9 @@ class LatestEpochCheckpoint(Checkpoint):
|
|
|
95
95
|
|
|
96
96
|
def _save_new_checkpoint(self, trainer: Trainer):
|
|
97
97
|
# Remove old checkpoints
|
|
98
|
-
|
|
98
|
+
if trainer.is_global_zero:
|
|
99
|
+
self._remove_old_checkpoints(trainer)
|
|
100
|
+
trainer.strategy.barrier()
|
|
99
101
|
|
|
100
102
|
# Save the new checkpoint
|
|
101
103
|
filepath = self._ckpt_path(trainer)
|
nshtrainer/model/config.py
CHANGED
|
@@ -288,7 +288,7 @@ class WandbLoggerConfig(CallbackConfigBase, BaseLoggerConfig):
|
|
|
288
288
|
offline: bool = False
|
|
289
289
|
"""Whether to run WandB in offline mode."""
|
|
290
290
|
|
|
291
|
-
use_wandb_core: bool =
|
|
291
|
+
use_wandb_core: bool = True
|
|
292
292
|
"""Whether to use the new `wandb-core` backend for WandB.
|
|
293
293
|
`wandb-core` is a new backend for WandB that is faster and more efficient than the old backend.
|
|
294
294
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
nshtrainer/__init__.py,sha256=39loiLLXbaGiozEsAn8mPHopxaPsek8JsgR9DD2gxtY,583
|
|
2
2
|
nshtrainer/_checkpoint/loader.py,sha256=48flPr1XgQHOgIPaCrRqOEvRuG0SZuV3cQ1vgHLqFqI,11025
|
|
3
|
-
nshtrainer/_checkpoint/metadata.py,sha256=
|
|
3
|
+
nshtrainer/_checkpoint/metadata.py,sha256=GlhlAyJh5gcp3R8l2Y3eAUQtQzBnitFlB0xdx-khEUQ,5579
|
|
4
4
|
nshtrainer/_checkpoint/saver.py,sha256=z_c7a91O4Bh4lZZjqJgxT3w25qFlJsOopV3cpJtkHk8,1655
|
|
5
5
|
nshtrainer/_experimental/__init__.py,sha256=2tQIcrWT8U8no_AeBTYnozaTmxN40kuAJdGQ4b-PoWM,120
|
|
6
6
|
nshtrainer/_experimental/flops/__init__.py,sha256=edo9Ez3LlrnxkNRX9W6YBhPkRPKYGLpkpnl5gx7sEX8,1550
|
|
@@ -15,7 +15,7 @@ nshtrainer/callbacks/ema.py,sha256=8-WHmKFP3VfnzMviJaIFmVD9xHPqIPmq9NRF5xdu3c8,1
|
|
|
15
15
|
nshtrainer/callbacks/finite_checks.py,sha256=gJC_RUr3ais3FJI0uB6wUZnDdE3WRwCix3ppA3PwQXA,2077
|
|
16
16
|
nshtrainer/callbacks/gradient_skipping.py,sha256=pqu5AELx4ctJxR2Y7YSSiGd5oGauVCTZFCEIIS6s88w,3665
|
|
17
17
|
nshtrainer/callbacks/interval.py,sha256=smz5Zl8cN6X6yHKVsMRS2e3SEkzRCP3LvwE1ONvLfaw,8080
|
|
18
|
-
nshtrainer/callbacks/latest_epoch_checkpoint.py,sha256=
|
|
18
|
+
nshtrainer/callbacks/latest_epoch_checkpoint.py,sha256=5JC-JCdgWNnunl0jv4Q9LhkEspLAn0x8VpCMJZi7-ow,4219
|
|
19
19
|
nshtrainer/callbacks/log_epoch.py,sha256=fTa_K_Y8A7g09630cG4YkDE6AzSMPkjb9bpPm4gtqos,1120
|
|
20
20
|
nshtrainer/callbacks/model_checkpoint.py,sha256=8D0wWLhr_KiksAA1fjfIuby42Mq6XokCvAnVUhjADd8,6564
|
|
21
21
|
nshtrainer/callbacks/norm_logging.py,sha256=T2psu8mYsw9iahPKT6aUPjkGrZ4TIzm6_UUUmE09GJs,6274
|
|
@@ -52,7 +52,7 @@ nshtrainer/metrics/__init__.py,sha256=ObLIELGguIEcUpRsUkqh1ltrvZii6vglTpJGrPvoy0
|
|
|
52
52
|
nshtrainer/metrics/_config.py,sha256=hWWS4IXENRyH3RmJ7z1Wx1n3Lt1sNMlGOrcU6PW15o0,1104
|
|
53
53
|
nshtrainer/model/__init__.py,sha256=NpvyQHmGaHB8xdraHmm8l7kDHLmvJSgBNQKkfYqtgyI,1454
|
|
54
54
|
nshtrainer/model/base.py,sha256=AXRfEsFAT0Ln7zjYVPU5NgtHS_c8FZM-M4pyLamO7OA,17516
|
|
55
|
-
nshtrainer/model/config.py,sha256=
|
|
55
|
+
nshtrainer/model/config.py,sha256=fcRhk5LYNvD0kptxT_qSA6OO1WRF5ko4NVTeaZvTuKk,54779
|
|
56
56
|
nshtrainer/model/modules/callback.py,sha256=K0-cyEtBcQhI7Q2e-AGTE8T-GghUPY9DYmneU6ULV6g,6401
|
|
57
57
|
nshtrainer/model/modules/debug.py,sha256=Yy7XEdPou9BkCsD5hJchwJGmCVGrfUru5g9VjPM4uAw,1120
|
|
58
58
|
nshtrainer/model/modules/distributed.py,sha256=ABpR9d-3uBS_fivfy_WYW-dExW6vp5BPaoPQnOudHng,1725
|
|
@@ -79,6 +79,6 @@ nshtrainer/util/seed.py,sha256=Or2wMPsnQxfnZ2xfBiyMcHFIUt3tGTNeMMyOEanCkqs,280
|
|
|
79
79
|
nshtrainer/util/slurm.py,sha256=rofIU26z3SdL79SF45tNez6juou1cyDLz07oXEZb9Hg,1566
|
|
80
80
|
nshtrainer/util/typed.py,sha256=NGuDkDzFlc1fAoaXjOFZVbmj0mRFjsQi1E_hPa7Bn5U,128
|
|
81
81
|
nshtrainer/util/typing_utils.py,sha256=8ptjSSLZxlmy4FY6lzzkoGoF5fGNClo8-B_c0XHQaNU,385
|
|
82
|
-
nshtrainer-0.10.
|
|
83
|
-
nshtrainer-0.10.
|
|
84
|
-
nshtrainer-0.10.
|
|
82
|
+
nshtrainer-0.10.17.dist-info/METADATA,sha256=yIpZNES4PCMfXUmDIpfa_1xOa3J3PNgdMzu1_OQXq94,696
|
|
83
|
+
nshtrainer-0.10.17.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
84
|
+
nshtrainer-0.10.17.dist-info/RECORD,,
|
|
File without changes
|