nshtrainer 1.0.0b52__py3-none-any.whl → 1.0.0b54__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nshtrainer/_checkpoint/metadata.py +2 -2
- nshtrainer/_checkpoint/saver.py +2 -2
- nshtrainer/trainer/trainer.py +15 -1
- nshtrainer/util/path.py +42 -1
- {nshtrainer-1.0.0b52.dist-info → nshtrainer-1.0.0b54.dist-info}/METADATA +1 -1
- {nshtrainer-1.0.0b52.dist-info → nshtrainer-1.0.0b54.dist-info}/RECORD +7 -7
- {nshtrainer-1.0.0b52.dist-info → nshtrainer-1.0.0b54.dist-info}/WHEEL +0 -0
@@ -11,7 +11,7 @@ import numpy as np
|
|
11
11
|
import torch
|
12
12
|
|
13
13
|
from ..util._environment_info import EnvironmentConfig
|
14
|
-
from ..util.path import compute_file_checksum, try_symlink_or_copy
|
14
|
+
from ..util.path import compute_file_checksum, path_exists, try_symlink_or_copy
|
15
15
|
|
16
16
|
if TYPE_CHECKING:
|
17
17
|
from ..trainer.trainer import Trainer
|
@@ -154,7 +154,7 @@ def remove_checkpoint_metadata(checkpoint_path: Path):
|
|
154
154
|
def remove_checkpoint_metadata_link(ckpt_link_path: Path):
|
155
155
|
path = _metadata_path(ckpt_link_path)
|
156
156
|
# If the metadata does not exist, we can safely ignore this
|
157
|
-
if not path
|
157
|
+
if not path_exists(path, follow_symlinks=False):
|
158
158
|
# This is EXTREMELY important here
|
159
159
|
# Otherwise, we've already deleted the file that the symlink
|
160
160
|
# used to point to, so this always returns False
|
nshtrainer/_checkpoint/saver.py
CHANGED
@@ -7,7 +7,7 @@ from pathlib import Path
|
|
7
7
|
|
8
8
|
from lightning.pytorch import Trainer
|
9
9
|
|
10
|
-
from ..util.path import try_symlink_or_copy
|
10
|
+
from ..util.path import path_exists, try_symlink_or_copy
|
11
11
|
from .metadata import (
|
12
12
|
link_checkpoint_metadata,
|
13
13
|
remove_checkpoint_metadata,
|
@@ -29,7 +29,7 @@ def link_checkpoint(
|
|
29
29
|
|
30
30
|
if remove_existing:
|
31
31
|
try:
|
32
|
-
if linkpath
|
32
|
+
if path_exists(linkpath, follow_symlinks=False):
|
33
33
|
# follow_symlinks=False is EXTREMELY important here
|
34
34
|
# Otherwise, we've already deleted the file that the symlink
|
35
35
|
# used to point to, so this always returns False
|
nshtrainer/trainer/trainer.py
CHANGED
@@ -457,7 +457,21 @@ class Trainer(LightningTrainer):
|
|
457
457
|
):
|
458
458
|
filepath = Path(filepath)
|
459
459
|
|
460
|
-
|
460
|
+
if self.model is None:
|
461
|
+
raise AttributeError(
|
462
|
+
"Saving a checkpoint is only possible if a model is attached to the Trainer. Did you call"
|
463
|
+
" `Trainer.save_checkpoint()` before calling `Trainer.{fit,validate,test,predict}`?"
|
464
|
+
)
|
465
|
+
with self.profiler.profile("save_checkpoint"): # type: ignore
|
466
|
+
checkpoint = self._checkpoint_connector.dump_checkpoint(weights_only)
|
467
|
+
# Update the checkpoint for the trainer hyperparameters
|
468
|
+
checkpoint[self.CHECKPOINT_HYPER_PARAMS_KEY] = self.hparams.model_dump(
|
469
|
+
mode="json"
|
470
|
+
)
|
471
|
+
self.strategy.save_checkpoint(
|
472
|
+
checkpoint, filepath, storage_options=storage_options
|
473
|
+
)
|
474
|
+
self.strategy.barrier("Trainer.save_checkpoint")
|
461
475
|
|
462
476
|
# Save the checkpoint metadata
|
463
477
|
metadata_path = None
|
nshtrainer/util/path.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import errno
|
3
4
|
import hashlib
|
4
5
|
import logging
|
5
6
|
import os
|
@@ -98,7 +99,7 @@ def try_symlink_or_copy(
|
|
98
99
|
# If the link already exists, remove it
|
99
100
|
if remove_existing:
|
100
101
|
try:
|
101
|
-
if link_path
|
102
|
+
if path_exists(link_path, follow_symlinks=False):
|
102
103
|
# follow_symlinks=False is EXTREMELY important here
|
103
104
|
# Otherwise, we've already deleted the file that the symlink
|
104
105
|
# used to point to, so this always returns False
|
@@ -132,3 +133,43 @@ def try_symlink_or_copy(
|
|
132
133
|
else:
|
133
134
|
log.debug(f"Created symlink or copied {file_path} to {link_path}")
|
134
135
|
return True
|
136
|
+
|
137
|
+
|
138
|
+
_WINERROR_NOT_READY = 21 # drive exists but is not accessible
|
139
|
+
_WINERROR_INVALID_NAME = 123 # fix for bpo-35306
|
140
|
+
_WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself
|
141
|
+
|
142
|
+
# EBADF - guard against macOS `stat` throwing EBADF
|
143
|
+
_IGNORED_ERRNOS = (errno.ENOENT, errno.ENOTDIR, errno.EBADF, errno.ELOOP)
|
144
|
+
|
145
|
+
_IGNORED_WINERRORS = (
|
146
|
+
_WINERROR_NOT_READY,
|
147
|
+
_WINERROR_INVALID_NAME,
|
148
|
+
_WINERROR_CANT_RESOLVE_FILENAME,
|
149
|
+
)
|
150
|
+
|
151
|
+
|
152
|
+
def _ignore_error(exception):
|
153
|
+
return (
|
154
|
+
getattr(exception, "errno", None) in _IGNORED_ERRNOS
|
155
|
+
or getattr(exception, "winerror", None) in _IGNORED_WINERRORS
|
156
|
+
)
|
157
|
+
|
158
|
+
|
159
|
+
def path_exists(path: Path, follow_symlinks: bool = True):
|
160
|
+
"""
|
161
|
+
Whether this path exists.
|
162
|
+
|
163
|
+
This method normally follows symlinks; to check whether a symlink exists,
|
164
|
+
add the argument follow_symlinks=False.
|
165
|
+
"""
|
166
|
+
try:
|
167
|
+
path.stat(follow_symlinks=follow_symlinks)
|
168
|
+
except OSError as e:
|
169
|
+
if not _ignore_error(e):
|
170
|
+
raise
|
171
|
+
return False
|
172
|
+
except ValueError:
|
173
|
+
# Non-encodable path
|
174
|
+
return False
|
175
|
+
return True
|
@@ -1,8 +1,8 @@
|
|
1
1
|
nshtrainer/.nshconfig.generated.json,sha256=yZd6cn1RhvNNJUgiUTRYut8ofZYvbulnpPG-rZIRhi4,106
|
2
2
|
nshtrainer/__init__.py,sha256=g_moPnfQxSxFZX5NB9ILQQOJrt4RTRuiFt9N0STIpxM,874
|
3
3
|
nshtrainer/_callback.py,sha256=ZDppiJ4d65tRXTEWYPZLH_F1xFizdz1pkWJe_sQ5uII,12564
|
4
|
-
nshtrainer/_checkpoint/metadata.py,sha256=
|
5
|
-
nshtrainer/_checkpoint/saver.py,sha256=
|
4
|
+
nshtrainer/_checkpoint/metadata.py,sha256=Hh5a7OkdknUEbkEwX6vS88-XLEeuVDoR6a3en2uLzQE,5597
|
5
|
+
nshtrainer/_checkpoint/saver.py,sha256=utcrYKSosd04N9m2GIylufO5DO05D90qVU3mvadfApU,1658
|
6
6
|
nshtrainer/_directory.py,sha256=TJR9ccyuzRlAVfVjGyeQ3E2AFAcz-XbBCxWfiXo2SlY,3191
|
7
7
|
nshtrainer/_experimental/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
8
8
|
nshtrainer/_hf_hub.py,sha256=4OsCbIITnZk_YLyoMrVyZ0SIN04FBxlC0ig2Et8UAdo,14287
|
@@ -142,18 +142,18 @@ nshtrainer/trainer/plugin/layer_sync.py,sha256=h-ydZwXepnsw5-paLgiDatqPyQ_8C0QEv
|
|
142
142
|
nshtrainer/trainer/plugin/precision.py,sha256=I0QsB1bVxmsFmBOkgrAfGONsuYae_lD9Bz0PfJEQvH4,5598
|
143
143
|
nshtrainer/trainer/signal_connector.py,sha256=GhfGcSzfaTNhnj2QFkBDq5aT7FqbLMA7eC8SYQs8_8w,10828
|
144
144
|
nshtrainer/trainer/strategy.py,sha256=VPTn5z3zvXTydY8IJchjhjcOfpvtoejnvUkq5E4WTus,1368
|
145
|
-
nshtrainer/trainer/trainer.py,sha256=
|
145
|
+
nshtrainer/trainer/trainer.py,sha256=Lo3vUo3ooTAjaX2fUYPFSMv5FP7sWfVov0QbA-T5hZ8,21113
|
146
146
|
nshtrainer/util/_environment_info.py,sha256=MT8mBe6ZolRfKiwU-les1P-lPNPqXpHQcfADrh_A3uY,24629
|
147
147
|
nshtrainer/util/bf16.py,sha256=9QhHZCkYSfYpIcxwAMoXyuh2yTSHBzT-EdLQB297jEs,762
|
148
148
|
nshtrainer/util/config/__init__.py,sha256=Z39JJufSb61Lhn2GfVcv3eFW_eorOrN9-9llDWlnZZM,272
|
149
149
|
nshtrainer/util/config/dtype.py,sha256=Fn_MhhQoHPyFAnFPSwvcvLiGR3yWFIszMba02CJiC4g,2213
|
150
150
|
nshtrainer/util/config/duration.py,sha256=mM-UfU_HvhXwW33TYEDg0x58n80tnle2e6VaWtxZTjk,764
|
151
151
|
nshtrainer/util/environment.py,sha256=s-B5nY0cKYXdFMdNYumvC_xxacMATiI4DvV2gUDu20k,4195
|
152
|
-
nshtrainer/util/path.py,sha256=
|
152
|
+
nshtrainer/util/path.py,sha256=PuJzYfY-2jnkTg7t8ucF-neq2ouBOVKsyQkCkrndDfE,5087
|
153
153
|
nshtrainer/util/seed.py,sha256=diMV8iwBKN7Xxt5pELmui-gyqyT80_CZzomrWhNss0k,316
|
154
154
|
nshtrainer/util/slurm.py,sha256=HflkP5iI_r4UHMyPjw9R4dD5AHsJUpcfJw5PLvGYBRM,1603
|
155
155
|
nshtrainer/util/typed.py,sha256=Xt5fUU6zwLKSTLUdenovnKK0N8qUq89Kddz2_XeykVQ,164
|
156
156
|
nshtrainer/util/typing_utils.py,sha256=MjY-CUX9R5Tzat-BlFnQjwl1PQ_W2yZQoXhkYHlJ_VA,442
|
157
|
-
nshtrainer-1.0.
|
158
|
-
nshtrainer-1.0.
|
159
|
-
nshtrainer-1.0.
|
157
|
+
nshtrainer-1.0.0b54.dist-info/METADATA,sha256=oSfrN2tgKgkZJwGbZVNbLULQcVMxh_wb02u7Hrujfn4,988
|
158
|
+
nshtrainer-1.0.0b54.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
159
|
+
nshtrainer-1.0.0b54.dist-info/RECORD,,
|
File without changes
|