PyPI - nshtrainer - Versions diffs - 1.0.0b41__py3-none-any.whl → 1.0.0b43__py3-none-any.whl - Mend

nshtrainer 1.0.0b41py3-none-any.whl → 1.0.0b43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

nshtrainer/_checkpoint/metadata.py +7 -16
nshtrainer/_checkpoint/saver.py +10 -7
nshtrainer/callbacks/checkpoint/_base.py +3 -3
nshtrainer/trainer/trainer.py +2 -2
nshtrainer/util/path.py +10 -1
{nshtrainer-1.0.0b41.dist-info → nshtrainer-1.0.0b43.dist-info}/METADATA +1 -1
{nshtrainer-1.0.0b41.dist-info → nshtrainer-1.0.0b43.dist-info}/RECORD +8 -9
nshtrainer/nn/tests/test_mlp.py +0 -55
{nshtrainer-1.0.0b41.dist-info → nshtrainer-1.0.0b43.dist-info}/WHEEL +0 -0

nshtrainer/_checkpoint/metadata.py CHANGED Viewed

@@ -3,7 +3,6 @@ from __future__ import annotations
 import copy
 import datetime
 import logging
-from collections.abc import Callable
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, ClassVar
@@ -115,7 +114,7 @@ def _metadata_path(checkpoint_path: Path):
     return checkpoint_path.with_suffix(CheckpointMetadata.PATH_SUFFIX)
-def _write_checkpoint_metadata(trainer: Trainer, checkpoint_path: Path):
+def write_checkpoint_metadata(trainer: Trainer, checkpoint_path: Path):
     metadata_path = _metadata_path(checkpoint_path)
     metadata = _generate_checkpoint_metadata(trainer, checkpoint_path, metadata_path)
@@ -130,7 +129,7 @@ def _write_checkpoint_metadata(trainer: Trainer, checkpoint_path: Path):
     return metadata_path
-def _remove_checkpoint_metadata(checkpoint_path: Path):
+def remove_checkpoint_metadata(checkpoint_path: Path):
     path = _metadata_path(checkpoint_path)
     try:
         path.unlink(missing_ok=True)
@@ -140,23 +139,15 @@ def _remove_checkpoint_metadata(checkpoint_path: Path):
         log.debug(f"Removed {path}")
-def _link_checkpoint_metadata(checkpoint_path: Path, linked_checkpoint_path: Path):
+def link_checkpoint_metadata(checkpoint_path: Path, linked_checkpoint_path: Path):
     # First, remove any existing metadata files
-    _remove_checkpoint_metadata(linked_checkpoint_path)
+    remove_checkpoint_metadata(linked_checkpoint_path)
     # Link the metadata files to the new checkpoint
     path = _metadata_path(checkpoint_path)
     linked_path = _metadata_path(linked_checkpoint_path)
-    try_symlink_or_copy(path, linked_path)
+    if not path.exists():
+        raise FileNotFoundError(f"Checkpoint path does not exist: {checkpoint_path}")
-def _sort_ckpts_by_metadata(
-    checkpoint_paths: list[Path],
-    key: Callable[[CheckpointMetadata, Path], Any],
-    reverse: bool = False,
-):
-    return sorted(
-        [(CheckpointMetadata.from_ckpt_path(path), path) for path in checkpoint_paths],
-        key=lambda args_tuple: key(*args_tuple),
-        reverse=reverse,
-    )
+    try_symlink_or_copy(path, linked_path)

nshtrainer/_checkpoint/saver.py CHANGED Viewed

@@ -8,12 +8,12 @@ from pathlib import Path
 from lightning.pytorch import Trainer
 from ..util.path import try_symlink_or_copy
-from .metadata import _link_checkpoint_metadata, _remove_checkpoint_metadata
+from .metadata import link_checkpoint_metadata, remove_checkpoint_metadata
 log = logging.getLogger(__name__)
-def _link_checkpoint(
+def link_checkpoint(
     filepath: str | Path | os.PathLike,
     linkpath: str | Path | os.PathLike,
     *,
@@ -25,7 +25,10 @@ def _link_checkpoint(
     if remove_existing:
         try:
-            if linkpath.exists():
+            if linkpath.exists(follow_symlinks=False):
+                # follow_symlinks=False is EXTREMELY important here
+                # Otherwise, we've already deleted the file that the symlink
+                # used to point to, so this always returns False
                 if linkpath.is_dir():
                     shutil.rmtree(linkpath)
                 else:
@@ -36,14 +39,14 @@ def _link_checkpoint(
             log.debug(f"Removed {linkpath=}")
         if metadata:
-            _remove_checkpoint_metadata(linkpath)
+            remove_checkpoint_metadata(linkpath)
     try_symlink_or_copy(filepath, linkpath)
     if metadata:
-        _link_checkpoint_metadata(filepath, linkpath)
+        link_checkpoint_metadata(filepath, linkpath)
-def _remove_checkpoint(
+def remove_checkpoint(
     trainer: Trainer,
     filepath: str | Path | os.PathLike,
     *,
@@ -54,4 +57,4 @@ def _remove_checkpoint(
     trainer.strategy.remove_checkpoint(filepath)
     if metadata:
-        _remove_checkpoint_metadata(filepath)
+        remove_checkpoint_metadata(filepath)

nshtrainer/callbacks/checkpoint/_base.py CHANGED Viewed

@@ -12,7 +12,7 @@ from lightning.pytorch.callbacks import Checkpoint
 from typing_extensions import TypeVar, override
 from ..._checkpoint.metadata import CheckpointMetadata
-from ..._checkpoint.saver import _link_checkpoint, _remove_checkpoint
+from ..._checkpoint.saver import link_checkpoint, remove_checkpoint
 from ..base import CallbackConfigBase
 if TYPE_CHECKING:
@@ -122,7 +122,7 @@ class CheckpointBase(Checkpoint, ABC, Generic[TConfig]):
                 )
                 continue
-            _remove_checkpoint(trainer, old_ckpt_path, metadata=True)
+            remove_checkpoint(trainer, old_ckpt_path, metadata=True)
             log.debug(f"Removed old checkpoint: {old_ckpt_path}")
     def current_metrics(self, trainer: Trainer) -> dict[str, Any]:
@@ -167,7 +167,7 @@ class CheckpointBase(Checkpoint, ABC, Generic[TConfig]):
             # Create the latest symlink
             if (symlink_filename := self.symlink_path()) is not None:
                 symlink_path = self.dirpath / symlink_filename
-                _link_checkpoint(filepath, symlink_path, metadata=True)
+                link_checkpoint(filepath, symlink_path, metadata=True)
                 log.debug(f"Created latest symlink: {symlink_path}")
         # Barrier to ensure all processes have saved the checkpoint,

nshtrainer/trainer/trainer.py CHANGED Viewed

@@ -18,7 +18,7 @@ from lightning.pytorch.trainer.states import TrainerFn
 from lightning.pytorch.utilities.types import _EVALUATE_OUTPUT, _PREDICT_OUTPUT
 from typing_extensions import Never, Unpack, assert_never, deprecated, override
-from .._checkpoint.metadata import _write_checkpoint_metadata
+from .._checkpoint.metadata import write_checkpoint_metadata
 from ..callbacks.base import resolve_all_callbacks
 from ..util._environment_info import EnvironmentConfig
 from ..util.bf16 import is_bf16_supported_no_emulation
@@ -478,7 +478,7 @@ class Trainer(LightningTrainer):
         metadata_path = None
         if self.hparams.save_checkpoint_metadata and self.is_global_zero:
             # Generate the metadata and write to disk
-            metadata_path = _write_checkpoint_metadata(self, filepath)
+            metadata_path = write_checkpoint_metadata(self, filepath)
         # Call the `on_checkpoint_saved` method on all callbacks
         from .. import _callback

nshtrainer/util/path.py CHANGED Viewed

@@ -81,18 +81,27 @@ def compute_file_checksum(file_path: Path) -> str:
 def try_symlink_or_copy(
     file_path: Path,
     link_path: Path,
+    *,
     target_is_directory: bool = False,
     relative: bool = True,
     remove_existing: bool = True,
+    throw_on_invalid_target: bool = False,
 ):
     """
     Symlinks on Unix, copies on Windows.
     """
+    # Check if the target file exists
+    if throw_on_invalid_target and not file_path.exists():
+        raise FileNotFoundError(f"File not found: {file_path}")
     # If the link already exists, remove it
     if remove_existing:
         try:
-            if link_path.exists():
+            if link_path.exists(follow_symlinks=False):
+                # follow_symlinks=False is EXTREMELY important here
+                # Otherwise, we've already deleted the file that the symlink
+                # used to point to, so this always returns False
                 if link_path.is_dir():
                     shutil.rmtree(link_path)
                 else:

{nshtrainer-1.0.0b41.dist-info → nshtrainer-1.0.0b43.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: nshtrainer
-Version: 1.0.0b41
+Version: 1.0.0b43
 Summary:
 Author: Nima Shoghi
 Author-email: nimashoghi@gmail.com

{nshtrainer-1.0.0b41.dist-info → nshtrainer-1.0.0b43.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 nshtrainer/.nshconfig.generated.json,sha256=yZd6cn1RhvNNJUgiUTRYut8ofZYvbulnpPG-rZIRhi4,106
 nshtrainer/__init__.py,sha256=g_moPnfQxSxFZX5NB9ILQQOJrt4RTRuiFt9N0STIpxM,874
 nshtrainer/_callback.py,sha256=tXQCDzS6CvMTuTY5lQSH5qZs1pXUi-gt9bQdpXMVdEs,12715
-nshtrainer/_checkpoint/metadata.py,sha256=PHy-54Cg-o3OtCffAqrVv6ZVMU7zhRo_-sZiSEEno1Y,5019
-nshtrainer/_checkpoint/saver.py,sha256=LOP8jjKF0Dw9x9H-BKrLMWlEp1XTan2DUK0zQUCWw5U,1360
+nshtrainer/_checkpoint/metadata.py,sha256=LQZ8g50rKxQQx-FqiW3n8EWmal9qSWRouOpIIn6NJJY,4758
+nshtrainer/_checkpoint/saver.py,sha256=rWl4d2lCTMU4_wt8yZFL2pFQaP9hj5sPgqHMPQ4zuyI,1584
 nshtrainer/_directory.py,sha256=TJR9ccyuzRlAVfVjGyeQ3E2AFAcz-XbBCxWfiXo2SlY,3191
 nshtrainer/_experimental/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
 nshtrainer/_hf_hub.py,sha256=4OsCbIITnZk_YLyoMrVyZ0SIN04FBxlC0ig2Et8UAdo,14287
@@ -10,7 +10,7 @@ nshtrainer/callbacks/__init__.py,sha256=4giOYT8A709UOLRtQEt16QbOAFUHCjJ_aLB7ITTw
 nshtrainer/callbacks/actsave.py,sha256=NSXIIu62MNYe5gz479SMW33bdoKYoYtWtd_iTWFpKpc,3881
 nshtrainer/callbacks/base.py,sha256=Alaou1IHAIlMEM7g58d_02ozY2xWlshBN7fsw5Ee21s,3683
 nshtrainer/callbacks/checkpoint/__init__.py,sha256=l8tkHc83_mLiU0-wT09SWdRzwpm2ulbkLzcuCmuTwzE,620
-nshtrainer/callbacks/checkpoint/_base.py,sha256=ZVEUVl5kjCSSe69Q0rMUbKBNNUog0pxBwWkeyuxG2w0,6304
+nshtrainer/callbacks/checkpoint/_base.py,sha256=wCJBRI0pQYZc3GBu0b-aUBlBDhd39AdL82VvFgKmv3k,6300
 nshtrainer/callbacks/checkpoint/best_checkpoint.py,sha256=2CQuhPJ3Fi7lDw7z-J8kXXXuDU8-4HcU48oZxR49apk,2667
 nshtrainer/callbacks/checkpoint/last_checkpoint.py,sha256=vn-as3ex7kaTRcKsIurVtM6kUSHYNwHJeYG82j2dMcc,3554
 nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py,sha256=nljzETqkHwA-4g8mxaeFK5HxA8My0dlIPzIUscSMWyk,3525
@@ -122,7 +122,6 @@ nshtrainer/nn/mlp.py,sha256=_a8rJJniSCvM08gyQGO-5MUoO18U9_FSGGn3tZL2_U4,7101
 nshtrainer/nn/module_dict.py,sha256=9plb8aQUx5TUEPhX5jI9u8LrpTeKe7jZAHi8iIqcN8w,2365
 nshtrainer/nn/module_list.py,sha256=UB43pcwD_3nUke_DyLQt-iXKhWdKM6Zjm84lRC1hPYA,1755
 nshtrainer/nn/nonlinearity.py,sha256=xmaL4QCRvCxqmaGIOwetJeKK-6IK4m2OV7D3SjxSwJQ,6322
-nshtrainer/nn/tests/test_mlp.py,sha256=xBPiHlBvOCn67EbpzzKL-2FU7ikGxHT3i6CMSp1wk7M,1840
 nshtrainer/optimizer.py,sha256=u968GRNPUNn3f_9BEY2RBNuJq5O3wJWams3NG0dkrOA,1738
 nshtrainer/profiler/__init__.py,sha256=RjaNBoVcTFu8lF0dNlFp-2LaPYdonoIbDy2_KhgF0Ek,594
 nshtrainer/profiler/_base.py,sha256=kFcSVn9gJuMwgDxbfyHh46CmEAIPZjxw3yjPbKgzvwA,950
@@ -141,18 +140,18 @@ nshtrainer/trainer/plugin/layer_sync.py,sha256=h-ydZwXepnsw5-paLgiDatqPyQ_8C0QEv
 nshtrainer/trainer/plugin/precision.py,sha256=I0QsB1bVxmsFmBOkgrAfGONsuYae_lD9Bz0PfJEQvH4,5598
 nshtrainer/trainer/signal_connector.py,sha256=GhfGcSzfaTNhnj2QFkBDq5aT7FqbLMA7eC8SYQs8_8w,10828
 nshtrainer/trainer/strategy.py,sha256=VPTn5z3zvXTydY8IJchjhjcOfpvtoejnvUkq5E4WTus,1368
-nshtrainer/trainer/trainer.py,sha256=l2kJs27v4IHZnzxExr0zX0sVex0wukgiD2Wn_0wiGJg,20836
+nshtrainer/trainer/trainer.py,sha256=ed_Pn-yQCb9BqaHXo2wVhkt2CSfGNEzMAM6RsDoTo-I,20834
 nshtrainer/util/_environment_info.py,sha256=MT8mBe6ZolRfKiwU-les1P-lPNPqXpHQcfADrh_A3uY,24629
 nshtrainer/util/bf16.py,sha256=9QhHZCkYSfYpIcxwAMoXyuh2yTSHBzT-EdLQB297jEs,762
 nshtrainer/util/config/__init__.py,sha256=Z39JJufSb61Lhn2GfVcv3eFW_eorOrN9-9llDWlnZZM,272
 nshtrainer/util/config/dtype.py,sha256=Fn_MhhQoHPyFAnFPSwvcvLiGR3yWFIszMba02CJiC4g,2213
 nshtrainer/util/config/duration.py,sha256=mM-UfU_HvhXwW33TYEDg0x58n80tnle2e6VaWtxZTjk,764
 nshtrainer/util/environment.py,sha256=s-B5nY0cKYXdFMdNYumvC_xxacMATiI4DvV2gUDu20k,4195
-nshtrainer/util/path.py,sha256=L-Nh9tlXSUfoP19TFbQq8I0AfS5ugCfGYTYFeddDHcs,3516
+nshtrainer/util/path.py,sha256=9fIjE3S78pPL6wjAgEJUYfIJQAPdKOQqIYvTS9lWTUk,3959
 nshtrainer/util/seed.py,sha256=diMV8iwBKN7Xxt5pELmui-gyqyT80_CZzomrWhNss0k,316
 nshtrainer/util/slurm.py,sha256=HflkP5iI_r4UHMyPjw9R4dD5AHsJUpcfJw5PLvGYBRM,1603
 nshtrainer/util/typed.py,sha256=Xt5fUU6zwLKSTLUdenovnKK0N8qUq89Kddz2_XeykVQ,164
 nshtrainer/util/typing_utils.py,sha256=MjY-CUX9R5Tzat-BlFnQjwl1PQ_W2yZQoXhkYHlJ_VA,442
-nshtrainer-1.0.0b41.dist-info/METADATA,sha256=DL9HgN6RP8X8v0sCdTr2IjRSwIBY96NZXe15m5V4y4c,988
-nshtrainer-1.0.0b41.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
-nshtrainer-1.0.0b41.dist-info/RECORD,,
+nshtrainer-1.0.0b43.dist-info/METADATA,sha256=ZE3l6CN34ptFgx3SDPfKIgjdV2s3J8qdP729eb58vzo,988
+nshtrainer-1.0.0b43.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
+nshtrainer-1.0.0b43.dist-info/RECORD,,

nshtrainer/nn/tests/test_mlp.py DELETED Viewed

@@ -1,55 +0,0 @@
-from __future__ import annotations
-from typing import cast
-import pytest
-import torch
-from nshtrainer.nn.mlp import MLP
-def test_mlp_seed_reproducibility():
-    """Test that the seed parameter in MLP ensures reproducible weights."""
-    # Test dimensions
-    dims = [10, 20, 5]
-    # Create two MLPs with the same seed
-    seed1 = 42
-    mlp1 = MLP(dims, activation=torch.nn.ReLU(), seed=seed1)
-    mlp2 = MLP(dims, activation=torch.nn.ReLU(), seed=seed1)
-    # Create an MLP with a different seed
-    seed2 = 123
-    mlp3 = MLP(dims, activation=torch.nn.ReLU(), seed=seed2)
-    # Check first layer weights
-    layer1_weights1 = cast(torch.Tensor, mlp1[0].weight)
-    layer1_weights2 = cast(torch.Tensor, mlp2[0].weight)
-    layer1_weights3 = cast(torch.Tensor, mlp3[0].weight)
-    # Same seed should produce identical weights
-    assert torch.allclose(layer1_weights1, layer1_weights2)
-    # Different seeds should produce different weights
-    assert not torch.allclose(layer1_weights1, layer1_weights3)
-    # Check second layer weights
-    layer2_weights1 = cast(torch.Tensor, mlp1[2].weight)
-    layer2_weights2 = cast(torch.Tensor, mlp2[2].weight)
-    layer2_weights3 = cast(torch.Tensor, mlp3[2].weight)
-    # Same seed should produce identical weights for all layers
-    assert torch.allclose(layer2_weights1, layer2_weights2)
-    # Different seeds should produce different weights for all layers
-    assert not torch.allclose(layer2_weights1, layer2_weights3)
-    # Test that not providing a seed gives different results each time
-    mlp4 = MLP(dims, activation=torch.nn.ReLU(), seed=None)
-    mlp5 = MLP(dims, activation=torch.nn.ReLU(), seed=None)
-    # Without seeds, weights should be different
-    assert not torch.allclose(
-        cast(torch.Tensor, mlp4[0].weight), cast(torch.Tensor, mlp5[0].weight)
-    )

{nshtrainer-1.0.0b41.dist-info → nshtrainer-1.0.0b43.dist-info}/WHEEL RENAMED Viewed

File without changes

nshtrainer 1.0.0b41__py3-none-any.whl → 1.0.0b43__py3-none-any.whl

nshtrainer 1.0.0b41py3-none-any.whl → 1.0.0b43py3-none-any.whl