nshtrainer 0.10.12__tar.gz → 0.10.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/PKG-INFO +1 -1
  2. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/pyproject.toml +1 -1
  3. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_checkpoint/saver.py +13 -4
  4. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/latest_epoch_checkpoint.py +1 -1
  5. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/model_checkpoint.py +1 -1
  6. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/README.md +0 -0
  7. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/__init__.py +0 -0
  8. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_checkpoint/loader.py +0 -0
  9. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_checkpoint/metadata.py +0 -0
  10. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_experimental/__init__.py +0 -0
  11. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_experimental/flops/__init__.py +0 -0
  12. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_experimental/flops/flop_counter.py +0 -0
  13. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/_experimental/flops/module_tracker.py +0 -0
  14. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/__init__.py +0 -0
  15. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/_throughput_monitor_callback.py +0 -0
  16. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/actsave.py +0 -0
  17. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/base.py +0 -0
  18. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/early_stopping.py +0 -0
  19. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/ema.py +0 -0
  20. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/finite_checks.py +0 -0
  21. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/gradient_skipping.py +0 -0
  22. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/interval.py +0 -0
  23. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/log_epoch.py +0 -0
  24. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/norm_logging.py +0 -0
  25. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/on_exception_checkpoint.py +0 -0
  26. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/print_table.py +0 -0
  27. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/throughput_monitor.py +0 -0
  28. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/timer.py +0 -0
  29. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/callbacks/wandb_watch.py +0 -0
  30. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/data/__init__.py +0 -0
  31. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/data/balanced_batch_sampler.py +0 -0
  32. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/data/transform.py +0 -0
  33. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/__init__.py +0 -0
  34. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/_experimental.py +0 -0
  35. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/actsave.py +0 -0
  36. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/callbacks.py +0 -0
  37. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/config.py +0 -0
  38. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/data.py +0 -0
  39. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/log.py +0 -0
  40. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/lr_scheduler.py +0 -0
  41. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/model.py +0 -0
  42. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/nn.py +0 -0
  43. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/optimizer.py +0 -0
  44. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/runner.py +0 -0
  45. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/snapshot.py +0 -0
  46. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/snoop.py +0 -0
  47. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/trainer.py +0 -0
  48. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/typecheck.py +0 -0
  49. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/ll/util.py +0 -0
  50. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/lr_scheduler/__init__.py +0 -0
  51. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/lr_scheduler/_base.py +0 -0
  52. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/lr_scheduler/linear_warmup_cosine.py +0 -0
  53. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +0 -0
  54. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/metrics/__init__.py +0 -0
  55. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/metrics/_config.py +0 -0
  56. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/__init__.py +0 -0
  57. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/base.py +0 -0
  58. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/config.py +0 -0
  59. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/callback.py +0 -0
  60. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/debug.py +0 -0
  61. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/distributed.py +0 -0
  62. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/logger.py +0 -0
  63. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/profiler.py +0 -0
  64. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/rlp_sanity_checks.py +0 -0
  65. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/model/modules/shared_parameters.py +0 -0
  66. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/nn/__init__.py +0 -0
  67. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/nn/mlp.py +0 -0
  68. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/nn/module_dict.py +0 -0
  69. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/nn/module_list.py +0 -0
  70. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/nn/nonlinearity.py +0 -0
  71. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/optimizer.py +0 -0
  72. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/runner.py +0 -0
  73. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/scripts/find_packages.py +0 -0
  74. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/trainer/__init__.py +0 -0
  75. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/trainer/_runtime_callback.py +0 -0
  76. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/trainer/checkpoint_connector.py +0 -0
  77. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/trainer/signal_connector.py +0 -0
  78. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/trainer/trainer.py +0 -0
  79. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/util/_environment_info.py +0 -0
  80. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/util/environment.py +0 -0
  81. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/util/seed.py +0 -0
  82. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/util/slurm.py +0 -0
  83. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/util/typed.py +0 -0
  84. {nshtrainer-0.10.12 → nshtrainer-0.10.13}/src/nshtrainer/util/typing_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nshtrainer
3
- Version: 0.10.12
3
+ Version: 0.10.13
4
4
  Summary:
5
5
  Author: Nima Shoghi
6
6
  Author-email: nimashoghi@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "nshtrainer"
3
- version = "0.10.12"
3
+ version = "0.10.13"
4
4
  description = ""
5
5
  authors = ["Nima Shoghi <nimashoghi@gmail.com>"]
6
6
  readme = "README.md"
@@ -36,7 +36,8 @@ def _link_checkpoint(
36
36
  # fall back to copying the file
37
37
  shutil.copy(filepath, linkpath)
38
38
 
39
- _link_checkpoint_metadata(filepath, linkpath)
39
+ if metadata:
40
+ _link_checkpoint_metadata(filepath, linkpath)
40
41
  if barrier:
41
42
  trainer.strategy.barrier()
42
43
 
@@ -44,9 +45,17 @@ def _link_checkpoint(
44
45
  def _remove_checkpoint(
45
46
  trainer: Trainer,
46
47
  filepath: str | Path | os.PathLike,
47
- remove_metadata: bool = True,
48
+ *,
49
+ metadata: bool,
50
+ barrier: bool,
48
51
  ):
49
52
  if not isinstance(filepath, Path):
50
53
  filepath = Path(filepath)
51
- trainer.strategy.remove_checkpoint(filepath)
52
- _remove_checkpoint_metadata(filepath)
54
+
55
+ if trainer.is_global_zero:
56
+ trainer.strategy.remove_checkpoint(filepath)
57
+ if metadata:
58
+ _remove_checkpoint_metadata(filepath)
59
+
60
+ if barrier:
61
+ trainer.strategy.barrier()
@@ -69,7 +69,7 @@ class LatestEpochCheckpoint(Checkpoint):
69
69
 
70
70
  def _remove_checkpoints(self, trainer: Trainer, ckpt_paths: list[Path]):
71
71
  for ckpt_path in ckpt_paths:
72
- _remove_checkpoint(trainer, ckpt_path, remove_metadata=True)
72
+ _remove_checkpoint(trainer, ckpt_path, metadata=True, barrier=False)
73
73
 
74
74
  def _remove_old_checkpoints(self, trainer: Trainer):
75
75
  if (latest_k := self.config.latest_k) == "all":
@@ -202,4 +202,4 @@ class ModelCheckpoint(_ModelCheckpoint):
202
202
 
203
203
  @override
204
204
  def _remove_checkpoint(self, trainer: Trainer, filepath: str):
205
- return _remove_checkpoint(trainer, filepath, remove_metadata=True)
205
+ return _remove_checkpoint(trainer, filepath, metadata=True, barrier=False)
File without changes