nshtrainer 0.10.15__tar.gz → 0.10.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/PKG-INFO +1 -1
  2. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/pyproject.toml +1 -1
  3. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/_checkpoint/metadata.py +5 -9
  4. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/latest_epoch_checkpoint.py +4 -2
  5. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/README.md +0 -0
  6. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/__init__.py +0 -0
  7. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/_checkpoint/loader.py +0 -0
  8. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/_checkpoint/saver.py +0 -0
  9. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/_experimental/__init__.py +0 -0
  10. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/_experimental/flops/__init__.py +0 -0
  11. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/_experimental/flops/flop_counter.py +0 -0
  12. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/_experimental/flops/module_tracker.py +0 -0
  13. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/__init__.py +0 -0
  14. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/_throughput_monitor_callback.py +0 -0
  15. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/actsave.py +0 -0
  16. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/base.py +0 -0
  17. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/early_stopping.py +0 -0
  18. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/ema.py +0 -0
  19. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/finite_checks.py +0 -0
  20. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/gradient_skipping.py +0 -0
  21. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/interval.py +0 -0
  22. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/log_epoch.py +0 -0
  23. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/model_checkpoint.py +0 -0
  24. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/norm_logging.py +0 -0
  25. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/on_exception_checkpoint.py +0 -0
  26. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/print_table.py +0 -0
  27. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/throughput_monitor.py +0 -0
  28. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/timer.py +0 -0
  29. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/wandb_watch.py +0 -0
  30. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/data/__init__.py +0 -0
  31. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/data/balanced_batch_sampler.py +0 -0
  32. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/data/transform.py +0 -0
  33. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/__init__.py +0 -0
  34. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/_experimental.py +0 -0
  35. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/actsave.py +0 -0
  36. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/callbacks.py +0 -0
  37. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/config.py +0 -0
  38. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/data.py +0 -0
  39. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/log.py +0 -0
  40. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/lr_scheduler.py +0 -0
  41. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/model.py +0 -0
  42. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/nn.py +0 -0
  43. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/optimizer.py +0 -0
  44. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/runner.py +0 -0
  45. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/snapshot.py +0 -0
  46. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/snoop.py +0 -0
  47. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/trainer.py +0 -0
  48. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/typecheck.py +0 -0
  49. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/ll/util.py +0 -0
  50. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/lr_scheduler/__init__.py +0 -0
  51. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/lr_scheduler/_base.py +0 -0
  52. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/lr_scheduler/linear_warmup_cosine.py +0 -0
  53. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +0 -0
  54. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/metrics/__init__.py +0 -0
  55. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/metrics/_config.py +0 -0
  56. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/model/__init__.py +0 -0
  57. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/model/base.py +0 -0
  58. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/model/config.py +0 -0
  59. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/callback.py +0 -0
  60. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/debug.py +0 -0
  61. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/distributed.py +0 -0
  62. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/logger.py +0 -0
  63. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/profiler.py +0 -0
  64. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/rlp_sanity_checks.py +0 -0
  65. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/shared_parameters.py +0 -0
  66. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/nn/__init__.py +0 -0
  67. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/nn/mlp.py +0 -0
  68. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/nn/module_dict.py +0 -0
  69. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/nn/module_list.py +0 -0
  70. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/nn/nonlinearity.py +0 -0
  71. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/optimizer.py +0 -0
  72. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/runner.py +0 -0
  73. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/scripts/find_packages.py +0 -0
  74. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/trainer/__init__.py +0 -0
  75. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/trainer/_runtime_callback.py +0 -0
  76. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/trainer/checkpoint_connector.py +0 -0
  77. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/trainer/signal_connector.py +0 -0
  78. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/trainer/trainer.py +0 -0
  79. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/util/_environment_info.py +0 -0
  80. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/util/environment.py +0 -0
  81. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/util/seed.py +0 -0
  82. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/util/slurm.py +0 -0
  83. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/util/typed.py +0 -0
  84. {nshtrainer-0.10.15 → nshtrainer-0.10.16}/src/nshtrainer/util/typing_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nshtrainer
3
- Version: 0.10.15
3
+ Version: 0.10.16
4
4
  Summary:
5
5
  Author: Nima Shoghi
6
6
  Author-email: nimashoghi@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "nshtrainer"
3
- version = "0.10.15"
3
+ version = "0.10.16"
4
4
  description = ""
5
5
  authors = ["Nima Shoghi <nimashoghi@gmail.com>"]
6
6
  readme = "README.md"
@@ -105,10 +105,8 @@ def _write_checkpoint_metadata(
105
105
 
106
106
 
107
107
  def _remove_checkpoint_metadata(checkpoint_path: Path):
108
- for path in (
109
- checkpoint_path.with_suffix(METADATA_PATH_SUFFIX),
110
- checkpoint_path.with_suffix(HPARAMS_PATH_SUFFIX),
111
- ):
108
+ for suffix in (METADATA_PATH_SUFFIX, HPARAMS_PATH_SUFFIX):
109
+ path = checkpoint_path.with_suffix(suffix)
112
110
  try:
113
111
  path.unlink(missing_ok=True)
114
112
  except Exception as e:
@@ -122,11 +120,9 @@ def _link_checkpoint_metadata(checkpoint_path: Path, linked_checkpoint_path: Pat
122
120
  _remove_checkpoint_metadata(linked_checkpoint_path)
123
121
 
124
122
  # Link the metadata files to the new checkpoint
125
- for path in (
126
- checkpoint_path.with_suffix(METADATA_PATH_SUFFIX),
127
- checkpoint_path.with_suffix(HPARAMS_PATH_SUFFIX),
128
- ):
129
- linked_path = linked_checkpoint_path.with_suffix(path.suffix)
123
+ for suffix in (METADATA_PATH_SUFFIX, HPARAMS_PATH_SUFFIX):
124
+ path = checkpoint_path.with_suffix(suffix)
125
+ linked_path = linked_checkpoint_path.with_suffix(suffix)
130
126
  try:
131
127
  try:
132
128
  linked_path.symlink_to(path)
@@ -64,7 +64,7 @@ class LatestEpochCheckpoint(Checkpoint):
64
64
  filename = self.config.filename.format(
65
65
  epoch=trainer.current_epoch, step=trainer.global_step
66
66
  )
67
- filename = f"{self.PREFIX}{filename}.{self.EXTENSION}"
67
+ filename = f"{self.PREFIX}{filename}{self.EXTENSION}"
68
68
  return self.dirpath / filename
69
69
 
70
70
  def _remove_checkpoints(self, trainer: Trainer, ckpt_paths: list[Path]):
@@ -95,7 +95,9 @@ class LatestEpochCheckpoint(Checkpoint):
95
95
 
96
96
  def _save_new_checkpoint(self, trainer: Trainer):
97
97
  # Remove old checkpoints
98
- self._remove_old_checkpoints(trainer)
98
+ if trainer.is_global_zero:
99
+ self._remove_old_checkpoints(trainer)
100
+ trainer.strategy.barrier()
99
101
 
100
102
  # Save the new checkpoint
101
103
  filepath = self._ckpt_path(trainer)
File without changes