nshtrainer 0.10.15__tar.gz → 0.10.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/PKG-INFO +1 -1
  2. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/pyproject.toml +1 -1
  3. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/_checkpoint/metadata.py +5 -9
  4. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/latest_epoch_checkpoint.py +4 -2
  5. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/model/config.py +1 -1
  6. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/README.md +0 -0
  7. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/__init__.py +0 -0
  8. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/_checkpoint/loader.py +0 -0
  9. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/_checkpoint/saver.py +0 -0
  10. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/_experimental/__init__.py +0 -0
  11. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/_experimental/flops/__init__.py +0 -0
  12. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/_experimental/flops/flop_counter.py +0 -0
  13. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/_experimental/flops/module_tracker.py +0 -0
  14. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/__init__.py +0 -0
  15. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/_throughput_monitor_callback.py +0 -0
  16. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/actsave.py +0 -0
  17. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/base.py +0 -0
  18. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/early_stopping.py +0 -0
  19. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/ema.py +0 -0
  20. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/finite_checks.py +0 -0
  21. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/gradient_skipping.py +0 -0
  22. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/interval.py +0 -0
  23. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/log_epoch.py +0 -0
  24. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/model_checkpoint.py +0 -0
  25. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/norm_logging.py +0 -0
  26. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/on_exception_checkpoint.py +0 -0
  27. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/print_table.py +0 -0
  28. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/throughput_monitor.py +0 -0
  29. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/timer.py +0 -0
  30. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/callbacks/wandb_watch.py +0 -0
  31. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/data/__init__.py +0 -0
  32. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/data/balanced_batch_sampler.py +0 -0
  33. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/data/transform.py +0 -0
  34. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/__init__.py +0 -0
  35. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/_experimental.py +0 -0
  36. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/actsave.py +0 -0
  37. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/callbacks.py +0 -0
  38. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/config.py +0 -0
  39. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/data.py +0 -0
  40. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/log.py +0 -0
  41. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/lr_scheduler.py +0 -0
  42. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/model.py +0 -0
  43. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/nn.py +0 -0
  44. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/optimizer.py +0 -0
  45. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/runner.py +0 -0
  46. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/snapshot.py +0 -0
  47. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/snoop.py +0 -0
  48. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/trainer.py +0 -0
  49. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/typecheck.py +0 -0
  50. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/ll/util.py +0 -0
  51. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/lr_scheduler/__init__.py +0 -0
  52. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/lr_scheduler/_base.py +0 -0
  53. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/lr_scheduler/linear_warmup_cosine.py +0 -0
  54. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +0 -0
  55. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/metrics/__init__.py +0 -0
  56. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/metrics/_config.py +0 -0
  57. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/model/__init__.py +0 -0
  58. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/model/base.py +0 -0
  59. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/model/modules/callback.py +0 -0
  60. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/model/modules/debug.py +0 -0
  61. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/model/modules/distributed.py +0 -0
  62. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/model/modules/logger.py +0 -0
  63. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/model/modules/profiler.py +0 -0
  64. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/model/modules/rlp_sanity_checks.py +0 -0
  65. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/model/modules/shared_parameters.py +0 -0
  66. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/nn/__init__.py +0 -0
  67. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/nn/mlp.py +0 -0
  68. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/nn/module_dict.py +0 -0
  69. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/nn/module_list.py +0 -0
  70. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/nn/nonlinearity.py +0 -0
  71. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/optimizer.py +0 -0
  72. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/runner.py +0 -0
  73. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/scripts/find_packages.py +0 -0
  74. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/trainer/__init__.py +0 -0
  75. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/trainer/_runtime_callback.py +0 -0
  76. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/trainer/checkpoint_connector.py +0 -0
  77. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/trainer/signal_connector.py +0 -0
  78. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/trainer/trainer.py +0 -0
  79. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/util/_environment_info.py +0 -0
  80. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/util/environment.py +0 -0
  81. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/util/seed.py +0 -0
  82. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/util/slurm.py +0 -0
  83. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/util/typed.py +0 -0
  84. {nshtrainer-0.10.15 → nshtrainer-0.10.17}/src/nshtrainer/util/typing_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nshtrainer
3
- Version: 0.10.15
3
+ Version: 0.10.17
4
4
  Summary:
5
5
  Author: Nima Shoghi
6
6
  Author-email: nimashoghi@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "nshtrainer"
3
- version = "0.10.15"
3
+ version = "0.10.17"
4
4
  description = ""
5
5
  authors = ["Nima Shoghi <nimashoghi@gmail.com>"]
6
6
  readme = "README.md"
@@ -105,10 +105,8 @@ def _write_checkpoint_metadata(
105
105
 
106
106
 
107
107
  def _remove_checkpoint_metadata(checkpoint_path: Path):
108
- for path in (
109
- checkpoint_path.with_suffix(METADATA_PATH_SUFFIX),
110
- checkpoint_path.with_suffix(HPARAMS_PATH_SUFFIX),
111
- ):
108
+ for suffix in (METADATA_PATH_SUFFIX, HPARAMS_PATH_SUFFIX):
109
+ path = checkpoint_path.with_suffix(suffix)
112
110
  try:
113
111
  path.unlink(missing_ok=True)
114
112
  except Exception as e:
@@ -122,11 +120,9 @@ def _link_checkpoint_metadata(checkpoint_path: Path, linked_checkpoint_path: Pat
122
120
  _remove_checkpoint_metadata(linked_checkpoint_path)
123
121
 
124
122
  # Link the metadata files to the new checkpoint
125
- for path in (
126
- checkpoint_path.with_suffix(METADATA_PATH_SUFFIX),
127
- checkpoint_path.with_suffix(HPARAMS_PATH_SUFFIX),
128
- ):
129
- linked_path = linked_checkpoint_path.with_suffix(path.suffix)
123
+ for suffix in (METADATA_PATH_SUFFIX, HPARAMS_PATH_SUFFIX):
124
+ path = checkpoint_path.with_suffix(suffix)
125
+ linked_path = linked_checkpoint_path.with_suffix(suffix)
130
126
  try:
131
127
  try:
132
128
  linked_path.symlink_to(path)
@@ -64,7 +64,7 @@ class LatestEpochCheckpoint(Checkpoint):
64
64
  filename = self.config.filename.format(
65
65
  epoch=trainer.current_epoch, step=trainer.global_step
66
66
  )
67
- filename = f"{self.PREFIX}{filename}.{self.EXTENSION}"
67
+ filename = f"{self.PREFIX}{filename}{self.EXTENSION}"
68
68
  return self.dirpath / filename
69
69
 
70
70
  def _remove_checkpoints(self, trainer: Trainer, ckpt_paths: list[Path]):
@@ -95,7 +95,9 @@ class LatestEpochCheckpoint(Checkpoint):
95
95
 
96
96
  def _save_new_checkpoint(self, trainer: Trainer):
97
97
  # Remove old checkpoints
98
- self._remove_old_checkpoints(trainer)
98
+ if trainer.is_global_zero:
99
+ self._remove_old_checkpoints(trainer)
100
+ trainer.strategy.barrier()
99
101
 
100
102
  # Save the new checkpoint
101
103
  filepath = self._ckpt_path(trainer)
@@ -288,7 +288,7 @@ class WandbLoggerConfig(CallbackConfigBase, BaseLoggerConfig):
288
288
  offline: bool = False
289
289
  """Whether to run WandB in offline mode."""
290
290
 
291
- use_wandb_core: bool = False
291
+ use_wandb_core: bool = True
292
292
  """Whether to use the new `wandb-core` backend for WandB.
293
293
  `wandb-core` is a new backend for WandB that is faster and more efficient than the old backend.
294
294
  """
File without changes