nshtrainer 0.10.14__tar.gz → 0.10.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/PKG-INFO +1 -1
  2. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/pyproject.toml +1 -1
  3. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/_checkpoint/metadata.py +5 -9
  4. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/latest_epoch_checkpoint.py +4 -2
  5. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/model/config.py +4 -3
  6. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/README.md +0 -0
  7. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/__init__.py +0 -0
  8. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/_checkpoint/loader.py +0 -0
  9. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/_checkpoint/saver.py +0 -0
  10. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/_experimental/__init__.py +0 -0
  11. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/_experimental/flops/__init__.py +0 -0
  12. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/_experimental/flops/flop_counter.py +0 -0
  13. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/_experimental/flops/module_tracker.py +0 -0
  14. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/__init__.py +0 -0
  15. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/_throughput_monitor_callback.py +0 -0
  16. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/actsave.py +0 -0
  17. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/base.py +0 -0
  18. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/early_stopping.py +0 -0
  19. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/ema.py +0 -0
  20. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/finite_checks.py +0 -0
  21. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/gradient_skipping.py +0 -0
  22. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/interval.py +0 -0
  23. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/log_epoch.py +0 -0
  24. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/model_checkpoint.py +0 -0
  25. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/norm_logging.py +0 -0
  26. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/on_exception_checkpoint.py +0 -0
  27. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/print_table.py +0 -0
  28. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/throughput_monitor.py +0 -0
  29. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/timer.py +0 -0
  30. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/callbacks/wandb_watch.py +0 -0
  31. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/data/__init__.py +0 -0
  32. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/data/balanced_batch_sampler.py +0 -0
  33. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/data/transform.py +0 -0
  34. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/__init__.py +0 -0
  35. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/_experimental.py +0 -0
  36. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/actsave.py +0 -0
  37. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/callbacks.py +0 -0
  38. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/config.py +0 -0
  39. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/data.py +0 -0
  40. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/log.py +0 -0
  41. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/lr_scheduler.py +0 -0
  42. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/model.py +0 -0
  43. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/nn.py +0 -0
  44. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/optimizer.py +0 -0
  45. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/runner.py +0 -0
  46. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/snapshot.py +0 -0
  47. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/snoop.py +0 -0
  48. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/trainer.py +0 -0
  49. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/typecheck.py +0 -0
  50. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/ll/util.py +0 -0
  51. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/lr_scheduler/__init__.py +0 -0
  52. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/lr_scheduler/_base.py +0 -0
  53. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/lr_scheduler/linear_warmup_cosine.py +0 -0
  54. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +0 -0
  55. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/metrics/__init__.py +0 -0
  56. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/metrics/_config.py +0 -0
  57. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/model/__init__.py +0 -0
  58. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/model/base.py +0 -0
  59. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/callback.py +0 -0
  60. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/debug.py +0 -0
  61. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/distributed.py +0 -0
  62. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/logger.py +0 -0
  63. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/profiler.py +0 -0
  64. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/rlp_sanity_checks.py +0 -0
  65. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/model/modules/shared_parameters.py +0 -0
  66. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/nn/__init__.py +0 -0
  67. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/nn/mlp.py +0 -0
  68. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/nn/module_dict.py +0 -0
  69. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/nn/module_list.py +0 -0
  70. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/nn/nonlinearity.py +0 -0
  71. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/optimizer.py +0 -0
  72. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/runner.py +0 -0
  73. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/scripts/find_packages.py +0 -0
  74. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/trainer/__init__.py +0 -0
  75. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/trainer/_runtime_callback.py +0 -0
  76. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/trainer/checkpoint_connector.py +0 -0
  77. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/trainer/signal_connector.py +0 -0
  78. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/trainer/trainer.py +0 -0
  79. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/util/_environment_info.py +0 -0
  80. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/util/environment.py +0 -0
  81. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/util/seed.py +0 -0
  82. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/util/slurm.py +0 -0
  83. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/util/typed.py +0 -0
  84. {nshtrainer-0.10.14 → nshtrainer-0.10.16}/src/nshtrainer/util/typing_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nshtrainer
3
- Version: 0.10.14
3
+ Version: 0.10.16
4
4
  Summary:
5
5
  Author: Nima Shoghi
6
6
  Author-email: nimashoghi@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "nshtrainer"
3
- version = "0.10.14"
3
+ version = "0.10.16"
4
4
  description = ""
5
5
  authors = ["Nima Shoghi <nimashoghi@gmail.com>"]
6
6
  readme = "README.md"
@@ -105,10 +105,8 @@ def _write_checkpoint_metadata(
105
105
 
106
106
 
107
107
  def _remove_checkpoint_metadata(checkpoint_path: Path):
108
- for path in (
109
- checkpoint_path.with_suffix(METADATA_PATH_SUFFIX),
110
- checkpoint_path.with_suffix(HPARAMS_PATH_SUFFIX),
111
- ):
108
+ for suffix in (METADATA_PATH_SUFFIX, HPARAMS_PATH_SUFFIX):
109
+ path = checkpoint_path.with_suffix(suffix)
112
110
  try:
113
111
  path.unlink(missing_ok=True)
114
112
  except Exception as e:
@@ -122,11 +120,9 @@ def _link_checkpoint_metadata(checkpoint_path: Path, linked_checkpoint_path: Pat
122
120
  _remove_checkpoint_metadata(linked_checkpoint_path)
123
121
 
124
122
  # Link the metadata files to the new checkpoint
125
- for path in (
126
- checkpoint_path.with_suffix(METADATA_PATH_SUFFIX),
127
- checkpoint_path.with_suffix(HPARAMS_PATH_SUFFIX),
128
- ):
129
- linked_path = linked_checkpoint_path.with_suffix(path.suffix)
123
+ for suffix in (METADATA_PATH_SUFFIX, HPARAMS_PATH_SUFFIX):
124
+ path = checkpoint_path.with_suffix(suffix)
125
+ linked_path = linked_checkpoint_path.with_suffix(suffix)
130
126
  try:
131
127
  try:
132
128
  linked_path.symlink_to(path)
@@ -64,7 +64,7 @@ class LatestEpochCheckpoint(Checkpoint):
64
64
  filename = self.config.filename.format(
65
65
  epoch=trainer.current_epoch, step=trainer.global_step
66
66
  )
67
- filename = f"{self.PREFIX}{filename}.{self.EXTENSION}"
67
+ filename = f"{self.PREFIX}{filename}{self.EXTENSION}"
68
68
  return self.dirpath / filename
69
69
 
70
70
  def _remove_checkpoints(self, trainer: Trainer, ckpt_paths: list[Path]):
@@ -95,7 +95,9 @@ class LatestEpochCheckpoint(Checkpoint):
95
95
 
96
96
  def _save_new_checkpoint(self, trainer: Trainer):
97
97
  # Remove old checkpoints
98
- self._remove_old_checkpoints(trainer)
98
+ if trainer.is_global_zero:
99
+ self._remove_old_checkpoints(trainer)
100
+ trainer.strategy.barrier()
99
101
 
100
102
  # Save the new checkpoint
101
103
  filepath = self._ckpt_path(trainer)
@@ -315,9 +315,10 @@ class WandbLoggerConfig(CallbackConfigBase, BaseLoggerConfig):
315
315
  if pkg_resources.parse_version(
316
316
  wandb.__version__
317
317
  ) < pkg_resources.parse_version("0.17.5"):
318
- log.warning(
319
- "The version of WandB installed does not support the `wandb-core` backend. "
320
- "Unable to use the `wandb-core` backend for WandB."
318
+ raise ValueError(
319
+ "The version of WandB installed does not support the `wandb-core` backend "
320
+ f"(expected version >= 0.17.5, found version {wandb.__version__}). "
321
+ "Please either upgrade to a newer version of WandB or disable the `use_wandb_core` option."
321
322
  )
322
323
  else:
323
324
  wandb.require("core")
File without changes