nshtrainer 0.11.3__tar.gz → 0.11.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/PKG-INFO +1 -1
  2. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/pyproject.toml +1 -1
  3. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/checkpoint/best_checkpoint.py +27 -10
  4. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/README.md +0 -0
  5. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/__init__.py +0 -0
  6. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_checkpoint/loader.py +0 -0
  7. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_checkpoint/metadata.py +0 -0
  8. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_checkpoint/saver.py +0 -0
  9. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_experimental/__init__.py +0 -0
  10. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_experimental/flops/__init__.py +0 -0
  11. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_experimental/flops/flop_counter.py +0 -0
  12. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/_experimental/flops/module_tracker.py +0 -0
  13. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/__init__.py +0 -0
  14. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/_throughput_monitor_callback.py +0 -0
  15. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/actsave.py +0 -0
  16. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/base.py +0 -0
  17. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/checkpoint/__init__.py +0 -0
  18. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/checkpoint/latest_epoch_checkpoint.py +0 -0
  19. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/checkpoint/model_checkpoint.py +0 -0
  20. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py +0 -0
  21. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/early_stopping.py +0 -0
  22. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/ema.py +0 -0
  23. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/finite_checks.py +0 -0
  24. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/gradient_skipping.py +0 -0
  25. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/interval.py +0 -0
  26. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/log_epoch.py +0 -0
  27. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/norm_logging.py +0 -0
  28. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/print_table.py +0 -0
  29. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/throughput_monitor.py +0 -0
  30. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/timer.py +0 -0
  31. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/callbacks/wandb_watch.py +0 -0
  32. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/data/__init__.py +0 -0
  33. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/data/balanced_batch_sampler.py +0 -0
  34. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/data/transform.py +0 -0
  35. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/__init__.py +0 -0
  36. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/_experimental.py +0 -0
  37. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/actsave.py +0 -0
  38. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/callbacks.py +0 -0
  39. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/config.py +0 -0
  40. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/data.py +0 -0
  41. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/log.py +0 -0
  42. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/lr_scheduler.py +0 -0
  43. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/model.py +0 -0
  44. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/nn.py +0 -0
  45. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/optimizer.py +0 -0
  46. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/runner.py +0 -0
  47. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/snapshot.py +0 -0
  48. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/snoop.py +0 -0
  49. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/trainer.py +0 -0
  50. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/typecheck.py +0 -0
  51. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/ll/util.py +0 -0
  52. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/lr_scheduler/__init__.py +0 -0
  53. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/lr_scheduler/_base.py +0 -0
  54. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/lr_scheduler/linear_warmup_cosine.py +0 -0
  55. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +0 -0
  56. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/metrics/__init__.py +0 -0
  57. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/metrics/_config.py +0 -0
  58. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/__init__.py +0 -0
  59. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/base.py +0 -0
  60. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/config.py +0 -0
  61. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/callback.py +0 -0
  62. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/debug.py +0 -0
  63. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/distributed.py +0 -0
  64. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/logger.py +0 -0
  65. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/profiler.py +0 -0
  66. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/rlp_sanity_checks.py +0 -0
  67. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/model/modules/shared_parameters.py +0 -0
  68. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/nn/__init__.py +0 -0
  69. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/nn/mlp.py +0 -0
  70. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/nn/module_dict.py +0 -0
  71. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/nn/module_list.py +0 -0
  72. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/nn/nonlinearity.py +0 -0
  73. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/optimizer.py +0 -0
  74. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/runner.py +0 -0
  75. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/scripts/find_packages.py +0 -0
  76. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/trainer/__init__.py +0 -0
  77. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/trainer/_runtime_callback.py +0 -0
  78. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/trainer/checkpoint_connector.py +0 -0
  79. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/trainer/signal_connector.py +0 -0
  80. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/trainer/trainer.py +0 -0
  81. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/_environment_info.py +0 -0
  82. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/_useful_types.py +0 -0
  83. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/environment.py +0 -0
  84. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/seed.py +0 -0
  85. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/slurm.py +0 -0
  86. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/typed.py +0 -0
  87. {nshtrainer-0.11.3 → nshtrainer-0.11.5}/src/nshtrainer/util/typing_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nshtrainer
3
- Version: 0.11.3
3
+ Version: 0.11.5
4
4
  Summary:
5
5
  Author: Nima Shoghi
6
6
  Author-email: nimashoghi@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "nshtrainer"
3
- version = "0.11.3"
3
+ version = "0.11.5"
4
4
  description = ""
5
5
  authors = ["Nima Shoghi <nimashoghi@gmail.com>"]
6
6
  readme = "README.md"
@@ -106,6 +106,26 @@ class BestCheckpoint(Checkpoint):
106
106
  reverse=(self.metric.mode == "min"),
107
107
  )
108
108
 
109
+ def _create_symlink(self, trainer: Trainer, best_ckpt_path: Path):
110
+ # Resolve the symlink filename
111
+ if (symlink_filename := self._best_symlink_filename()) is None:
112
+ return
113
+
114
+ # If the symlink already exists and points to the best checkpoint,
115
+ # then we don't need to create a new symlink.
116
+ symlink_path = self.dirpath / symlink_filename
117
+ if symlink_path.exists() and symlink_path.resolve() == best_ckpt_path:
118
+ return
119
+
120
+ _link_checkpoint(
121
+ trainer,
122
+ best_ckpt_path,
123
+ symlink_path,
124
+ metadata=True,
125
+ barrier=False,
126
+ )
127
+ log.debug(f"Created best symlink: {symlink_path}")
128
+
109
129
  def _save_best_checkpoint(self, trainer: Trainer):
110
130
  if (current := self._get_metric_value(trainer.callback_metrics)) is None:
111
131
  log.warning(
@@ -130,6 +150,7 @@ class BestCheckpoint(Checkpoint):
130
150
  # Save the current model
131
151
  filepath = self._ckpt_path(trainer)
132
152
  trainer.save_checkpoint(filepath, self.config.save_weights_only)
153
+ log.debug(f"Saved best checkpoint: {filepath}")
133
154
 
134
155
  # Remove worst checkpoint if we've reached save_top_k
135
156
  # NOTE: We add 1 to save_top_k here because we have just saved a new checkpoint
@@ -143,13 +164,9 @@ class BestCheckpoint(Checkpoint):
143
164
  )
144
165
 
145
166
  # Create symlink to best model
146
- if (symlink_filename := self._best_symlink_filename()) is not None:
147
- symlink_path = self.dirpath / symlink_filename
148
- _link_checkpoint(
149
- trainer,
150
- filepath,
151
- symlink_path,
152
- barrier=True,
153
- metadata=True,
154
- )
155
- log.debug(f"Created best symlink: {symlink_path}")
167
+ if sorted_ckpts:
168
+ _, best_ckpt_path = sorted_ckpts[0]
169
+ self._create_symlink(trainer, best_ckpt_path)
170
+
171
+ # Barrier to ensure all processes have saved the checkpoint before continuing
172
+ trainer.strategy.barrier()
File without changes