nshtrainer 1.0.0b45__tar.gz → 1.0.0b46__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/PKG-INFO +1 -1
  2. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/pyproject.toml +1 -1
  3. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/_checkpoint/metadata.py +20 -5
  4. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/_checkpoint/saver.py +6 -2
  5. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/checkpoint/_base.py +1 -1
  6. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/util/path.py +2 -1
  7. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/README.md +0 -0
  8. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/.nshconfig.generated.json +0 -0
  9. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/__init__.py +0 -0
  10. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/_callback.py +0 -0
  11. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/_directory.py +0 -0
  12. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/_experimental/__init__.py +0 -0
  13. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/_hf_hub.py +0 -0
  14. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/__init__.py +0 -0
  15. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/actsave.py +0 -0
  16. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/base.py +0 -0
  17. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/checkpoint/__init__.py +0 -0
  18. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/checkpoint/best_checkpoint.py +0 -0
  19. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/checkpoint/last_checkpoint.py +0 -0
  20. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py +0 -0
  21. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/debug_flag.py +0 -0
  22. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/directory_setup.py +0 -0
  23. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/early_stopping.py +0 -0
  24. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/ema.py +0 -0
  25. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/finite_checks.py +0 -0
  26. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/gradient_skipping.py +0 -0
  27. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/interval.py +0 -0
  28. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/log_epoch.py +0 -0
  29. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/lr_monitor.py +0 -0
  30. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/metric_validation.py +0 -0
  31. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/norm_logging.py +0 -0
  32. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/print_table.py +0 -0
  33. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/rlp_sanity_checks.py +0 -0
  34. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/shared_parameters.py +0 -0
  35. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/timer.py +0 -0
  36. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/wandb_upload_code.py +0 -0
  37. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/callbacks/wandb_watch.py +0 -0
  38. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/.gitattributes +0 -0
  39. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/__init__.py +0 -0
  40. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/_checkpoint/__init__.py +0 -0
  41. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/_checkpoint/metadata/__init__.py +0 -0
  42. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/_directory/__init__.py +0 -0
  43. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/_hf_hub/__init__.py +0 -0
  44. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/__init__.py +0 -0
  45. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/actsave/__init__.py +0 -0
  46. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/base/__init__.py +0 -0
  47. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/checkpoint/__init__.py +0 -0
  48. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/checkpoint/_base/__init__.py +0 -0
  49. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/checkpoint/best_checkpoint/__init__.py +0 -0
  50. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/checkpoint/last_checkpoint/__init__.py +0 -0
  51. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/checkpoint/on_exception_checkpoint/__init__.py +0 -0
  52. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/debug_flag/__init__.py +0 -0
  53. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/directory_setup/__init__.py +0 -0
  54. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/early_stopping/__init__.py +0 -0
  55. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/ema/__init__.py +0 -0
  56. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/finite_checks/__init__.py +0 -0
  57. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/gradient_skipping/__init__.py +0 -0
  58. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/log_epoch/__init__.py +0 -0
  59. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/lr_monitor/__init__.py +0 -0
  60. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/metric_validation/__init__.py +0 -0
  61. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/norm_logging/__init__.py +0 -0
  62. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/print_table/__init__.py +0 -0
  63. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/rlp_sanity_checks/__init__.py +0 -0
  64. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/shared_parameters/__init__.py +0 -0
  65. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/timer/__init__.py +0 -0
  66. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/wandb_upload_code/__init__.py +0 -0
  67. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/callbacks/wandb_watch/__init__.py +0 -0
  68. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/loggers/__init__.py +0 -0
  69. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/loggers/actsave/__init__.py +0 -0
  70. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/loggers/base/__init__.py +0 -0
  71. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/loggers/csv/__init__.py +0 -0
  72. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/loggers/tensorboard/__init__.py +0 -0
  73. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/loggers/wandb/__init__.py +0 -0
  74. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/lr_scheduler/__init__.py +0 -0
  75. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/lr_scheduler/base/__init__.py +0 -0
  76. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/lr_scheduler/linear_warmup_cosine/__init__.py +0 -0
  77. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/lr_scheduler/reduce_lr_on_plateau/__init__.py +0 -0
  78. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/metrics/__init__.py +0 -0
  79. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/metrics/_config/__init__.py +0 -0
  80. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/nn/__init__.py +0 -0
  81. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/nn/mlp/__init__.py +0 -0
  82. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/nn/nonlinearity/__init__.py +0 -0
  83. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/optimizer/__init__.py +0 -0
  84. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/profiler/__init__.py +0 -0
  85. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/profiler/_base/__init__.py +0 -0
  86. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/profiler/advanced/__init__.py +0 -0
  87. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/profiler/pytorch/__init__.py +0 -0
  88. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/profiler/simple/__init__.py +0 -0
  89. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/trainer/__init__.py +0 -0
  90. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/trainer/_config/__init__.py +0 -0
  91. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/trainer/accelerator/__init__.py +0 -0
  92. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/trainer/plugin/__init__.py +0 -0
  93. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/trainer/plugin/base/__init__.py +0 -0
  94. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/trainer/plugin/environment/__init__.py +0 -0
  95. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/trainer/plugin/io/__init__.py +0 -0
  96. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/trainer/plugin/layer_sync/__init__.py +0 -0
  97. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/trainer/plugin/precision/__init__.py +0 -0
  98. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/trainer/strategy/__init__.py +0 -0
  99. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/trainer/trainer/__init__.py +0 -0
  100. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/util/__init__.py +0 -0
  101. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/util/_environment_info/__init__.py +0 -0
  102. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/util/config/__init__.py +0 -0
  103. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/util/config/dtype/__init__.py +0 -0
  104. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/configs/util/config/duration/__init__.py +0 -0
  105. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/data/__init__.py +0 -0
  106. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/data/balanced_batch_sampler.py +0 -0
  107. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/data/datamodule.py +0 -0
  108. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/data/transform.py +0 -0
  109. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/loggers/__init__.py +0 -0
  110. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/loggers/actsave.py +0 -0
  111. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/loggers/base.py +0 -0
  112. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/loggers/csv.py +0 -0
  113. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/loggers/tensorboard.py +0 -0
  114. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/loggers/wandb.py +0 -0
  115. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/lr_scheduler/__init__.py +0 -0
  116. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/lr_scheduler/base.py +0 -0
  117. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/lr_scheduler/linear_warmup_cosine.py +0 -0
  118. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +0 -0
  119. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/metrics/__init__.py +0 -0
  120. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/metrics/_config.py +0 -0
  121. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/model/__init__.py +0 -0
  122. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/model/base.py +0 -0
  123. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/model/mixins/callback.py +0 -0
  124. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/model/mixins/debug.py +0 -0
  125. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/model/mixins/logger.py +0 -0
  126. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/nn/__init__.py +0 -0
  127. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/nn/mlp.py +0 -0
  128. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/nn/module_dict.py +0 -0
  129. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/nn/module_list.py +0 -0
  130. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/nn/nonlinearity.py +0 -0
  131. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/optimizer.py +0 -0
  132. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/profiler/__init__.py +0 -0
  133. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/profiler/_base.py +0 -0
  134. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/profiler/advanced.py +0 -0
  135. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/profiler/pytorch.py +0 -0
  136. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/profiler/simple.py +0 -0
  137. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/trainer/__init__.py +0 -0
  138. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/trainer/_config.py +0 -0
  139. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/trainer/_runtime_callback.py +0 -0
  140. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/trainer/accelerator.py +0 -0
  141. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/trainer/plugin/__init__.py +0 -0
  142. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/trainer/plugin/base.py +0 -0
  143. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/trainer/plugin/environment.py +0 -0
  144. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/trainer/plugin/io.py +0 -0
  145. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/trainer/plugin/layer_sync.py +0 -0
  146. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/trainer/plugin/precision.py +0 -0
  147. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/trainer/signal_connector.py +0 -0
  148. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/trainer/strategy.py +0 -0
  149. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/trainer/trainer.py +0 -0
  150. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/util/_environment_info.py +0 -0
  151. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/util/bf16.py +0 -0
  152. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/util/config/__init__.py +0 -0
  153. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/util/config/dtype.py +0 -0
  154. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/util/config/duration.py +0 -0
  155. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/util/environment.py +0 -0
  156. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/util/seed.py +0 -0
  157. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/util/slurm.py +0 -0
  158. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/util/typed.py +0 -0
  159. {nshtrainer-1.0.0b45 → nshtrainer-1.0.0b46}/src/nshtrainer/util/typing_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: nshtrainer
3
- Version: 1.0.0b45
3
+ Version: 1.0.0b46
4
4
  Summary:
5
5
  Author: Nima Shoghi
6
6
  Author-email: nimashoghi@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "nshtrainer"
3
- version = "1.0.0-beta45"
3
+ version = "1.0.0-beta46"
4
4
  description = ""
5
5
  authors = ["Nima Shoghi <nimashoghi@gmail.com>"]
6
6
  readme = "README.md"
@@ -139,15 +139,30 @@ def remove_checkpoint_metadata(checkpoint_path: Path):
139
139
  log.debug(f"Removed {path}")
140
140
 
141
141
 
142
+ def remove_checkpoint_metadata_link(ckpt_link_path: Path):
143
+ path = _metadata_path(ckpt_link_path)
144
+ # If the metadata does not exist, we can safely ignore this
145
+ if not path.exists(follow_symlinks=False):
146
+ # This is EXTREMELY important here
147
+ # Otherwise, we've already deleted the file that the symlink
148
+ # used to point to, so this always returns False
149
+ log.debug(f"Metadata file does not exist: {path}")
150
+ return
151
+
152
+ # If the metadata exists, we can remove it
153
+ try:
154
+ path.unlink(missing_ok=True)
155
+ except Exception:
156
+ log.warning(f"Failed to remove {path}", exc_info=True)
157
+ else:
158
+ log.debug(f"Removed {path}")
159
+
160
+
142
161
  def link_checkpoint_metadata(checkpoint_path: Path, linked_checkpoint_path: Path):
143
162
  # First, remove any existing metadata files
144
- remove_checkpoint_metadata(linked_checkpoint_path)
163
+ remove_checkpoint_metadata_link(linked_checkpoint_path)
145
164
 
146
165
  # Link the metadata files to the new checkpoint
147
166
  path = _metadata_path(checkpoint_path)
148
167
  linked_path = _metadata_path(linked_checkpoint_path)
149
-
150
- if not path.exists():
151
- raise FileNotFoundError(f"Checkpoint path does not exist: {checkpoint_path}")
152
-
153
168
  try_symlink_or_copy(path, linked_path)
@@ -8,7 +8,11 @@ from pathlib import Path
8
8
  from lightning.pytorch import Trainer
9
9
 
10
10
  from ..util.path import try_symlink_or_copy
11
- from .metadata import link_checkpoint_metadata, remove_checkpoint_metadata
11
+ from .metadata import (
12
+ link_checkpoint_metadata,
13
+ remove_checkpoint_metadata,
14
+ remove_checkpoint_metadata_link,
15
+ )
12
16
 
13
17
  log = logging.getLogger(__name__)
14
18
 
@@ -39,7 +43,7 @@ def link_checkpoint(
39
43
  log.debug(f"Removed {linkpath=}")
40
44
 
41
45
  if metadata:
42
- remove_checkpoint_metadata(linkpath)
46
+ remove_checkpoint_metadata_link(linkpath)
43
47
 
44
48
  try_symlink_or_copy(filepath, linkpath)
45
49
  if metadata:
@@ -160,7 +160,7 @@ class CheckpointBase(Checkpoint, ABC, Generic[TConfig]):
160
160
  filepath = self.resolve_checkpoint_path(self.current_metrics(trainer))
161
161
  trainer.save_checkpoint(filepath, self.config.save_weights_only)
162
162
 
163
- if trainer.is_global_zero:
163
+ if trainer.hparams.save_checkpoint_metadata and trainer.is_global_zero:
164
164
  # Remove old checkpoints
165
165
  self.remove_old_checkpoints(trainer)
166
166
 
@@ -120,7 +120,8 @@ def try_symlink_or_copy(
120
120
  shutil.copy(file_path, link_path)
121
121
  else:
122
122
  link_path.symlink_to(
123
- symlink_target, target_is_directory=target_is_directory
123
+ symlink_target,
124
+ target_is_directory=target_is_directory,
124
125
  )
125
126
  except Exception:
126
127
  log.warning(
File without changes