nshtrainer 1.3.3__tar.gz → 1.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/PKG-INFO +2 -2
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/pyproject.toml +2 -2
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/_hf_hub.py +3 -11
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/early_stopping.py +17 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/wandb_upload_code.py +5 -17
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/signal_connector.py +12 -7
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/util/_environment_info.py +14 -6
- nshtrainer-1.3.5/src/nshtrainer/util/code_upload.py +40 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/README.md +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/.nshconfig.generated.json +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/_callback.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/_checkpoint/metadata.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/_checkpoint/saver.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/_directory.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/_experimental/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/actsave.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/base.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/checkpoint/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/checkpoint/_base.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/checkpoint/best_checkpoint.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/checkpoint/last_checkpoint.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/debug_flag.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/directory_setup.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/distributed_prediction_writer.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/ema.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/finite_checks.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/gradient_skipping.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/interval.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/log_epoch.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/lr_monitor.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/metric_validation.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/norm_logging.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/print_table.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/rlp_sanity_checks.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/shared_parameters.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/timer.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/wandb_watch.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/.gitattributes +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/_checkpoint/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/_checkpoint/metadata/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/_directory/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/_hf_hub/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/actsave/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/base/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/checkpoint/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/checkpoint/_base/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/checkpoint/best_checkpoint/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/checkpoint/last_checkpoint/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/checkpoint/on_exception_checkpoint/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/debug_flag/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/directory_setup/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/distributed_prediction_writer/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/early_stopping/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/ema/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/finite_checks/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/gradient_skipping/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/log_epoch/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/lr_monitor/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/metric_validation/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/norm_logging/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/print_table/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/rlp_sanity_checks/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/shared_parameters/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/timer/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/wandb_upload_code/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/wandb_watch/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/loggers/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/loggers/actsave/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/loggers/base/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/loggers/csv/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/loggers/tensorboard/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/loggers/wandb/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/lr_scheduler/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/lr_scheduler/base/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/lr_scheduler/linear_warmup_cosine/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/lr_scheduler/reduce_lr_on_plateau/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/metrics/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/metrics/_config/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/nn/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/nn/mlp/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/nn/nonlinearity/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/nn/rng/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/optimizer/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/profiler/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/profiler/_base/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/profiler/advanced/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/profiler/pytorch/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/profiler/simple/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/_config/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/accelerator/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/plugin/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/plugin/base/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/plugin/environment/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/plugin/io/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/plugin/layer_sync/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/plugin/precision/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/strategy/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/trainer/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/util/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/util/_environment_info/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/util/config/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/util/config/dtype/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/util/config/duration/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/data/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/data/balanced_batch_sampler.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/data/datamodule.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/data/transform.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/loggers/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/loggers/actsave.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/loggers/base.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/loggers/csv.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/loggers/tensorboard.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/loggers/wandb.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/lr_scheduler/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/lr_scheduler/base.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/lr_scheduler/linear_warmup_cosine.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/metrics/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/metrics/_config.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/model/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/model/base.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/model/mixins/callback.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/model/mixins/debug.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/model/mixins/logger.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/nn/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/nn/mlp.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/nn/module_dict.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/nn/module_list.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/nn/nonlinearity.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/nn/rng.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/optimizer.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/profiler/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/profiler/_base.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/profiler/advanced.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/profiler/pytorch.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/profiler/simple.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/_config.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/_distributed_prediction_result.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/_log_hparams.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/_runtime_callback.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/accelerator.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/plugin/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/plugin/base.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/plugin/environment.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/plugin/io.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/plugin/layer_sync.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/plugin/precision.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/strategy.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/trainer.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/util/bf16.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/util/config/__init__.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/util/config/dtype.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/util/config/duration.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/util/environment.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/util/path.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/util/seed.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/util/slurm.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/util/typed.py +0 -0
- {nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/util/typing_utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: nshtrainer
|
3
|
-
Version: 1.3.
|
3
|
+
Version: 1.3.5
|
4
4
|
Summary:
|
5
5
|
Author: Nima Shoghi
|
6
6
|
Author-email: nimashoghi@gmail.com
|
@@ -15,7 +15,7 @@ Requires-Dist: GitPython ; extra == "extra"
|
|
15
15
|
Requires-Dist: huggingface-hub ; extra == "extra"
|
16
16
|
Requires-Dist: lightning
|
17
17
|
Requires-Dist: nshconfig (>0.39)
|
18
|
-
Requires-Dist: nshrunner
|
18
|
+
Requires-Dist: nshrunner ; extra == "extra"
|
19
19
|
Requires-Dist: nshutils ; extra == "extra"
|
20
20
|
Requires-Dist: numpy
|
21
21
|
Requires-Dist: packaging
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "nshtrainer"
|
3
|
-
version = "1.3.
|
3
|
+
version = "1.3.5"
|
4
4
|
description = ""
|
5
5
|
authors = [{ name = "Nima Shoghi", email = "nimashoghi@gmail.com" }]
|
6
6
|
requires-python = ">=3.10,<4.0"
|
@@ -8,7 +8,6 @@ readme = "README.md"
|
|
8
8
|
|
9
9
|
dependencies = [
|
10
10
|
"nshconfig>0.39",
|
11
|
-
"nshrunner",
|
12
11
|
"psutil",
|
13
12
|
"numpy",
|
14
13
|
"torch",
|
@@ -20,6 +19,7 @@ dependencies = [
|
|
20
19
|
|
21
20
|
[project.optional-dependencies]
|
22
21
|
extra = [
|
22
|
+
"nshrunner",
|
23
23
|
"wrapt",
|
24
24
|
"GitPython",
|
25
25
|
"wandb",
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import contextlib
|
4
4
|
import logging
|
5
|
-
import os
|
6
5
|
import re
|
7
6
|
from dataclasses import dataclass
|
8
7
|
from functools import cached_property
|
@@ -10,7 +9,6 @@ from pathlib import Path
|
|
10
9
|
from typing import TYPE_CHECKING, Any, ClassVar, Literal, cast
|
11
10
|
|
12
11
|
import nshconfig as C
|
13
|
-
from nshrunner._env import SNAPSHOT_DIR
|
14
12
|
from typing_extensions import assert_never, override
|
15
13
|
|
16
14
|
from ._callback import NTCallbackBase
|
@@ -19,6 +17,7 @@ from .callbacks.base import (
|
|
19
17
|
CallbackMetadataConfig,
|
20
18
|
callback_registry,
|
21
19
|
)
|
20
|
+
from .util.code_upload import get_code_dir
|
22
21
|
|
23
22
|
if TYPE_CHECKING:
|
24
23
|
from huggingface_hub import HfApi # noqa: F401
|
@@ -319,20 +318,13 @@ class HFHubCallback(NTCallbackBase):
|
|
319
318
|
def _save_code(self):
|
320
319
|
# If a snapshot has been taken (which can be detected using the SNAPSHOT_DIR env),
|
321
320
|
# then upload all contents within the snapshot directory to the repository.
|
322
|
-
if
|
321
|
+
if (snapshot_dir := get_code_dir()) is None:
|
323
322
|
log.debug("No snapshot directory found. Skipping upload.")
|
324
323
|
return
|
325
324
|
|
326
325
|
with self._with_error_handling("save code"):
|
327
|
-
snapshot_dir = Path(snapshot_dir)
|
328
|
-
if not snapshot_dir.exists() or not snapshot_dir.is_dir():
|
329
|
-
log.warning(
|
330
|
-
f"Snapshot directory '{snapshot_dir}' does not exist or is not a directory."
|
331
|
-
)
|
332
|
-
return
|
333
|
-
|
334
326
|
self.api.upload_folder(
|
335
|
-
folder_path=str(snapshot_dir),
|
327
|
+
folder_path=str(snapshot_dir.absolute()),
|
336
328
|
repo_id=self.repo_id,
|
337
329
|
repo_type="model",
|
338
330
|
path_in_repo="code", # Prefix with "code" folder
|
@@ -43,6 +43,13 @@ class EarlyStoppingCallbackConfig(CallbackConfigBase):
|
|
43
43
|
the training will be stopped.
|
44
44
|
"""
|
45
45
|
|
46
|
+
skip_first_n_epochs: int = 0
|
47
|
+
"""
|
48
|
+
Number of initial epochs to skip before starting to monitor for early stopping.
|
49
|
+
This helps avoid false early stopping when the model might temporarily perform worse
|
50
|
+
during early training phases.
|
51
|
+
"""
|
52
|
+
|
46
53
|
strict: bool = True
|
47
54
|
"""
|
48
55
|
Whether to enforce that the monitored quantity must improve by at least `min_delta`
|
@@ -94,6 +101,16 @@ class EarlyStoppingCallback(_EarlyStopping):
|
|
94
101
|
if getattr(trainer, "fast_dev_run", False):
|
95
102
|
return
|
96
103
|
|
104
|
+
# Skip early stopping check for the first n epochs
|
105
|
+
if trainer.current_epoch < self.config.skip_first_n_epochs:
|
106
|
+
if self.verbose and trainer.current_epoch == 0:
|
107
|
+
self._log_info(
|
108
|
+
trainer,
|
109
|
+
f"Early stopping checks are disabled for the first {self.config.skip_first_n_epochs} epochs",
|
110
|
+
self.log_rank_zero_only,
|
111
|
+
)
|
112
|
+
return
|
113
|
+
|
97
114
|
should_stop, reason = False, None
|
98
115
|
|
99
116
|
if not should_stop:
|
@@ -1,16 +1,14 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import logging
|
4
|
-
import os
|
5
|
-
from pathlib import Path
|
6
4
|
from typing import Literal, cast
|
7
5
|
|
8
6
|
from lightning.pytorch import LightningModule, Trainer
|
9
7
|
from lightning.pytorch.callbacks.callback import Callback
|
10
8
|
from lightning.pytorch.loggers import WandbLogger
|
11
|
-
from nshrunner._env import SNAPSHOT_DIR
|
12
9
|
from typing_extensions import final, override
|
13
10
|
|
11
|
+
from ..util.code_upload import get_code_dir
|
14
12
|
from .base import CallbackConfigBase, callback_registry
|
15
13
|
|
16
14
|
log = logging.getLogger(__name__)
|
@@ -62,22 +60,12 @@ class WandbUploadCodeCallback(Callback):
|
|
62
60
|
log.warning("Wandb logger not found. Skipping code upload.")
|
63
61
|
return
|
64
62
|
|
65
|
-
|
66
|
-
|
67
|
-
run = cast(Run, logger.experiment)
|
68
|
-
|
69
|
-
# If a snapshot has been taken (which can be detected using the SNAPSHOT_DIR env),
|
70
|
-
# then upload all contents within the snapshot directory to the repository.
|
71
|
-
if not (snapshot_dir := os.environ.get(SNAPSHOT_DIR)):
|
72
|
-
log.debug("No snapshot directory found. Skipping upload.")
|
63
|
+
if (snapshot_dir := get_code_dir()) is None:
|
64
|
+
log.info("No nshrunner snapshot found. Skipping code upload.")
|
73
65
|
return
|
74
66
|
|
75
|
-
|
76
|
-
if not snapshot_dir.exists() or not snapshot_dir.is_dir():
|
77
|
-
log.warning(
|
78
|
-
f"Snapshot directory '{snapshot_dir}' does not exist or is not a directory."
|
79
|
-
)
|
80
|
-
return
|
67
|
+
from wandb.wandb_run import Run
|
81
68
|
|
69
|
+
run = cast(Run, logger.experiment)
|
82
70
|
log.info(f"Uploading code from snapshot directory '{snapshot_dir}'")
|
83
71
|
run.log_code(str(snapshot_dir.absolute()))
|
@@ -14,7 +14,6 @@ from pathlib import Path
|
|
14
14
|
from types import FrameType
|
15
15
|
from typing import Any
|
16
16
|
|
17
|
-
import nshrunner as nr
|
18
17
|
import torch.utils.data
|
19
18
|
from lightning.fabric.plugins.environments.lsf import LSFEnvironment
|
20
19
|
from lightning.fabric.plugins.environments.slurm import SLURMEnvironment
|
@@ -34,6 +33,12 @@ _IS_WINDOWS = platform.system() == "Windows"
|
|
34
33
|
|
35
34
|
|
36
35
|
def _resolve_requeue_signals():
|
36
|
+
try:
|
37
|
+
import nshrunner as nr
|
38
|
+
except ImportError:
|
39
|
+
log.debug("nshrunner not found. Skipping signal requeueing.")
|
40
|
+
return None
|
41
|
+
|
37
42
|
if (session := nr.Session.from_current_session()) is None:
|
38
43
|
return None
|
39
44
|
|
@@ -52,9 +57,9 @@ class _SignalConnector(_LightningSignalConnector):
|
|
52
57
|
|
53
58
|
signals_set = set(signals)
|
54
59
|
valid_signals: set[signal.Signals] = signal.valid_signals()
|
55
|
-
assert signals_set.issubset(
|
56
|
-
valid_signals
|
57
|
-
)
|
60
|
+
assert signals_set.issubset(valid_signals), (
|
61
|
+
f"Invalid signal(s) found: {signals_set - valid_signals}"
|
62
|
+
)
|
58
63
|
return signals
|
59
64
|
|
60
65
|
def _compose_and_register(
|
@@ -241,9 +246,9 @@ class _SignalConnector(_LightningSignalConnector):
|
|
241
246
|
"Writing requeue script to exit script directory."
|
242
247
|
)
|
243
248
|
exit_script_dir = Path(exit_script_dir)
|
244
|
-
assert (
|
245
|
-
exit_script_dir
|
246
|
-
)
|
249
|
+
assert exit_script_dir.is_dir(), (
|
250
|
+
f"Exit script directory {exit_script_dir} does not exist"
|
251
|
+
)
|
247
252
|
|
248
253
|
exit_script_path = exit_script_dir / f"requeue_{job_id}.sh"
|
249
254
|
log.info(f"Writing requeue script to {exit_script_path}")
|
@@ -356,12 +356,20 @@ class EnvironmentSnapshotConfig(C.Config):
|
|
356
356
|
|
357
357
|
@classmethod
|
358
358
|
def from_current_environment(cls):
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
359
|
+
try:
|
360
|
+
import nshrunner as nr
|
361
|
+
|
362
|
+
if (session := nr.Session.from_current_session()) is None:
|
363
|
+
log.warning("No active session found, skipping snapshot information")
|
364
|
+
return cls.empty()
|
365
|
+
|
366
|
+
draft = cls.draft()
|
367
|
+
draft.snapshot_dir = session.snapshot_dir
|
368
|
+
draft.modules = session.snapshot_modules
|
369
|
+
return draft.finalize()
|
370
|
+
except ImportError:
|
371
|
+
log.warning("nshrunner not found, skipping snapshot information")
|
372
|
+
return cls.empty()
|
365
373
|
|
366
374
|
|
367
375
|
class EnvironmentPackageConfig(C.Config):
|
@@ -0,0 +1,40 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
from pathlib import Path
|
5
|
+
|
6
|
+
log = logging.getLogger(__name__)
|
7
|
+
|
8
|
+
|
9
|
+
def get_code_dir() -> Path | None:
|
10
|
+
try:
|
11
|
+
import nshrunner as nr
|
12
|
+
|
13
|
+
if (session := nr.Session.from_current_session()) is None:
|
14
|
+
log.debug("No active session found. Skipping code upload.")
|
15
|
+
return None
|
16
|
+
|
17
|
+
# New versions of nshrunner will have the code_dir attribute
|
18
|
+
# in the session object. We should use that. Otherwise, use snapshot_dir.
|
19
|
+
try:
|
20
|
+
code_dir = session.code_dir # type: ignore
|
21
|
+
except AttributeError:
|
22
|
+
code_dir = session.snapshot_dir
|
23
|
+
|
24
|
+
if code_dir is None:
|
25
|
+
log.debug("No code directory found. Skipping code upload.")
|
26
|
+
return None
|
27
|
+
|
28
|
+
assert isinstance(code_dir, Path), (
|
29
|
+
f"Code directory should be a Path object. Got {type(code_dir)} instead."
|
30
|
+
)
|
31
|
+
if not code_dir.exists() or not code_dir.is_dir():
|
32
|
+
log.warning(
|
33
|
+
f"Code directory '{code_dir}' does not exist or is not a directory."
|
34
|
+
)
|
35
|
+
return None
|
36
|
+
|
37
|
+
return code_dir
|
38
|
+
except ImportError:
|
39
|
+
log.debug("nshrunner not found. Skipping code upload.")
|
40
|
+
return None
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/checkpoint/best_checkpoint.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/checkpoint/last_checkpoint.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/callbacks/distributed_prediction_writer.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/_checkpoint/metadata/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/checkpoint/__init__.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/checkpoint/_base/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/debug_flag/__init__.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/directory_setup/__init__.py
RENAMED
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/early_stopping/__init__.py
RENAMED
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/finite_checks/__init__.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/gradient_skipping/__init__.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/log_epoch/__init__.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/lr_monitor/__init__.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/metric_validation/__init__.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/norm_logging/__init__.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/print_table/__init__.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/rlp_sanity_checks/__init__.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/shared_parameters/__init__.py
RENAMED
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/wandb_upload_code/__init__.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/callbacks/wandb_watch/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/loggers/tensorboard/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/accelerator/__init__.py
RENAMED
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/plugin/base/__init__.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/plugin/environment/__init__.py
RENAMED
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/plugin/layer_sync/__init__.py
RENAMED
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/trainer/plugin/precision/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/util/_environment_info/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/configs/util/config/duration/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nshtrainer-1.3.3 → nshtrainer-1.3.5}/src/nshtrainer/trainer/_distributed_prediction_result.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|