nshtrainer 0.8.7__tar.gz → 0.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/PKG-INFO +1 -1
  2. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/pyproject.toml +1 -1
  3. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/model/config.py +47 -10
  4. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/README.md +0 -0
  5. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/__init__.py +0 -0
  6. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/_experimental/__init__.py +0 -0
  7. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/_experimental/flops/__init__.py +0 -0
  8. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/_experimental/flops/flop_counter.py +0 -0
  9. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/_experimental/flops/module_tracker.py +0 -0
  10. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/actsave/__init__.py +0 -0
  11. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/actsave/_callback.py +0 -0
  12. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/__init__.py +0 -0
  13. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/_throughput_monitor_callback.py +0 -0
  14. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/base.py +0 -0
  15. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/early_stopping.py +0 -0
  16. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/ema.py +0 -0
  17. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/finite_checks.py +0 -0
  18. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/gradient_skipping.py +0 -0
  19. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/interval.py +0 -0
  20. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/latest_epoch_checkpoint.py +0 -0
  21. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/log_epoch.py +0 -0
  22. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/norm_logging.py +0 -0
  23. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/on_exception_checkpoint.py +0 -0
  24. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/print_table.py +0 -0
  25. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/throughput_monitor.py +0 -0
  26. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/timer.py +0 -0
  27. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/callbacks/wandb_watch.py +0 -0
  28. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/data/__init__.py +0 -0
  29. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/data/balanced_batch_sampler.py +0 -0
  30. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/data/transform.py +0 -0
  31. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/__init__.py +0 -0
  32. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/_experimental.py +0 -0
  33. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/actsave.py +0 -0
  34. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/callbacks.py +0 -0
  35. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/config.py +0 -0
  36. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/data.py +0 -0
  37. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/log.py +0 -0
  38. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/lr_scheduler.py +0 -0
  39. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/model.py +0 -0
  40. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/nn.py +0 -0
  41. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/optimizer.py +0 -0
  42. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/runner.py +0 -0
  43. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/snapshot.py +0 -0
  44. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/snoop.py +0 -0
  45. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/trainer.py +0 -0
  46. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/typecheck.py +0 -0
  47. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/ll/util.py +0 -0
  48. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/lr_scheduler/__init__.py +0 -0
  49. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/lr_scheduler/_base.py +0 -0
  50. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/lr_scheduler/linear_warmup_cosine.py +0 -0
  51. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +0 -0
  52. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/model/__init__.py +0 -0
  53. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/model/base.py +0 -0
  54. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/model/modules/callback.py +0 -0
  55. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/model/modules/debug.py +0 -0
  56. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/model/modules/distributed.py +0 -0
  57. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/model/modules/logger.py +0 -0
  58. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/model/modules/profiler.py +0 -0
  59. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/model/modules/rlp_sanity_checks.py +0 -0
  60. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/model/modules/shared_parameters.py +0 -0
  61. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/nn/__init__.py +0 -0
  62. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/nn/mlp.py +0 -0
  63. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/nn/module_dict.py +0 -0
  64. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/nn/module_list.py +0 -0
  65. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/nn/nonlinearity.py +0 -0
  66. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/optimizer.py +0 -0
  67. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/runner.py +0 -0
  68. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/scripts/check_env.py +0 -0
  69. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/scripts/find_packages.py +0 -0
  70. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/trainer/__init__.py +0 -0
  71. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/trainer/signal_connector.py +0 -0
  72. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/trainer/trainer.py +0 -0
  73. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/util/environment.py +0 -0
  74. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/util/seed.py +0 -0
  75. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/util/slurm.py +0 -0
  76. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/util/typed.py +0 -0
  77. {nshtrainer-0.8.7 → nshtrainer-0.9.1}/src/nshtrainer/util/typing_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nshtrainer
3
- Version: 0.8.7
3
+ Version: 0.9.1
4
4
  Summary:
5
5
  Author: Nima Shoghi
6
6
  Author-email: nimashoghi@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "nshtrainer"
3
- version = "0.8.7"
3
+ version = "0.9.1"
4
4
  description = ""
5
5
  authors = ["Nima Shoghi <nimashoghi@gmail.com>"]
6
6
  readme = "README.md"
@@ -805,6 +805,36 @@ class CheckpointLoadingConfig(C.Config):
805
805
  """
806
806
 
807
807
 
808
+ def _create_symlink_to_nshrunner(base_dir: Path):
809
+ # Resolve the current nshrunner session directory
810
+ if not (session_dir := os.environ.get("NSHRUNNER_SESSION_DIR")):
811
+ log.warning("NSHRUNNER_SESSION_DIR is not set. Skipping symlink creation.")
812
+ return
813
+ session_dir = Path(session_dir)
814
+ if not session_dir.exists() or not session_dir.is_dir():
815
+ log.warning(
816
+ f"NSHRUNNER_SESSION_DIR is not a valid directory: {session_dir}. "
817
+ "Skipping symlink creation."
818
+ )
819
+ return
820
+
821
+ # Create the symlink
822
+ symlink_path = base_dir / "nshrunner"
823
+ if symlink_path.exists():
824
+ # If it already points to the correct directory, we're done
825
+ if symlink_path.resolve() == session_dir.resolve():
826
+ return
827
+
828
+ # Otherwise, we should log a warning and remove the existing symlink
829
+ log.warning(
830
+ f"A symlink pointing to {symlink_path.resolve()} already exists at {symlink_path}. "
831
+ "Removing the existing symlink."
832
+ )
833
+ symlink_path.unlink()
834
+
835
+ symlink_path.symlink_to(session_dir)
836
+
837
+
808
838
  class DirectoryConfig(C.Config):
809
839
  project_root: Path | None = None
810
840
  """
@@ -813,30 +843,33 @@ class DirectoryConfig(C.Config):
813
843
  This isn't specific to the run; it is the parent directory of all runs.
814
844
  """
815
845
 
846
+ create_symlink_to_nshrunner_root: bool = True
847
+ """Should we create a symlink to the root folder for the Runner (if we're in one)?"""
848
+
816
849
  log: Path | None = None
817
- """Base directory for all experiment tracking (e.g., WandB, Tensorboard, etc.) files. If None, will use lltrainer/{id}/log/."""
850
+ """Base directory for all experiment tracking (e.g., WandB, Tensorboard, etc.) files. If None, will use nshtrainer/{id}/log/."""
818
851
 
819
852
  stdio: Path | None = None
820
- """stdout/stderr log directory to use for the trainer. If None, will use lltrainer/{id}/stdio/."""
853
+ """stdout/stderr log directory to use for the trainer. If None, will use nshtrainer/{id}/stdio/."""
821
854
 
822
855
  checkpoint: Path | None = None
823
- """Checkpoint directory to use for the trainer. If None, will use lltrainer/{id}/checkpoint/."""
856
+ """Checkpoint directory to use for the trainer. If None, will use nshtrainer/{id}/checkpoint/."""
824
857
 
825
858
  activation: Path | None = None
826
- """Activation directory to use for the trainer. If None, will use lltrainer/{id}/activation/."""
859
+ """Activation directory to use for the trainer. If None, will use nshtrainer/{id}/activation/."""
827
860
 
828
861
  profile: Path | None = None
829
- """Directory to save profiling information to. If None, will use lltrainer/{id}/profile/."""
862
+ """Directory to save profiling information to. If None, will use nshtrainer/{id}/profile/."""
830
863
 
831
864
  def resolve_run_root_directory(self, run_id: str) -> Path:
832
865
  if (project_root_dir := self.project_root) is None:
833
866
  project_root_dir = Path.cwd()
834
867
 
835
- # The default base dir is $CWD/lltrainer/{id}/
836
- base_dir = project_root_dir / "lltrainer"
868
+ # The default base dir is $CWD/nshtrainer/{id}/
869
+ base_dir = project_root_dir / "nshtrainer"
837
870
  base_dir.mkdir(exist_ok=True)
838
871
 
839
- # Add a .gitignore file to the lltrainer directory
872
+ # Add a .gitignore file to the nshtrainer directory
840
873
  # which will ignore all files except for the .gitignore file itself
841
874
  gitignore_path = base_dir / ".gitignore"
842
875
  if not gitignore_path.exists():
@@ -846,6 +879,10 @@ class DirectoryConfig(C.Config):
846
879
  base_dir = base_dir / run_id
847
880
  base_dir.mkdir(exist_ok=True)
848
881
 
882
+ # Create a symlink to the root folder for the Runner
883
+ if self.create_symlink_to_nshrunner_root:
884
+ _create_symlink_to_nshrunner(base_dir)
885
+
849
886
  return base_dir
850
887
 
851
888
  def resolve_subdirectory(
@@ -854,7 +891,7 @@ class DirectoryConfig(C.Config):
854
891
  # subdirectory: Literal["log", "stdio", "checkpoint", "activation", "profile"],
855
892
  subdirectory: str,
856
893
  ) -> Path:
857
- # The subdir will be $CWD/lltrainer/{id}/{log, stdio, checkpoint, activation}/
894
+ # The subdir will be $CWD/nshtrainer/{id}/{log, stdio, checkpoint, activation}/
858
895
  if (subdir := getattr(self, subdirectory, None)) is not None:
859
896
  assert isinstance(
860
897
  subdir, Path
@@ -874,7 +911,7 @@ class DirectoryConfig(C.Config):
874
911
  if (log_dir := logger.log_dir) is not None:
875
912
  return log_dir
876
913
 
877
- # Save to lltrainer/{id}/log/{logger kind}/{id}/
914
+ # Save to nshtrainer/{id}/log/{logger kind}/{id}/
878
915
  log_dir = self.resolve_subdirectory(run_id, "log")
879
916
  log_dir = log_dir / logger.kind
880
917
 
File without changes