nshtrainer 0.35.1__tar.gz → 0.37.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/PKG-INFO +1 -1
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/pyproject.toml +1 -1
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/_checkpoint/saver.py +2 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/__init__.py +4 -1
- nshtrainer-0.37.0/src/nshtrainer/callbacks/wandb_upload_code.py +79 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/config.py +4 -1
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/loggers/wandb.py +43 -7
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/util/path.py +14 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/README.md +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/__init__.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/_callback.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/_checkpoint/loader.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/_checkpoint/metadata.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/_directory.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/_experimental/__init__.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/_hf_hub.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/_throughput_monitor_callback.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/actsave.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/base.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/checkpoint/__init__.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/checkpoint/_base.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/checkpoint/best_checkpoint.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/checkpoint/last_checkpoint.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/debug_flag.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/directory_setup.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/early_stopping.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/ema.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/finite_checks.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/gradient_skipping.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/interval.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/log_epoch.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/norm_logging.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/print_table.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/rlp_sanity_checks.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/shared_parameters.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/throughput_monitor.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/timer.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/wandb_watch.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/data/__init__.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/data/balanced_batch_sampler.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/data/transform.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/__init__.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/_experimental.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/actsave.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/callbacks.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/config.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/data.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/log.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/lr_scheduler.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/model.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/nn.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/optimizer.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/runner.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/snapshot.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/snoop.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/trainer.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/typecheck.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/ll/util.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/loggers/__init__.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/loggers/_base.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/loggers/csv.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/loggers/tensorboard.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/lr_scheduler/__init__.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/lr_scheduler/_base.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/lr_scheduler/linear_warmup_cosine.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/metrics/__init__.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/metrics/_config.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/model/__init__.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/model/base.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/model/config.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/model/mixins/callback.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/model/mixins/logger.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/nn/__init__.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/nn/mlp.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/nn/module_dict.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/nn/module_list.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/nn/nonlinearity.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/optimizer.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/profiler/__init__.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/profiler/_base.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/profiler/advanced.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/profiler/pytorch.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/profiler/simple.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/runner.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/scripts/find_packages.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/trainer/__init__.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/trainer/_config.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/trainer/_runtime_callback.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/trainer/checkpoint_connector.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/trainer/signal_connector.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/trainer/trainer.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/util/_environment_info.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/util/_useful_types.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/util/bf16.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/util/config/__init__.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/util/config/dtype.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/util/config/duration.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/util/environment.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/util/seed.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/util/slurm.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/util/typed.py +0 -0
- {nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/util/typing_utils.py +0 -0
|
@@ -39,6 +39,8 @@ from .shared_parameters import SharedParametersConfig as SharedParametersConfig
|
|
|
39
39
|
from .throughput_monitor import ThroughputMonitorConfig as ThroughputMonitorConfig
|
|
40
40
|
from .timer import EpochTimer as EpochTimer
|
|
41
41
|
from .timer import EpochTimerConfig as EpochTimerConfig
|
|
42
|
+
from .wandb_upload_code import WandbUploadCodeCallback as WandbUploadCodeCallback
|
|
43
|
+
from .wandb_upload_code import WandbUploadCodeConfig as WandbUploadCodeConfig
|
|
42
44
|
from .wandb_watch import WandbWatchCallback as WandbWatchCallback
|
|
43
45
|
from .wandb_watch import WandbWatchConfig as WandbWatchConfig
|
|
44
46
|
|
|
@@ -57,6 +59,7 @@ CallbackConfig = Annotated[
|
|
|
57
59
|
| OnExceptionCheckpointCallbackConfig
|
|
58
60
|
| SharedParametersConfig
|
|
59
61
|
| RLPSanityChecksConfig
|
|
60
|
-
| WandbWatchConfig
|
|
62
|
+
| WandbWatchConfig
|
|
63
|
+
| WandbUploadCodeConfig,
|
|
61
64
|
C.Field(discriminator="name"),
|
|
62
65
|
]
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Literal, cast
|
|
5
|
+
|
|
6
|
+
from lightning.pytorch import LightningModule, Trainer
|
|
7
|
+
from lightning.pytorch.callbacks.callback import Callback
|
|
8
|
+
from lightning.pytorch.loggers import WandbLogger
|
|
9
|
+
from nshrunner._env import SNAPSHOT_DIR
|
|
10
|
+
from typing_extensions import override
|
|
11
|
+
|
|
12
|
+
from .base import CallbackConfigBase
|
|
13
|
+
|
|
14
|
+
log = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class WandbUploadCodeConfig(CallbackConfigBase):
|
|
18
|
+
name: Literal["wandb_upload_code"] = "wandb_upload_code"
|
|
19
|
+
|
|
20
|
+
enabled: bool = True
|
|
21
|
+
"""Enable uploading the code to wandb."""
|
|
22
|
+
|
|
23
|
+
def __bool__(self):
|
|
24
|
+
return self.enabled
|
|
25
|
+
|
|
26
|
+
@override
|
|
27
|
+
def create_callbacks(self, root_config):
|
|
28
|
+
if not self:
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
yield WandbUploadCodeCallback(self)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class WandbUploadCodeCallback(Callback):
|
|
35
|
+
def __init__(self, config: WandbUploadCodeConfig):
|
|
36
|
+
super().__init__()
|
|
37
|
+
|
|
38
|
+
self.config = config
|
|
39
|
+
|
|
40
|
+
@override
|
|
41
|
+
def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str):
|
|
42
|
+
if not self.config:
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
if not trainer.is_global_zero:
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
if (
|
|
49
|
+
logger := next(
|
|
50
|
+
(
|
|
51
|
+
logger
|
|
52
|
+
for logger in trainer.loggers
|
|
53
|
+
if isinstance(logger, WandbLogger)
|
|
54
|
+
),
|
|
55
|
+
None,
|
|
56
|
+
)
|
|
57
|
+
) is None:
|
|
58
|
+
log.warning("Wandb logger not found. Skipping code upload.")
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
from wandb.wandb_run import Run
|
|
62
|
+
|
|
63
|
+
run = cast(Run, logger.experiment)
|
|
64
|
+
|
|
65
|
+
# If a snapshot has been taken (which can be detected using the SNAPSHOT_DIR env),
|
|
66
|
+
# then upload all contents within the snapshot directory to the repository.
|
|
67
|
+
if not (snapshot_dir := os.environ.get(SNAPSHOT_DIR)):
|
|
68
|
+
log.debug("No snapshot directory found. Skipping upload.")
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
snapshot_dir = Path(snapshot_dir)
|
|
72
|
+
if not snapshot_dir.exists() or not snapshot_dir.is_dir():
|
|
73
|
+
log.warning(
|
|
74
|
+
f"Snapshot directory '{snapshot_dir}' does not exist or is not a directory."
|
|
75
|
+
)
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
log.info(f"Uploading code from snapshot directory '{snapshot_dir}'")
|
|
79
|
+
run.log_code(str(snapshot_dir.absolute()))
|
|
@@ -61,14 +61,17 @@ from nshtrainer.callbacks.throughput_monitor import (
|
|
|
61
61
|
ThroughputMonitorConfig as ThroughputMonitorConfig,
|
|
62
62
|
)
|
|
63
63
|
from nshtrainer.callbacks.timer import EpochTimerConfig as EpochTimerConfig
|
|
64
|
+
from nshtrainer.callbacks.wandb_upload_code import (
|
|
65
|
+
WandbUploadCodeConfig as WandbUploadCodeConfig,
|
|
66
|
+
)
|
|
64
67
|
from nshtrainer.callbacks.wandb_watch import WandbWatchConfig as WandbWatchConfig
|
|
68
|
+
from nshtrainer.config import LRSchedulerConfig as LRSchedulerConfig
|
|
65
69
|
from nshtrainer.loggers._base import BaseLoggerConfig as BaseLoggerConfig
|
|
66
70
|
from nshtrainer.loggers.csv import CSVLoggerConfig as CSVLoggerConfig
|
|
67
71
|
from nshtrainer.loggers.tensorboard import (
|
|
68
72
|
TensorboardLoggerConfig as TensorboardLoggerConfig,
|
|
69
73
|
)
|
|
70
74
|
from nshtrainer.loggers.wandb import WandbLoggerConfig as WandbLoggerConfig
|
|
71
|
-
from nshtrainer.lr_scheduler import LRSchedulerConfig as LRSchedulerConfig
|
|
72
75
|
from nshtrainer.lr_scheduler._base import LRSchedulerConfigBase as LRSchedulerConfigBase
|
|
73
76
|
from nshtrainer.lr_scheduler.linear_warmup_cosine import (
|
|
74
77
|
DurationConfig as DurationConfig,
|
|
@@ -5,9 +5,10 @@ from typing import TYPE_CHECKING, Literal
|
|
|
5
5
|
import nshconfig as C
|
|
6
6
|
from lightning.pytorch import Callback, LightningModule, Trainer
|
|
7
7
|
from packaging import version
|
|
8
|
-
from typing_extensions import override
|
|
8
|
+
from typing_extensions import assert_never, override
|
|
9
9
|
|
|
10
10
|
from ..callbacks.base import CallbackConfigBase
|
|
11
|
+
from ..callbacks.wandb_upload_code import WandbUploadCodeConfig
|
|
11
12
|
from ..callbacks.wandb_watch import WandbWatchConfig
|
|
12
13
|
from ._base import BaseLoggerConfig
|
|
13
14
|
|
|
@@ -82,15 +83,18 @@ class WandbLoggerConfig(CallbackConfigBase, BaseLoggerConfig):
|
|
|
82
83
|
project: str | None = None
|
|
83
84
|
"""WandB project name to use for the logger. If None, will use the root config's project name."""
|
|
84
85
|
|
|
85
|
-
log_model:
|
|
86
|
+
log_model: Literal["all", "latest", "none"] | bool = False
|
|
86
87
|
"""
|
|
87
88
|
Whether to log the model checkpoints to wandb.
|
|
88
89
|
Valid values are:
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
90
|
+
- "all": Log all checkpoints.
|
|
91
|
+
- "latest" or True: Log only the latest checkpoint.
|
|
92
|
+
- "none" or False: Do not log any checkpoints
|
|
92
93
|
"""
|
|
93
94
|
|
|
95
|
+
log_code: WandbUploadCodeConfig | None = None
|
|
96
|
+
"""WandB code upload configuration. Used to upload code to WandB."""
|
|
97
|
+
|
|
94
98
|
watch: WandbWatchConfig | None = WandbWatchConfig()
|
|
95
99
|
"""WandB model watch configuration. Used to log model architecture, gradients, and parameters."""
|
|
96
100
|
|
|
@@ -110,6 +114,18 @@ class WandbLoggerConfig(CallbackConfigBase, BaseLoggerConfig):
|
|
|
110
114
|
self.use_wandb_core = value
|
|
111
115
|
return self
|
|
112
116
|
|
|
117
|
+
@property
|
|
118
|
+
def _lightning_log_model(self) -> Literal["all"] | bool:
|
|
119
|
+
match self.log_model:
|
|
120
|
+
case "all":
|
|
121
|
+
return "all"
|
|
122
|
+
case "latest" | True:
|
|
123
|
+
return True
|
|
124
|
+
case "none" | False:
|
|
125
|
+
return False
|
|
126
|
+
case _:
|
|
127
|
+
assert_never(self.log_model)
|
|
128
|
+
|
|
113
129
|
@override
|
|
114
130
|
def create_logger(self, root_config):
|
|
115
131
|
if not self.enabled:
|
|
@@ -128,11 +144,28 @@ class WandbLoggerConfig(CallbackConfigBase, BaseLoggerConfig):
|
|
|
128
144
|
f"(expected version >= 0.17.5, found version {wandb.__version__}). "
|
|
129
145
|
"Please either upgrade to a newer version of WandB or disable the `use_wandb_core` option."
|
|
130
146
|
)
|
|
131
|
-
|
|
147
|
+
# W&B versions 0.18.0 use wandb-core by default
|
|
148
|
+
elif wandb_version < version.parse("0.18.0"):
|
|
132
149
|
wandb.require("core") # type: ignore
|
|
133
150
|
log.critical("Using the `wandb-core` backend for WandB.")
|
|
134
151
|
except ImportError:
|
|
135
152
|
pass
|
|
153
|
+
else:
|
|
154
|
+
# W&B versions 0.18.0 use wandb-core by default,
|
|
155
|
+
# so if `use_wandb_core` is False, we should use the old backend
|
|
156
|
+
# explicitly.
|
|
157
|
+
wandb_version = version.parse(importlib.metadata.version("wandb"))
|
|
158
|
+
if wandb_version >= version.parse("0.18.0"):
|
|
159
|
+
log.warning(
|
|
160
|
+
"Explicitly using the old backend for WandB. "
|
|
161
|
+
"If you want to use the new `wandb-core` backend, set `use_wandb_core=True`."
|
|
162
|
+
)
|
|
163
|
+
try:
|
|
164
|
+
import wandb # type: ignore
|
|
165
|
+
|
|
166
|
+
wandb.require("legacy-service") # type: ignore
|
|
167
|
+
except ImportError:
|
|
168
|
+
pass
|
|
136
169
|
|
|
137
170
|
from lightning.pytorch.loggers.wandb import WandbLogger
|
|
138
171
|
|
|
@@ -145,7 +178,7 @@ class WandbLoggerConfig(CallbackConfigBase, BaseLoggerConfig):
|
|
|
145
178
|
project=self.project or _project_name(root_config),
|
|
146
179
|
name=root_config.run_name,
|
|
147
180
|
version=root_config.id,
|
|
148
|
-
log_model=self.
|
|
181
|
+
log_model=self._lightning_log_model,
|
|
149
182
|
notes=(
|
|
150
183
|
"\n".join(f"- {note}" for note in root_config.notes)
|
|
151
184
|
if root_config.notes
|
|
@@ -161,3 +194,6 @@ class WandbLoggerConfig(CallbackConfigBase, BaseLoggerConfig):
|
|
|
161
194
|
|
|
162
195
|
if self.watch:
|
|
163
196
|
yield from self.watch.create_callbacks(root_config)
|
|
197
|
+
|
|
198
|
+
if self.log_code:
|
|
199
|
+
yield from self.log_code.create_callbacks(root_config)
|
|
@@ -80,11 +80,25 @@ def try_symlink_or_copy(
|
|
|
80
80
|
link_path: Path,
|
|
81
81
|
target_is_directory: bool = False,
|
|
82
82
|
relative: bool = True,
|
|
83
|
+
remove_existing: bool = True,
|
|
83
84
|
):
|
|
84
85
|
"""
|
|
85
86
|
Symlinks on Unix, copies on Windows.
|
|
86
87
|
"""
|
|
87
88
|
|
|
89
|
+
# If the link already exists, remove it
|
|
90
|
+
if remove_existing:
|
|
91
|
+
try:
|
|
92
|
+
if link_path.exists():
|
|
93
|
+
if link_path.is_dir():
|
|
94
|
+
shutil.rmtree(link_path)
|
|
95
|
+
else:
|
|
96
|
+
link_path.unlink(missing_ok=True)
|
|
97
|
+
except Exception:
|
|
98
|
+
log.warning(f"Failed to remove {link_path}", exc_info=True)
|
|
99
|
+
else:
|
|
100
|
+
log.debug(f"Removed {link_path=}")
|
|
101
|
+
|
|
88
102
|
symlink_target = get_relative_path(link_path, file_path) if relative else file_path
|
|
89
103
|
try:
|
|
90
104
|
if platform.system() == "Windows":
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/_throughput_monitor_callback.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/checkpoint/best_checkpoint.py
RENAMED
|
File without changes
|
{nshtrainer-0.35.1 → nshtrainer-0.37.0}/src/nshtrainer/callbacks/checkpoint/last_checkpoint.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|