nshtrainer 1.3.6__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nshtrainer/_checkpoint/metadata.py +4 -1
- nshtrainer/_hf_hub.py +3 -0
- nshtrainer/callbacks/checkpoint/_base.py +173 -40
- nshtrainer/callbacks/lr_monitor.py +9 -1
- nshtrainer/trainer/_config.py +8 -2
- nshtrainer/trainer/trainer.py +10 -2
- {nshtrainer-1.3.6.dist-info → nshtrainer-1.4.0.dist-info}/METADATA +1 -1
- {nshtrainer-1.3.6.dist-info → nshtrainer-1.4.0.dist-info}/RECORD +9 -9
- {nshtrainer-1.3.6.dist-info → nshtrainer-1.4.0.dist-info}/WHEEL +0 -0
@@ -85,6 +85,7 @@ def _generate_checkpoint_metadata(
|
|
85
85
|
trainer: Trainer,
|
86
86
|
checkpoint_path: Path,
|
87
87
|
metadata_path: Path,
|
88
|
+
compute_checksum: bool = True,
|
88
89
|
):
|
89
90
|
checkpoint_timestamp = datetime.datetime.now()
|
90
91
|
start_timestamp = trainer.start_time()
|
@@ -105,7 +106,9 @@ def _generate_checkpoint_metadata(
|
|
105
106
|
# moving the checkpoint directory
|
106
107
|
checkpoint_path=checkpoint_path.relative_to(metadata_path.parent),
|
107
108
|
checkpoint_filename=checkpoint_path.name,
|
108
|
-
checkpoint_checksum=compute_file_checksum(checkpoint_path)
|
109
|
+
checkpoint_checksum=compute_file_checksum(checkpoint_path)
|
110
|
+
if compute_checksum
|
111
|
+
else "",
|
109
112
|
run_id=trainer.hparams.id,
|
110
113
|
name=trainer.hparams.full_name,
|
111
114
|
project=trainer.hparams.project,
|
nshtrainer/_hf_hub.py
CHANGED
@@ -91,6 +91,9 @@ class HuggingFaceHubConfig(CallbackConfigBase):
|
|
91
91
|
|
92
92
|
@override
|
93
93
|
def create_callbacks(self, trainer_config):
|
94
|
+
if not self:
|
95
|
+
return
|
96
|
+
|
94
97
|
# Attempt to login. If it fails, we'll log a warning or error based on the configuration.
|
95
98
|
try:
|
96
99
|
api = _api(self.token)
|
@@ -1,17 +1,19 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import logging
|
4
|
+
import string
|
4
5
|
from abc import ABC, abstractmethod
|
6
|
+
from collections.abc import Callable
|
5
7
|
from pathlib import Path
|
6
|
-
from typing import TYPE_CHECKING, Any, Generic, Literal
|
8
|
+
from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar
|
7
9
|
|
8
10
|
import numpy as np
|
9
11
|
import torch
|
10
12
|
from lightning.pytorch import Trainer
|
11
13
|
from lightning.pytorch.callbacks import Checkpoint
|
12
|
-
from typing_extensions import
|
14
|
+
from typing_extensions import override
|
13
15
|
|
14
|
-
from ..._checkpoint.metadata import CheckpointMetadata
|
16
|
+
from ..._checkpoint.metadata import CheckpointMetadata, _generate_checkpoint_metadata
|
15
17
|
from ..._checkpoint.saver import link_checkpoint, remove_checkpoint
|
16
18
|
from ..base import CallbackConfigBase
|
17
19
|
|
@@ -22,6 +24,81 @@ if TYPE_CHECKING:
|
|
22
24
|
log = logging.getLogger(__name__)
|
23
25
|
|
24
26
|
|
27
|
+
class _FormatDict(dict):
|
28
|
+
"""A dictionary that returns an empty string for missing keys when formatting."""
|
29
|
+
|
30
|
+
def __missing__(self, key):
|
31
|
+
log.debug(
|
32
|
+
f"Missing format key '{key}' in checkpoint filename, using empty string"
|
33
|
+
)
|
34
|
+
return ""
|
35
|
+
|
36
|
+
|
37
|
+
def _get_checkpoint_metadata(dirpath: Path) -> list[CheckpointMetadata]:
|
38
|
+
"""Get all checkpoint metadata from a directory."""
|
39
|
+
return [
|
40
|
+
CheckpointMetadata.from_file(p)
|
41
|
+
for p in dirpath.glob(f"*{CheckpointMetadata.PATH_SUFFIX}")
|
42
|
+
if p.is_file() and not p.is_symlink()
|
43
|
+
]
|
44
|
+
|
45
|
+
|
46
|
+
def _sort_checkpoint_metadata(
|
47
|
+
metas: list[CheckpointMetadata],
|
48
|
+
key_fn: Callable[[CheckpointMetadata], Any],
|
49
|
+
reverse: bool = False,
|
50
|
+
) -> list[CheckpointMetadata]:
|
51
|
+
"""Sort checkpoint metadata by the given key function."""
|
52
|
+
return sorted(metas, key=key_fn, reverse=reverse)
|
53
|
+
|
54
|
+
|
55
|
+
def _remove_checkpoints(
|
56
|
+
trainer: Trainer,
|
57
|
+
dirpath: Path,
|
58
|
+
metas_to_remove: list[CheckpointMetadata],
|
59
|
+
) -> None:
|
60
|
+
"""Remove checkpoint files and their metadata."""
|
61
|
+
for meta in metas_to_remove:
|
62
|
+
ckpt_path = dirpath / meta.checkpoint_filename
|
63
|
+
if not ckpt_path.exists():
|
64
|
+
log.warning(
|
65
|
+
f"Checkpoint file not found: {ckpt_path}\n"
|
66
|
+
"Skipping removal of the checkpoint metadata."
|
67
|
+
)
|
68
|
+
continue
|
69
|
+
|
70
|
+
remove_checkpoint(trainer, ckpt_path, metadata=True)
|
71
|
+
log.debug(f"Removed checkpoint: {ckpt_path}")
|
72
|
+
|
73
|
+
|
74
|
+
def _update_symlink(
|
75
|
+
dirpath: Path,
|
76
|
+
symlink_path: Path | None,
|
77
|
+
sort_key_fn: Callable[[CheckpointMetadata], Any],
|
78
|
+
sort_reverse: bool,
|
79
|
+
) -> None:
|
80
|
+
"""Update symlink to point to the best checkpoint."""
|
81
|
+
if symlink_path is None:
|
82
|
+
return
|
83
|
+
|
84
|
+
# Get all checkpoint metadata after any removals
|
85
|
+
remaining_metas = _get_checkpoint_metadata(dirpath)
|
86
|
+
|
87
|
+
if remaining_metas:
|
88
|
+
# Sort by the key function
|
89
|
+
remaining_metas = _sort_checkpoint_metadata(
|
90
|
+
remaining_metas, sort_key_fn, sort_reverse
|
91
|
+
)
|
92
|
+
|
93
|
+
# Link to the best checkpoint
|
94
|
+
best_meta = remaining_metas[0]
|
95
|
+
best_filepath = dirpath / best_meta.checkpoint_filename
|
96
|
+
link_checkpoint(best_filepath, symlink_path, metadata=True)
|
97
|
+
log.debug(f"Updated symlink {symlink_path.name} -> {best_filepath.name}")
|
98
|
+
else:
|
99
|
+
log.warning(f"No checkpoints found in {dirpath} to create symlink.")
|
100
|
+
|
101
|
+
|
25
102
|
class BaseCheckpointCallbackConfig(CallbackConfigBase, ABC):
|
26
103
|
dirpath: str | Path | None = None
|
27
104
|
"""Directory path to save the checkpoint file."""
|
@@ -95,35 +172,27 @@ class CheckpointBase(Checkpoint, ABC, Generic[TConfig]):
|
|
95
172
|
def resolve_checkpoint_path(self, current_metrics: dict[str, Any]) -> Path:
|
96
173
|
if (filename := self.config.filename) is None:
|
97
174
|
filename = self.default_filename()
|
98
|
-
filename = filename.format(**current_metrics)
|
99
|
-
return self.dirpath / f"{filename}{self.extension()}"
|
100
|
-
|
101
|
-
def remove_old_checkpoints(self, trainer: Trainer):
|
102
|
-
if (topk := self.config.topk) == "all":
|
103
|
-
return
|
104
175
|
|
105
|
-
#
|
106
|
-
|
107
|
-
|
108
|
-
for p in self.dirpath.glob(f"*{CheckpointMetadata.PATH_SUFFIX}")
|
109
|
-
if p.is_file() and not p.is_symlink()
|
176
|
+
# Extract all field names from the format string
|
177
|
+
field_names = [
|
178
|
+
fname for _, fname, _, _ in string.Formatter().parse(filename) if fname
|
110
179
|
]
|
111
180
|
|
112
|
-
#
|
113
|
-
|
181
|
+
# Filter current_metrics to only include keys that are in the format string
|
182
|
+
format_dict = {k: v for k, v in current_metrics.items() if k in field_names}
|
114
183
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
184
|
+
try:
|
185
|
+
formatted_filename = filename.format(**format_dict)
|
186
|
+
except KeyError as e:
|
187
|
+
log.warning(
|
188
|
+
f"Missing key {e} in {filename=} with {format_dict=}. Using default values."
|
189
|
+
)
|
190
|
+
# Provide a simple fallback for missing keys
|
191
|
+
formatted_filename = string.Formatter().vformat(
|
192
|
+
filename, (), _FormatDict(format_dict)
|
193
|
+
)
|
124
194
|
|
125
|
-
|
126
|
-
log.debug(f"Removed old checkpoint: {old_ckpt_path}")
|
195
|
+
return self.dirpath / f"{formatted_filename}{self.extension()}"
|
127
196
|
|
128
197
|
def current_metrics(self, trainer: Trainer) -> dict[str, Any]:
|
129
198
|
current_metrics: dict[str, Any] = {
|
@@ -142,9 +211,22 @@ class CheckpointBase(Checkpoint, ABC, Generic[TConfig]):
|
|
142
211
|
|
143
212
|
current_metrics[name] = value
|
144
213
|
|
214
|
+
log.debug(
|
215
|
+
f"Current metrics: {current_metrics}, {trainer.callback_metrics=}, {trainer.logged_metrics=}"
|
216
|
+
)
|
145
217
|
return current_metrics
|
146
218
|
|
147
219
|
def save_checkpoints(self, trainer: Trainer):
|
220
|
+
log.debug(
|
221
|
+
f"{type(self).__name__}.save_checkpoints() called at {trainer.current_epoch=}, {trainer.global_step=}"
|
222
|
+
)
|
223
|
+
# Also print out the current stack trace for debugging
|
224
|
+
if log.isEnabledFor(logging.DEBUG):
|
225
|
+
import traceback
|
226
|
+
|
227
|
+
stack = traceback.extract_stack()
|
228
|
+
log.debug(f"Stack trace: {''.join(traceback.format_list(stack))}")
|
229
|
+
|
148
230
|
if self._should_skip_saving_checkpoint(trainer):
|
149
231
|
return
|
150
232
|
|
@@ -156,22 +238,73 @@ class CheckpointBase(Checkpoint, ABC, Generic[TConfig]):
|
|
156
238
|
f"but got {type(trainer).__name__}"
|
157
239
|
)
|
158
240
|
|
159
|
-
|
160
|
-
filepath = self.resolve_checkpoint_path(
|
161
|
-
|
241
|
+
current_metrics = self.current_metrics(trainer)
|
242
|
+
filepath = self.resolve_checkpoint_path(current_metrics)
|
243
|
+
|
244
|
+
# Get all existing checkpoint metadata
|
245
|
+
existing_metas = _get_checkpoint_metadata(self.dirpath)
|
246
|
+
|
247
|
+
# Determine which checkpoints to remove
|
248
|
+
to_remove: list[CheckpointMetadata] = []
|
249
|
+
should_save = True
|
250
|
+
|
251
|
+
# Check if we should save this checkpoint
|
252
|
+
if (topk := self.config.topk) != "all" and len(existing_metas) >= topk:
|
253
|
+
# Generate hypothetical metadata for the current checkpoint
|
254
|
+
hypothetical_meta = _generate_checkpoint_metadata(
|
255
|
+
trainer=trainer,
|
256
|
+
checkpoint_path=filepath,
|
257
|
+
metadata_path=filepath.with_suffix(CheckpointMetadata.PATH_SUFFIX),
|
258
|
+
compute_checksum=False,
|
259
|
+
)
|
260
|
+
|
261
|
+
# Add the hypothetical metadata to the list and sort
|
262
|
+
metas = _sort_checkpoint_metadata(
|
263
|
+
[*existing_metas, hypothetical_meta],
|
264
|
+
self.topk_sort_key,
|
265
|
+
self.topk_sort_reverse(),
|
266
|
+
)
|
267
|
+
|
268
|
+
# If the hypothetical metadata is not in the top-k, skip saving
|
269
|
+
if hypothetical_meta not in metas[:topk]:
|
270
|
+
log.debug(
|
271
|
+
f"Skipping checkpoint save: would not make top {topk} "
|
272
|
+
f"based on {self.topk_sort_key.__name__}"
|
273
|
+
)
|
274
|
+
should_save = False
|
275
|
+
else:
|
276
|
+
# Determine which existing checkpoints to remove
|
277
|
+
to_remove = metas[topk:]
|
278
|
+
assert hypothetical_meta not in to_remove, (
|
279
|
+
"Hypothetical metadata should not be in the to_remove list."
|
280
|
+
)
|
281
|
+
log.debug(
|
282
|
+
f"Removing checkpoints: {[meta.checkpoint_filename for meta in to_remove]} "
|
283
|
+
f"and saving the new checkpoint: {hypothetical_meta.checkpoint_filename}"
|
284
|
+
)
|
162
285
|
|
163
|
-
if
|
164
|
-
|
165
|
-
|
286
|
+
# Only save if it would make it into the top-k
|
287
|
+
if should_save:
|
288
|
+
# Save the new checkpoint
|
289
|
+
trainer.save_checkpoint(
|
290
|
+
filepath,
|
291
|
+
weights_only=self.config.save_weights_only,
|
292
|
+
)
|
166
293
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
294
|
+
if trainer.is_global_zero:
|
295
|
+
# Remove old checkpoints that should be deleted
|
296
|
+
if to_remove:
|
297
|
+
_remove_checkpoints(trainer, self.dirpath, to_remove)
|
298
|
+
|
299
|
+
# Update the symlink to point to the best checkpoint
|
300
|
+
_update_symlink(
|
301
|
+
self.dirpath,
|
302
|
+
self.symlink_path(),
|
303
|
+
self.topk_sort_key,
|
304
|
+
self.topk_sort_reverse(),
|
305
|
+
)
|
172
306
|
|
173
|
-
# Barrier to ensure all processes have
|
174
|
-
# deleted the old checkpoints, and created the symlink before continuing
|
307
|
+
# Barrier to ensure all processes have completed checkpoint operations
|
175
308
|
trainer.strategy.barrier()
|
176
309
|
|
177
310
|
def _should_skip_saving_checkpoint(self, trainer: Trainer) -> bool:
|
@@ -1,12 +1,15 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import logging
|
3
4
|
from typing import Literal
|
4
5
|
|
5
6
|
from lightning.pytorch.callbacks import LearningRateMonitor
|
6
|
-
from typing_extensions import final
|
7
|
+
from typing_extensions import final, override
|
7
8
|
|
8
9
|
from .base import CallbackConfigBase, callback_registry
|
9
10
|
|
11
|
+
log = logging.getLogger(__name__)
|
12
|
+
|
10
13
|
|
11
14
|
@final
|
12
15
|
@callback_registry.register
|
@@ -28,7 +31,12 @@ class LearningRateMonitorConfig(CallbackConfigBase):
|
|
28
31
|
Option to also log the weight decay values of the optimizer. Defaults to False.
|
29
32
|
"""
|
30
33
|
|
34
|
+
@override
|
31
35
|
def create_callbacks(self, trainer_config):
|
36
|
+
if not list(trainer_config.enabled_loggers()):
|
37
|
+
log.warning("No loggers enabled. LearningRateMonitor will not be used.")
|
38
|
+
return
|
39
|
+
|
32
40
|
yield LearningRateMonitor(
|
33
41
|
logging_interval=self.logging_interval,
|
34
42
|
log_momentum=self.log_momentum,
|
nshtrainer/trainer/_config.py
CHANGED
@@ -717,8 +717,9 @@ class TrainerConfig(C.Config):
|
|
717
717
|
|
718
718
|
auto_set_default_root_dir: bool = True
|
719
719
|
"""If enabled, will automatically set the default root dir to [cwd/lightning_logs/<id>/]. There is basically no reason to disable this."""
|
720
|
-
save_checkpoint_metadata:
|
721
|
-
"""
|
720
|
+
save_checkpoint_metadata: Literal[True] = True
|
721
|
+
"""Will save additional metadata whenever a checkpoint is saved.
|
722
|
+
This is a core feature of nshtrainer and cannot be disabled."""
|
722
723
|
auto_set_debug_flag: DebugFlagCallbackConfig | None = DebugFlagCallbackConfig()
|
723
724
|
"""If enabled, will automatically set the debug flag to True if:
|
724
725
|
- The trainer is running in fast_dev_run mode.
|
@@ -1308,6 +1309,11 @@ class TrainerConfig(C.Config):
|
|
1308
1309
|
if self.barebones and self.shared_parameters:
|
1309
1310
|
raise ValueError("shared_parameters is not supported under barebones mode")
|
1310
1311
|
|
1312
|
+
if not self.save_checkpoint_metadata:
|
1313
|
+
raise ValueError(
|
1314
|
+
"save_checkpoint_metadata must be True. This is a core feature of nshtrainer and cannot be disabled."
|
1315
|
+
)
|
1316
|
+
|
1311
1317
|
def _nshtrainer_set_id_if_missing(self):
|
1312
1318
|
"""
|
1313
1319
|
Set the ID for the configuration object if it is missing.
|
nshtrainer/trainer/trainer.py
CHANGED
@@ -45,6 +45,9 @@ patch_log_hparams_function()
|
|
45
45
|
|
46
46
|
|
47
47
|
class Trainer(LightningTrainer):
|
48
|
+
profiler: Profiler
|
49
|
+
"""Profiler used for profiling the training process."""
|
50
|
+
|
48
51
|
CHECKPOINT_HYPER_PARAMS_KEY = "trainer_hyper_parameters"
|
49
52
|
|
50
53
|
@property
|
@@ -469,6 +472,11 @@ class Trainer(LightningTrainer):
|
|
469
472
|
weights_only: bool = False,
|
470
473
|
storage_options: Any | None = None,
|
471
474
|
):
|
475
|
+
assert self.hparams.save_checkpoint_metadata, (
|
476
|
+
"Checkpoint metadata is not enabled. "
|
477
|
+
"Please set `hparams.save_checkpoint_metadata=True`."
|
478
|
+
)
|
479
|
+
|
472
480
|
filepath = Path(filepath)
|
473
481
|
|
474
482
|
if self.model is None:
|
@@ -476,7 +484,7 @@ class Trainer(LightningTrainer):
|
|
476
484
|
"Saving a checkpoint is only possible if a model is attached to the Trainer. Did you call"
|
477
485
|
" `Trainer.save_checkpoint()` before calling `Trainer.{fit,validate,test,predict}`?"
|
478
486
|
)
|
479
|
-
with self.profiler.profile("save_checkpoint"):
|
487
|
+
with self.profiler.profile("save_checkpoint"):
|
480
488
|
checkpoint = self._checkpoint_connector.dump_checkpoint(weights_only)
|
481
489
|
# Update the checkpoint for the trainer hyperparameters
|
482
490
|
checkpoint[self.CHECKPOINT_HYPER_PARAMS_KEY] = self.hparams.model_dump(
|
@@ -489,7 +497,7 @@ class Trainer(LightningTrainer):
|
|
489
497
|
|
490
498
|
# Save the checkpoint metadata
|
491
499
|
metadata_path = None
|
492
|
-
if self.
|
500
|
+
if self.is_global_zero:
|
493
501
|
# Generate the metadata and write to disk
|
494
502
|
metadata_path = write_checkpoint_metadata(self, filepath)
|
495
503
|
|
@@ -1,15 +1,15 @@
|
|
1
1
|
nshtrainer/.nshconfig.generated.json,sha256=yZd6cn1RhvNNJUgiUTRYut8ofZYvbulnpPG-rZIRhi4,106
|
2
2
|
nshtrainer/__init__.py,sha256=RI_2B_IUWa10B6H5TAuWtE5FWX1X4ue-J4dTDaF2-lQ,1035
|
3
3
|
nshtrainer/_callback.py,sha256=ZDppiJ4d65tRXTEWYPZLH_F1xFizdz1pkWJe_sQ5uII,12564
|
4
|
-
nshtrainer/_checkpoint/metadata.py,sha256=
|
4
|
+
nshtrainer/_checkpoint/metadata.py,sha256=El9Ip8jGA7mAN5rAMpVfg1dfUe2dGoOOfvF1JfYJGHM,5676
|
5
5
|
nshtrainer/_checkpoint/saver.py,sha256=utcrYKSosd04N9m2GIylufO5DO05D90qVU3mvadfApU,1658
|
6
6
|
nshtrainer/_experimental/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
7
|
-
nshtrainer/_hf_hub.py,sha256=
|
7
|
+
nshtrainer/_hf_hub.py,sha256=OB4252GJ6AbKNCRmHVvEglvjYVMUN822BFYECABxfZU,14037
|
8
8
|
nshtrainer/callbacks/__init__.py,sha256=m6eJuprZfBELuKpngKXre33B9yPXkG7jlKVmI-0yXRQ,4000
|
9
9
|
nshtrainer/callbacks/actsave.py,sha256=NSXIIu62MNYe5gz479SMW33bdoKYoYtWtd_iTWFpKpc,3881
|
10
10
|
nshtrainer/callbacks/base.py,sha256=K9aom1WVVRYxl-tHWgtmDUQZ1o63NgznvLsjauTKcCc,4225
|
11
11
|
nshtrainer/callbacks/checkpoint/__init__.py,sha256=l8tkHc83_mLiU0-wT09SWdRzwpm2ulbkLzcuCmuTwzE,620
|
12
|
-
nshtrainer/callbacks/checkpoint/_base.py,sha256=
|
12
|
+
nshtrainer/callbacks/checkpoint/_base.py,sha256=BjgfCXsf4Ihf1MNKkHBUwjHMLwc04PZO-2Bx-LdAazg,11010
|
13
13
|
nshtrainer/callbacks/checkpoint/best_checkpoint.py,sha256=aCs3E1eucfDlUeW2Iq_Ke7hb96BxHanmvn7PCCbqq0E,2648
|
14
14
|
nshtrainer/callbacks/checkpoint/last_checkpoint.py,sha256=vn-as3ex7kaTRcKsIurVtM6kUSHYNwHJeYG82j2dMcc,3554
|
15
15
|
nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py,sha256=nljzETqkHwA-4g8mxaeFK5HxA8My0dlIPzIUscSMWyk,3525
|
@@ -22,7 +22,7 @@ nshtrainer/callbacks/finite_checks.py,sha256=3lZ3kEIjmYQfqTF0DcrgZ9_98ZLQhQj8usH
|
|
22
22
|
nshtrainer/callbacks/gradient_skipping.py,sha256=8g7oC7PF0LTAEzwiNoaS5tWOnkjk_EB0QG3JdHkQ8ek,3523
|
23
23
|
nshtrainer/callbacks/interval.py,sha256=UCzUzt3XCFVyQyCWL9lOrStkkxesvduNOYk8yMrGTTk,8116
|
24
24
|
nshtrainer/callbacks/log_epoch.py,sha256=B5Dm8XVZwCzKUhUWfT_5PDdDac993191OsbcxxuSVJE,1457
|
25
|
-
nshtrainer/callbacks/lr_monitor.py,sha256=
|
25
|
+
nshtrainer/callbacks/lr_monitor.py,sha256=v45ehnwNO987087HfiOY5aIrVRbwdKMgPYRFHs1fyEE,1444
|
26
26
|
nshtrainer/callbacks/metric_validation.py,sha256=4RDr1FuNKfro-6QEtmcFqT4iNf2twmJVNk9y-8nq9bg,2882
|
27
27
|
nshtrainer/callbacks/norm_logging.py,sha256=nVIDWe-ASl5zN830-ODR8QMCqI1ma-QPCIwoy0Wb-Nk,6390
|
28
28
|
nshtrainer/callbacks/print_table.py,sha256=VaS4JgI963do79laXK4lUkFQx8v6aRSy22W0zyal_LA,3035
|
@@ -133,7 +133,7 @@ nshtrainer/profiler/advanced.py,sha256=XrM3FX0ThCv5UwUrrH0l4Ow4LGAtpiBww2N8QAU5N
|
|
133
133
|
nshtrainer/profiler/pytorch.py,sha256=8K37XvPnCApUpIK8tA2zNMFIaIiTLSoxKQoiyCPBm1Q,2757
|
134
134
|
nshtrainer/profiler/simple.py,sha256=PimjqcU-JuS-8C0ZGHAdwCxgNLij4x0FH6WXsjBQzZs,1005
|
135
135
|
nshtrainer/trainer/__init__.py,sha256=jRaHdaFK8wxNrN1bleT9cf29iZahL_-XkWo5TWz2CmA,550
|
136
|
-
nshtrainer/trainer/_config.py,sha256=
|
136
|
+
nshtrainer/trainer/_config.py,sha256=FWEspBYt_bjLhUSkJApkC9pfYBTlFBHmIQRFNGpGjAc,45849
|
137
137
|
nshtrainer/trainer/_distributed_prediction_result.py,sha256=bQw8Z6PT694UUf-zQPkech6CxyUSy8bAIexfSfPej0U,2507
|
138
138
|
nshtrainer/trainer/_log_hparams.py,sha256=XH2lZ4U_3AZBhOt91ocsEhdL_NRz35oWvqLCUFDohUs,2389
|
139
139
|
nshtrainer/trainer/_runtime_callback.py,sha256=6F2Gq27Q8OFfN3RtdNC6QRA8ac0LC1hh4DUE3V5WgbI,4217
|
@@ -146,7 +146,7 @@ nshtrainer/trainer/plugin/layer_sync.py,sha256=-BbEyWZ063O7tZme7Gdu1lVxK6p1NeuLc
|
|
146
146
|
nshtrainer/trainer/plugin/precision.py,sha256=7lf7KZd_yFyPmhLApjEIv0pkoDB5zdxi-7in0wRj3z8,5436
|
147
147
|
nshtrainer/trainer/signal_connector.py,sha256=ZgbSkbthoe8MYN6rBoFf-7UDpQtc9fs9pG_FNvTYSfs,10962
|
148
148
|
nshtrainer/trainer/strategy.py,sha256=VPTn5z3zvXTydY8IJchjhjcOfpvtoejnvUkq5E4WTus,1368
|
149
|
-
nshtrainer/trainer/trainer.py,sha256=
|
149
|
+
nshtrainer/trainer/trainer.py,sha256=G_tHqzZCHJazhROcoKeOI5rZ5A8F8XlghiIWkdMbPR0,24387
|
150
150
|
nshtrainer/util/_environment_info.py,sha256=j-wyEHKirsu3rIXTtqC2kLmIIkRe6obWjxPVWaqg2ow,24887
|
151
151
|
nshtrainer/util/bf16.py,sha256=9QhHZCkYSfYpIcxwAMoXyuh2yTSHBzT-EdLQB297jEs,762
|
152
152
|
nshtrainer/util/code_upload.py,sha256=CpbZEBbA8EcBElUVoCPbP5zdwtNzJhS20RLaOB-q-2k,1257
|
@@ -159,6 +159,6 @@ nshtrainer/util/seed.py,sha256=diMV8iwBKN7Xxt5pELmui-gyqyT80_CZzomrWhNss0k,316
|
|
159
159
|
nshtrainer/util/slurm.py,sha256=HflkP5iI_r4UHMyPjw9R4dD5AHsJUpcfJw5PLvGYBRM,1603
|
160
160
|
nshtrainer/util/typed.py,sha256=Xt5fUU6zwLKSTLUdenovnKK0N8qUq89Kddz2_XeykVQ,164
|
161
161
|
nshtrainer/util/typing_utils.py,sha256=MjY-CUX9R5Tzat-BlFnQjwl1PQ_W2yZQoXhkYHlJ_VA,442
|
162
|
-
nshtrainer-1.
|
163
|
-
nshtrainer-1.
|
164
|
-
nshtrainer-1.
|
162
|
+
nshtrainer-1.4.0.dist-info/METADATA,sha256=PIV_5Swp1HhgFU2ZBj_X1tCeOBfNhrhTXOFB1vgunno,979
|
163
|
+
nshtrainer-1.4.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
164
|
+
nshtrainer-1.4.0.dist-info/RECORD,,
|
File without changes
|