nshtrainer 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nshtrainer/__init__.py +64 -0
- nshtrainer/_experimental/__init__.py +2 -0
- nshtrainer/_experimental/flops/__init__.py +48 -0
- nshtrainer/_experimental/flops/flop_counter.py +787 -0
- nshtrainer/_experimental/flops/module_tracker.py +140 -0
- nshtrainer/_snoop.py +216 -0
- nshtrainer/_submit/print_environment_info.py +31 -0
- nshtrainer/_submit/session/_output.py +12 -0
- nshtrainer/_submit/session/_script.py +109 -0
- nshtrainer/_submit/session/lsf.py +467 -0
- nshtrainer/_submit/session/slurm.py +573 -0
- nshtrainer/_submit/session/unified.py +350 -0
- nshtrainer/actsave/__init__.py +7 -0
- nshtrainer/actsave/_callback.py +75 -0
- nshtrainer/actsave/_loader.py +144 -0
- nshtrainer/actsave/_saver.py +337 -0
- nshtrainer/callbacks/__init__.py +35 -0
- nshtrainer/callbacks/_throughput_monitor_callback.py +549 -0
- nshtrainer/callbacks/base.py +113 -0
- nshtrainer/callbacks/early_stopping.py +112 -0
- nshtrainer/callbacks/ema.py +383 -0
- nshtrainer/callbacks/finite_checks.py +75 -0
- nshtrainer/callbacks/gradient_skipping.py +103 -0
- nshtrainer/callbacks/interval.py +322 -0
- nshtrainer/callbacks/latest_epoch_checkpoint.py +45 -0
- nshtrainer/callbacks/log_epoch.py +35 -0
- nshtrainer/callbacks/norm_logging.py +187 -0
- nshtrainer/callbacks/on_exception_checkpoint.py +44 -0
- nshtrainer/callbacks/print_table.py +90 -0
- nshtrainer/callbacks/throughput_monitor.py +56 -0
- nshtrainer/callbacks/timer.py +157 -0
- nshtrainer/callbacks/wandb_watch.py +103 -0
- nshtrainer/config.py +289 -0
- nshtrainer/data/__init__.py +4 -0
- nshtrainer/data/balanced_batch_sampler.py +132 -0
- nshtrainer/data/transform.py +67 -0
- nshtrainer/lr_scheduler/__init__.py +18 -0
- nshtrainer/lr_scheduler/_base.py +101 -0
- nshtrainer/lr_scheduler/linear_warmup_cosine.py +138 -0
- nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +73 -0
- nshtrainer/model/__init__.py +44 -0
- nshtrainer/model/base.py +641 -0
- nshtrainer/model/config.py +2064 -0
- nshtrainer/model/modules/callback.py +157 -0
- nshtrainer/model/modules/debug.py +42 -0
- nshtrainer/model/modules/distributed.py +70 -0
- nshtrainer/model/modules/logger.py +170 -0
- nshtrainer/model/modules/profiler.py +24 -0
- nshtrainer/model/modules/rlp_sanity_checks.py +202 -0
- nshtrainer/model/modules/shared_parameters.py +72 -0
- nshtrainer/nn/__init__.py +19 -0
- nshtrainer/nn/mlp.py +106 -0
- nshtrainer/nn/module_dict.py +66 -0
- nshtrainer/nn/module_list.py +50 -0
- nshtrainer/nn/nonlinearity.py +157 -0
- nshtrainer/optimizer.py +62 -0
- nshtrainer/runner.py +21 -0
- nshtrainer/scripts/check_env.py +41 -0
- nshtrainer/scripts/find_packages.py +51 -0
- nshtrainer/trainer/__init__.py +1 -0
- nshtrainer/trainer/signal_connector.py +208 -0
- nshtrainer/trainer/trainer.py +340 -0
- nshtrainer/typecheck.py +144 -0
- nshtrainer/util/environment.py +119 -0
- nshtrainer/util/seed.py +11 -0
- nshtrainer/util/singleton.py +89 -0
- nshtrainer/util/slurm.py +49 -0
- nshtrainer/util/typed.py +2 -0
- nshtrainer/util/typing_utils.py +19 -0
- nshtrainer-0.1.0.dist-info/METADATA +18 -0
- nshtrainer-0.1.0.dist-info/RECORD +72 -0
- nshtrainer-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from logging import getLogger
|
|
4
|
+
|
|
5
|
+
log = getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@contextmanager
|
|
9
|
+
def remove_slurm_environment_variables():
|
|
10
|
+
"""
|
|
11
|
+
SLURM_CPU_BIND_* environment variables are set by SLURM in the current environment.
|
|
12
|
+
We need to remove all of these environment variables during the codepath in which we create the new SLURM runs, so that the new SLURM runs do not inherit the environment variables from the current environment.
|
|
13
|
+
To make things easier, we will patch the environment to remove all "SLURM_" environment variables.
|
|
14
|
+
Otherwise, the runs will faill with an error like shown below:
|
|
15
|
+
srun: error: CPU binding outside of job step allocation, allocated CPUs are: 0x01F000000001F0000000.
|
|
16
|
+
srun: error: Task launch for StepId=5216715.0 failed on node learnfair0537: Unable to satisfy cpu bind request
|
|
17
|
+
srun: error: Application launch failed: Unable to satisfy cpu bind request
|
|
18
|
+
srun: Job step aborted
|
|
19
|
+
|
|
20
|
+
See https://www.mail-archive.com/slurm-users@lists.schedmd.com/msg09157.html for more details.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
removed_env_vars = {}
|
|
24
|
+
for key in list(os.environ.keys()):
|
|
25
|
+
if not key.startswith("SLURM_"):
|
|
26
|
+
continue
|
|
27
|
+
removed_env_vars[key] = os.environ.pop(key)
|
|
28
|
+
|
|
29
|
+
log.debug(
|
|
30
|
+
f"Removed environment variables before launching new SLURM job: {list(removed_env_vars.keys())}"
|
|
31
|
+
)
|
|
32
|
+
try:
|
|
33
|
+
yield
|
|
34
|
+
finally:
|
|
35
|
+
os.environ.update(removed_env_vars)
|
|
36
|
+
log.debug(
|
|
37
|
+
f"Restored environment variables after launching new SLURM job: {list(removed_env_vars.keys())}"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@contextmanager
|
|
42
|
+
def remove_lsf_environment_variables():
|
|
43
|
+
"""Same as above, but for the LSF task scheduler."""
|
|
44
|
+
removed_env_vars = {}
|
|
45
|
+
for key in list(os.environ.keys()):
|
|
46
|
+
if (
|
|
47
|
+
not key.startswith("LS_")
|
|
48
|
+
and not key.startswith("LSF_")
|
|
49
|
+
and not key.startswith("LSB_")
|
|
50
|
+
and not key.startswith("BSUB_")
|
|
51
|
+
):
|
|
52
|
+
continue
|
|
53
|
+
removed_env_vars[key] = os.environ.pop(key)
|
|
54
|
+
|
|
55
|
+
log.debug(
|
|
56
|
+
f"Removed environment variables before launching new LSF job: {list(removed_env_vars.keys())}"
|
|
57
|
+
)
|
|
58
|
+
try:
|
|
59
|
+
yield
|
|
60
|
+
finally:
|
|
61
|
+
os.environ.update(removed_env_vars)
|
|
62
|
+
log.debug(
|
|
63
|
+
f"Restored environment variables after launching new LSF job: {list(removed_env_vars.keys())}"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@contextmanager
|
|
68
|
+
def remove_wandb_environment_variables():
|
|
69
|
+
"""
|
|
70
|
+
Similar to above, but removes all "WANDB_" environment variables.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
removed_env_vars = {}
|
|
74
|
+
for key in list(os.environ.keys()):
|
|
75
|
+
if not key.startswith("WANDB_"):
|
|
76
|
+
continue
|
|
77
|
+
removed_env_vars[key] = os.environ.pop(key)
|
|
78
|
+
|
|
79
|
+
log.debug(
|
|
80
|
+
f"Removed environment variables before launching new SLURM job: {list(removed_env_vars.keys())}"
|
|
81
|
+
)
|
|
82
|
+
try:
|
|
83
|
+
yield
|
|
84
|
+
finally:
|
|
85
|
+
os.environ.update(removed_env_vars)
|
|
86
|
+
log.debug(
|
|
87
|
+
f"Restored environment variables after launching new SLURM job: {list(removed_env_vars.keys())}"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@contextmanager
|
|
92
|
+
def set_additional_env_vars(additional_env_vars: dict[str, str] | None = None):
|
|
93
|
+
"""
|
|
94
|
+
Set additional environment variables for the run.
|
|
95
|
+
Newly set environment variables will be removed after the run is finished.
|
|
96
|
+
Existing environment variables will be restored to their original values after the run is finished.
|
|
97
|
+
"""
|
|
98
|
+
if additional_env_vars is None:
|
|
99
|
+
additional_env_vars = {}
|
|
100
|
+
|
|
101
|
+
removed_env_vars = {}
|
|
102
|
+
for key, value in additional_env_vars.items():
|
|
103
|
+
removed_env_vars[key] = os.environ.pop(key, None)
|
|
104
|
+
os.environ[key] = value
|
|
105
|
+
|
|
106
|
+
log.debug(
|
|
107
|
+
f"Set additional environment variables for the run: {list(additional_env_vars.keys())}"
|
|
108
|
+
)
|
|
109
|
+
try:
|
|
110
|
+
yield
|
|
111
|
+
finally:
|
|
112
|
+
for key in additional_env_vars.keys():
|
|
113
|
+
if removed_env_vars[key] is None:
|
|
114
|
+
del os.environ[key]
|
|
115
|
+
else:
|
|
116
|
+
os.environ[key] = removed_env_vars[key]
|
|
117
|
+
log.debug(
|
|
118
|
+
f"Restored environment variables after launching new SLURM job: {list(additional_env_vars.keys())}"
|
|
119
|
+
)
|
nshtrainer/util/seed.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from logging import getLogger
|
|
2
|
+
|
|
3
|
+
import lightning.fabric.utilities.seed as LS
|
|
4
|
+
|
|
5
|
+
log = getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def seed_everything(seed: int | None, *, workers: bool = False):
|
|
9
|
+
seed = LS.seed_everything(seed, workers=workers)
|
|
10
|
+
log.critical(f"Set global seed to {seed}.")
|
|
11
|
+
return seed
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from logging import getLogger
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from typing_extensions import Self, TypeVar, override
|
|
5
|
+
|
|
6
|
+
log = getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Singleton:
|
|
10
|
+
singleton_key = "_singleton_instance"
|
|
11
|
+
|
|
12
|
+
@classmethod
|
|
13
|
+
def get(cls) -> Self | None:
|
|
14
|
+
return getattr(cls, cls.singleton_key, None)
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def set(cls, instance: Self) -> None:
|
|
18
|
+
if cls.get() is not None:
|
|
19
|
+
log.warning(f"{cls.__qualname__} instance is already set")
|
|
20
|
+
|
|
21
|
+
setattr(cls, cls.singleton_key, instance)
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def reset(cls) -> None:
|
|
25
|
+
if cls.get() is not None:
|
|
26
|
+
delattr(cls, cls.singleton_key)
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def register(cls, instance: Self) -> None:
|
|
30
|
+
cls.set(instance)
|
|
31
|
+
|
|
32
|
+
def register_self(self):
|
|
33
|
+
self.register(self)
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def instance(cls) -> Self:
|
|
37
|
+
instance = cls.get()
|
|
38
|
+
if instance is None:
|
|
39
|
+
raise RuntimeError(f"{cls.__qualname__} instance is not set")
|
|
40
|
+
|
|
41
|
+
return instance
|
|
42
|
+
|
|
43
|
+
@override
|
|
44
|
+
def __init_subclass__(cls, *args, **kwargs) -> None:
|
|
45
|
+
super().__init_subclass__(*args, **kwargs)
|
|
46
|
+
|
|
47
|
+
cls.reset()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
T = TypeVar("T", infer_variance=True)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class Registry:
|
|
54
|
+
_registry: dict[type, Any] = {}
|
|
55
|
+
|
|
56
|
+
@staticmethod
|
|
57
|
+
def register(cls_: type[T], instance: T):
|
|
58
|
+
if not isinstance(instance, cls_):
|
|
59
|
+
raise ValueError(f"{instance} is not an instance of {cls_.__qualname__}")
|
|
60
|
+
|
|
61
|
+
if cls_ in Registry._registry:
|
|
62
|
+
raise ValueError(f"{cls_.__qualname__} is already registered")
|
|
63
|
+
|
|
64
|
+
Registry._registry[cls_] = instance
|
|
65
|
+
|
|
66
|
+
@staticmethod
|
|
67
|
+
def try_get(cls_: type[T]) -> T | None:
|
|
68
|
+
return Registry._registry.get(cls_)
|
|
69
|
+
|
|
70
|
+
@staticmethod
|
|
71
|
+
def get(cls_: type[T]) -> T:
|
|
72
|
+
instance = Registry.try_get(cls_)
|
|
73
|
+
if instance is None:
|
|
74
|
+
raise ValueError(f"{cls_.__qualname__} is not registered")
|
|
75
|
+
|
|
76
|
+
return instance
|
|
77
|
+
|
|
78
|
+
@staticmethod
|
|
79
|
+
def instance(cls_: type[T]) -> T:
|
|
80
|
+
return Registry.get(cls_)
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def reset(cls_: type[T]):
|
|
84
|
+
if cls_ in Registry._registry:
|
|
85
|
+
del Registry._registry[cls_]
|
|
86
|
+
|
|
87
|
+
@staticmethod
|
|
88
|
+
def reset_all():
|
|
89
|
+
Registry._registry.clear()
|
nshtrainer/util/slurm.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
class SlurmParseException(Exception):
|
|
2
|
+
pass
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def _expand_id_suffix(suffix_parts: str) -> list[str]:
|
|
6
|
+
"""Parse the a suffix formatted like "1-3,5,8" into
|
|
7
|
+
the list of numeric values 1,2,3,5,8.
|
|
8
|
+
"""
|
|
9
|
+
suffixes = []
|
|
10
|
+
for suffix_part in suffix_parts.split(","):
|
|
11
|
+
if "-" in suffix_part:
|
|
12
|
+
low, high = suffix_part.split("-")
|
|
13
|
+
int_length = len(low)
|
|
14
|
+
for num in range(int(low), int(high) + 1):
|
|
15
|
+
suffixes.append(f"{num:0{int_length}}")
|
|
16
|
+
else:
|
|
17
|
+
suffixes.append(suffix_part)
|
|
18
|
+
return suffixes
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _parse_node_group(node_list: str, pos: int, parsed: list[str]) -> int:
|
|
22
|
+
"""Parse a node group of the form PREFIX[1-3,5,8] and return
|
|
23
|
+
the position in the string at which the parsing stopped
|
|
24
|
+
"""
|
|
25
|
+
prefixes = [""]
|
|
26
|
+
while pos < len(node_list):
|
|
27
|
+
c = node_list[pos]
|
|
28
|
+
if c == ",":
|
|
29
|
+
parsed.extend(prefixes)
|
|
30
|
+
return pos + 1
|
|
31
|
+
if c == "[":
|
|
32
|
+
last_pos = node_list.index("]", pos)
|
|
33
|
+
suffixes = _expand_id_suffix(node_list[pos + 1 : last_pos])
|
|
34
|
+
prefixes = [prefix + suffix for prefix in prefixes for suffix in suffixes]
|
|
35
|
+
pos = last_pos + 1
|
|
36
|
+
else:
|
|
37
|
+
for i, prefix in enumerate(prefixes):
|
|
38
|
+
prefixes[i] = prefix + c
|
|
39
|
+
pos += 1
|
|
40
|
+
parsed.extend(prefixes)
|
|
41
|
+
return pos
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def parse_slurm_node_list(node_list: str):
|
|
45
|
+
pos = 0
|
|
46
|
+
parsed: list[str] = []
|
|
47
|
+
while pos < len(node_list):
|
|
48
|
+
pos = _parse_node_group(node_list, pos, parsed)
|
|
49
|
+
return parsed
|
nshtrainer/util/typed.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
|
+
|
|
3
|
+
from typing_extensions import TypeVar
|
|
4
|
+
|
|
5
|
+
TBase = TypeVar("TBase")
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def mixin_base_type(base_class: type[TBase]) -> type[TBase]:
|
|
9
|
+
"""
|
|
10
|
+
Useful function to make mixins with baseclass typehint
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
class ReadonlyMixin(mixin_base_type(BaseAdmin))):
|
|
14
|
+
...
|
|
15
|
+
```
|
|
16
|
+
"""
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
return base_class
|
|
19
|
+
return object
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: nshtrainer
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary:
|
|
5
|
+
Author: Nima Shoghi
|
|
6
|
+
Author-email: nimashoghi@gmail.com
|
|
7
|
+
Requires-Python: >=3.10,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Requires-Dist: nshconfig (>=0.2.0,<0.3.0)
|
|
13
|
+
Requires-Dist: nshrunner (>=0.1.0,<0.2.0)
|
|
14
|
+
Requires-Dist: torch
|
|
15
|
+
Requires-Dist: typing-extensions
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
nshtrainer/__init__.py,sha256=o39TbnjwUYzE4POcncUiDx02Ey-Hzx8UGuwJDjMcKZU,2971
|
|
2
|
+
nshtrainer/_experimental/__init__.py,sha256=2tQIcrWT8U8no_AeBTYnozaTmxN40kuAJdGQ4b-PoWM,120
|
|
3
|
+
nshtrainer/_experimental/flops/__init__.py,sha256=edo9Ez3LlrnxkNRX9W6YBhPkRPKYGLpkpnl5gx7sEX8,1550
|
|
4
|
+
nshtrainer/_experimental/flops/flop_counter.py,sha256=-sL0Fy6poXa__hyzUMdZScjPULp4coQELQpPU6p6dXU,25736
|
|
5
|
+
nshtrainer/_experimental/flops/module_tracker.py,sha256=bUL-IRTd0aF_DwmXkZjHZAA31p4ZEhyqhc26XWKQUUY,4922
|
|
6
|
+
nshtrainer/_snoop.py,sha256=Rofv1Rd92E0LY40G3A-o9Hu0ZI73RR59wJD5l4Q3PDM,7022
|
|
7
|
+
nshtrainer/_submit/print_environment_info.py,sha256=enbJGl_iHIlhKN8avzKnoZSb0zUQ_fUdnsQ8a_9tbYk,963
|
|
8
|
+
nshtrainer/_submit/session/_output.py,sha256=CNGH5W6_XxAC5-TRvMAMxOHd3fjGpJhK-7RGTDyvMu4,245
|
|
9
|
+
nshtrainer/_submit/session/_script.py,sha256=0AeBgBduDsoIEBrY9kebARiBUEGc50JAD9oE_IDiLnA,3775
|
|
10
|
+
nshtrainer/_submit/session/lsf.py,sha256=p19EP6OhROZxqfRhzeTD7GDmfYaREIKMXMOI8G933FE,14307
|
|
11
|
+
nshtrainer/_submit/session/slurm.py,sha256=JpAjQvck4LjGN8o8fOvIeMuFqrg1cioANoVsX5hU-3g,17594
|
|
12
|
+
nshtrainer/_submit/session/unified.py,sha256=gfh-AtnMyFHzcQOUlhlAR__vaWDk1r9XCivz_t_lHKk,11695
|
|
13
|
+
nshtrainer/actsave/__init__.py,sha256=G1T-fELuGWkVqdhdyoePtj2dTOUtcIOW4VgsXv9JNTA,338
|
|
14
|
+
nshtrainer/actsave/_callback.py,sha256=QoTa60F70f1RxB41VKixN9l5_htfFQxXDPHHSNFreuk,2770
|
|
15
|
+
nshtrainer/actsave/_loader.py,sha256=fAhD32DrJa4onkYfcwc21YIeGEYzOSXCK_HVo9SZLgQ,4604
|
|
16
|
+
nshtrainer/actsave/_saver.py,sha256=0EHmQDhqVxQWRWWSyt03eP1K9ETiACMQYmsZkDMt6HY,9451
|
|
17
|
+
nshtrainer/callbacks/__init__.py,sha256=ohE_MO_kX1o4SZwcipIXUA9m7XYcijEKJtGcoU8dTkY,1667
|
|
18
|
+
nshtrainer/callbacks/_throughput_monitor_callback.py,sha256=aJo_11rc4lo0IYOd-kHmPDtzdC4ctgXyRudkRJqH4m4,23184
|
|
19
|
+
nshtrainer/callbacks/base.py,sha256=WESZz1VSTl1xSGVXBmxFqWwbLxXcJp97jpg9zrE0EsY,3560
|
|
20
|
+
nshtrainer/callbacks/early_stopping.py,sha256=jriSU761wf_qTJ9Bos0D3h5aDvZHYpRqK62Ne8aWp5I,3768
|
|
21
|
+
nshtrainer/callbacks/ema.py,sha256=zKCtvzZFo0ORlwNZHjaMk-sJoxrlTtFWOzR-yGy95W0,12134
|
|
22
|
+
nshtrainer/callbacks/finite_checks.py,sha256=kX3TIJsxyqx0GuLJfYsqVgKU27zwjG9Z8324lyCFtwM,2087
|
|
23
|
+
nshtrainer/callbacks/gradient_skipping.py,sha256=ModaIXpb69LbA8TpEXKRLdr4Sq7-l0CWnN6fvpaV188,3477
|
|
24
|
+
nshtrainer/callbacks/interval.py,sha256=smz5Zl8cN6X6yHKVsMRS2e3SEkzRCP3LvwE1ONvLfaw,8080
|
|
25
|
+
nshtrainer/callbacks/latest_epoch_checkpoint.py,sha256=ZT0bn7X0BZbQXbk6fos47NsbbhD4Z9c9YmFqdcUEqus,1503
|
|
26
|
+
nshtrainer/callbacks/log_epoch.py,sha256=fTa_K_Y8A7g09630cG4YkDE6AzSMPkjb9bpPm4gtqos,1120
|
|
27
|
+
nshtrainer/callbacks/norm_logging.py,sha256=IMrK0WiVSDFyspwyPpwELMK4mmd5Jpx4enAW_GsWbi4,6284
|
|
28
|
+
nshtrainer/callbacks/on_exception_checkpoint.py,sha256=eDyB7qkpPdAaKjAY2uFMMY8Nht6TGeuDnsgHuKtp8eA,1615
|
|
29
|
+
nshtrainer/callbacks/print_table.py,sha256=FcA-CBWwMf9c1NNRinvYpZC400RNQxuP28bJfgniT3Q,2840
|
|
30
|
+
nshtrainer/callbacks/throughput_monitor.py,sha256=YQLdpX3LGybIiD814yT9yCCVSEXRWf8WwsvVaN5aDBE,1848
|
|
31
|
+
nshtrainer/callbacks/timer.py,sha256=sDXPPcdDKu5xnuK_bjr8plIq9MBuluNJ42Mt9LvPZzc,4610
|
|
32
|
+
nshtrainer/callbacks/wandb_watch.py,sha256=pUpMsNxd03ex1rzOmFw2HzGOXjnQGaH84m8cc2dXo4g,2937
|
|
33
|
+
nshtrainer/config.py,sha256=0Fj5w-ry0BRl2_zJI6jwCnmMWE3p_eD8_Wn-NyFkTqU,10442
|
|
34
|
+
nshtrainer/data/__init__.py,sha256=7mk1tr7SWUZ7ySbsf0y0ZPszk7u4QznPhQ-7wnpH9ec,149
|
|
35
|
+
nshtrainer/data/balanced_batch_sampler.py,sha256=bcJBcQjh1hB1yKF_xSlT9AtEWv0BJjYc1CuH2BF-ea8,4392
|
|
36
|
+
nshtrainer/data/transform.py,sha256=JeGxvytQly8hougrsdMmKG8gJ6qvFPDglJCO4Tp6STk,1795
|
|
37
|
+
nshtrainer/lr_scheduler/__init__.py,sha256=GNGmkcJD3jgCMk7pfaanAYrKz9957qkx6_Q0rssiHK0,738
|
|
38
|
+
nshtrainer/lr_scheduler/_base.py,sha256=1tWMABevKZAuGhJN8Me2E9eqEyqoLtsG0bADPjED7a4,3752
|
|
39
|
+
nshtrainer/lr_scheduler/linear_warmup_cosine.py,sha256=VhsxZJ_Mw9zjkAGunFQ1KRub5_QM5NRqaEFWtmedFp8,5212
|
|
40
|
+
nshtrainer/lr_scheduler/reduce_lr_on_plateau.py,sha256=Ct-uLo8Q4t7lJ_HwoLRhNmudnCw4cSnblpBEg22aVTI,2691
|
|
41
|
+
nshtrainer/model/__init__.py,sha256=PdvZkpAVkqvCLipGJvEHFU3WxnSMxYpvtuOkvLIenxg,2078
|
|
42
|
+
nshtrainer/model/base.py,sha256=bhngGHxr0suQB9Ezi_3d5JgDWYqS_yPgGJZrGmc1TnI,23571
|
|
43
|
+
nshtrainer/model/config.py,sha256=RMDdrbtvwm5vTFPxQ2x1hqiBIEEE-OAknhF6KTWfkkk,70293
|
|
44
|
+
nshtrainer/model/modules/callback.py,sha256=JF59U9-CjJsAIspEhTJbVaGN0wGctZG7UquE3IS7R8A,6408
|
|
45
|
+
nshtrainer/model/modules/debug.py,sha256=DTVty8cKnzj1GCULRyGx_sWTTsq9NLi30dzqjRTnuCU,1127
|
|
46
|
+
nshtrainer/model/modules/distributed.py,sha256=ABpR9d-3uBS_fivfy_WYW-dExW6vp5BPaoPQnOudHng,1725
|
|
47
|
+
nshtrainer/model/modules/logger.py,sha256=XEeo3QrplTNKZqfl6iWZf3fze3R4YOeOvs-RKVHFoQs,5527
|
|
48
|
+
nshtrainer/model/modules/profiler.py,sha256=rQ_jRMcM1Z2AIROZlRnBRHM5rkTpq67afZPD6CIRfXs,825
|
|
49
|
+
nshtrainer/model/modules/rlp_sanity_checks.py,sha256=o6gUceFwsuDHmL8eLOYuT3JGXFzq_qc4awl2RWaBygU,8900
|
|
50
|
+
nshtrainer/model/modules/shared_parameters.py,sha256=mD5wrlBE3c025vzVdTpnSyC8yxzuI-aUWMmPhqPT0a0,2694
|
|
51
|
+
nshtrainer/nn/__init__.py,sha256=57LPaP3G-BBGD2eGxbBUABNgYl3s_oASwrtOSS4bzTs,1339
|
|
52
|
+
nshtrainer/nn/mlp.py,sha256=i-dHk0tomO_XlU6cKN4CC4HxTaYb-ukBCAgY1ySXl4I,3963
|
|
53
|
+
nshtrainer/nn/module_dict.py,sha256=NOY0B6WDTnktyWH4GthsprMQo0bpehC-hCq9SfD8paE,2329
|
|
54
|
+
nshtrainer/nn/module_list.py,sha256=fb2u5Rqdjff8Pekyr9hkCPkBorQ-fldzzFAjsgWAm30,1719
|
|
55
|
+
nshtrainer/nn/nonlinearity.py,sha256=IhIR8NCTY3Np9dMDnUouERR8ZhWpK3S0hTbT0i8HezU,3645
|
|
56
|
+
nshtrainer/optimizer.py,sha256=JiLNRtcfYxyhAab1Z1QcEzmrX9S_JyrBS67TXy12kXI,1557
|
|
57
|
+
nshtrainer/runner.py,sha256=9HsYB58aasY9RVvya_gPECDs_MBhM1fl4cbM3iJYTDc,600
|
|
58
|
+
nshtrainer/scripts/check_env.py,sha256=IMl6dSqsLYppI0XuCsVq8lK4bYqXwY9KHJkzsShz4Kg,806
|
|
59
|
+
nshtrainer/scripts/find_packages.py,sha256=FbdlfmAefttFSMfaT0A46a-oHLP_ioaQKihwBfBeWeA,1467
|
|
60
|
+
nshtrainer/trainer/__init__.py,sha256=P2rmr8oBVTHk-HJHYPcUwWqDEArMbPR4_rPpATbWK3E,40
|
|
61
|
+
nshtrainer/trainer/signal_connector.py,sha256=aGg6kRiHiqtAdGlEvEvGLmOy7AvRHTSkXdTmZpRXbjU,8435
|
|
62
|
+
nshtrainer/trainer/trainer.py,sha256=oi8KdHF1AdZ54KFbCFAEI7W-C7qRtRe-KtOjNwBuS3M,14033
|
|
63
|
+
nshtrainer/typecheck.py,sha256=CFkmPIxCU24CHk_7_pykb-Y1PRNhpLgsVZw1zuuOS_U,4614
|
|
64
|
+
nshtrainer/util/environment.py,sha256=_SEtiQ_s5bL5pllUlf96AOUv15kNvCPvocVC13S7mIk,4166
|
|
65
|
+
nshtrainer/util/seed.py,sha256=HEXgVs-wldByahOysKwq7506OHxdYTEgmP-tDQVAEkQ,287
|
|
66
|
+
nshtrainer/util/singleton.py,sha256=nLhpuMZxl0zdNsnvS97o4ASUnKzCWYEKLzR_j9oP_xs,2208
|
|
67
|
+
nshtrainer/util/slurm.py,sha256=rofIU26z3SdL79SF45tNez6juou1cyDLz07oXEZb9Hg,1566
|
|
68
|
+
nshtrainer/util/typed.py,sha256=NGuDkDzFlc1fAoaXjOFZVbmj0mRFjsQi1E_hPa7Bn5U,128
|
|
69
|
+
nshtrainer/util/typing_utils.py,sha256=8ptjSSLZxlmy4FY6lzzkoGoF5fGNClo8-B_c0XHQaNU,385
|
|
70
|
+
nshtrainer-0.1.0.dist-info/METADATA,sha256=3zdNPxyB-I6Gudq2gTaU0crdgmDCcGCp6Zudef0DtuM,529
|
|
71
|
+
nshtrainer-0.1.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
72
|
+
nshtrainer-0.1.0.dist-info/RECORD,,
|