libinephany 0.18.1__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- libinephany-1.0.0/CODE_VERSION.cfg +1 -0
- {libinephany-0.18.1/libinephany.egg-info → libinephany-1.0.0}/PKG-INFO +1 -1
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observation_utils.py +19 -2
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observers/base_observers.py +20 -8
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observers/global_observers/__init__.py +19 -1
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observers/global_observers/constants.py +2 -0
- libinephany-1.0.0/libinephany/observations/observers/global_observers/gradient_observers.py +510 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observers/global_observers/hyperparameter_observers.py +26 -18
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observers/global_observers/model_observers.py +220 -6
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observers/global_observers/progress_observers.py +7 -1
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observers/local_observers.py +158 -25
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/statistic_trackers.py +435 -23
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/schemas/tensor_statistics.py +33 -32
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/states/hyperparameter_states.py +32 -30
- {libinephany-0.18.1 → libinephany-1.0.0/libinephany.egg-info}/PKG-INFO +1 -1
- libinephany-0.18.1/CODE_VERSION.cfg +0 -1
- libinephany-0.18.1/libinephany/observations/observers/global_observers/gradient_observers.py +0 -193
- {libinephany-0.18.1 → libinephany-1.0.0}/LICENSE +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/MANIFEST.in +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/README.md +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/__init__.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/aws/__init__.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/aws/s3_functions.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/__init__.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observer_pipeline.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observers/__init__.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observers/global_observers/base_classes.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observers/global_observers/loss_observers.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observers/observer_containers.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/pipeline_coordinator.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/post_processors/__init__.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/post_processors/postprocessors.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/statistic_manager.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/__init__.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/configs/__init__.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/configs/hyperparameter_configs.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/configs/observer_config.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/configs/outer_model_config.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/schemas/__init__.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/schemas/agent_info.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/schemas/inner_task_profile.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/schemas/observation_models.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/schemas/request_schemas.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/schemas/response_schemas.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/pydantic_models/states/__init__.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/__init__.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/agent_utils.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/asyncio_worker.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/backend_statuses.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/constants.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/directory_utils.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/dropout_utils.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/enums.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/error_severities.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/exceptions.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/import_utils.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/optim_utils.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/random_seeds.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/samplers.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/standardizers.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/torch_distributed_utils.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/torch_utils.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/transforms.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/utils/typing.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/web_apps/__init__.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/web_apps/error_logger.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany/web_apps/web_app_utils.py +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany.egg-info/SOURCES.txt +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany.egg-info/dependency_links.txt +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany.egg-info/requires.txt +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/libinephany.egg-info/top_level.txt +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/pyproject.toml +0 -0
- {libinephany-0.18.1 → libinephany-1.0.0}/setup.cfg +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
@@ -25,6 +25,7 @@ from libinephany.utils import optim_utils
|
|
25
25
|
# ======================================================================================================================
|
26
26
|
|
27
27
|
EXP_AVERAGE = "exp_avg"
|
28
|
+
MOMENTUM_BUFFER = "momentum_buffer"
|
28
29
|
MIN_DECAY_FACTOR = 1e-10
|
29
30
|
|
30
31
|
MIN_TOTAL_WEIGHT = 1e-15 # Minimum total weight threshold for numerical stability
|
@@ -64,10 +65,8 @@ def get_exponential_weighted_average(values: list[int | float]) -> float:
|
|
64
65
|
:param values: List of values to average via EWA.
|
65
66
|
:return: EWA of the given values.
|
66
67
|
"""
|
67
|
-
|
68
68
|
exp_weighted_average = pd.Series(values).ewm(alpha=0.1).mean().iloc[-1]
|
69
69
|
assert isinstance(exp_weighted_average, float)
|
70
|
-
|
71
70
|
return exp_weighted_average
|
72
71
|
|
73
72
|
|
@@ -232,6 +231,24 @@ def form_update_tensor(
|
|
232
231
|
raise NotImplementedError(f"Optimizer {type(optimizer).__name__} is not supported!")
|
233
232
|
|
234
233
|
|
234
|
+
def form_momentum_tensor(
|
235
|
+
optimizer: optim.Optimizer, parameters: list[torch.Tensor], parameter_group: dict[str, Any]
|
236
|
+
) -> None | torch.Tensor:
|
237
|
+
"""
|
238
|
+
:param optimizer: Optimizer to form the momentum tensor from.
|
239
|
+
:param parameters: Parameters to create the momentum tensor from.
|
240
|
+
:param parameter_group: Parameter group within the optimizer the given parameters came from.
|
241
|
+
"""
|
242
|
+
if type(optimizer) in optim_utils.ADAM_OPTIMISERS:
|
243
|
+
momentum_list = [optimizer.state[p][EXP_AVERAGE].view(-1) for p in parameters if tensor_on_local_rank(p)]
|
244
|
+
return torch.cat(momentum_list) if momentum_list else None
|
245
|
+
elif type(optimizer) in optim_utils.SGD_OPTIMISERS:
|
246
|
+
momentum_list = [optimizer.state[p][MOMENTUM_BUFFER].view(-1) for p in parameters if tensor_on_local_rank(p)]
|
247
|
+
return torch.cat(momentum_list) if momentum_list else None
|
248
|
+
else:
|
249
|
+
raise NotImplementedError(f"Optimizer {type(optimizer).__name__} is not supported!")
|
250
|
+
|
251
|
+
|
235
252
|
def null_standardizer(value_to_standardize: float, **kwargs) -> float:
|
236
253
|
"""
|
237
254
|
:param value_to_standardize: Value to mock the standardization of.
|
{libinephany-0.18.1 → libinephany-1.0.0}/libinephany/observations/observers/base_observers.py
RENAMED
@@ -43,15 +43,15 @@ class Observer(ABC):
|
|
43
43
|
standardizer: Standardizer | None,
|
44
44
|
observer_config: ObserverConfig,
|
45
45
|
should_standardize: bool = True,
|
46
|
-
|
46
|
+
include_statistics: list[str] | None = None,
|
47
47
|
**kwargs,
|
48
48
|
) -> None:
|
49
49
|
"""
|
50
50
|
:param standardizer: None or the standardizer to apply to the returned observations.
|
51
51
|
:param global_config: ObserverConfig that can be used to inform various observation calculations.
|
52
52
|
:param should_standardize: Whether standardization should be applied to returned values.
|
53
|
-
:param
|
54
|
-
fields in the model to
|
53
|
+
:param include_statistics: If the observation uses the TensorStatistic model to return observations, names of the
|
54
|
+
fields in the model to include in returned observations.
|
55
55
|
:param kwargs: Miscellaneous keyword arguments.
|
56
56
|
"""
|
57
57
|
|
@@ -63,7 +63,10 @@ class Observer(ABC):
|
|
63
63
|
self.standardize = standardizer if standardizer is not None else observation_utils.null_standardizer
|
64
64
|
self.should_standardize = should_standardize and self.can_standardize
|
65
65
|
|
66
|
-
self.
|
66
|
+
self.include_statistics: list[str] | None = None
|
67
|
+
|
68
|
+
if include_statistics is not None:
|
69
|
+
self.include_statistics = TensorStatistics.filter_include_statistics(include_statistics=include_statistics)
|
67
70
|
|
68
71
|
@final
|
69
72
|
@property
|
@@ -102,7 +105,10 @@ class Observer(ABC):
|
|
102
105
|
observation_format = self.observation_format
|
103
106
|
|
104
107
|
if observation_format is StatisticStorageTypes.TENSOR_STATISTICS:
|
105
|
-
|
108
|
+
if self.include_statistics is None:
|
109
|
+
raise ValueError(f"{self.__class__.__name__} must be provided with include_statistics.")
|
110
|
+
|
111
|
+
return len([field for field in TensorStatistics.model_fields.keys() if field in self.include_statistics])
|
106
112
|
|
107
113
|
elif observation_format is StatisticStorageTypes.FLOAT:
|
108
114
|
return 1
|
@@ -231,10 +237,13 @@ class Observer(ABC):
|
|
231
237
|
self._cached_observation = deepcopy(observations)
|
232
238
|
|
233
239
|
if self.observation_format is StatisticStorageTypes.TENSOR_STATISTICS:
|
240
|
+
if self.include_statistics is None:
|
241
|
+
raise ValueError(f"{self.__class__.__name__} must be provided with include_statistics.")
|
242
|
+
|
234
243
|
if return_dict:
|
235
244
|
observations_dict = observations.as_observation_dict() # type: ignore
|
236
245
|
|
237
|
-
observations = observations.to_list(
|
246
|
+
observations = observations.to_list(include_statistics=self.include_statistics) # type: ignore
|
238
247
|
|
239
248
|
observations = [observations] if not isinstance(observations, list) else observations # type: ignore
|
240
249
|
|
@@ -256,7 +265,7 @@ class Observer(ABC):
|
|
256
265
|
def inform(self) -> float | int | dict[str, float] | None:
|
257
266
|
"""
|
258
267
|
:return: The cached observation. If the observation format is TensorStatistics then it is converted to a
|
259
|
-
dictionary with the statistics specified in
|
268
|
+
dictionary with the statistics specified in include_statistics included.
|
260
269
|
"""
|
261
270
|
|
262
271
|
if not self.can_inform:
|
@@ -269,7 +278,10 @@ class Observer(ABC):
|
|
269
278
|
)
|
270
279
|
|
271
280
|
if self.observation_format is StatisticStorageTypes.TENSOR_STATISTICS:
|
272
|
-
|
281
|
+
if self.include_statistics is None:
|
282
|
+
raise ValueError(f"{self.__class__.__name__} must be provided with include_statistics.")
|
283
|
+
|
284
|
+
observation = self._cached_observation.model_dump(include=set(self.include_statistics)) # type: ignore
|
273
285
|
|
274
286
|
else:
|
275
287
|
observation = self._cached_observation
|
@@ -8,7 +8,15 @@
|
|
8
8
|
# ======================================================================================================================
|
9
9
|
|
10
10
|
|
11
|
-
from .gradient_observers import
|
11
|
+
from .gradient_observers import (
|
12
|
+
CosineSimilarityObserverOfGradientAndMomentum,
|
13
|
+
CosineSimilarityObserverOfGradientAndUpdate,
|
14
|
+
CosineSimilarityOfGradientAndParameter,
|
15
|
+
GlobalFirstOrderGradients,
|
16
|
+
GlobalSecondOrderGradients,
|
17
|
+
LHOPTGradientVarianceFraction,
|
18
|
+
LHOPTMomentumGradientRatio,
|
19
|
+
)
|
12
20
|
from .hyperparameter_observers import (
|
13
21
|
InitialHyperparameters,
|
14
22
|
LHOPTHyperparameterRatio,
|
@@ -31,8 +39,11 @@ from .model_observers import (
|
|
31
39
|
GlobalLAMBTrustRatio,
|
32
40
|
GlobalParameters,
|
33
41
|
GlobalParameterUpdates,
|
42
|
+
LHOPTAverageParameterUpdateMagnitudeObserver,
|
43
|
+
LHOPTGlobalLAMBTrustRatio,
|
34
44
|
LogRatioOfPreviousAndCurrentParamNormEnvStepObserver,
|
35
45
|
LogRatioOfUpdateAndPreviousParamNormEnvStepObserver,
|
46
|
+
LogRatioOfUpdateAndPreviousParamNormInnerStepObserver,
|
36
47
|
NumberOfLayers,
|
37
48
|
NumberOfParameters,
|
38
49
|
)
|
@@ -51,14 +62,17 @@ __all__ = [
|
|
51
62
|
GlobalFirstOrderGradients.__name__,
|
52
63
|
GlobalSecondOrderGradients.__name__,
|
53
64
|
LHOPTGradientVarianceFraction.__name__,
|
65
|
+
LHOPTMomentumGradientRatio.__name__,
|
54
66
|
GlobalActivations.__name__,
|
55
67
|
GlobalParameterUpdates.__name__,
|
56
68
|
GlobalParameters.__name__,
|
57
69
|
GlobalLAMBTrustRatio.__name__,
|
58
70
|
NumberOfParameters.__name__,
|
59
71
|
NumberOfLayers.__name__,
|
72
|
+
LHOPTAverageParameterUpdateMagnitudeObserver.__name__,
|
60
73
|
LogRatioOfPreviousAndCurrentParamNormEnvStepObserver.__name__,
|
61
74
|
LogRatioOfUpdateAndPreviousParamNormEnvStepObserver.__name__,
|
75
|
+
LogRatioOfUpdateAndPreviousParamNormInnerStepObserver.__name__,
|
62
76
|
TrainingProgress.__name__,
|
63
77
|
EpochsCompleted.__name__,
|
64
78
|
ProgressAtEachCheckpoint.__name__,
|
@@ -66,4 +80,8 @@ __all__ = [
|
|
66
80
|
LHOPTValidationLoss.__name__,
|
67
81
|
LHOPTLossRatio.__name__,
|
68
82
|
PercentileOfLossAtEachCheckpoint.__name__,
|
83
|
+
LHOPTGlobalLAMBTrustRatio.__name__,
|
84
|
+
CosineSimilarityObserverOfGradientAndMomentum.__name__,
|
85
|
+
CosineSimilarityObserverOfGradientAndUpdate.__name__,
|
86
|
+
CosineSimilarityOfGradientAndParameter.__name__,
|
69
87
|
]
|
@@ -20,6 +20,7 @@ class LHOPTConstants(TypedDict):
|
|
20
20
|
ZERO_DIVISION_TOLERANCE: float
|
21
21
|
DEFAULT_SAMPLE_FREQUENCY: int
|
22
22
|
DEFAULT_VARIANCE_THRESHOLD: float
|
23
|
+
DEFAULT_ENV_STEP_SAMPLE_FREQUENCY: int
|
23
24
|
|
24
25
|
|
25
26
|
# Create the constants instance
|
@@ -36,4 +37,5 @@ LHOPT_CONSTANTS: LHOPTConstants = LHOPTConstants(
|
|
36
37
|
ZERO_DIVISION_TOLERANCE=1e-8,
|
37
38
|
DEFAULT_SAMPLE_FREQUENCY=4,
|
38
39
|
DEFAULT_VARIANCE_THRESHOLD=1e-6,
|
40
|
+
DEFAULT_ENV_STEP_SAMPLE_FREQUENCY=10,
|
39
41
|
)
|