libinephany 0.16.4__tar.gz → 0.17.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. libinephany-0.17.0/CODE_VERSION.cfg +1 -0
  2. {libinephany-0.16.4/libinephany.egg-info → libinephany-0.17.0}/PKG-INFO +1 -1
  3. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observation_utils.py +2 -0
  4. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observer_pipeline.py +4 -2
  5. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observers/global_observers/__init__.py +13 -2
  6. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observers/global_observers/base_classes.py +42 -1
  7. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observers/global_observers/constants.py +6 -0
  8. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observers/global_observers/gradient_observers.py +81 -0
  9. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observers/global_observers/hyperparameter_observers.py +114 -3
  10. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observers/global_observers/loss_observers.py +4 -4
  11. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observers/global_observers/model_observers.py +142 -0
  12. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observers/local_observers.py +88 -0
  13. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/statistic_trackers.py +75 -0
  14. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/states/hyperparameter_states.py +17 -0
  15. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/constants.py +1 -0
  16. {libinephany-0.16.4 → libinephany-0.17.0/libinephany.egg-info}/PKG-INFO +1 -1
  17. libinephany-0.16.4/CODE_VERSION.cfg +0 -1
  18. {libinephany-0.16.4 → libinephany-0.17.0}/LICENSE +0 -0
  19. {libinephany-0.16.4 → libinephany-0.17.0}/MANIFEST.in +0 -0
  20. {libinephany-0.16.4 → libinephany-0.17.0}/README.md +0 -0
  21. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/__init__.py +0 -0
  22. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/aws/__init__.py +0 -0
  23. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/aws/s3_functions.py +0 -0
  24. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/__init__.py +0 -0
  25. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observers/__init__.py +0 -0
  26. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observers/base_observers.py +0 -0
  27. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observers/global_observers/progress_observers.py +0 -0
  28. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/observers/observer_containers.py +0 -0
  29. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/pipeline_coordinator.py +0 -0
  30. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/post_processors/__init__.py +0 -0
  31. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/post_processors/postprocessors.py +0 -0
  32. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/observations/statistic_manager.py +0 -0
  33. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/__init__.py +0 -0
  34. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/configs/__init__.py +0 -0
  35. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/configs/hyperparameter_configs.py +0 -0
  36. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/configs/observer_config.py +0 -0
  37. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/configs/outer_model_config.py +0 -0
  38. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/schemas/__init__.py +0 -0
  39. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/schemas/agent_info.py +0 -0
  40. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/schemas/inner_task_profile.py +0 -0
  41. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/schemas/observation_models.py +0 -0
  42. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/schemas/request_schemas.py +0 -0
  43. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/schemas/response_schemas.py +0 -0
  44. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/schemas/tensor_statistics.py +0 -0
  45. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/pydantic_models/states/__init__.py +0 -0
  46. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/__init__.py +0 -0
  47. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/agent_utils.py +0 -0
  48. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/asyncio_worker.py +0 -0
  49. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/backend_statuses.py +0 -0
  50. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/directory_utils.py +0 -0
  51. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/dropout_utils.py +0 -0
  52. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/enums.py +0 -0
  53. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/error_severities.py +0 -0
  54. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/exceptions.py +0 -0
  55. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/import_utils.py +0 -0
  56. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/optim_utils.py +0 -0
  57. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/random_seeds.py +0 -0
  58. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/samplers.py +0 -0
  59. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/standardizers.py +0 -0
  60. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/torch_distributed_utils.py +0 -0
  61. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/torch_utils.py +0 -0
  62. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/transforms.py +0 -0
  63. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/utils/typing.py +0 -0
  64. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/web_apps/__init__.py +0 -0
  65. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/web_apps/error_logger.py +0 -0
  66. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany/web_apps/web_app_utils.py +0 -0
  67. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany.egg-info/SOURCES.txt +0 -0
  68. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany.egg-info/dependency_links.txt +0 -0
  69. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany.egg-info/requires.txt +0 -0
  70. {libinephany-0.16.4 → libinephany-0.17.0}/libinephany.egg-info/top_level.txt +0 -0
  71. {libinephany-0.16.4 → libinephany-0.17.0}/pyproject.toml +0 -0
  72. {libinephany-0.16.4 → libinephany-0.17.0}/setup.cfg +0 -0
@@ -0,0 +1 @@
1
+ 0.17.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: libinephany
3
- Version: 0.16.4
3
+ Version: 0.17.0
4
4
  Summary: Inephany library containing code commonly used by multiple subpackages.
5
5
  Author-email: Inephany <info@inephany.com>
6
6
  License: Apache 2.0
@@ -64,8 +64,10 @@ def get_exponential_weighted_average(values: list[int | float]) -> float:
64
64
  :param values: List of values to average via EWA.
65
65
  :return: EWA of the given values.
66
66
  """
67
+
67
68
  exp_weighted_average = pd.Series(values).ewm(alpha=0.1).mean().iloc[-1]
68
69
  assert isinstance(exp_weighted_average, float)
70
+
69
71
  return exp_weighted_average
70
72
 
71
73
 
@@ -226,8 +226,10 @@ class ObserverPipeline:
226
226
  names to floats or TensorStatistic models.
227
227
  :param actions_taken: Dictionary mapping agent IDs to actions taken by that agent.
228
228
  :param return_dict: Whether to return a dictionary of observations as well as the normal vector.
229
- :return: Tuple of a dictionary mapping agent ID to that agent's completed observation vector, a boolean
230
- indicating whether an observation clip occurred and a dictionary of observations.
229
+ :return: Tuple of:
230
+ - A dictionary mapping agent ID to that agent's completed observation vector,
231
+ - A boolean indicating whether an observation clip occurred,
232
+ - A dictionary mapping agent ID to a dictionary mapping observer name to that observer's observation vector.
231
233
  """
232
234
 
233
235
  global_obs, global_obs_dict = self.global_observers.observe(
@@ -8,8 +8,13 @@
8
8
  # ======================================================================================================================
9
9
 
10
10
 
11
- from .gradient_observers import GlobalFirstOrderGradients, GlobalSecondOrderGradients
12
- from .hyperparameter_observers import InitialHyperparameters, ModelFamilyOneHot, OptimizerTypeOneHot
11
+ from .gradient_observers import GlobalFirstOrderGradients, GlobalSecondOrderGradients, LHOPTGradientVarianceFraction
12
+ from .hyperparameter_observers import (
13
+ InitialHyperparameters,
14
+ LHOPTHyperparameterRatio,
15
+ ModelFamilyOneHot,
16
+ OptimizerTypeOneHot,
17
+ )
13
18
  from .loss_observers import (
14
19
  LHOPTLossRatio,
15
20
  LHOPTTrainingLoss,
@@ -26,6 +31,8 @@ from .model_observers import (
26
31
  GlobalLAMBTrustRatio,
27
32
  GlobalParameters,
28
33
  GlobalParameterUpdates,
34
+ LogRatioOfPreviousAndCurrentParamNormEnvStepObserver,
35
+ LogRatioOfUpdateAndPreviousParamNormEnvStepObserver,
29
36
  NumberOfLayers,
30
37
  NumberOfParameters,
31
38
  )
@@ -33,6 +40,7 @@ from .progress_observers import EpochsCompleted, ProgressAtEachCheckpoint, Train
33
40
 
34
41
  __all__ = [
35
42
  InitialHyperparameters.__name__,
43
+ LHOPTHyperparameterRatio.__name__,
36
44
  OptimizerTypeOneHot.__name__,
37
45
  ModelFamilyOneHot.__name__,
38
46
  TrainingLoss.__name__,
@@ -42,12 +50,15 @@ __all__ = [
42
50
  ValidationScore.__name__,
43
51
  GlobalFirstOrderGradients.__name__,
44
52
  GlobalSecondOrderGradients.__name__,
53
+ LHOPTGradientVarianceFraction.__name__,
45
54
  GlobalActivations.__name__,
46
55
  GlobalParameterUpdates.__name__,
47
56
  GlobalParameters.__name__,
48
57
  GlobalLAMBTrustRatio.__name__,
49
58
  NumberOfParameters.__name__,
50
59
  NumberOfLayers.__name__,
60
+ LogRatioOfPreviousAndCurrentParamNormEnvStepObserver.__name__,
61
+ LogRatioOfUpdateAndPreviousParamNormEnvStepObserver.__name__,
51
62
  TrainingProgress.__name__,
52
63
  EpochsCompleted.__name__,
53
64
  ProgressAtEachCheckpoint.__name__,
@@ -4,6 +4,7 @@
4
4
  #
5
5
  # ======================================================================================================================
6
6
 
7
+ import math
7
8
  from abc import ABC, abstractmethod
8
9
  from typing import Any
9
10
 
@@ -15,7 +16,7 @@ from libinephany.pydantic_models.schemas.tensor_statistics import TensorStatisti
15
16
  from libinephany.pydantic_models.states.hyperparameter_states import HyperparameterStates
16
17
 
17
18
 
18
- class LHOPTOuterStepBaseObserver(GlobalObserver, ABC):
19
+ class LHOPTBaseObserver(GlobalObserver, ABC):
19
20
  """
20
21
  Base class for LHOPT outer step observers to eliminate duplicate code.
21
22
  """
@@ -95,6 +96,26 @@ class LHOPTOuterStepBaseObserver(GlobalObserver, ABC):
95
96
  """
96
97
  raise NotImplementedError
97
98
 
99
+ def _compute_log_ratio(self, numerator: float, denominator: float) -> float:
100
+ """
101
+ Compute the log ratio.
102
+
103
+ :param numerator: Numerator value
104
+ :param denominator: Denominator value
105
+ :return: Log ratio value
106
+ """
107
+ # Calculate the ratio of numerator to denominator
108
+
109
+ if denominator <= LHOPT_CONSTANTS["ZERO_DIVISION_TOLERANCE"]:
110
+ return 0.0
111
+
112
+ ratio = numerator / denominator
113
+
114
+ if ratio <= 0:
115
+ return 0.0
116
+
117
+ return math.log(ratio)
118
+
98
119
 
99
120
  class LHOPTCheckpointBaseObserver(GlobalObserver, ABC):
100
121
  """
@@ -181,3 +202,23 @@ class LHOPTCheckpointBaseObserver(GlobalObserver, ABC):
181
202
  :param action_taken: Action taken by the agent this class instance is assigned to.
182
203
  """
183
204
  raise NotImplementedError
205
+
206
+ def _compute_log_ratio(self, numerator: float, denominator: float) -> float:
207
+ """
208
+ Compute the log ratio.
209
+
210
+ :param numerator: Numerator value
211
+ :param denominator: Denominator value
212
+ :return: Log ratio value
213
+ """
214
+ # Calculate the ratio of numerator to denominator
215
+
216
+ if denominator <= LHOPT_CONSTANTS["ZERO_DIVISION_TOLERANCE"]:
217
+ return 0.0
218
+
219
+ ratio = numerator / denominator
220
+
221
+ if ratio <= 0:
222
+ return 0.0
223
+
224
+ return math.log(ratio)
@@ -17,6 +17,9 @@ class LHOPTConstants(TypedDict):
17
17
  DEFAULT_TIME_WINDOW: int
18
18
  DEFAULT_CHECKPOINT_INTERVAL: int
19
19
  DEFAULT_PERCENTILE: float
20
+ ZERO_DIVISION_TOLERANCE: float
21
+ DEFAULT_SAMPLE_FREQUENCY: int
22
+ DEFAULT_VARIANCE_THRESHOLD: float
20
23
 
21
24
 
22
25
  # Create the constants instance
@@ -30,4 +33,7 @@ LHOPT_CONSTANTS: LHOPTConstants = LHOPTConstants(
30
33
  DEFAULT_TIME_WINDOW=32,
31
34
  DEFAULT_CHECKPOINT_INTERVAL=100,
32
35
  DEFAULT_PERCENTILE=0.6,
36
+ ZERO_DIVISION_TOLERANCE=1e-8,
37
+ DEFAULT_SAMPLE_FREQUENCY=4,
38
+ DEFAULT_VARIANCE_THRESHOLD=1e-6,
33
39
  )
@@ -9,6 +9,8 @@ from typing import Any
9
9
  from libinephany.observations import observation_utils, statistic_trackers
10
10
  from libinephany.observations.observation_utils import StatisticStorageTypes
11
11
  from libinephany.observations.observers.base_observers import GlobalObserver
12
+ from libinephany.observations.observers.global_observers.base_classes import LHOPTBaseObserver
13
+ from libinephany.observations.observers.global_observers.constants import LHOPT_CONSTANTS
12
14
  from libinephany.pydantic_models.schemas.observation_models import ObservationInputs
13
15
  from libinephany.pydantic_models.schemas.tensor_statistics import TensorStatistics
14
16
  from libinephany.pydantic_models.states.hyperparameter_states import HyperparameterStates
@@ -110,3 +112,82 @@ class GlobalSecondOrderGradients(GlobalObserver):
110
112
  skip_statistics=self.skip_statistics, compute_hessian_diagonal=self.compute_hessian_diagonal
111
113
  )
112
114
  }
115
+
116
+
117
+ class LHOPTGradientVarianceFraction(LHOPTBaseObserver):
118
+ """
119
+ This is a global observer from the OpenAI paper "Learning to Optimize with Reinforcement Learning"
120
+ https://arxiv.org/abs/2305.18291.
121
+
122
+ It returns two-dimensional observations: [raw_value, cdf_feature] for gradient variance fraction values.
123
+ """
124
+
125
+ def __init__(
126
+ self,
127
+ *,
128
+ variance_threshold: float = LHOPT_CONSTANTS["DEFAULT_VARIANCE_THRESHOLD"],
129
+ **kwargs,
130
+ ) -> None:
131
+ """
132
+ :param variance_threshold: Threshold for variance comparison in gradient variance fraction calculation
133
+ :param kwargs: Other observation keyword arguments.
134
+ """
135
+ super().__init__(**kwargs)
136
+ self.variance_threshold = variance_threshold
137
+
138
+ @property
139
+ def can_standardize(self) -> bool:
140
+ """
141
+ This observer has its own CDF calculation, no need to standardize.
142
+ :return: Whether the observation can be standardized.
143
+ """
144
+ return False
145
+
146
+ def _get_observation_format(self) -> StatisticStorageTypes:
147
+ """
148
+ :return: Format the observation returns data in. Must be one of the enum attributes in the StatisticStorageTypes
149
+ enumeration class.
150
+ """
151
+ return StatisticStorageTypes.VECTOR
152
+
153
+ @property
154
+ def vector_length(self) -> int:
155
+ """
156
+ :return: Length of the vector returned by this observation if it returns a vector.
157
+ """
158
+ return 2 # [raw_value, cdf_feature]
159
+
160
+ def _observe(
161
+ self,
162
+ observation_inputs: ObservationInputs,
163
+ hyperparameter_states: HyperparameterStates,
164
+ tracked_statistics: dict[str, dict[str, float | TensorStatistics]],
165
+ action_taken: float | int | None,
166
+ ) -> float | int | list[int | float] | TensorStatistics:
167
+ """
168
+ :param observation_inputs: Observation input metrics not calculated with statistic trackers.
169
+ :param hyperparameter_states: HyperparameterStates that manages the hyperparameters.
170
+ :param tracked_statistics: Dictionary mapping statistic tracker class names to dictionaries mapping module
171
+ names to floats or TensorStatistic models.
172
+ :param action_taken: Action taken by the agent this class instance is assigned to.
173
+ :return: Single float/int, list of floats/ints or TensorStatistics model to add to the observation vector.
174
+ """
175
+ if statistic_trackers.GradientVarianceFraction.__name__ not in tracked_statistics:
176
+ return [0.0, 0.0]
177
+
178
+ raw_value = list(tracked_statistics[statistic_trackers.GradientVarianceFraction.__name__].values())[0] # type: ignore[list-item]
179
+
180
+ cdf_feature = self._compute_cdf_feature(raw_value) # type: ignore[arg-type]
181
+ self._update_time()
182
+
183
+ return [raw_value, cdf_feature] # type: ignore[list-item]
184
+
185
+ def get_required_trackers(self) -> dict[str, dict[str, Any] | None]:
186
+ """
187
+ :return: Dictionary mapping statistic tracker class names to kwargs for the class or None if no kwargs are
188
+ needed.
189
+ """
190
+
191
+ return {
192
+ statistic_trackers.GradientVarianceFraction.__name__: dict(variance_threshold=self.variance_threshold),
193
+ }
@@ -12,6 +12,7 @@ from torch.optim import SGD, Adam, AdamW
12
12
  from libinephany.observations import observation_utils
13
13
  from libinephany.observations.observation_utils import StatisticStorageTypes
14
14
  from libinephany.observations.observers.base_observers import GlobalObserver
15
+ from libinephany.observations.observers.global_observers.base_classes import LHOPT_CONSTANTS
15
16
  from libinephany.pydantic_models.schemas.observation_models import ObservationInputs
16
17
  from libinephany.pydantic_models.schemas.tensor_statistics import TensorStatistics
17
18
  from libinephany.pydantic_models.states.hyperparameter_states import HyperparameterStates
@@ -42,9 +43,7 @@ class InitialHyperparameters(GlobalObserver):
42
43
 
43
44
  available_hparams = HyperparameterStates.get_all_hyperparameters()
44
45
 
45
- return len(
46
- [hparam for hparam in available_hparams if not any(skipped in hparam for skipped in self.skip_hparams)]
47
- )
46
+ return len([hparam for hparam in available_hparams if hparam not in self.skip_hparams])
48
47
 
49
48
  @property
50
49
  def can_standardize(self) -> bool:
@@ -284,3 +283,115 @@ class ModelFamilyOneHot(GlobalObserver):
284
283
  """
285
284
 
286
285
  self._sample_zero_vector()
286
+
287
+
288
+ class LHOPTHyperparameterRatio(GlobalObserver):
289
+ """
290
+ LHOPT-specific hyperparameter ratio observer that returns the ratio of current value to initial value
291
+ for all hyperparameter actions.
292
+
293
+ This observer computes: current_value / initial_value for each hyperparameter,
294
+ providing insights into how much hyperparameters have changed from their starting values.
295
+ """
296
+
297
+ def __init__(self, skip_hparams: list[str] | None = None, pad_with: float = 0.0, **kwargs) -> None:
298
+ """
299
+ :param skip_hparams: Names of the hyperparameters to not include in the initial values vector returned by
300
+ this observation.
301
+ :param kwargs: Miscellaneous keyword arguments.
302
+ """
303
+
304
+ super().__init__(**kwargs)
305
+
306
+ force_skip = ["samples", "gradient_accumulation"]
307
+ self.skip_hparams = force_skip if skip_hparams is None else skip_hparams + force_skip
308
+ self.pad_with = pad_with
309
+
310
+ @property
311
+ def vector_length(self) -> int:
312
+ """
313
+ :return: Length of the vector returned by this observation if it returns a vector.
314
+ """
315
+
316
+ available_hparams = HyperparameterStates.get_all_hyperparameters()
317
+
318
+ return len([hparam for hparam in available_hparams if hparam not in self.skip_hparams])
319
+
320
+ @property
321
+ def can_standardize(self) -> bool:
322
+ """
323
+ :return: Whether the observation can be standardized.
324
+ """
325
+
326
+ return False
327
+
328
+ @property
329
+ def can_inform(self) -> bool:
330
+ """
331
+ :return: Whether observations from the observer can be used in the agent info dictionary.
332
+ """
333
+
334
+ return False
335
+
336
+ def _get_observation_format(self) -> StatisticStorageTypes:
337
+ """
338
+ :return: Format the observation returns data in. Must be one of the enum attributes in the StatisticStorageTypes
339
+ enumeration class.
340
+ """
341
+
342
+ return StatisticStorageTypes.VECTOR
343
+
344
+ def _observe(
345
+ self,
346
+ observation_inputs: ObservationInputs,
347
+ hyperparameter_states: HyperparameterStates,
348
+ tracked_statistics: dict[str, dict[str, float | TensorStatistics]],
349
+ action_taken: float | int | None,
350
+ ) -> float | int | list[int | float] | TensorStatistics:
351
+ """
352
+ :param observation_inputs: Observation input metrics not calculated with statistic trackers.
353
+ :param hyperparameter_states: HyperparameterStates that manages the hyperparameters.
354
+ :param tracked_statistics: Dictionary mapping statistic tracker class names to dictionaries mapping module
355
+ names to floats or TensorStatistic models.
356
+ :param action_taken: Action taken by the agent this class instance is assigned to.
357
+ :return: Single float/int, list of floats/ints or TensorStatistics model to add to the observation vector.
358
+ """
359
+
360
+ # Get initial and current hyperparameter values
361
+ initial_values = hyperparameter_states.get_initial_internal_values(self.skip_hparams)
362
+ initial_values = {
363
+ hparam_name: self.pad_with if initial_value is None else initial_value
364
+ for hparam_name, initial_value in initial_values.items()
365
+ if hparam_name not in self.skip_hparams
366
+ }
367
+ current_values = hyperparameter_states.get_current_internal_values(self.skip_hparams)
368
+ current_values = {
369
+ hparam_name: self.pad_with if current_value is None else current_value
370
+ for hparam_name, current_value in current_values.items()
371
+ if hparam_name not in self.skip_hparams
372
+ }
373
+
374
+ ratios = []
375
+
376
+ for hparam_name in initial_values.keys():
377
+ initial_value = initial_values[hparam_name]
378
+ current_value = current_values[hparam_name]
379
+
380
+ if initial_value is None or current_value is None:
381
+ ratios.append(0.0)
382
+ continue
383
+
384
+ if abs(initial_value) < LHOPT_CONSTANTS["ZERO_DIVISION_TOLERANCE"]:
385
+ ratios.append(0.0)
386
+ else:
387
+ ratios.append(current_value / initial_value)
388
+
389
+ return ratios
390
+
391
+ def get_required_trackers(self) -> dict[str, dict[str, Any] | None]:
392
+ """
393
+ :return: Dictionary mapping statistic tracker class names to kwargs for the class or None if no kwargs are
394
+ needed.
395
+ """
396
+
397
+ return {}
@@ -10,8 +10,8 @@ from typing import Any
10
10
  from libinephany.observations.observation_utils import StatisticStorageTypes
11
11
  from libinephany.observations.observers.base_observers import GlobalObserver
12
12
  from libinephany.observations.observers.global_observers.base_classes import (
13
+ LHOPTBaseObserver,
13
14
  LHOPTCheckpointBaseObserver,
14
- LHOPTOuterStepBaseObserver,
15
15
  )
16
16
  from libinephany.observations.observers.global_observers.constants import LHOPT_CONSTANTS
17
17
  from libinephany.pydantic_models.schemas.observation_models import ObservationInputs
@@ -229,7 +229,7 @@ class ValidationScore(GlobalObserver):
229
229
  return {}
230
230
 
231
231
 
232
- class LHOPTTrainingLoss(LHOPTOuterStepBaseObserver):
232
+ class LHOPTTrainingLoss(LHOPTBaseObserver):
233
233
  """
234
234
  This is a global observer from the OpenAI paper "Learning to Optimize with Reinforcement Learning"
235
235
  https://arxiv.org/abs/2305.18291.
@@ -284,7 +284,7 @@ class LHOPTTrainingLoss(LHOPTOuterStepBaseObserver):
284
284
  return 3 # [is_nan, is_inf, cdf_feature]
285
285
 
286
286
 
287
- class LHOPTValidationLoss(LHOPTOuterStepBaseObserver):
287
+ class LHOPTValidationLoss(LHOPTBaseObserver):
288
288
  """
289
289
  This is a global observer from the OpenAI paper "Learning to Optimize with Reinforcement Learning"
290
290
  https://arxiv.org/abs/2305.18291.
@@ -339,7 +339,7 @@ class LHOPTValidationLoss(LHOPTOuterStepBaseObserver):
339
339
  return 3 # [is_nan, is_inf, cdf_feature]
340
340
 
341
341
 
342
- class LHOPTLossRatio(LHOPTOuterStepBaseObserver):
342
+ class LHOPTLossRatio(LHOPTBaseObserver):
343
343
  """
344
344
  This is a global observer from the OpenAI paper "Learning to Optimize with Reinforcement Learning"
345
345
  https://arxiv.org/abs/2305.18291.
@@ -10,6 +10,8 @@ from typing import Any
10
10
  from libinephany.observations import observation_utils, statistic_trackers
11
11
  from libinephany.observations.observation_utils import StatisticStorageTypes
12
12
  from libinephany.observations.observers.base_observers import GlobalObserver
13
+ from libinephany.observations.observers.global_observers.base_classes import LHOPTBaseObserver
14
+ from libinephany.observations.observers.global_observers.constants import LHOPT_CONSTANTS
13
15
  from libinephany.pydantic_models.schemas.observation_models import ObservationInputs
14
16
  from libinephany.pydantic_models.schemas.tensor_statistics import TensorStatistics
15
17
  from libinephany.pydantic_models.states.hyperparameter_states import HyperparameterStates
@@ -325,3 +327,143 @@ class NumberOfLayers(GlobalObserver):
325
327
  """
326
328
 
327
329
  return {statistic_trackers.NumberOfLayers.__name__: dict(trainable_only=self.trainable_only)}
330
+
331
+
332
+ class LogRatioOfPreviousAndCurrentParamNormEnvStepObserver(LHOPTBaseObserver):
333
+
334
+ def __init__(self, **kwargs):
335
+ super().__init__(**kwargs)
336
+ self._previous_param_norm = None
337
+
338
+ @property
339
+ def vector_length(self) -> int:
340
+ """
341
+ :return: Length of the vector returned by this observation if it returns a vector.
342
+ """
343
+ return 2 # [tanh_feature, cdf_feature]
344
+
345
+ def _observe(
346
+ self,
347
+ observation_inputs: ObservationInputs,
348
+ hyperparameter_states: HyperparameterStates,
349
+ tracked_statistics: dict[str, dict[str, float | TensorStatistics]],
350
+ action_taken: float | int | None,
351
+ ) -> float | int | list[int | float] | TensorStatistics:
352
+ """
353
+ :param observation_inputs: Observation input metrics not calculated with statistic trackers.
354
+ :param hyperparameter_states: HyperparameterStates that manages the hyperparameters.
355
+ :param tracked_statistics: Dictionary mapping statistic tracker class names to dictionaries mapping module
356
+ names to floats or TensorStatistic models.
357
+ :param action_taken: Action taken by the agent this class instance is assigned to.
358
+ :return: Single float/int, list of floats/ints or TensorStatistics model to add to the observation vector.
359
+ """
360
+
361
+ statistics = tracked_statistics[statistic_trackers.ParameterStatistics.__name__]
362
+
363
+ current_param_norm = observation_utils.average_tensor_statistics(
364
+ tensor_statistics=[stats for stats in statistics.values() if isinstance(stats, TensorStatistics)]
365
+ ).norm_
366
+
367
+ if self._previous_param_norm is None:
368
+ self._previous_param_norm = current_param_norm
369
+ self._compute_cdf_feature(0.0) # default value since we can't compute log ratio yet
370
+ self._update_time()
371
+ return [0.0, 0.0]
372
+
373
+ log_ratio = self._compute_log_ratio(current_param_norm, self._previous_param_norm)
374
+ tanh_feature = math.tanh(max(-LHOPT_CONSTANTS["TANH_BOUND"], min(LHOPT_CONSTANTS["TANH_BOUND"], log_ratio)))
375
+ cdf_feature = self._compute_cdf_feature(log_ratio)
376
+ self._update_time()
377
+ self._previous_param_norm = current_param_norm
378
+
379
+ return [tanh_feature, cdf_feature]
380
+
381
+ def get_required_trackers(self) -> dict[str, dict[str, Any] | None]:
382
+ """
383
+ :return: Dictionary mapping statistic tracker class names to kwargs for the class or None if no kwargs are
384
+ needed.
385
+ """
386
+
387
+ return {
388
+ statistic_trackers.ParameterStatistics.__name__: dict(skip_statistics=self.skip_statistics),
389
+ }
390
+
391
+ def reset(self) -> None:
392
+ """
393
+ Reset the observer by clearing the previous parameter norm and time series.
394
+ """
395
+
396
+ super().reset()
397
+ self._previous_param_norm = None
398
+
399
+
400
+ class LogRatioOfUpdateAndPreviousParamNormEnvStepObserver(LHOPTBaseObserver):
401
+
402
+ def __init__(self, **kwargs):
403
+ super().__init__(**kwargs)
404
+ self._previous_param_norm = None
405
+
406
+ @property
407
+ def vector_length(self) -> int:
408
+ """
409
+ :return: Length of the vector returned by this observation if it returns a vector.
410
+ """
411
+ return 2 # [tanh_feature, cdf_feature]
412
+
413
+ def _observe(
414
+ self,
415
+ observation_inputs: ObservationInputs,
416
+ hyperparameter_states: HyperparameterStates,
417
+ tracked_statistics: dict[str, dict[str, float | TensorStatistics]],
418
+ action_taken: float | int | None,
419
+ ) -> float | int | list[int | float] | TensorStatistics:
420
+ """
421
+ :param observation_inputs: Observation input metrics not calculated with statistic trackers.
422
+ :param hyperparameter_states: HyperparameterStates that manages the hyperparameters.
423
+ :param tracked_statistics: Dictionary mapping statistic tracker class names to dictionaries mapping module
424
+ names to floats or TensorStatistics models.
425
+ :param action_taken: Action taken by the agent this class instance is assigned to.
426
+ :return: List containing [raw_log_ratio, cdf_feature].
427
+ """
428
+
429
+ update_statistics = tracked_statistics[statistic_trackers.ParameterUpdateStatistics.__name__]
430
+ param_statistics = tracked_statistics[statistic_trackers.ParameterStatistics.__name__]
431
+ update_norm = observation_utils.average_tensor_statistics(
432
+ tensor_statistics=[stats for stats in update_statistics.values() if isinstance(stats, TensorStatistics)]
433
+ ).norm_
434
+
435
+ current_param_norm = observation_utils.average_tensor_statistics(
436
+ tensor_statistics=[stats for stats in param_statistics.values() if isinstance(stats, TensorStatistics)]
437
+ ).norm_
438
+
439
+ if self._previous_param_norm is None:
440
+ self._previous_param_norm = current_param_norm
441
+ self._compute_cdf_feature(0.0) # default value since we can't compute log ratio yet
442
+ self._update_time()
443
+ return [0.0, 0.0]
444
+ log_ratio = self._compute_log_ratio(update_norm, self._previous_param_norm)
445
+ tanh_feature = math.tanh(max(-LHOPT_CONSTANTS["TANH_BOUND"], min(LHOPT_CONSTANTS["TANH_BOUND"], log_ratio)))
446
+ cdf_feature = self._compute_cdf_feature(log_ratio)
447
+ self._update_time()
448
+ self._previous_param_norm = current_param_norm
449
+
450
+ return [tanh_feature, cdf_feature]
451
+
452
+ def get_required_trackers(self) -> dict[str, dict[str, Any] | None]:
453
+ """
454
+ :return: Dictionary mapping statistic tracker class names to kwargs for the class or None if no kwargs are
455
+ needed.
456
+ """
457
+
458
+ return {
459
+ statistic_trackers.ParameterUpdateStatistics.__name__: dict(skip_statistics=self.skip_statistics),
460
+ statistic_trackers.ParameterStatistics.__name__: dict(skip_statistics=self.skip_statistics),
461
+ }
462
+
463
+ def reset(self) -> None:
464
+ """
465
+ Reset the observer by clearing the previous parameter norm and time series.
466
+ """
467
+
468
+ super().reset()
469
+ self._previous_param_norm = None
@@ -469,6 +469,94 @@ class ActionSchemeOneHot(LocalObserver):
469
469
  return {}
470
470
 
471
471
 
472
+ class PreviousActionRepresentation(LocalObserver):
473
+ """
474
+ Observer that returns the representation of the previous action taken by the agent.
475
+
476
+ This observer tracks the previous action and returns it in an appropriate format:
477
+ - For discrete actions: returns one-hot encoding of the previous action
478
+ - For continuous actions: returns the previous action value directly
479
+ """
480
+
481
+ DISCRETE_INDEX = 0
482
+
483
+ def __init__(self, **kwargs) -> None:
484
+ super().__init__(**kwargs)
485
+ self._previous_action: float | int | None = None
486
+
487
+ @property
488
+ def vector_length(self) -> int:
489
+ """
490
+ :return: Length of the vector returned by this observation if it returns a vector.
491
+ """
492
+ return self.number_of_discrete_actions if self.is_discrete else 1
493
+
494
+ @property
495
+ def is_discrete(self) -> bool:
496
+ """
497
+ :return: Whether the agent is using discrete actions.
498
+ """
499
+
500
+ valid_actions = self.number_of_discrete_actions is not None and self.number_of_discrete_actions > 0
501
+ return self.action_scheme_index == self.DISCRETE_INDEX and valid_actions
502
+
503
+ @property
504
+ def can_inform(self) -> bool:
505
+ """
506
+ :return: Whether observations from the observer can be used in the agent info dictionary.
507
+ """
508
+ return False
509
+
510
+ def _get_observation_format(self) -> StatisticStorageTypes:
511
+ """
512
+ :return: Format the observation returns data in.
513
+ """
514
+ return StatisticStorageTypes.VECTOR
515
+
516
+ def _observe(
517
+ self,
518
+ observation_inputs: ObservationInputs,
519
+ hyperparameter_states: HyperparameterStates,
520
+ tracked_statistics: dict[str, dict[str, float | TensorStatistics]],
521
+ action_taken: float | int | None,
522
+ ) -> float | int | list[int | float] | TensorStatistics:
523
+ """
524
+ Returns the representation of the previous action.
525
+
526
+ :param observation_inputs: Observation input metrics not calculated with statistic trackers.
527
+ :param hyperparameter_states: HyperparameterStates that manages the hyperparameters.
528
+ :param tracked_statistics: Dictionary mapping statistic tracker class names to dictionaries mapping module
529
+ names to floats or TensorStatistic models.
530
+ :param action_taken: Action taken by the agent this class instance is assigned to.
531
+ :return: Previous action representation (one-hot vector for discrete, float for continuous).
532
+ """
533
+
534
+ if self._previous_action is None:
535
+ result = [0.0] * self.vector_length
536
+ else:
537
+ if self.is_discrete:
538
+ result = observation_utils.create_one_hot_observation(
539
+ vector_length=self.vector_length, one_hot_index=int(self._previous_action)
540
+ )
541
+ else:
542
+ result = [float(self._previous_action)]
543
+
544
+ self._previous_action = action_taken
545
+
546
+ return result
547
+
548
+ def get_required_trackers(self) -> dict[str, dict[str, Any] | None]:
549
+ """
550
+ :return: Dictionary mapping statistic tracker class names to kwargs for the class or None if no kwargs are
551
+ needed.
552
+ """
553
+ return {}
554
+
555
+ def reset(self) -> None:
556
+ """Resets the observer by clearing the previous action."""
557
+ self._previous_action = None
558
+
559
+
472
560
  class DepthOneHot(LocalObserver):
473
561
 
474
562
  @property
@@ -16,6 +16,7 @@ from torch.distributed import ReduceOp
16
16
 
17
17
  from libinephany.observations import observation_utils
18
18
  from libinephany.observations.observation_utils import StatisticStorageTypes
19
+ from libinephany.observations.observers.global_observers.constants import LHOPT_CONSTANTS
19
20
  from libinephany.pydantic_models.schemas.tensor_statistics import TensorStatistics
20
21
  from libinephany.utils import torch_distributed_utils
21
22
  from libinephany.utils.constants import PARAMS, SCHEDULER_GROUP_NAME
@@ -883,3 +884,77 @@ class NumberOfLayers(Statistic):
883
884
  self._count = count
884
885
 
885
886
  return count
887
+
888
+
889
+ class GradientVarianceFraction(Statistic):
890
+
891
+ def __init__(
892
+ self,
893
+ *,
894
+ variance_threshold: float = LHOPT_CONSTANTS["DEFAULT_VARIANCE_THRESHOLD"],
895
+ **kwargs,
896
+ ) -> None:
897
+ """
898
+ :param variance_threshold: Threshold for variance comparison in gradient variance fraction calculation.
899
+ :param kwargs: Other observation keyword arguments.
900
+ """
901
+
902
+ super().__init__(**kwargs)
903
+ self.variance_threshold = variance_threshold
904
+
905
+ def _get_storage_format(self) -> StatisticStorageTypes:
906
+ """
907
+ :return: Storage format this observation stores data in. Must be one of the enum attributes in the
908
+ StatisticStorageTypes enumeration class.
909
+ """
910
+
911
+ return StatisticStorageTypes.FLOAT
912
+
913
+ def _gather(
914
+ self,
915
+ *,
916
+ optimizer: optim.Optimizer,
917
+ model: nn.Module,
918
+ parameters: list[torch.Tensor],
919
+ parameter_group: dict[str, Any],
920
+ ) -> torch.Tensor | TensorStatistics | float | None:
921
+ """
922
+ :param optimizer: Optimizer the given parameters and parameter group came from.
923
+ :param model: Inner model to gather statistics from.
924
+ :param parameters: List of parameters to gather statistics from.
925
+ :param parameter_group: Parameter group the parameters originate from.
926
+ :return: TensorStatistics model or a float.
927
+ """
928
+
929
+ gradients = [p.grad for p in parameters if observation_utils.tensor_on_local_rank(p) and p.grad is not None]
930
+
931
+ if not gradients:
932
+ return 0.0 # Return 0.0 instead of None for no gradients
933
+
934
+ # Calculate variance fraction
935
+ return self._calculate_variance_fraction(gradients)
936
+
937
+ def _calculate_variance_fraction(self, gradients: list[torch.Tensor]) -> float:
938
+ """
939
+ Calculate the fraction of parameters with variance above threshold.
940
+ fraction = sqrt(variance) >= threshold
941
+
942
+ :param gradients: List of gradient tensors
943
+ :return: Fraction of parameters with high variance (0.0 to 1.0)
944
+ """
945
+ total_parameters = 0
946
+ variance_parameters = 0
947
+
948
+ for grad in gradients:
949
+ parameter_count = grad.numel()
950
+ total_parameters += parameter_count
951
+
952
+ variance = grad.var().item()
953
+
954
+ if math.sqrt(variance) >= self.variance_threshold:
955
+ variance_parameters += parameter_count
956
+
957
+ if total_parameters == 0:
958
+ return 0.0
959
+
960
+ return variance_parameters / total_parameters
@@ -855,3 +855,20 @@ class HyperparameterStates(BaseModel):
855
855
  }
856
856
 
857
857
  return initial_internal_values
858
+
859
+ def get_current_internal_values(self, skip_hparams: list[str] | None = None) -> dict[str, float | int | None]:
860
+ """
861
+ :param skip_hparams: Hyperparameters to ignore while retrieving current values.
862
+ :return: Dictionary mapping hyperparameter names to their current values during training.
863
+ """
864
+
865
+ current_internal_values = {
866
+ **self.global_hparams.get_current_internal_values(skip_hparams),
867
+ **next(iter(self.parameter_group_hparams.values())).get_current_internal_values(skip_hparams),
868
+ }
869
+ current_internal_values = {
870
+ hparam_name: current_internal_values.get(hparam_name, None)
871
+ for hparam_name in self.initial_hyperparameter_internal_values
872
+ }
873
+
874
+ return current_internal_values
@@ -8,6 +8,7 @@ KEY_HEADER_CASE = "X-API-Key"
8
8
  KEY_HEADER_NO_CASE = KEY_HEADER_CASE.lower()
9
9
 
10
10
  TIMESTAMP_FORMAT = "%Y-%m-%d-%H-%M-%S"
11
+ TIMESTAMP_FORMAT_WITH_MS = "%Y-%m-%d-%H-%M-%S-%f"
11
12
 
12
13
  RLLIB_TRUNC_EPISODES = "truncate_episodes"
13
14
  RLLIB_COMP_EPISODES = "complete_episodes"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: libinephany
3
- Version: 0.16.4
3
+ Version: 0.17.0
4
4
  Summary: Inephany library containing code commonly used by multiple subpackages.
5
5
  Author-email: Inephany <info@inephany.com>
6
6
  License: Apache 2.0
@@ -1 +0,0 @@
1
- 0.16.4
File without changes
File without changes
File without changes
File without changes