PyPI - libinephany - Versions diffs - 0.19.0__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

libinephany 0.19.0py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

libinephany/observations/statistic_trackers.py CHANGED Viewed

@@ -76,7 +76,7 @@ class Statistic(ABC):
         self.max_cache_size = max_statistic_cache_size
         self.downsample_percent = tensor_stats_downsample_percentage
         self.sample_frequency = statistic_sample_frequency
-        self.skip_statistics: list[str] | None = None
+        self.include_statistics: list[str] | None = None
     @final
     @property
@@ -195,12 +195,17 @@ class Statistic(ABC):
         Processes the tensor cache to build a TensorStatistic model.
         """
+        if not self.include_statistics:
+            raise ValueError(f"{self.__class__.__name__} must be provided with include_statistics.")
         if self._tensor_cache:
             concatenated = torch.cat(self._tensor_cache)
             self._tensor_cache = []
             statistics = TensorStatistics.build(
-                tensor=concatenated, skip_statistics=self.skip_statistics, sample_percentage=self.downsample_percent
+                tensor=concatenated,
+                include_statistics=self.include_statistics,
+                sample_percentage=self.downsample_percent,
             )
             self._data.append(statistics)  # type: ignore
@@ -368,18 +373,18 @@ class FirstOrderGradients(Statistic):
     def __init__(
         self,
         *,
-        skip_statistics: list[str] | None = None,
+        include_statistics: list[str] | None = None,
         **kwargs,
     ) -> None:
         """
-        :param skip_statistics: If the observation uses the TensorStatistic model to return observations, names of the
-        fields in the model to not include in returned observations.
+        :param include_statistics: If the observation uses the TensorStatistic model to return observations, names of the
+        fields in the model to include in returned observations.
         :param kwargs: Other observation keyword arguments.
         """
         super().__init__(**kwargs)
-        self.skip_statistics = skip_statistics
+        self.include_statistics = include_statistics
     def _get_storage_format(self) -> StatisticStorageTypes:
         """
@@ -422,22 +427,22 @@ class SecondOrderGradients(Statistic):
     def __init__(
         self,
         *,
+        include_statistics: list[str] | None = None,
         compute_hessian_diagonal: bool = False,
-        skip_statistics: list[str] | None = None,
         **kwargs,
     ) -> None:
         """
+        :param include_statistics: If the observation uses the TensorStatistic model to return observations, names of the
+        fields in the model to include in returned observations.
         :param compute_hessian_diagonal: Whether to compute the Hessian diagonal to determine second order gradients
         or use the squared first order gradients as approximations in the same way Adam does.
-        :param skip_statistics: If the observation uses the TensorStatistic model to return observations, names of the
-        fields in the model to not include in returned observations.
         :param kwargs: Other observation keyword arguments.
         """
         super().__init__(**kwargs)
         self.compute_hessian_diagonal = compute_hessian_diagonal
-        self.skip_statistics = skip_statistics
+        self.include_statistics = include_statistics
     @property
     def requires_gradient_graphs(self) -> bool:
@@ -520,18 +525,18 @@ class ActivationStatistics(Statistic):
     def __init__(
         self,
         *,
-        skip_statistics: list[str] | None = None,
+        include_statistics: list[str] | None = None,
         **kwargs,
     ) -> None:
         """
-        :param skip_statistics: If the observation uses the TensorStatistic model to return observations, names of the
-        fields in the model to not include in returned observations.
+        :param include_statistics: If the observation uses the TensorStatistic model to return observations, names of the
+        fields in the model to include in returned observations.
         :param kwargs: Other observation keyword arguments.
         """
         super().__init__(**kwargs)
-        self.skip_statistics = skip_statistics
+        self.include_statistics = include_statistics
     @property
     def uses_forward_hook(self) -> bool:
@@ -554,6 +559,9 @@ class ActivationStatistics(Statistic):
         :return: Forward hook to register the function with.
         """
+        if self.include_statistics is None:
+            raise ValueError("include_statistics is required to use forward hooks!")
         def hook(module: nn.Module, layer_input: torch.Tensor, layer_output: torch.Tensor) -> None:
             """
             :param module: Module the hook was registered with. Not used here.
@@ -563,7 +571,9 @@ class ActivationStatistics(Statistic):
             if self._sample_number % self.sample_frequency == 0:
                 statistics = TensorStatistics.build(
-                    tensor=layer_output, skip_statistics=self.skip_statistics, sample_percentage=self.downsample_percent
+                    tensor=layer_output,
+                    include_statistics=self.include_statistics,
+                    sample_percentage=self.downsample_percent,
                 )
                 self._data.append(statistics)  # type: ignore
@@ -598,23 +608,23 @@ class ActivationStatistics(Statistic):
         return None
-class InnerStepParameterUpdateStatistics(Statistic):
+class ParameterUpdateStatistics(Statistic):
     def __init__(
         self,
         *,
-        skip_statistics: list[str] | None = None,
+        include_statistics: list[str] | None = None,
         **kwargs,
     ) -> None:
         """
-        :param skip_statistics: If the observation uses the TensorStatistic model to return observations, names of the
-        fields in the model to not include in returned observations.
+        :param include_statistics: If the observation uses the TensorStatistic model to return observations, names of the
+        fields in the model to include in returned observations.
         :param kwargs: Other observation keyword arguments.
         """
         super().__init__(**kwargs)
-        self.skip_statistics = skip_statistics
+        self.include_statistics = include_statistics
     def _get_storage_format(self) -> StatisticStorageTypes:
         """
@@ -650,56 +660,9 @@ class InnerStepParameterUpdateStatistics(Statistic):
         return update_tensor
-class ParameterUpdateStatistics(Statistic):
-    def __init__(
-        self,
-        *,
-        skip_statistics: list[str] | None = None,
-        **kwargs,
-    ) -> None:
-        """
-        :param skip_statistics: If the observation uses the TensorStatistic model to return observations, names of the
-        fields in the model to not include in returned observations.
-        :param kwargs: Other observation keyword arguments.
-        """
-        super().__init__(**kwargs)
-        self.skip_statistics = skip_statistics
-    def _get_storage_format(self) -> StatisticStorageTypes:
-        """
-        :return: Storage format this observation stores data in. Must be one of the enum attributes in the
-        StatisticStorageTypes enumeration class.
-        """
-        return StatisticStorageTypes.TENSOR_STATISTICS
-    def _gather(
-        self,
-        *,
-        optimizer: optim.Optimizer,
-        model: nn.Module,
-        parameters: list[torch.Tensor],
-        parameter_group: dict[str, Any],
-    ) -> torch.Tensor | TensorStatistics | float | None:
-        """
-        :param optimizer: Optimizer the given parameters and parameter group came from.
-        :param model: Inner model to gather statistics from.
-        :param parameters: List of parameters to gather statistics from.
-        :param parameter_group: Parameter group the parameters originate from.
-        :return: None, TensorStatistics model or a float.
-        """
-        update_tensor = observation_utils.form_update_tensor(
-            optimizer=optimizer, parameters=parameters, parameter_group=parameter_group
-        )
-        if update_tensor is None:
-            update_tensor = torch.cat([torch.zeros(p.view(-1).shape, device=p.device) for p in parameters])
+class LHOPTParameterUpdateStatistics(ParameterUpdateStatistics):
-        return update_tensor
+    pass
 class ParameterStatistics(Statistic):
@@ -707,18 +670,18 @@ class ParameterStatistics(Statistic):
     def __init__(
         self,
         *,
-        skip_statistics: list[str] | None = None,
+        include_statistics: list[str] | None = None,
         **kwargs,
     ) -> None:
         """
-        :param skip_statistics: If the observation uses the TensorStatistic model to return observations, names of the
-        fields in the model to not include in returned observations.
+        :param include_statistics: If the observation uses the TensorStatistic model to return observations, names of the
+        fields in the model to include in returned observations.
         :param kwargs: Other observation keyword arguments.
         """
         super().__init__(**kwargs)
-        self.skip_statistics = skip_statistics
+        self.include_statistics = include_statistics
     def _get_storage_format(self) -> StatisticStorageTypes:
         """
@@ -747,49 +710,9 @@ class ParameterStatistics(Statistic):
         return torch.cat([p.data.view(-1) for p in parameters if observation_utils.tensor_on_local_rank(p)])
-class InnerStepParameterStatistics(Statistic):
+class LHOPTParameterStatistics(ParameterStatistics):
-    def __init__(
-        self,
-        *,
-        skip_statistics: list[str] | None = None,
-        **kwargs,
-    ) -> None:
-        """
-        :param skip_statistics: If the observation uses the TensorStatistic model to return observations, names of the
-        fields in the model to not include in returned observations.
-        :param kwargs: Other observation keyword arguments.
-        """
-        super().__init__(**kwargs)
-        self.skip_statistics = skip_statistics
-    def _get_storage_format(self) -> StatisticStorageTypes:
-        """
-        :return: Storage format this observation stores data in. Must be one of the enum attributes in the
-        StatisticStorageTypes enumeration class.
-        """
-        return StatisticStorageTypes.TENSOR_STATISTICS
-    def _gather(
-        self,
-        *,
-        optimizer: optim.Optimizer,
-        model: nn.Module,
-        parameters: list[torch.Tensor],
-        parameter_group: dict[str, Any],
-    ) -> torch.Tensor | TensorStatistics | float | None:
-        """
-        :param optimizer: Optimizer the given parameters and parameter group came from.
-        :param model: Inner model to gather statistics from.
-        :param parameters: List of parameters to gather statistics from.
-        :param parameter_group: Parameter group the parameters originate from.
-        :return: None, TensorStatistics model or a float.
-        """
-        return torch.cat([p.data.view(-1) for p in parameters if observation_utils.tensor_on_local_rank(p)])
+    pass
 class LAMBTrustRatioStatistics(Statistic):
@@ -797,16 +720,20 @@ class LAMBTrustRatioStatistics(Statistic):
     def __init__(
         self,
         *,
+        include_statistics: list[str] | None = None,
         use_log_transform: bool = False,
         **kwargs,
     ) -> None:
         """
+        :param include_statistics: If the observation uses the TensorStatistic model to return observations, names of the
+        fields in the model to include in returned observations.
         :param use_log_transform: Whether to transform the LAMB trust ratio by taking ln(1 + R).
         :param kwargs: Other observation keyword arguments.
         """
         super().__init__(**kwargs)
+        self.include_statistics = include_statistics
         self.use_log_transform = use_log_transform
     def _get_storage_format(self) -> StatisticStorageTypes:
@@ -857,69 +784,9 @@ class LAMBTrustRatioStatistics(Statistic):
         return lamb_trust_ratio
-class LHOPTLAMBTrustRatioStatistics(Statistic):
+class LHOPTLAMBTrustRatioStatistics(LAMBTrustRatioStatistics):
-    def __init__(
-        self,
-        *,
-        use_log_transform: bool = False,
-        **kwargs,
-    ) -> None:
-        """
-        :param use_log_transform: Whether to transform the LAMB trust ratio by taking ln(1 + R).
-        :param kwargs: Other observation keyword arguments.
-        """
-        super().__init__(**kwargs)
-        self.use_log_transform = use_log_transform
-    def _get_storage_format(self) -> StatisticStorageTypes:
-        """
-        :return: Storage format this observation stores data in. Must be one of the enum attributes in the
-        StatisticStorageTypes enumeration class.
-        """
-        return StatisticStorageTypes.FLOAT
-    def _gather(
-        self,
-        *,
-        optimizer: optim.Optimizer,
-        model: nn.Module,
-        parameters: list[torch.Tensor],
-        parameter_group: dict[str, Any],
-    ) -> torch.Tensor | TensorStatistics | float | None:
-        """
-        :param optimizer: Optimizer the given parameters and parameter group came from.
-        :param model: Inner model to gather statistics from.
-        :param parameters: List of parameters to gather statistics from.
-        :param parameter_group: Parameter group the parameters originate from.
-        :return: None, TensorStatistics model or a float.
-        """
-        weights_list = [p.data.view(-1) for p in parameters if observation_utils.tensor_on_local_rank(p)]
-        if weights_list:
-            weights = torch.cat(weights_list)
-        else:
-            weights = None
-        updates = observation_utils.form_update_tensor(
-            optimizer=optimizer, parameters=parameters, parameter_group=parameter_group
-        )
-        update_norm = torch.norm(updates, p=2).item() if updates is not None else 0
-        weight_norm = torch.norm(weights, p=2).item() if weights is not None else 0
-        lamb_trust_ratio = 0.0
-        if update_norm > 0:
-            lamb_trust_ratio = weight_norm / update_norm
-            if self.use_log_transform:
-                lamb_trust_ratio = math.log(1 + lamb_trust_ratio)
-        return lamb_trust_ratio
+    pass
 class NumberOfParameters(Statistic):
@@ -1125,22 +992,6 @@ class GradientVarianceFraction(Statistic):
 class AverageParameterUpdateMagnitudeStatistics(Statistic):
-    def __init__(
-        self,
-        *,
-        skip_statistics: list[str] | None = None,
-        **kwargs,
-    ) -> None:
-        """
-        :param skip_statistics: If the observation uses the TensorStatistic model to return observations, names of the
-        fields in the model to not include in returned observations.
-        :param kwargs: Other observation keyword arguments.
-        """
-        super().__init__(**kwargs)
-        self.skip_statistics = skip_statistics
     def _get_storage_format(self) -> StatisticStorageTypes:
         """
         :return: Storage format this observation stores data in. Must be one of the enum attributes in the
@@ -1183,22 +1034,6 @@ class AverageParameterUpdateMagnitudeStatistics(Statistic):
 class MomentumGradientRatioStatistics(Statistic):
-    def __init__(
-        self,
-        *,
-        skip_statistics: list[str] | None = None,
-        **kwargs,
-    ) -> None:
-        """
-        :param skip_statistics: If the observation uses the TensorStatistic model to return observations, names of the
-        fields in the model to not include in returned observations.
-        :param kwargs: Other observation keyword arguments.
-        """
-        super().__init__(**kwargs)
-        self.skip_statistics = skip_statistics
     def _get_storage_format(self) -> StatisticStorageTypes:
         """
         :return: Storage format this observation stores data in. Must be one of the enum attributes in the
@@ -1263,26 +1098,8 @@ class LogOfNoiseScaleStatistics(Statistic):
     - Σ is the noise covariance matrix
     - B is the batch size
     - ε is the learning rate
     """
-    def __init__(
-        self,
-        *,
-        skip_statistics: list[str] | None = None,
-        **kwargs,
-    ) -> None:
-        """
-        :param skip_statistics: If the observation uses the TensorStatistic model to return observations, names of the
-        fields in the model to not include in returned observations.
-        :param kwargs: Other observation keyword arguments.
-        """
-        super().__init__(**kwargs)
-        self.skip_statistics = skip_statistics
     @property
     def requires_gradient_graphs(self) -> bool:
         """
@@ -1370,19 +1187,17 @@ class LogOfNoiseScaleStatistics(Statistic):
         # This is a common assumption when the exact noise structure is unknown
         noise_covariance = torch.ones_like(hessian_diagonals)
-        # Compute tr(HΣ)
-        trace_hessian_noise_covariance = torch.sum(hessian_diagonals * noise_covariance)
-        # Avoid division by zero and log of zero
-        if trace_hessian_noise_covariance <= 0:
-            return None
+        # Compute tr(HΣ), add zero division tolerance to avoid log of zero when gradient is too small
+        trace_hessian_noise_covariance = (
+            torch.sum(hessian_diagonals * noise_covariance) + LHOPT_CONSTANTS["ZERO_DIVISION_TOLERANCE"]
+        )
         log_trace_hessian_noise_covariance = torch.log(trace_hessian_noise_covariance).item()
-        # Compute tr(H^3 Σ)
-        trace_hessian_cubed_noise_covariance = torch.sum(hessian_diagonals**3 * noise_covariance)
-        if trace_hessian_cubed_noise_covariance <= 0:
-            return None
+        # Compute tr(H^3 Σ), add zero division tolerance to avoid log of zero when gradient is too small
+        trace_hessian_cubed_noise_covariance = (
+            torch.sum(hessian_diagonals**3 * noise_covariance) + LHOPT_CONSTANTS["ZERO_DIVISION_TOLERANCE"]
+        )
         log_trace_hessian_cubed_noise_covariance = torch.log(trace_hessian_cubed_noise_covariance).item()

libinephany/pydantic_models/schemas/tensor_statistics.py CHANGED Viewed

@@ -13,7 +13,7 @@ from pydantic import BaseModel
 #
 # ======================================================================================================================
-STRIP_SUFFIX = "_"
+FIELD_SUFFIX = "_"
 # ======================================================================================================================
 #
@@ -164,28 +164,38 @@ class TensorStatistics(BaseModel):
         return tensor[random_indices]
     @classmethod
-    def filter_skip_statistics(cls, skip_statistics: list[str] | None) -> list[str]:
+    def filter_include_statistics(cls, include_statistics: list[str]) -> list[str]:
         """
-        :param skip_statistics: Names of the fields in the model to not include in returned observations.
-        :return: Empty list if skip_statistics was None or skip_statistics filtered to include only the names of fields
-        present in this pydantic model.
+        :param include_statistics: Names of the fields in the model to include in returned observations.
+        :return: List of fields from the given include_statistics list that are present in this pydantic model.
+        :raises ValueError: If no statistics to include are given.
         """
-        return (
-            [skip_stat for skip_stat in skip_statistics if skip_stat in cls.model_fields.keys()]
-            if skip_statistics is not None
-            else []
-        )
+        filtered_include_statistics: list[str] = []
+        for include_stat in include_statistics:
+            with_suffix = include_stat + FIELD_SUFFIX if not include_stat.endswith(FIELD_SUFFIX) else include_stat
+            if with_suffix in cls.model_fields.keys():
+                filtered_include_statistics.append(with_suffix)
+        if not filtered_include_statistics:
+            raise ValueError(f"No statistics to include given to {cls.__name__}!")
+        return filtered_include_statistics
     @classmethod
     def build(
-        cls, tensor: torch.Tensor, sample_percentage: float = 0.01, skip_statistics: list[str] | None = None
+        cls,
+        tensor: torch.Tensor,
+        include_statistics: list[str],
+        sample_percentage: float = 0.01,
     ) -> "TensorStatistics":
         """
         :param tensor: Tensor to compute and store statistics of.
+        :param include_statistics: If the observation uses the TensorStatistic model to return observations, names of the
+        fields in the model to include in returned observations.
         :param sample_percentage: Percentage of the given tensor to randomly sample and compute statistics from.
-        :param skip_statistics: If the observation uses the TensorStatistic model to return observations, names of the
-        fields in the model to not include in returned observations.
         :return: Constructed tensor statistics.
         """
@@ -193,12 +203,10 @@ class TensorStatistics(BaseModel):
         downsampled_tensor = cls.downsample_tensor(tensor=tensor, sample_percentage=sample_percentage)
         for field, field_value in stats.model_dump().items():
-            name = field[:-1] if field.endswith("_") else field
-            if skip_statistics is not None and name in skip_statistics:
-                continue
+            name = field[:-1] if field.endswith(FIELD_SUFFIX) else field
-            setattr(stats, name, downsampled_tensor)
+            if name in include_statistics:
+                setattr(stats, name, downsampled_tensor)
         return stats
@@ -219,28 +227,21 @@ class TensorStatistics(BaseModel):
             inter_quartile_range_=tensor[6],
         )
-    def to_list(self, skip_statistics: list[str] | None) -> list[float]:
+    def to_list(self, include_statistics: list[str]) -> list[float]:
         """
-        :param skip_statistics: None or a list of field names to skip from adding to the returned list.
+        :param include_statistics: List of field names to include in the returned list.
         :return: List of field values.
         """
-        if skip_statistics is None:
-            skip_statistics = []
-        if not all(skip_stat in self.model_fields.keys() for skip_stat in skip_statistics):
-            raise ValueError(
-                f"One or more skip statistic keys do not exist in TensorStatistics. Valid Skip Keys: "
-                f"{list(self.model_fields.keys())} Given Skip Keys: {skip_statistics}"
-            )
+        filtered_includes = self.filter_include_statistics(include_statistics=include_statistics)
         as_list = []
         for field, field_value in self.model_dump().items():
-            if field in skip_statistics:
-                continue
+            without_suffix = field[:-1]
-            as_list.append(field_value)
+            if field in filtered_includes or without_suffix in filtered_includes:
+                as_list.append(field_value)
         return as_list
@@ -267,6 +268,6 @@ class TensorStatistics(BaseModel):
         """
         return {
-            field[:-1] if field.endswith(STRIP_SUFFIX) else field: field_value
+            field[:-1] if field.endswith(FIELD_SUFFIX) else field: field_value
             for field, field_value in self.model_dump().items()
         }

libinephany 0.19.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

libinephany 0.19.0py3-none-any.whl → 1.0.1py3-none-any.whl