PyPI - libinephany - Versions diffs - 0.14.1__tar.gz → 0.15.1__tar.gz - Mend

libinephany 0.14.1tar.gz → 0.15.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

libinephany-0.15.1/CODE_VERSION.cfg ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.15.1

{libinephany-0.14.1/libinephany.egg-info → libinephany-0.15.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: libinephany
-Version: 0.14.1
+Version: 0.15.1
 Summary: Inephany library containing code commonly used by multiple subpackages.
 Author-email: Inephany <info@inephany.com>
 License: Apache 2.0

{libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/observers/global_observers.py RENAMED Viewed

@@ -36,7 +36,7 @@ class InitialHyperparameters(GlobalObserver):
         super().__init__(**kwargs)
-        force_skip = ["samples"]
+        force_skip = ["samples", "gradient_accumulation"]
         skip_hparams = force_skip if skip_hparams is None else skip_hparams + force_skip
         self.skip_hparams = [] if skip_hparams is None else skip_hparams
         self.pad_with = pad_with

{libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/statistic_trackers.py RENAMED Viewed

@@ -213,10 +213,10 @@ class Statistic(ABC):
         if torch_distributed_utils.is_scheduler_master_rank():
             if isinstance(statistic, torch.Tensor):
-                shape = statistic.shape
+                shape = statistic.view(-1).shape
             elif isinstance(statistic, TensorStatistics):
-                shape = statistic.to_tensor().shape
+                shape = statistic.to_tensor().view(-1).shape
             elif statistic is not None:
                 shape = torch.tensor([statistic]).shape
@@ -239,23 +239,21 @@ class Statistic(ABC):
         if not torch_distributed_utils.is_distributed():
             return statistic
-        if statistic is None:
-            shape = self._determine_reduction_shape(statistic=statistic)
-            if shape is None:
-                return statistic
+        shape = self._determine_reduction_shape(statistic=statistic)
-            to_reduce = torch.zeros(shape)
+        if statistic is None:
+            to_reduce = torch.zeros(shape, dtype=torch.float64)
         elif isinstance(statistic, torch.Tensor):
-            to_reduce = statistic.clone()
+            to_reduce = statistic.clone().to(torch.float64).view(-1)
         elif isinstance(statistic, TensorStatistics):
-            to_reduce = statistic.to_tensor()
+            to_reduce = statistic.to_tensor().to(torch.float64).view(-1)
         else:
-            to_reduce = torch.tensor([statistic])
+            to_reduce = torch.tensor([statistic], dtype=torch.float64)
+        to_reduce = to_reduce.to(torch_distributed_utils.get_local_device())
         dist.reduce(to_reduce, dst=MASTER_SCHEDULER_RANK, op=ReduceOp.SUM)
         if not torch_distributed_utils.is_scheduler_master_rank():
@@ -283,11 +281,13 @@ class Statistic(ABC):
         parameter_group = self._find_parameter_group(optimizer=optimizer)
         parameters = self._get_parameters(parameter_group=parameter_group)
+        self._sample_number += 1
         if self._sample_number % self.sample_frequency == 0:
             statistic = self._gather(
                 optimizer=optimizer, model=model, parameters=parameters, parameter_group=parameter_group
             )
             statistic = self._distributed_reduce(statistic=statistic)
             if not torch_distributed_utils.is_scheduler_master_rank():
@@ -303,9 +303,6 @@ class Statistic(ABC):
             elif statistic is not None:
                 self._data.append(statistic)  # type: ignore
-        if torch_distributed_utils.is_scheduler_master_rank():
-            self._sample_number += 1
     @final
     def fetch(self) -> TensorStatistics | float | None:
         """

{libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/configs/hyperparameter_configs.py RENAMED Viewed

@@ -4,7 +4,7 @@
 #
 # ======================================================================================================================
-from typing import Any
+from typing import Any, cast
 from pydantic import BaseModel, ConfigDict, ValidationError, field_serializer, field_validator, model_validator
@@ -232,6 +232,23 @@ class BatchSizeHParamConfig(HParamConfig):
     sample_discrete_values: list[float | int] | None = None
+class GradientAccumulationHParamConfig(HParamConfig):
+    max_hparam_value: float | int = 64
+    min_hparam_value: float | int = 1
+    hparam_dtype: type[float | int] = int
+    initial_value: int = 1
+    initial_delta: float = 0.0
+    scale: float = 1.0
+    sampler: str = "DiscreteRangeSampler"
+    sample_initial_values: bool = False
+    sample_lower_bound: int = 1
+    sample_upper_bound: int = 64
+    sample_step: int = 1
+    sample_discrete_values: list[float | int] | None = None
+    force_limit: float | int = 64
 class EpochsHParamConfig(HParamConfig):
     max_hparam_value: float | int = 16
     min_hparam_value: float | int = 1
@@ -289,6 +306,7 @@ class HParamConfigs(BaseModel):
     sgd_momentum_config: HParamConfig = SGDMomentumHParamConfig()
     batch_size_config: HParamConfig = BatchSizeHParamConfig()
+    gradient_accumulation_config: GradientAccumulationHParamConfig = GradientAccumulationHParamConfig()
     epochs_config: HParamConfig = EpochsHParamConfig()
     token_config: HParamConfig = TokensHParamConfig()
     samples_config: HParamConfig = SamplesHParamConfig()
@@ -351,6 +369,9 @@ class HParamConfigs(BaseModel):
             case AgentTypes.BatchSize:
                 self.batch_size_config = hparam_config
+            case AgentTypes.GradientAccumulationAgent:
+                self.gradient_accumulation_config = cast(GradientAccumulationHParamConfig, hparam_config)
             case AgentTypes.Epochs:
                 self.epochs_config = hparam_config
@@ -400,6 +421,9 @@ class HParamConfigs(BaseModel):
             case AgentTypes.BatchSize:
                 return self.batch_size_config
+            case AgentTypes.GradientAccumulationAgent:
+                return self.gradient_accumulation_config
             case AgentTypes.Epochs:
                 return self.epochs_config

{libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/inner_task_profile.py RENAMED Viewed

@@ -140,6 +140,40 @@ class InnerTaskProfiles(BaseModel):
         return sum(self.compiled_action_sizes.values())
+    @property
+    def max_total_observation_size(self) -> int:
+        """
+        :return: The summed observation size of all agents with the task that has the most layers.
+        """
+        if not self.profiles:
+            raise ValueError(
+                "No profiles to calculate max total observation size. Ensure profiles have been "
+                "added before executing the training loop"
+            )
+        largest_task_name = max(self.profiles, key=lambda k: self.profiles[k].number_of_layers)
+        largest_task = self.profiles[largest_task_name]
+        return sum(largest_task.observation_space_sizes.values())
+    @property
+    def max_total_action_size(self) -> int:
+        """
+        :return: The summed action size of all agents with the task that has the most layers.
+        """
+        if not self.profiles:
+            raise ValueError(
+                "No profiles to calculate max total action size. Ensure profiles have been "
+                "added before executing the training loop"
+            )
+        largest_task_name = max(self.profiles, key=lambda k: self.profiles[k].number_of_layers)
+        largest_task = self.profiles[largest_task_name]
+        return sum(largest_task.action_space_sizes.values())
     @staticmethod
     def _compile_gym_space_sizes(spaces: dict[str, dict[str, int]]) -> dict[str, int]:
         """

{libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/states/hyperparameter_states.py RENAMED Viewed

@@ -20,6 +20,7 @@ from libinephany.utils.constants import (
     DROPOUT,
     EPOCHS,
     GRAD_NORM_CLIP,
+    GRADIENT_ACCUMULATION,
     LEARNING_RATE,
     SAMPLES,
     SGD_MOMENTUM,
@@ -60,6 +61,7 @@ class UpdateCallbacks(BaseModel):
     sgd_momentum: Callable[..., None]
     batch_size: Callable[..., None] | None
+    gradient_accumulation: Callable[..., None] | None
     epochs: Callable[..., None] | None
     def __getitem__(self, item: str) -> Callable[..., None] | None:
@@ -457,6 +459,7 @@ class ParameterGroupHParams(HyperparameterContainer):
 class GlobalHParams(HyperparameterContainer):
     batch_size: Hyperparameter
+    gradient_accumulation: Hyperparameter
     epochs: Hyperparameter
     tokens: Hyperparameter
     samples: Hyperparameter
@@ -550,6 +553,14 @@ class HyperparameterStates(BaseModel):
         """
         return self.global_hparams.batch_size
+    @computed_field  # type: ignore[misc]
+    @property
+    def gradient_accumulation(self) -> Hyperparameter:
+        """
+        :return: The gradient accumulation steps of the inner model.
+        """
+        return self.global_hparams.gradient_accumulation
     @computed_field  # type: ignore[misc]
     @property
     def epochs(self) -> Hyperparameter:
@@ -676,6 +687,7 @@ class HyperparameterStates(BaseModel):
         return {
             BATCH_SIZE: hparam_configs.batch_size_config,
+            GRADIENT_ACCUMULATION: hparam_configs.gradient_accumulation_config,
             EPOCHS: hparam_configs.epochs_config,
             TOKENS: hparam_configs.token_config,
             SAMPLES: hparam_configs.samples_config,

{libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/constants.py RENAMED Viewed

@@ -21,6 +21,7 @@ ADAM_BETA_TWO = "adam_beta_two"
 ADAM_EPS = "adam_eps"
 SGD_MOMENTUM = "sgd_momentum"
 BATCH_SIZE = "batch_size"
+GRADIENT_ACCUMULATION = "gradient_accumulation"
 EPOCHS = "epochs"
 TOKENS = "tokens"
 SAMPLES = "samples"
@@ -41,6 +42,7 @@ AGENT_PREFIX_EPS = "adam-eps"
 AGENT_PREFIX_SGD_MOMENTUM = "sgd-momentum"
 AGENT_BATCH_SIZE = "batch-size"
+AGENT_GRADIENT_ACCUMULATION = "gradient-accumulation"
 AGENT_BANDIT_SUFFIX = "bandit-agent"
@@ -53,6 +55,7 @@ AGENT_TYPES = [
     ADAM_BETA_TWO,
     ADAM_EPS,
     SGD_MOMENTUM,
+    GRADIENT_ACCUMULATION,
 ]
 SUFFIXES = [AGENT_BANDIT_SUFFIX]
 PREFIXES = [
@@ -64,6 +67,7 @@ PREFIXES = [
     AGENT_PREFIX_BETA_TWO,
     AGENT_PREFIX_EPS,
     AGENT_PREFIX_SGD_MOMENTUM,
+    AGENT_GRADIENT_ACCUMULATION,
 ]
 PREFIXES_TO_HPARAMS = {
     AGENT_PREFIX_LR: LEARNING_RATE,
@@ -74,4 +78,5 @@ PREFIXES_TO_HPARAMS = {
     AGENT_PREFIX_BETA_TWO: ADAM_BETA_TWO,
     AGENT_PREFIX_EPS: ADAM_EPS,
     AGENT_PREFIX_SGD_MOMENTUM: SGD_MOMENTUM,
+    AGENT_GRADIENT_ACCUMULATION: GRADIENT_ACCUMULATION,
 }

{libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/enums.py RENAMED Viewed

@@ -14,6 +14,7 @@ from libinephany.utils.constants import (
     DROPOUT,
     EPOCHS,
     GRAD_NORM_CLIP,
+    GRADIENT_ACCUMULATION,
     LEARNING_RATE,
     SAMPLES,
     SGD_MOMENTUM,
@@ -69,6 +70,7 @@ class AgentTypes(EnumWithIndices):
     AdamBetaTwoAgent = ADAM_BETA_TWO
     AdamEpsAgent = ADAM_EPS
     SGDMomentumAgent = SGD_MOMENTUM
+    GradientAccumulationAgent = GRADIENT_ACCUMULATION
     # Deprecated or Non-Agent
     BatchSize = BATCH_SIZE

{libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/torch_distributed_utils.py RENAMED Viewed

@@ -4,8 +4,10 @@
 #
 # ======================================================================================================================
+import os
 from typing import Any
+import torch
 import torch.distributed as dist
 # ======================================================================================================================
@@ -14,7 +16,11 @@ import torch.distributed as dist
 #
 # ======================================================================================================================
+CUDA = "cuda"
+CPU = "cpu"
+CUDA_PREFIX = f"{CUDA}:"
 MASTER_SCHEDULER_RANK = 0
+LOCAL_RANK = "LOCAL_RANK"
 # ======================================================================================================================
 #
@@ -48,7 +54,10 @@ def get_local_rank() -> int:
     :return: Distributed computing rank of this process.
     """
-    return dist.get_rank() if is_distributed() else MASTER_SCHEDULER_RANK
+    if not is_distributed():
+        return MASTER_SCHEDULER_RANK
+    return dist.get_rank()
 def is_scheduler_master_rank() -> bool:
@@ -83,3 +92,15 @@ def barrier() -> None:
     if is_distributed():
         dist.barrier()
+def get_local_device() -> torch.device:
+    """
+    :return: Local device of the current rank.
+    """
+    if not is_distributed():
+        return torch.device(CUDA if torch.cuda.is_available() else CPU)
+    local_device_rank = os.environ.get(LOCAL_RANK, MASTER_SCHEDULER_RANK)
+    return torch.device(f"{CUDA_PREFIX}{local_device_rank}" if torch.cuda.is_available() else CPU)

{libinephany-0.14.1 → libinephany-0.15.1/libinephany.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: libinephany
-Version: 0.14.1
+Version: 0.15.1
 Summary: Inephany library containing code commonly used by multiple subpackages.
 Author-email: Inephany <info@inephany.com>
 License: Apache 2.0