PyPI - agilerl - Versions diffs - 2.3.5.dev0__tar.gz → 2.3.5.dev1__tar.gz - Mend

agilerl 2.3.5.dev0tar.gz → 2.3.5.dev1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

{agilerl-2.3.5.dev0 → agilerl-2.3.5.dev1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: agilerl
-Version: 2.3.5.dev0
+Version: 2.3.5.dev1
 Summary: AgileRL is a deep reinforcement learning library focused on improving RL development through RLOps.
 License: Apache 2.0
 Author: Nick Ustaran-Anderegg

{agilerl-2.3.5.dev0 → agilerl-2.3.5.dev1}/agilerl/algorithms/bc_lm.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Callable, Optional, Tuple, Union
+from typing import Any, Callable, Optional, Union
 import numpy as np
 import torch
@@ -167,7 +167,7 @@ class BC_LM(nn.Module):
         temp: float = 1.0,
         top_k: Optional[int] = None,
         top_p: Optional[float] = None,
-    ) -> Tuple[torch.Tensor, Any]:
+    ) -> tuple[torch.Tensor, Any]:
         prepared_inputs = self.prepare_inputs(items)
         tokens = prepared_inputs["tokens"]
         scores, model_outputs = self.score(
@@ -189,7 +189,7 @@ class BC_LM(nn.Module):
         temp: float = 1.0,
         top_k: Optional[int] = None,
         top_p: Optional[float] = None,
-    ) -> Tuple[torch.Tensor, Any]:
+    ) -> tuple[torch.Tensor, Any]:
         scores, model_outputs = self.score(
             (
                 tokens.unsqueeze(1),

{agilerl-2.3.5.dev0 → agilerl-2.3.5.dev1}/agilerl/algorithms/core/base.py RENAMED Viewed

@@ -12,12 +12,8 @@ from importlib.metadata import version
 from typing import (
     Any,
     Callable,
-    Dict,
     Iterable,
-    List,
     Optional,
-    Tuple,
-    Type,
     TypeVar,
     Union,
     cast,
@@ -30,16 +26,13 @@ from accelerate import Accelerator
 from accelerate.utils import broadcast_object_list
 from accelerate.utils.deepspeed import DeepSpeedOptimizerWrapper
 from deepspeed.checkpoint.utils import clone_tensors_for_torch_save
-from deepspeed.runtime.engine import DeepSpeedEngine
 from gymnasium import spaces
-from numpy.typing import ArrayLike
 from peft import PeftModel, set_peft_model_state_dict
 from safetensors.torch import load_file
 from tensordict import TensorDict
 from torch._dynamo import OptimizedModule
 from torch.optim import AdamW
 from torch.optim.lr_scheduler import SequentialLR
-from vllm.distributed.parallel_state import destroy_model_parallel
 from agilerl.algorithms.core.optimizer_wrapper import OptimizerWrapper
 from agilerl.algorithms.core.registry import (
@@ -107,7 +100,7 @@ class _RegistryMeta(type):
     initializing with specified network groups and optimizers."""
     def __call__(
-        cls: Type[SelfEvolvableAlgorithm], *args, **kwargs
+        cls: type[SelfEvolvableAlgorithm], *args, **kwargs
     ) -> SelfEvolvableAlgorithm:
         # Create the instance
         instance: SelfEvolvableAlgorithm = super().__call__(*args, **kwargs)
@@ -124,7 +117,7 @@ class RegistryMeta(_RegistryMeta, ABCMeta): ...
 def get_checkpoint_dict(
     agent: SelfEvolvableAlgorithm, using_deepspeed: bool = False
-) -> Dict[str, Any]:
+) -> dict[str, Any]:
     """Returns a dictionary of the agent's attributes to save in a checkpoint.
     Note: Accelerator is always excluded from the checkpoint as it cannot be serialized.
@@ -152,7 +145,7 @@ def get_checkpoint_dict(
         attribute_dict.pop("rollout_buffer")
     # Get checkpoint dictionaries for evolvable modules and optimizers
-    network_info: Dict[str, Dict[str, Any]] = {"modules": {}, "optimizers": {}}
+    network_info: dict[str, dict[str, Any]] = {"modules": {}, "optimizers": {}}
     for attr in agent.evolvable_attributes():
         evolvable_obj: EvolvableAttributeType = getattr(agent, attr)
         if isinstance(evolvable_obj, OptimizerWrapper):
@@ -186,14 +179,14 @@ def get_checkpoint_dict(
 def get_optimizer_cls(
-    optimizer_cls: Union[str, Dict[str, str]],
-) -> Union[Type[torch.optim.Optimizer], Dict[str, Type[torch.optim.Optimizer]]]:
+    optimizer_cls: Union[str, dict[str, str]],
+) -> Union[type[torch.optim.Optimizer], dict[str, type[torch.optim.Optimizer]]]:
     """Returns the optimizer class from the string or dictionary of optimizer classes.
     :param optimizer_cls: The optimizer class or dictionary of optimizer classes.
-    :type optimizer_cls: Union[str, Dict[str, str]]
+    :type optimizer_cls: Union[str, dict[str, str]]
     :return: The optimizer class or dictionary of optimizer classes.
-    :rtype: Union[Type[torch.optim.Optimizer], Dict[str, Type[torch.optim.Optimizer]]]
+    :rtype: Union[type[torch.optim.Optimizer], dict[str, type[torch.optim.Optimizer]]]
     """
     if isinstance(optimizer_cls, dict):
         optimizer_cls = {
@@ -313,20 +306,20 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
         raise NotImplementedError
     @abstractmethod
-    def test(self, *args, **kwargs) -> ArrayLike:
+    def test(self, *args, **kwargs) -> np.ndarray:
         """Abstract method for testing the algorithm."""
         raise NotImplementedError
     @staticmethod
-    def get_state_dim(observation_space: GymSpaceType) -> Tuple[int, ...]:
+    def get_state_dim(observation_space: GymSpaceType) -> tuple[int, ...]:
         """Returns the dimension of the state space as it pertains to the underlying
         networks (i.e. the input size of the networks).
         :param observation_space: The observation space of the environment.
-        :type observation_space: spaces.Space or List[spaces.Space].
+        :type observation_space: spaces.Space or list[spaces.Space].
         :return: The dimension of the state space.
-        :rtype: Tuple[int, ...].
+        :rtype: tuple[int, ...].
         """
         warnings.warn(
             "This method is deprecated. Use get_input_size_from_space instead.",
@@ -335,12 +328,12 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
         return get_input_size_from_space(observation_space)
     @staticmethod
-    def get_action_dim(action_space: GymSpaceType) -> Tuple[int, ...]:
+    def get_action_dim(action_space: GymSpaceType) -> tuple[int, ...]:
         """Returns the dimension of the action space as it pertains to the underlying
         networks (i.e. the output size of the networks).
         :param action_space: The action space of the environment.
-        :type action_space: spaces.Space or List[spaces.Space].
+        :type action_space: spaces.Space or list[spaces.Space].
         :return: The dimension of the action space.
         :rtype: int.
@@ -354,7 +347,7 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
     @staticmethod
     def inspect_attributes(
         agent: SelfEvolvableAlgorithm, input_args_only: bool = False
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         """
         Inspect and retrieve the attributes of the current object, excluding attributes related to the
         underlying evolvable networks (i.e. `EvolvableModule`, `torch.optim.Optimizer`) and with
@@ -451,21 +444,21 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
     @classmethod
     def population(
-        cls: Type[SelfEvolvableAlgorithm],
+        cls: type[SelfEvolvableAlgorithm],
         size: int,
         observation_space: GymSpaceType,
         action_space: GymSpaceType,
-        wrapper_cls: Optional[Type[SelfAgentWrapper]] = None,
-        wrapper_kwargs: Dict[str, Any] = {},
+        wrapper_cls: Optional[type[SelfAgentWrapper]] = None,
+        wrapper_kwargs: dict[str, Any] = {},
         **kwargs,
-    ) -> List[Union[SelfEvolvableAlgorithm, SelfAgentWrapper]]:
+    ) -> list[Union[SelfEvolvableAlgorithm, SelfAgentWrapper]]:
         """Creates a population of algorithms.
         :param size: The size of the population.
         :type size: int.
         :return: A list of algorithms.
-        :rtype: List[SelfEvolvableAlgorithm].
+        :rtype: list[SelfEvolvableAlgorithm].
         """
         if wrapper_cls is not None:
             return [
@@ -549,11 +542,12 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
                 hp_value = getattr(self, hp)
                 hp_spec = self.registry.hp_config[hp]
                 dtype = type(hp_value)
-                if dtype not in [int, float]:
+                if dtype not in [int, float, np.ndarray]:
                     raise TypeError(
                         f"Can't mutate hyperparameter {hp} of type {dtype}. AgileRL only supports "
-                        "mutating integer or float hyperparameters."
+                        "mutating integer, float, and numpy ndarray hyperparameters."
                     )
                 hp_spec.dtype = dtype
     def _wrap_attr(self, attr: EvolvableAttributeType) -> EvolvableAttributeType:
@@ -637,7 +631,7 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
         """
         self.training = training
-    def get_lr_names(self) -> List[str]:
+    def get_lr_names(self) -> list[str]:
         """Returns the learning rates of the algorithm."""
         return [opt.lr for opt in self.registry.optimizers]
@@ -695,14 +689,14 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
         for name, obj in self.evolvable_attributes(networks_only=True).items():
             setattr(self, name, compile_model(obj, self.torch_compiler))
-    def to_device(self, *experiences: TorchObsType) -> Tuple[TorchObsType, ...]:
+    def to_device(self, *experiences: TorchObsType) -> tuple[TorchObsType, ...]:
         """Moves experiences to the device.
         :param experiences: Experiences to move to device
-        :type experiences: Tuple[torch.Tensor[float], ...]
+        :type experiences: tuple[torch.Tensor[float], ...]
         :return: Experiences on the device
-        :rtype: Tuple[torch.Tensor[float], ...]
+        :rtype: tuple[torch.Tensor[float], ...]
         """
         device = self.device if self.accelerator is None else self.accelerator.device
         on_device = []
@@ -861,12 +855,12 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
         :param path: Location to load checkpoint from
         :type path: string
         """
-        checkpoint: Dict[str, Any] = torch.load(
+        checkpoint: dict[str, Any] = torch.load(
             path, map_location=self.device, pickle_module=dill, weights_only=False
         )
         # Recreate evolvable modules
-        network_info: Dict[str, Dict[str, Any]] = checkpoint["network_info"]
+        network_info: dict[str, dict[str, Any]] = checkpoint["network_info"]
         network_names = network_info["network_names"]
         for name in network_names:
             net_dict = {
@@ -967,7 +961,7 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
     @classmethod
     def load(
-        cls: Type[SelfEvolvableAlgorithm],
+        cls: type[SelfEvolvableAlgorithm],
         path: str,
         device: DeviceType = "cpu",
         accelerator: Optional[Accelerator] = None,
@@ -984,12 +978,12 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
         :return: An instance of the algorithm
         :rtype: RLAlgorithm
         """
-        checkpoint: Dict[str, Any] = torch.load(
+        checkpoint: dict[str, Any] = torch.load(
             path, map_location=device, pickle_module=dill, weights_only=False
         )
         # Reconstruct evolvable modules in algorithm
-        network_info: Optional[Dict[str, Dict[str, Any]]] = checkpoint.get(
+        network_info: Optional[dict[str, dict[str, Any]]] = checkpoint.get(
             "network_info"
         )
         if network_info is None:
@@ -1001,7 +995,7 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
             )
         network_names = network_info["network_names"]
-        loaded_modules: Dict[str, EvolvableAttributeType] = {}
+        loaded_modules: dict[str, EvolvableAttributeType] = {}
         for name in network_names:
             net_dict = {
                 k: v for k, v in network_info["modules"].items() if k.startswith(name)
@@ -1021,7 +1015,7 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
             # Reconstruct the modules
             module_cls: Union[
-                Type[EvolvableModule], Dict[str, Type[EvolvableModule]]
+                type[EvolvableModule], dict[str, type[EvolvableModule]]
             ] = net_dict[f"{name}_cls"]
             if isinstance(module_cls, dict):
                 for agent_id, mod_cls in module_cls.items():
@@ -1187,7 +1181,7 @@ class RLAlgorithm(EvolvableAlgorithm, ABC):
         :type observations: ObservationType
         :return: Preprocessed observations
-        :rtype: torch.Tensor[float] or dict[str, torch.Tensor[float]] or Tuple[torch.Tensor[float], ...]
+        :rtype: torch.Tensor[float] or dict[str, torch.Tensor[float]] or tuple[torch.Tensor[float], ...]
         """
         return preprocess_observation(
             self.observation_space,
@@ -1201,13 +1195,13 @@ class MultiAgentRLAlgorithm(EvolvableAlgorithm, ABC):
     """Base object for all multi-agent algorithms in the AgileRL framework.
     :param observation_spaces: The observation spaces of the agent environments.
-    :type observation_spaces: Union[List[spaces.Space], spaces.Dict]
+    :type observation_spaces: Union[list[spaces.Space], spaces.Dict]
     :param action_spaces: The action spaces of the agent environments.
-    :type action_spaces: Union[List[spaces.Space], spaces.Dict]
+    :type action_spaces: Union[list[spaces.Space], spaces.Dict]
     :param index: The index of the individual in the population.
     :type index: int.
     :param agent_ids: The agent IDs of the agents in the environment.
-    :type agent_ids: Optional[List[int]], optional
+    :type agent_ids: Optional[list[int]], optional
     :param learn_step: Learning frequency, defaults to 2048
     :type learn_step: int, optional
     :param device: Device to run the algorithm on, defaults to "cpu"
@@ -1224,13 +1218,13 @@ class MultiAgentRLAlgorithm(EvolvableAlgorithm, ABC):
     :type name: Optional[str], optional
     """
-    possible_observation_spaces: Dict[str, spaces.Space]
-    possible_action_spaces: Dict[str, spaces.Space]
+    possible_observation_spaces: dict[str, spaces.Space]
+    possible_action_spaces: dict[str, spaces.Space]
-    shared_agent_ids: List[str]
-    grouped_agents: Dict[str, List[str]]
-    unique_observation_spaces: Dict[str, spaces.Space]
-    unique_action_spaces: Dict[str, spaces.Space]
+    shared_agent_ids: list[str]
+    grouped_agents: dict[str, list[str]]
+    unique_observation_spaces: dict[str, spaces.Space]
+    unique_action_spaces: dict[str, spaces.Space]
     def __init__(
         self,
@@ -1396,14 +1390,14 @@ class MultiAgentRLAlgorithm(EvolvableAlgorithm, ABC):
     def preprocess_observation(
         self, observation: ObservationType
-    ) -> Dict[str, TorchObsType]:
+    ) -> dict[str, TorchObsType]:
         """Preprocesses observations for forward pass through neural network.
         :param observations: Observations of environment
         :type observations: numpy.ndarray[float] or dict[str, numpy.ndarray[float]]
         :return: Preprocessed observations
-        :rtype: torch.Tensor[float] or dict[str, torch.Tensor[float]] or Tuple[torch.Tensor[float], ...]
+        :rtype: torch.Tensor[float] or dict[str, torch.Tensor[float]] or tuple[torch.Tensor[float], ...]
         """
         preprocessed = {}
         for agent_id, agent_obs in observation.items():
@@ -1421,10 +1415,10 @@ class MultiAgentRLAlgorithm(EvolvableAlgorithm, ABC):
         """Extract action masks from info dictionary
         :param infos: Info dict
-        :type infos: Dict[str, Dict[...]]
+        :type infos: dict[str, dict[...]]
         :return: Action masks
-        :rtype: Dict[str, np.ndarray]
+        :rtype: dict[str, np.ndarray]
         """
         # Get dict of form {"agent_id" : [1, 0, 0, 0]...} etc
         action_masks = {
@@ -1437,14 +1431,14 @@ class MultiAgentRLAlgorithm(EvolvableAlgorithm, ABC):
     def extract_agent_masks(
         self, infos: Optional[InfosDict] = None
-    ) -> Tuple[ArrayDict, ArrayDict]:
+    ) -> tuple[ArrayDict, ArrayDict]:
         """Extract env_defined_actions from info dictionary and determine agent masks
         :param infos: Info dict
-        :type infos: Dict[str, Dict[...]]
+        :type infos: dict[str, dict[...]]
         :return: Env defined actions and agent masks
-        :rtype: Tuple[ArrayDict, ArrayDict]
+        :rtype: tuple[ArrayDict, ArrayDict]
         """
         # Deal with case of no env_defined_actions defined in the info dict
         # Deal with empty info dicts for each sub agent
@@ -1506,7 +1500,7 @@ class MultiAgentRLAlgorithm(EvolvableAlgorithm, ABC):
         net_config: Optional[NetConfigType] = None,
         flatten: bool = True,
         return_encoders: bool = False,
-    ) -> Union[NetConfigType, Tuple[NetConfigType, Dict[str, NetConfigType]]]:
+    ) -> Union[NetConfigType, tuple[NetConfigType, dict[str, NetConfigType]]]:
         """Extract an appropriate net config for each sub-agent from the passed net config dictionary. If
         grouped_agents is True, the net config will be built for the grouped agents i.e. through their
         common prefix in their agent_id, whenever the passed net config is None.
@@ -1539,7 +1533,7 @@ class MultiAgentRLAlgorithm(EvolvableAlgorithm, ABC):
         # Helper function to append unique configs to the unique_configs dictionary
         # -> Access to unique configs is relevant for algorithms with networks that process
         # multiple agents' observations (e.g. shared critic in MADDPG)
-        def _add_to_encoder_configs(config: Dict[str, Any], agent_id: str = "") -> None:
+        def _add_to_encoder_configs(config: dict[str, Any], agent_id: str = "") -> None:
             config = config_from_dict(config)
             config_key = "mlp_config" if isinstance(config, MlpNetConfig) else agent_id
@@ -1697,7 +1691,7 @@ class MultiAgentRLAlgorithm(EvolvableAlgorithm, ABC):
         self,
         group_outputs: ArrayDict,
         vect_dim: int,
-        grouped_agents: Dict[str, List[str]],
+        grouped_agents: dict[str, list[str]],
     ) -> ArrayDict:
         """Disassembles batched output by shared policies into their grouped agents' outputs.
@@ -1705,13 +1699,13 @@ class MultiAgentRLAlgorithm(EvolvableAlgorithm, ABC):
             i.e. any given agent will always terminate at the same timestep in different vectorized environments.
         :param group_outputs: Dictionary to be disassembled, has the form {'agent': [4, 7, 8]}
-        :type group_outputs: Dict[str, np.ndarray]
+        :type group_outputs: dict[str, np.ndarray]
         :param vect_dim: Vectorization dimension size, i.e. number of vect envs
         :type vect_dim: int
         :param grouped_agents: Dictionary of grouped agent IDs
-        :type grouped_agents: Dict[str, List[str]]
+        :type grouped_agents: dict[str, list[str]]
         :return: Assembled dictionary, e.g. {'agent_0': 4, 'agent_1': 7, 'agent_2': 8}
-        :rtype: Dict[str, np.ndarray]
+        :rtype: dict[str, np.ndarray]
         """
         output_dict = {}
         for group_id, agent_ids in grouped_agents.items():
@@ -1728,9 +1722,9 @@ class MultiAgentRLAlgorithm(EvolvableAlgorithm, ABC):
         """Sums the rewards for grouped agents
         :param rewards: Reward dictionary from environment
-        :type rewards: Dict[str, np.ndarray]
+        :type rewards: dict[str, np.ndarray]
         :return: Summed rewards dictionary
-        :rtype: Dict[str, np.ndarray]
+        :rtype: dict[str, np.ndarray]
         """
         reward_shape = list(rewards.values())[0]
         reward_shape = (
@@ -1751,11 +1745,11 @@ class MultiAgentRLAlgorithm(EvolvableAlgorithm, ABC):
         """Assembles individual agent outputs into batched outputs for shared policies.
         :param agent_outputs: Dictionary with individual agent outputs, e.g. {'agent_0': 4, 'agent_1': 7, 'agent_2': 8}
-        :type agent_outputs: Dict[str, np.ndarray]
+        :type agent_outputs: dict[str, np.ndarray]
         :param vect_dim: Vectorization dimension size, i.e. number of vect envs
         :type vect_dim: int
         :return: Assembled dictionary with the form {'agent': [4, 7, 8]}
-        :rtype: Dict[str, np.ndarray]
+        :rtype: dict[str, np.ndarray]
         """
         group_outputs = {}
         for group_id in self.shared_agent_ids:
@@ -1846,7 +1840,7 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
         :type observations: numpy.ndarray[float] or dict[str, numpy.ndarray[float]]
         :return: Preprocessed observations
-        :rtype: torch.Tensor[float] or dict[str, torch.Tensor[float]] or Tuple[torch.Tensor[float], ...]
+        :rtype: torch.Tensor[float] or dict[str, torch.Tensor[float]] or tuple[torch.Tensor[float], ...]
         """
         return cast(TorchObsType, observation)
@@ -1890,6 +1884,8 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
                 path + "/attributes.pt",
                 pickle_module=dill,
             )
+        if self.accelerator is not None:
+            self.accelerator.wait_for_everyone()
     # TODO: This could hopefully be abstracted into EvolvableAlgorithm with a decorator to
     # handle _load_distributed_actor if deepspeed is used.
@@ -1907,28 +1903,22 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
             if weights_only:
                 if self.use_separate_reference_adapter:
                     self._update_existing_adapter(
-                        self.accelerator,
-                        self.actor,
                         path,
                         "reference",
                     )
                 self._update_existing_adapter(
-                    self.accelerator,
-                    self.actor,
                     path,
                     "actor",
                 )
             else:
                 self._load_distributed_actor(path, tag="save_checkpoint")
-            checkpoint["accelerator"] = (
-                Accelerator() if self.accelerator is not None else None
-            )
-            self.accelerator = None
             for attr, value in checkpoint.items():
                 setattr(self, attr, value)
+            self.device = self.accelerator.device
             self.optimizer = None
             self.optimizer = OptimizerWrapper(
                 optimizer_cls=self._select_optim_class(),
@@ -1937,7 +1927,6 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
                 lr=self.lr,
                 lr_name="lr",
             )
-            self.wrap_models()
         else:
             super().load_checkpoint(path + "/attributes.pt")
@@ -1964,11 +1953,11 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
             """
         )
-    def _select_optim_class(self) -> Union[Type[OptimizerType], Type[DummyOptimizer]]:
+    def _select_optim_class(self) -> Union[type[OptimizerType], type[DummyOptimizer]]:
         """Select the optimizer class based on the accelerator and deepspeed config.
         :return: Optimizer class
-        :rtype: Union[Type[torch.optim.Optimizer], Type[DummyOptimizer]]
+        :rtype: Union[type[torch.optim.Optimizer], type[DummyOptimizer]]
         """
         if self.accelerator is None:
             return AdamW
@@ -2089,8 +2078,6 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
                 None,
             )
         if self.use_vllm:
-            destroy_model_parallel()
-            del self.llm.llm_engine.model_executor.driver_worker
             self.llm = None
         gc.collect()
         torch.cuda.empty_cache()
@@ -2201,7 +2188,7 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
         lr: float,
         accelerator: Optional[Accelerator] = None,
         scheduler_config: Optional[CosineLRScheduleConfig] = None,
-    ) -> Tuple[Optional[Accelerator], Optional[SequentialLR]]:
+    ) -> tuple[Optional[Accelerator], Optional[SequentialLR]]:
         """Update the learning rate of the optimizer
         :param optimizer: Optimizer
@@ -2268,20 +2255,14 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
             "Recompile method is not available for LLM finetuning algorithms."
         )
-    @staticmethod
     def _update_existing_adapter(
-        accelerator: Accelerator,
-        wrapped_model: DeepSpeedEngine,
+        self,
         checkpoint_dir: str,
         adapter_name: str,
     ) -> None:
         """
         Overwrite weights of an existing adapter in-place without creating new parameters.
-        :param accelerator: Accelerator
-        :type accelerator: Accelerator
-        :param wrapped_model: Wrapped model
-        :type wrapped_model: DeepSpeedEngine
         :param checkpoint_dir: Checkpoint directory
         :type checkpoint_dir: str
         :param adapter_name: Adapter name
@@ -2290,7 +2271,7 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
         :return: None
         :rtype: None
         """
-        base_model = accelerator.unwrap_model(wrapped_model)
+        base_model = self.accelerator.unwrap_model(self.actor)
         if hasattr(base_model, "module"):
             base_model = base_model.module

{agilerl-2.3.5.dev0 → agilerl-2.3.5.dev1}/agilerl/algorithms/core/optimizer_wrapper.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import inspect
-from typing import Any, Dict, List, Optional, Type, Union
+from typing import Any, Optional, Union
 import torch.nn as nn
 from peft import PeftModel
@@ -10,9 +10,9 @@ from agilerl.protocols import EvolvableAlgorithm
 from agilerl.typing import OptimizerType, StateDict
 from agilerl.utils.llm_utils import DummyOptimizer
-ModuleList = List[EvolvableModule]
+ModuleList = list[EvolvableModule]
 _Optimizer = Union[
-    Type[OptimizerType], Dict[str, Type[OptimizerType]], Type[DummyOptimizer]
+    type[OptimizerType], dict[str, type[OptimizerType]], type[DummyOptimizer]
 ]
 _Module = Union[EvolvableModule, ModuleDict, ModuleList, PeftModel]
@@ -21,7 +21,7 @@ def init_from_multiple(
     networks: ModuleList,
     optimizer_cls: OptimizerType,
     lr: float,
-    optimizer_kwargs: Dict[str, Any],
+    optimizer_kwargs: dict[str, Any],
 ) -> Optimizer:
     """
     Initialize an optimizer from a list of networks.
@@ -33,7 +33,7 @@ def init_from_multiple(
     :param lr: The learning rate of the optimizer.
     :type lr: float
     :param optimizer_kwargs: The keyword arguments to be passed to the optimizer.
-    :type optimizer_kwargs: Dict[str, Any]
+    :type optimizer_kwargs: dict[str, Any]
     """
     opt_args = []
     for i, net in enumerate(networks):
@@ -51,7 +51,7 @@ def init_from_single(
     network: EvolvableModule,
     optimizer_cls: OptimizerType,
     lr: float,
-    optimizer_kwargs: Dict[str, Any],
+    optimizer_kwargs: dict[str, Any],
 ) -> Optimizer:
     """
     Initialize an optimizer from a single network.
@@ -67,15 +67,15 @@ class OptimizerWrapper:
     to be able to reinitialize them after mutating an individual.
     :param optimizer_cls: The optimizer class to be initialized.
-    :type optimizer_cls: Type[torch.optim.Optimizer]
+    :type optimizer_cls: type[torch.optim.Optimizer]
     :param networks: The network/s that the optimizer will update.
     :type networks: EvolvableModule, ModuleDict
     :param lr: The learning rate of the optimizer.
     :type lr: float
     :param optimizer_kwargs: The keyword arguments to be passed to the optimizer.
-    :type optimizer_kwargs: Dict[str, Any]
+    :type optimizer_kwargs: dict[str, Any]
     :param network_names: The attribute names of the networks in the parent container.
-    :type network_names: List[str]
+    :type network_names: list[str]
     :param lr_name: The attribute name of the learning rate in the parent container.
     :type lr_name: str
     """
@@ -87,8 +87,8 @@ class OptimizerWrapper:
         optimizer_cls: _Optimizer,
         networks: _Module,
         lr: float,
-        optimizer_kwargs: Optional[Dict[str, Any]] = None,
-        network_names: Optional[List[str]] = None,
+        optimizer_kwargs: Optional[dict[str, Any]] = None,
+        network_names: Optional[list[str]] = None,
         lr_name: Optional[str] = None,
     ) -> None:
@@ -208,7 +208,7 @@ class OptimizerWrapper:
         current_frame = inspect.currentframe()
         return current_frame.f_back.f_back.f_locals["self"]
-    def _infer_network_attr_names(self, container: Any) -> List[str]:
+    def _infer_network_attr_names(self, container: Any) -> list[str]:
         """
         Infer attribute names of the networks being optimized.
@@ -263,7 +263,7 @@ class OptimizerWrapper:
         Load the state of the optimizer from the passed state dictionary.
         :param state_dict: State dictionary of the optimizer.
-        :type state_dict: Dict[str, Any]
+        :type state_dict: dict[str, Any]
         """
         if isinstance(self.networks[0], ModuleDict):
             assert (
@@ -293,7 +293,7 @@ class OptimizerWrapper:
         return self.optimizer.state_dict()
-    def optimizer_cls_names(self) -> Union[str, Dict[str, str]]:
+    def optimizer_cls_names(self) -> Union[str, dict[str, str]]:
         """
         Return the names of the optimizers.
         """
@@ -304,7 +304,7 @@ class OptimizerWrapper:
             }
         return self.optimizer_cls.__name__
-    def checkpoint_dict(self, name: str) -> Dict[str, Any]:
+    def checkpoint_dict(self, name: str) -> dict[str, Any]:
         """
         Return a dictionary of the optimizer's state and parameters.
@@ -312,7 +312,7 @@ class OptimizerWrapper:
         :type name: str
         :return: A dictionary of the optimizer's state and parameters.
-        :rtype: Dict[str, Any]
+        :rtype: dict[str, Any]
         """
         return {
             f"{name}_cls": self.optimizer_cls_names(),

agilerl 2.3.5.dev0__tar.gz → 2.3.5.dev1__tar.gz

agilerl 2.3.5.dev0tar.gz → 2.3.5.dev1tar.gz