PyPI - agilerl - Versions diffs - 2.4.1.dev1__py3-none-any.whl → 2.4.1.dev3__py3-none-any.whl - Mend

agilerl 2.4.1.dev1py3-none-any.whl → 2.4.1.dev3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

agilerl/__init__.py +18 -0
agilerl/algorithms/core/base.py +53 -26
agilerl/algorithms/core/optimizer_wrapper.py +11 -3
agilerl/algorithms/core/registry.py +1 -1
agilerl/algorithms/dpo.py +5 -6
agilerl/algorithms/grpo.py +15 -16
agilerl/algorithms/ilql.py +14 -0
agilerl/protocols.py +131 -0
agilerl/utils/algo_utils.py +51 -4
agilerl/utils/llm_utils.py +15 -46
agilerl/utils/utils.py +2 -2
{agilerl-2.4.1.dev1.dist-info → agilerl-2.4.1.dev3.dist-info}/METADATA +21 -9
{agilerl-2.4.1.dev1.dist-info → agilerl-2.4.1.dev3.dist-info}/RECORD +15 -15
{agilerl-2.4.1.dev1.dist-info → agilerl-2.4.1.dev3.dist-info}/WHEEL +0 -0
{agilerl-2.4.1.dev1.dist-info → agilerl-2.4.1.dev3.dist-info}/licenses/LICENSE +0 -0

agilerl/__init__.py CHANGED Viewed

@@ -0,0 +1,18 @@
+from importlib.metadata import metadata
+from importlib.util import find_spec
+from packaging.requirements import Requirement
+def get_extra_dependencies(package: str, extra: str) -> list[str]:
+    requires = metadata(package).get_all("Requires-Dist") or []
+    deps = []
+    for req in requires:
+        r = Requirement(req)
+        if r.marker and r.marker.evaluate({"extra": extra}):
+            deps.append(r.name)
+    return deps
+LLM_PACKAGES = get_extra_dependencies("agilerl", "llm")
+HAS_LLM_DEPENDENCIES = all(find_spec(pkg) is not None for pkg in LLM_PACKAGES)

agilerl/algorithms/core/base.py CHANGED Viewed

@@ -27,20 +27,14 @@ import torch
 import torch.nn.functional as F
 from accelerate import Accelerator
 from accelerate.utils import broadcast_object_list, set_seed
-from accelerate.utils.deepspeed import DeepSpeedOptimizerWrapper
-from deepspeed.checkpoint.utils import clone_tensors_for_torch_save
 from gymnasium import spaces
-from peft import LoraConfig, PeftModel, get_peft_model, set_peft_model_state_dict
-from safetensors.torch import load_file
 from tensordict import TensorDict
 from torch._dynamo import OptimizedModule
 from torch.nn.utils import clip_grad_norm_
 from torch.optim import AdamW
 from torch.optim.lr_scheduler import SequentialLR
-from transformers import PretrainedConfig
-from transformers.modeling_utils import PreTrainedModel
-from vllm import LLM, SamplingParams
+from agilerl import HAS_LLM_DEPENDENCIES
 from agilerl.algorithms.core.optimizer_wrapper import OptimizerWrapper
 from agilerl.algorithms.core.registry import (
     HyperparameterConfig,
@@ -55,7 +49,11 @@ from agilerl.protocols import (
     EvolvableAttributeDict,
     EvolvableAttributeType,
     EvolvableModule,
+    LoraConfigProtocol,
     ModuleDict,
+    PeftModelProtocol,
+    PretrainedConfigProtocol,
+    PreTrainedModelProtocol,
 )
 from agilerl.typing import (
     ActionType,
@@ -74,6 +72,7 @@ from agilerl.typing import (
 )
 from agilerl.utils.algo_utils import (
     CosineLRScheduleConfig,
+    DummyOptimizer,
     VLLMConfig,
     check_supported_space,
     chkpt_attribute_to_device,
@@ -96,11 +95,18 @@ from agilerl.utils.evolvable_networks import (
     is_image_space,
     is_vector_space,
 )
-from agilerl.utils.llm_utils import (
-    DummyOptimizer,
-    create_model_from_name_or_path,
-    gather_if_zero3,
-)
+if HAS_LLM_DEPENDENCIES:
+    from accelerate.utils.deepspeed import DeepSpeedOptimizerWrapper
+    from deepspeed.checkpoint.utils import clone_tensors_for_torch_save
+    from peft import LoraConfig, get_peft_model, set_peft_model_state_dict
+    from safetensors.torch import load_file
+    from vllm import LLM, SamplingParams
+    from agilerl.utils.llm_utils import (
+        create_model_from_name_or_path,
+        gather_if_zero3,
+    )
 __all__ = ["EvolvableAlgorithm", "RLAlgorithm", "MultiAgentRLAlgorithm"]
@@ -1145,6 +1151,16 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
         return self
+    def clean_up(self) -> None:
+        """
+        Clean up the algorithm by deleting the networks and optimizers.
+        :return: None
+        :rtype: None
+        """
+        for evo_attr in self.evolvable_attributes().values():
+            del evo_attr
 class RLAlgorithm(EvolvableAlgorithm, ABC):
     """Base object for all single-agent algorithms in the AgileRL framework.
@@ -1801,6 +1817,10 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
     :type accelerator: Optional[Accelerator]
     :param name: The name of the algorithm.
     :type name: Optional[str]
+    :param model_config: The configuration for the model.
+    :type model_config: dict[str, Any] | PretrainedConfig | None
+    :param gradient_checkpointing: Whether to use gradient checkpointing.
+    :type gradient_checkpointing: bool
     """
     def __init__(
@@ -1815,10 +1835,10 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
         seed: int,
         pad_token_id: int,
         pad_token: str,
-        lora_config: LoraConfig | None,
+        lora_config: LoraConfigProtocol | None,
         use_separate_reference_adapter: bool,
         model_name: str | None = None,
-        actor_network: PreTrainedModel | None = None,
+        actor_network: PreTrainedModelProtocol | None = None,
         micro_batch_size_per_gpu: int | None = None,
         cosine_lr_schedule_config: Optional[CosineLRScheduleConfig] = None,
         hp_config: Optional[HyperparameterConfig] = None,
@@ -1826,9 +1846,14 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
         device: Union[str, torch.device] = "cpu",
         accelerator: Optional[Accelerator] = None,
         name: Optional[str] = None,
-        model_config: dict[str, Any] | PretrainedConfig | None = None,
+        model_config: dict[str, Any] | PretrainedConfigProtocol | None = None,
         gradient_checkpointing: bool = True,
     ):
+        if not HAS_LLM_DEPENDENCIES:
+            raise ImportError(
+                "LLM dependencies are not installed. Please install them using `pip install agilerl[llm]`."
+            )
         if model_name is None and actor_network is None:
             raise ValueError(
                 "At least one of model_name or actor_network must be provided."
@@ -1883,7 +1908,7 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
                     )
                     lr = optim_lr
-        if lora_config is None and not isinstance(actor_network, PeftModel):
+        if lora_config is None and not isinstance(actor_network, PeftModelProtocol):
             warnings.warn(
                 "No LoRA config provided. AgileRL can only be used to finetune adapters at present. Using default LoRA configuration for RL finetuning."
             )
@@ -2049,7 +2074,7 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
                 device_map="auto"
             )
             tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B")
-            model = PeftModel.from_pretrained(base_model, path)
+            model = PeftModelProtocol.from_pretrained(base_model, path)
             """
         )
@@ -2161,6 +2186,11 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
     def clean_up(self) -> None:
         """Clean up the algorithm."""
         if self.accelerator is not None:
+            # Free up GPU memory occupied by parameters
+            if hasattr(self.actor, "empty_partition_cache"):
+                self.actor.empty_partition_cache()
+            if hasattr(self.actor, "destroy"):
+                self.actor.destroy()
             (
                 self.actor,
                 self.optimizer,
@@ -2184,10 +2214,8 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
         if hasattr(self, "llm"):
             del self.llm.llm_engine.model_executor
             del self.llm
         gc.collect()
         torch.cuda.empty_cache()
-        torch.cuda.reset_peak_memory_stats()
         torch.cuda.synchronize()
     def clone(self, index: Optional[int] = None, wrap: bool = True):
@@ -2222,8 +2250,8 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
             input_args["wrap"] = False
             input_args["clone"] = True
-            actor: PeftModel = cast(
-                PeftModel,
+            actor: PeftModelProtocol = cast(
+                PeftModelProtocol,
                 (
                     self.accelerator.unwrap_model(self.actor)
                     if self.accelerator is not None
@@ -2415,12 +2443,12 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
             self.reference_update_tracker += 1
     def _initialize_actors(
-        self, base_model: PreTrainedModel | None, add_adapters: bool = True
+        self, base_model: PreTrainedModelProtocol | None, add_adapters: bool = True
     ):
         """Initialize the actor network.
         :param base_model: Base model
-        :type base_model: PreTrainedModel
+        :type base_model: PreTrainedModelProtocol
         :param add_adapters: Flag to indicate if adapters should be added to the model, defaults to True
         :type add_adapters: bool, optional
         """
@@ -2430,7 +2458,7 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
                 self.pretrained_model_name_or_path
             )
-        if isinstance(base_model, PeftModel) and add_adapters:
+        if isinstance(base_model, PeftModelProtocol) and add_adapters:
             # Handles backwards compatibility with user providing a peft model as the actor network
             if self.lora_config is None:
                 adapter_name = list(base_model.peft_config.keys())
@@ -2440,7 +2468,7 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
             if "default" in list(base_model.peft_config.keys()):
                 base_model.peft_config.pop("default")
-        self.actor: PeftModel = (
+        self.actor: PeftModelProtocol = (
             get_peft_model(base_model, self.lora_config, adapter_name="actor")
             if add_adapters
             else base_model
@@ -2589,7 +2617,6 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
     def _move_model_to_vllm(self) -> None:
         """Move the deepspeed model to vllm."""
-        # TODO: Add support for ZeRO Stage 3
         if self.accelerator is not None:
             self.accelerator.wait_for_everyone()
         model_ref = self.accelerator.unwrap_model(self.actor)

agilerl/algorithms/core/optimizer_wrapper.py CHANGED Viewed

@@ -2,19 +2,27 @@ import inspect
 from typing import Any, Optional, Union
 import torch.nn as nn
-from peft import PeftModel
 from torch.optim import Optimizer
+from agilerl import HAS_LLM_DEPENDENCIES
 from agilerl.modules import EvolvableModule, ModuleDict
 from agilerl.protocols import EvolvableAlgorithm
 from agilerl.typing import OptimizerType, StateDict
-from agilerl.utils.llm_utils import DummyOptimizer
+from agilerl.utils.algo_utils import DummyOptimizer
+if HAS_LLM_DEPENDENCIES:
+    from peft import PeftModel
+    PeftModelType = PeftModel
+else:
+    PeftModelType = "PeftModel"
 ModuleList = list[EvolvableModule]
 _Optimizer = Union[
     type[OptimizerType], dict[str, type[OptimizerType]], type[DummyOptimizer]
 ]
-_Module = Union[EvolvableModule, ModuleDict, ModuleList, PeftModel]
+_Module = Union[EvolvableModule, ModuleDict, ModuleList, PeftModelType]
 def init_from_multiple(

agilerl/algorithms/core/registry.py CHANGED Viewed

@@ -9,7 +9,7 @@ from torch.optim import Optimizer
 from agilerl.protocols import EvolvableAlgorithm
 from agilerl.typing import NetworkType
-from agilerl.utils.llm_utils import DummyOptimizer
+from agilerl.utils.algo_utils import DummyOptimizer
 @dataclass

agilerl/algorithms/dpo.py CHANGED Viewed

@@ -5,11 +5,10 @@ import numpy as np
 import torch
 import torch.nn.functional as F
 from accelerate import Accelerator
-from peft import LoraConfig
-from transformers import PreTrainedModel
 from agilerl.algorithms.core.base import LLMAlgorithm
 from agilerl.algorithms.core.registry import HyperparameterConfig, NetworkGroup
+from agilerl.protocols import LoraConfigProtocol, PreTrainedModelProtocol
 from agilerl.typing import ExperiencesType, LLMObsType
 from agilerl.utils.algo_utils import get_experiences_samples
 from agilerl.utils.llm_utils import PreferenceGym
@@ -25,7 +24,7 @@ class DPO(LLMAlgorithm):
     :param model_name: Model name
     :type model_name: str, optional
     :param actor_network: HuggingFace LLM
-    :type actor_network: PreTrainedModel
+    :type actor_network: PreTrainedModelProtocol
     :param model_config: Model configuration, to be used when creating the model from a name or path
     :param hp_config: RL hyperparameter mutation configuration, defaults to None, whereby algorithm mutations are disabled.
     :type hp_config: HyperparameterConfig, optional
@@ -50,7 +49,7 @@ class DPO(LLMAlgorithm):
     :param device: Device for accelerated computing, 'cpu' or 'cuda', defaults to 'cpu'
     :type device: str, optional
     :param lora_config: Config for LoRA, defaults to None
-    :type lora_config: LoraConfig, optional
+    :type lora_config: LoraConfigProtocol, optional
     :param accelerator: Accelerator for distributed computing, defaults to None
     :type accelerator: accelerate.Accelerator(), optional
     :param wrap: Wrap models for distributed training upon creation, defaults to True
@@ -70,7 +69,7 @@ class DPO(LLMAlgorithm):
         pad_token_id: int,
         pad_token: str,
         model_name: str | None = None,
-        actor_network: PreTrainedModel | None = None,
+        actor_network: PreTrainedModelProtocol | None = None,
         model_config: dict[str, Any] | None = None,
         hp_config: HyperparameterConfig | None = None,
         index: int = 0,
@@ -83,7 +82,7 @@ class DPO(LLMAlgorithm):
         micro_batch_size_per_gpu: int | None = None,
         reduce_memory_peak: bool = False,
         device: str = "cpu",
-        lora_config: LoraConfig | None = None,
+        lora_config: LoraConfigProtocol | None = None,
         accelerator: Accelerator | None = None,
         wrap: bool = True,
         clone: bool = False,

agilerl/algorithms/grpo.py CHANGED Viewed

@@ -1,17 +1,18 @@
 import gc
-from typing import Any, Optional, Union
+from typing import Any, Optional
 import numpy as np
 import torch
 from accelerate import Accelerator
-from deepspeed.runtime.zero.stage3 import DeepSpeedZeroOptimizer_Stage3
-from deepspeed.runtime.zero.stage_1_and_2 import DeepSpeedZeroOptimizer
-from peft import LoraConfig, PeftModel
-from transformers import GenerationConfig
-from transformers.modeling_utils import PreTrainedModel
+from agilerl import HAS_LLM_DEPENDENCIES
 from agilerl.algorithms.core import LLMAlgorithm
 from agilerl.algorithms.core.registry import HyperparameterConfig, NetworkGroup
+from agilerl.protocols import (
+    LoraConfigProtocol,
+    PeftModelProtocol,
+    PreTrainedModelProtocol,
+)
 from agilerl.typing import ExperiencesType, LLMObsType
 from agilerl.utils.algo_utils import (
     CosineLRScheduleConfig,
@@ -23,10 +24,8 @@ from agilerl.utils.llm_utils import (
     ReasoningGym,
 )
-DeepSpeedOptimizerType = Union[
-    DeepSpeedZeroOptimizer,  # ZeRO Stage 1 & 2 optimizer
-    DeepSpeedZeroOptimizer_Stage3,  # ZeRO Stage 3 optimizer
-]
+if HAS_LLM_DEPENDENCIES:
+    from transformers import GenerationConfig
 class GRPO(LLMAlgorithm):
@@ -39,7 +38,7 @@ class GRPO(LLMAlgorithm):
     :param model_name: Model name
     :type model_name: str, optional
     :param actor_network: HuggingFace LLM
-    :type actor_network: PreTrainedModel
+    :type actor_network: PreTrainedModelProtocol
     :param model_config: Model configuration, to be used when creating the model from a name or path
     :type model_config: dict[str, Any], optional
     :param hp_config: RL hyperparameter mutation configuration, defaults to None, whereby algorithm mutations are disabled.
@@ -77,7 +76,7 @@ class GRPO(LLMAlgorithm):
     :param max_model_len: Maximum context window length, defaults to None
     :type max_model_len: int, optional
     :param lora_config: Config for LoRA, defaults to None
-    :type lora_config: LoraConfig, optional
+    :type lora_config: LoraConfigProtocol, optional
     :param cosine_lr_schedule_config: Config for cosine lr scheduling, defaults to None
     :type cosine_lr_schedule_config: CosineLRScheduleConfig, optional
     :param accelerator: Accelerator for distributed computing, defaults to None
@@ -105,7 +104,7 @@ class GRPO(LLMAlgorithm):
         pad_token_id: int,
         pad_token: str,
         model_name: str | None = None,
-        actor_network: PreTrainedModel | None = None,
+        actor_network: PreTrainedModelProtocol | None = None,
         model_config: dict[str, Any] | None = None,
         hp_config: Optional[HyperparameterConfig] = None,
         index: int = 0,
@@ -127,7 +126,7 @@ class GRPO(LLMAlgorithm):
         max_output_tokens: int | None = 1024,
         min_output_tokens: Optional[int] = None,
         max_model_len: Optional[int] = None,
-        lora_config: Optional[LoraConfig] = None,
+        lora_config: Optional[LoraConfigProtocol] = None,
         cosine_lr_schedule_config: Optional[CosineLRScheduleConfig] = None,
         accelerator: Optional[Accelerator] = None,
         device: str = "cpu",
@@ -188,8 +187,8 @@ class GRPO(LLMAlgorithm):
         ), "Policy update epochs must be greater than or equal to one."
         if actor_network is not None:
             assert isinstance(
-                actor_network, (PeftModel, PreTrainedModel)
-            ), "Actor network must be a PeftModel or PreTrainedModel"
+                actor_network, (PeftModelProtocol, PreTrainedModelProtocol)
+            ), "Actor network must be a PeftModelProtocol or PreTrainedModelProtocol"
         self.clip_coef = clip_coef
         self.update_epochs = update_epochs

agilerl/algorithms/ilql.py CHANGED Viewed

@@ -1223,6 +1223,20 @@ class ILQL(nn.Module):
         self.fitness = checkpoint["fitness"]
         self.steps = checkpoint["steps"]
+    def clean_up(self) -> None:
+        """Clean up the networks"""
+        del self.model
+        del self.actor
+        del self.actor_target
+        del self.v
+        del self.q
+        del self.target_q
+        del self.pi
+        del self.optimizer
+        if self.double_q:
+            del self.q2
+            del self.target_q2
 class ILQL_Policy:
     def __init__(self, iql_model: ILQL, kind: str, **generation_kwargs) -> None:

agilerl/protocols.py CHANGED Viewed

@@ -299,3 +299,134 @@ class AgentWrapper(Protocol, Generic[T_EvolvableAlgorithm]):
     def learn(
         self, experiences: tuple[Iterable[ObservationType], ...], **kwargs
     ) -> None: ...
+@runtime_checkable
+class LoraConfigProtocol(Protocol):
+    """
+    "Protocol for LoRA configuration.
+    LoRA configuration is used to configure the LoRA module.
+    """
+    r: int
+    lora_alpha: int
+    target_modules: str
+    task_type: str
+    lora_dropout: float
+@runtime_checkable
+class PretrainedConfigProtocol(Protocol):
+    """Protocol for HuggingFace pre-trained model configuration.
+    Defines the interface for model configuration objects from HuggingFace transformers.
+    These configs store model architecture parameters and can be converted to/from dictionaries.
+    """
+    # Common model architecture attributes (these are examples - actual configs may have more)
+    vocab_size: int
+    hidden_size: int
+    num_attention_heads: int
+    num_hidden_layers: int
+    def to_dict(self) -> dict[str, Any]: ...
+    def to_json_string(self) -> str: ...
+    def save_pretrained(self, save_directory: str, **kwargs: Any) -> None: ...
+    @classmethod
+    def from_pretrained(
+        cls, pretrained_model_name_or_path: str, **kwargs: Any
+    ) -> "PretrainedConfigProtocol": ...
+    @classmethod
+    def from_dict(
+        cls, config_dict: dict[str, Any], **kwargs: Any
+    ) -> "PretrainedConfigProtocol": ...
+    @classmethod
+    def from_json_file(cls, json_file: str) -> "PretrainedConfigProtocol": ...
+@runtime_checkable
+class GenerationConfigProtocol(Protocol):
+    """Protocol for text generation configuration.
+    Used to configure parameters for text generation in language models.
+    """
+    do_sample: bool
+    temperature: float
+    max_length: Optional[int]
+    max_new_tokens: Optional[int]
+    min_new_tokens: Optional[int]
+    pad_token_id: int
+    repetition_penalty: float
+    top_p: float
+    top_k: int
+    min_p: float
+@runtime_checkable
+class PreTrainedModelProtocol(Protocol):
+    """Protocol for HuggingFace pre-trained models.
+    Defines the interface for pre-trained transformer models from HuggingFace.
+    These models support text generation, state management, and device operations.
+    """
+    device: DeviceType
+    config: Any
+    def eval(self) -> "PreTrainedModelProtocol": ...
+    def train(self, mode: bool = True) -> "PreTrainedModelProtocol": ...
+    def generate(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        generation_config: Optional["GenerationConfigProtocol"] = None,
+        **kwargs: Any
+    ) -> torch.Tensor: ...
+    def forward(self, *args: Any, **kwargs: Any) -> Any: ...
+    def parameters(self) -> Generator: ...
+    def state_dict(self) -> dict[str, Any]: ...
+    def load_state_dict(
+        self, state_dict: dict[str, Any], strict: bool = True
+    ) -> None: ...
+    def to(self, device: DeviceType) -> "PreTrainedModelProtocol": ...
+@runtime_checkable
+class PeftModelProtocol(Protocol):
+    """Protocol for PEFT (Parameter-Efficient Fine-Tuning) models.
+    PEFT models wrap pre-trained models with adapters for efficient fine-tuning.
+    They extend PreTrainedModel functionality with adapter-specific operations.
+    """
+    device: DeviceType
+    config: Any
+    peft_config: dict[str, Any]
+    base_model: PreTrainedModelProtocol
+    def eval(self) -> "PeftModelProtocol": ...
+    def train(self, mode: bool = True) -> "PeftModelProtocol": ...
+    def generate(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        generation_config: Optional["GenerationConfigProtocol"] = None,
+        **kwargs: Any
+    ) -> torch.Tensor: ...
+    def forward(self, *args: Any, **kwargs: Any) -> Any: ...
+    def parameters(self) -> Generator: ...
+    def state_dict(self) -> dict[str, Any]: ...
+    def load_state_dict(
+        self, state_dict: dict[str, Any], strict: bool = True
+    ) -> None: ...
+    def to(self, device: DeviceType) -> "PeftModelProtocol": ...
+    @classmethod
+    def from_pretrained(
+        cls, base_model: PreTrainedModelProtocol, adapter_path: str, **kwargs: Any
+    ) -> "PeftModelProtocol": ...

agilerl/utils/algo_utils.py CHANGED Viewed

@@ -13,14 +13,13 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from gymnasium import spaces
-from peft import PeftModel, get_peft_model
 from tensordict import TensorDict, from_module
 from tensordict.nn import CudaGraphModule
 from torch._dynamo import OptimizedModule
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR, SequentialLR
-from transformers import PreTrainedModel
+from agilerl import HAS_LLM_DEPENDENCIES
 from agilerl.modules.dummy import DummyEvolvable
 from agilerl.protocols import (
     EvolvableAttributeType,
@@ -42,9 +41,16 @@ from agilerl.typing import (
     SupportedObsSpaces,
     TorchObsType,
 )
-from agilerl.utils.llm_utils import gather_if_zero3
-PreTrainedModelType = Union[PeftModel, PreTrainedModel]
+if HAS_LLM_DEPENDENCIES:
+    from peft import PeftModel, get_peft_model
+    from transformers import PreTrainedModel
+    from agilerl.utils.llm_utils import gather_if_zero3
+    PreTrainedModelType = Union[PeftModel, PreTrainedModel]
+else:
+    PreTrainedModelType = Union["PeftModel", "PreTrainedModel"]
 def check_supported_space(observation_space: GymSpaceType) -> None:
@@ -1629,3 +1635,44 @@ def clone_llm(
         if state_dict is not None:
             model.load_state_dict(state_dict, strict=False)
     return model
+class DummyOptimizer:
+    """
+    Placeholder optimizer class to pass to the OptimizerWrapper when the optimizer is defined in the deepspeed config.
+    """
+    def __init__(self, params: list[torch.Tensor], lr: float, **kwargs) -> None:
+        """
+        Sentinel class to use for the optimizer when the optimizer is defined in the deepspeed config.
+        :param params: Parameters to optimize.
+        :type params: list[torch.Tensor]
+        :param lr: Learning rate.
+        :type lr: float
+        """
+        pass
+    def step(self, closure=None):
+        raise RuntimeError(
+            "DummyOptimizer is a placeholder optimizer and should not be used."
+            "Please ensure you are calling accelerator.prepare() on the optimizer."
+        )
+    def zero_grad(self):
+        raise RuntimeError(
+            "DummyOptimizer is a placeholder optimizer and should not be used."
+            "Please ensure you are calling accelerator.prepare() on the optimizer."
+        )
+    def state_dict(self):
+        raise RuntimeError(
+            "DummyOptimizer is a placeholder optimizer and should not be used."
+            "Please ensure you are calling accelerator.prepare() on the optimizer."
+        )
+    def load_state_dict(self, state_dict):
+        raise RuntimeError(
+            "DummyOptimizer is a placeholder optimizer and should not be used."
+            "Please ensure you are calling accelerator.prepare() on the optimizer."
+        )

agilerl/utils/llm_utils.py CHANGED Viewed

@@ -4,19 +4,29 @@ from abc import ABC, abstractmethod
 from contextlib import contextmanager
 from typing import Any, Callable, Generator
-import deepspeed
 import gymnasium as gym
 import torch
 import torch.nn as nn
 from accelerate import Accelerator
-from datasets import Dataset
 from torch.utils.data import DataLoader
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from transformers.modeling_utils import PreTrainedModel
-from transformers.tokenization_utils_base import BatchEncoding
+from agilerl import HAS_LLM_DEPENDENCIES
 from agilerl.typing import PreferencePrompts, ReasoningPrompts
+if HAS_LLM_DEPENDENCIES:
+    import deepspeed
+    from datasets import Dataset
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    from transformers.modeling_utils import PreTrainedModel
+    from transformers.tokenization_utils_base import BatchEncoding
+    AutoTokenizer = AutoTokenizer
+else:
+    AutoTokenizer = Any
+    PreTrainedModel = Any
+    BatchEncoding = Any
+    Dataset = Any
 def apply_chat_template(
     conversation_template: list[dict[str, str]],
@@ -614,47 +624,6 @@ class PreferenceGym(HuggingFaceGym):
         return collate_fn
-class DummyOptimizer:
-    """
-    Placeholder optimizer class to pass to the OptimizerWrapper when the optimizer is defined in the deepspeed config.
-    """
-    def __init__(self, params: list[torch.Tensor], lr: float, **kwargs) -> None:
-        """
-        Sentinel class to use for the optimizer when the optimizer is defined in the deepspeed config.
-        :param params: Parameters to optimize.
-        :type params: list[torch.Tensor]
-        :param lr: Learning rate.
-        :type lr: float
-        """
-        pass
-    def step(self, closure=None):
-        raise RuntimeError(
-            "DummyOptimizer is a placeholder optimizer and should not be used."
-            "Please ensure you are calling accelerator.prepare() on the optimizer."
-        )
-    def zero_grad(self):
-        raise RuntimeError(
-            "DummyOptimizer is a placeholder optimizer and should not be used."
-            "Please ensure you are calling accelerator.prepare() on the optimizer."
-        )
-    def state_dict(self):
-        raise RuntimeError(
-            "DummyOptimizer is a placeholder optimizer and should not be used."
-            "Please ensure you are calling accelerator.prepare() on the optimizer."
-        )
-    def load_state_dict(self, state_dict):
-        raise RuntimeError(
-            "DummyOptimizer is a placeholder optimizer and should not be used."
-            "Please ensure you are calling accelerator.prepare() on the optimizer."
-        )
 @contextmanager
 def gather_if_zero3(
     zero_stage: int, params: list[torch.Tensor], modifier_rank: int | None = None

agilerl/utils/utils.py CHANGED Viewed

@@ -36,8 +36,8 @@ from agilerl.hpo.mutation import Mutations
 from agilerl.hpo.tournament import TournamentSelection
 from agilerl.modules import EvolvableModule
 from agilerl.typing import BPTTSequenceType, GymSpaceType, PopulationType
-from agilerl.utils.algo_utils import CosineLRScheduleConfig, clone_llm
-from agilerl.utils.llm_utils import DummyOptimizer, get_state_dict
+from agilerl.utils.algo_utils import CosineLRScheduleConfig, DummyOptimizer, clone_llm
+from agilerl.utils.llm_utils import get_state_dict
 from agilerl.vector.pz_async_vec_env import AsyncPettingZooVecEnv
 SupportedObservationSpace = Union[

{agilerl-2.4.1.dev1.dist-info → agilerl-2.4.1.dev3.dist-info}/METADATA RENAMED Viewed

@@ -1,22 +1,23 @@
 Metadata-Version: 2.4
 Name: agilerl
-Version: 2.4.1.dev1
+Version: 2.4.1.dev3
 Summary: AgileRL is a deep reinforcement learning library focused on improving RL development through RLOps.
 License: Apache 2.0
 License-File: LICENSE
 Author: Nick Ustaran-Anderegg
 Author-email: dev@agilerl.com
-Requires-Python: >=3.10,<4.0
+Requires-Python: >=3.10,<3.13
 Classifier: License :: Other/Proprietary License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Classifier: Programming Language :: Python :: 3.14
+Provides-Extra: all
+Provides-Extra: llm
 Requires-Dist: SuperSuit (>=3.9.0,<4.0.0)
 Requires-Dist: accelerate (>=1.7.0,<2.0.0)
-Requires-Dist: deepspeed (>=0.17.1,<0.18.0)
+Requires-Dist: datasets (==4.4.1) ; extra == "llm" or extra == "all"
+Requires-Dist: deepspeed (>=0.17.1,<0.18.0) ; extra == "llm" or extra == "all"
 Requires-Dist: dill (>=0.3.7,<0.4.0)
 Requires-Dist: fastrand (>=1.3.0,<2.0.0)
 Requires-Dist: flatten_dict (>=0.4.2,<0.5.0)
@@ -26,11 +27,12 @@ Requires-Dist: h5py (>=3.8.0,<4.0.0)
 Requires-Dist: hydra-core (>=1.3.2,<2.0.0)
 Requires-Dist: jax[cpu] (>=0.4.31,<0.5.0)
 Requires-Dist: matplotlib (>=3.9.4,<3.10.0)
-Requires-Dist: minari (>=0.5.2,<0.6.0)
+Requires-Dist: minari[all] (==0.5.2)
 Requires-Dist: numpy (>=1.26.4,<2.0.0)
 Requires-Dist: omegaconf (>=2.3.0,<3.0.0)
+Requires-Dist: packaging (>=20.0)
 Requires-Dist: pandas (>=2.2.3,<3.0.0)
-Requires-Dist: peft (>=0.15.2,<0.16.0)
+Requires-Dist: peft (>=0.18.0,<0.19.0) ; extra == "llm" or extra == "all"
 Requires-Dist: pettingzoo (>=1.23.1,<2.0.0)
 Requires-Dist: pre-commit (>=3.4.0,<4.0.0)
 Requires-Dist: pygame (>=2.6.0,<3.0.0)
@@ -41,9 +43,9 @@ Requires-Dist: tensordict (>=0.8,<0.9)
 Requires-Dist: termcolor (>=1.1.0,<2.0.0)
 Requires-Dist: torch (==2.7.1)
 Requires-Dist: tqdm (>=4.66.4,<5.0.0)
-Requires-Dist: transformers (>=4.48.1,<5.0.0)
+Requires-Dist: transformers (>=4.57.1,<5.0.0) ; extra == "llm" or extra == "all"
 Requires-Dist: ucimlrepo (>=0.0.3,<0.0.4)
-Requires-Dist: vllm (==0.10.0)
+Requires-Dist: vllm (==0.10.0) ; extra == "llm" or extra == "all"
 Requires-Dist: wandb (>=0.17.6,<0.18.0)
 Description-Content-Type: text/markdown
@@ -97,6 +99,16 @@ git clone https://github.com/AgileRL/AgileRL.git && cd AgileRL
 pip install -e .
 ```
+If you wish to install all additional dependencies please specify `[all]` or if you want to install a specific family of dependencies specify that family directly. At present, we have just one family, `[llm]`, which contains the dependencies related to our LLM RFT algorithms (datasets, deepspeed, peft, transformers, vllm).
+```bash
+pip install agilerl[all]
+```
+Or in development mode:
+```bash
+pip install -e ".[all]"
+```
 To install the ``nightly`` version of AgileRL with the latest features, use:
 ```bash

{agilerl-2.4.1.dev1.dist-info → agilerl-2.4.1.dev3.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
-agilerl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+agilerl/__init__.py,sha256=0hZjnAULURFWpshG_mhNdaHhf8nlc7h2sR7CLEqup54,572
 agilerl/algorithms/__init__.py,sha256=5N4DqCEETuFBlhnzf7XEQzIClRXX9e-FxQqQHgLh3Es,661
 agilerl/algorithms/bc_lm.py,sha256=dDCN--Y49wJA_msVB_r8XYgLYXSYeJItYyhSD41bFFk,22946
 agilerl/algorithms/core/__init__.py,sha256=kKGnzj4TGRZKk2J6jcaKkK3s1LjCYu979o8u8OJUZjI,268
-agilerl/algorithms/core/base.py,sha256=tSpcn8u19xUPAZyHpi94k4BdoybOR7gpEtK01i51Lgc,120933
-agilerl/algorithms/core/optimizer_wrapper.py,sha256=Frpd6No_yBWtOQJjkKiKjXIqOZoi_LUuGBec0Py_CYQ,12940
-agilerl/algorithms/core/registry.py,sha256=RhKR5zWfffxJAvR1D08TGXLGr3zXXE5raPw_Twzhcr4,19990
+agilerl/algorithms/core/base.py,sha256=LeFN0l17oCUxp23zFayq8tr9RFbSw--68TPa1FwobuA,121970
+agilerl/algorithms/core/optimizer_wrapper.py,sha256=UQTlnv-mbNGlQ3RX9ocHtczXhTZq1MBKO6OdoQ879uM,13086
+agilerl/algorithms/core/registry.py,sha256=ndaw9U814tHrPBhEPO9kLIDNKmLStTwLXPsnu-nnj8c,19991
 agilerl/algorithms/cqn.py,sha256=3zE6LPWPV8ut5hLPllw3yhY_amonbiSmbBXJU0-7Zo4,12583
 agilerl/algorithms/ddpg.py,sha256=uau1E37D9SARlf_bTswfZQGQRobh9tOcB6hoRpszx_g,21365
-agilerl/algorithms/dpo.py,sha256=A9eO5E8FvYmcJ_-qmuBMDuiCvYkFgJlczclJZTPuN8s,15740
+agilerl/algorithms/dpo.py,sha256=kN2wp2Ms_2sFiJcmqpVPxG4XHoJis6l6BQlSCsj07pk,15777
 agilerl/algorithms/dqn.py,sha256=P05AspMruXghyqWGzXj4t0x6m6Pl9MKt8EKh3RP2yBU,17105
 agilerl/algorithms/dqn_rainbow.py,sha256=HyP-jkiVOkBUJmvpUlrB6VHo8m-AO2Z84M3Zb_ZP6fQ,20483
-agilerl/algorithms/grpo.py,sha256=eEbWs3j1rKgO8qsFGiQ6861OJ4TgioR3R217MQXA2NQ,19392
-agilerl/algorithms/ilql.py,sha256=qIP9ptASnNa_uZX2Ep7yzA9jK4SJyxb8RHv_iJQYKCc,79507
+agilerl/algorithms/grpo.py,sha256=9VvRf4jQNDOfUlkKDZBNiiBACUybgeOxSQgnszjm2BM,19237
+agilerl/algorithms/ilql.py,sha256=vX070xfPFxNKWh6oEc_LERUJx80JQq8oMzZ8ESBOUgE,79844
 agilerl/algorithms/ippo.py,sha256=W9FDLf5bznG-RvfJs8Gqpa2ARGReqmPB9xW9mu2Mj-c,39085
 agilerl/algorithms/maddpg.py,sha256=qVXDyb_W51lZtvst4K3yiosSy58BEBYbck8wF8CViBA,33908
 agilerl/algorithms/matd3.py,sha256=n17y6PvM51r290Def_QeFT4p7TMo54MIDLN30XqlMk8,37926
@@ -55,7 +55,7 @@ agilerl/networks/distributions.py,sha256=mzntWgwoEdZKAspInbmvfc6_0rGuPdquqQyQkVS
 agilerl/networks/distributions_experimental.py,sha256=K6_EYflAlR6qRouRr6SJXnT19w7QhOA1bwN7kCl3DJ8,18890
 agilerl/networks/q_networks.py,sha256=a1Arze6GypKprxUQObbpJQbikmY5LtrvAAnEyoTrcLM,17284
 agilerl/networks/value_networks.py,sha256=ZLX5vQIxeV65uxOzv2r5QMxF_-fzFT8N1et3lHdQP7E,4630
-agilerl/protocols.py,sha256=ORuz0dd2tkbWURG9PncwujC2ha1HKebuDG5MHuXxpu4,10015
+agilerl/protocols.py,sha256=SQ8T79jmZAqlm2fJ1Qo0kefU5w2c4Mh_wUk9RtiPego,14052
 agilerl/rollouts/__init__.py,sha256=dGR9BnXliQI6yvXPwecV7g5TCtCEPbyIB-W1a5evBBY,130
 agilerl/rollouts/on_policy.py,sha256=VOxUjwzyYngzrTEW9asXsgz1O6lRTUn_PijmjqtzGwQ,8036
 agilerl/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -68,18 +68,18 @@ agilerl/training/train_offline.py,sha256=qAlr3lGQf7EfSSmTtmohi80rUN4HMha955q3pae
 agilerl/training/train_on_policy.py,sha256=iQEIHq_JgBIBH2GPJeLN6QmPRho-_beUdro1H9DPkUA,19360
 agilerl/typing.py,sha256=JtLhZMNyFzrnSeos6ltWyD_8yWFkc8Zx-OIC3d1CPQc,5442
 agilerl/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-agilerl/utils/algo_utils.py,sha256=ta_1PKJ61WJHkvJKPtEddqkIBoZdV_uIM_db2qjFeFw,58754
+agilerl/utils/algo_utils.py,sha256=Ue9uR5R_QywZbO7jvnQPTVAn6STLT9f-_nwrygs4Iz4,60376
 agilerl/utils/cache.py,sha256=8Q1SYbTxQYzIn40UMy32EWMvtgaduY1k5jqwPihxJ_Q,3418
 agilerl/utils/evolvable_networks.py,sha256=cIJHzadFOaK0aAqwn96HvnuH4atLBxrQ3cwpR1nxvUo,23265
 agilerl/utils/ilql_utils.py,sha256=dU_vbwOB6VsODGGu_hOyDN_xRtFKVhZbxMISFlAUM5s,2293
-agilerl/utils/llm_utils.py,sha256=9kqqBnPOm2Y9zZiWJQN7Idr9f1If0nKrkfQsgM4dP98,27842
+agilerl/utils/llm_utils.py,sha256=rc4fnqw3z1RvKdDUisX4THbRTkAWeg84SPt7VTd_hJY,26594
 agilerl/utils/log_utils.py,sha256=OIhj86V97-ijlUENic2WKIWipB5ITJyBIGM_ZPZg5Vo,4401
 agilerl/utils/minari_utils.py,sha256=WNFzt9ZQuvWy3w84MFhhGkA0e9MAgc4KSI_cmPgFTBo,5109
 agilerl/utils/probe_envs.py,sha256=q2uyPQW7mbo9x4c_Yq9vi2Yu1X9qyLm43adET9SFf9Y,39796
 agilerl/utils/probe_envs_ma.py,sha256=vvUY6lUBJfKGOVZtiFBKQ7Nwmsoj8aFnXD2W8-7rw8A,75686
 agilerl/utils/sampling_utils.py,sha256=Sc2G178eB5_hQEPiMnrMUDt8WdmRI7CVbRZPVg0NDTE,2336
 agilerl/utils/torch_utils.py,sha256=V3W9q3Y8x_eTYk83JORutOalAcZryKrlzq1_-7VxxdU,3424
-agilerl/utils/utils.py,sha256=5QT9tANh25fBYqvGMzkrxWhwnvPOvFXvhwOTdtHFBYU,39846
+agilerl/utils/utils.py,sha256=bLCBDIEv4xBAC49yqWWoeiTFgYrFBAtcca6F6sFoD7c,39846
 agilerl/vector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 agilerl/vector/pz_async_vec_env.py,sha256=uj9TyCn0SWksTUOW84RGspMkXqdGG-wjr86w08uCMb0,36742
 agilerl/vector/pz_vec_env.py,sha256=sFVqm8eecxVHahTpFZEE3fvyZrmp2vMu0GECik8el6M,5978
@@ -89,7 +89,7 @@ agilerl/wrappers/learning.py,sha256=nSVMg6eUBWn13NNdIFgCEHj31CaN_dGryQa13SmMvBw,
 agilerl/wrappers/make_evolvable.py,sha256=sb9oAorGAayrD_6lNbyvHhefA_RKO4bSSNjqS6u9UhI,51079
 agilerl/wrappers/pettingzoo_wrappers.py,sha256=Pw8VzabxfYCw5ad15y5J3rAH1teA6nVVo0RHCTTdOPQ,2063
 agilerl/wrappers/utils.py,sha256=pENFH2AxsXd22s8HGUeM-jRowC0tmjHLWjqDwIq12l8,2194
-agilerl-2.4.1.dev1.dist-info/METADATA,sha256=__mHkCDRSJZfEPH4rFyWIsyTYy4-8clFk_TzOTVI04c,19961
-agilerl-2.4.1.dev1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-agilerl-2.4.1.dev1.dist-info/licenses/LICENSE,sha256=vPX_VnIseflXJ30mQvwbXZoe208EtIr9ZVrl6cfdQNs,11720
-agilerl-2.4.1.dev1.dist-info/RECORD,,
+agilerl-2.4.1.dev3.dist-info/METADATA,sha256=ahIiSFnYkAUr_Dwia-i2KDuUcJm30WOGaAIaFrGNB30,20565
+agilerl-2.4.1.dev3.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+agilerl-2.4.1.dev3.dist-info/licenses/LICENSE,sha256=vPX_VnIseflXJ30mQvwbXZoe208EtIr9ZVrl6cfdQNs,11720
+agilerl-2.4.1.dev3.dist-info/RECORD,,

{agilerl-2.4.1.dev1.dist-info → agilerl-2.4.1.dev3.dist-info}/WHEEL RENAMED Viewed

File without changes

{agilerl-2.4.1.dev1.dist-info → agilerl-2.4.1.dev3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

agilerl 2.4.1.dev1__py3-none-any.whl → 2.4.1.dev3__py3-none-any.whl

agilerl 2.4.1.dev1py3-none-any.whl → 2.4.1.dev3py3-none-any.whl