PyPI - agilerl - Versions diffs - 2.3.5.dev0__tar.gz → 2.4.0.dev0__tar.gz - Mend

agilerl 2.3.5.dev0tar.gz → 2.4.0.dev0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

{agilerl-2.3.5.dev0 → agilerl-2.4.0.dev0}/LICENSE RENAMED Viewed

@@ -199,3 +199,16 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+---
+THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
+This project incorporates code from the following third-party software:
+TRL (Transformer Reinforcement Learning)
+Copyright 2020-2025 The HuggingFace Team
+Licensed under Apache License 2.0
+https://github.com/huggingface/trl
+Portions of the GRPO implementation use code patterns adapted from TRL's GRPOTrainer.

{agilerl-2.3.5.dev0 → agilerl-2.4.0.dev0}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,9 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: agilerl
-Version: 2.3.5.dev0
+Version: 2.4.0.dev0
 Summary: AgileRL is a deep reinforcement learning library focused on improving RL development through RLOps.
 License: Apache 2.0
+License-File: LICENSE
 Author: Nick Ustaran-Anderegg
 Author-email: dev@agilerl.com
 Requires-Python: >=3.10,<4.0
@@ -12,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Requires-Dist: SuperSuit (>=3.9.0,<4.0.0)
 Requires-Dist: accelerate (>=1.7.0,<2.0.0)
 Requires-Dist: deepspeed (>=0.17.1,<0.18.0)
@@ -275,7 +277,7 @@ agent_pop = create_population(
 Next, create the tournament, mutations and experience replay buffer objects that allow agents to share memory and efficiently perform evolutionary HPO.
 <details>
-<summary>Mutations and Tournament Seelection Example</summary>
+<summary>Mutations and Tournament Selection Example</summary>
 ```python
 from agilerl.components.replay_buffer import ReplayBuffer

{agilerl-2.3.5.dev0 → agilerl-2.4.0.dev0}/README.md RENAMED Viewed

@@ -228,7 +228,7 @@ agent_pop = create_population(
 Next, create the tournament, mutations and experience replay buffer objects that allow agents to share memory and efficiently perform evolutionary HPO.
 <details>
-<summary>Mutations and Tournament Seelection Example</summary>
+<summary>Mutations and Tournament Selection Example</summary>
 ```python
 from agilerl.components.replay_buffer import ReplayBuffer

{agilerl-2.3.5.dev0 → agilerl-2.4.0.dev0}/agilerl/algorithms/__init__.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from .bc_lm import BC_LM, BC_Evaluator, BC_Policy
 from .cqn import CQN
 from .ddpg import DDPG
+from .dpo import DPO
 from .dqn import DQN
 from .dqn_rainbow import RainbowDQN
 from .grpo import GRPO
@@ -30,4 +31,5 @@ __all__ = [
     "PPO",
     "TD3",
     "GRPO",
+    "DPO",
 ]

{agilerl-2.3.5.dev0 → agilerl-2.4.0.dev0}/agilerl/algorithms/bc_lm.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Callable, Optional, Tuple, Union
+from typing import Any, Callable, Optional, Union
 import numpy as np
 import torch
@@ -167,7 +167,7 @@ class BC_LM(nn.Module):
         temp: float = 1.0,
         top_k: Optional[int] = None,
         top_p: Optional[float] = None,
-    ) -> Tuple[torch.Tensor, Any]:
+    ) -> tuple[torch.Tensor, Any]:
         prepared_inputs = self.prepare_inputs(items)
         tokens = prepared_inputs["tokens"]
         scores, model_outputs = self.score(
@@ -189,7 +189,7 @@ class BC_LM(nn.Module):
         temp: float = 1.0,
         top_k: Optional[int] = None,
         top_p: Optional[float] = None,
-    ) -> Tuple[torch.Tensor, Any]:
+    ) -> tuple[torch.Tensor, Any]:
         scores, model_outputs = self.score(
             (
                 tokens.unsqueeze(1),

agilerl 2.3.5.dev0__tar.gz → 2.4.0.dev0__tar.gz

agilerl 2.3.5.dev0tar.gz → 2.4.0.dev0tar.gz