PyPI - agilerl - Versions diffs - 2.4.1.dev0__tar.gz → 2.4.1.dev1__tar.gz - Mend

agilerl 2.4.1.dev0tar.gz → 2.4.1.dev1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,9 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: agilerl
-Version: 2.4.1.dev0
+Version: 2.4.1.dev1
 Summary: AgileRL is a deep reinforcement learning library focused on improving RL development through RLOps.
 License: Apache 2.0
+License-File: LICENSE
 Author: Nick Ustaran-Anderegg
 Author-email: dev@agilerl.com
 Requires-Python: >=3.10,<4.0
@@ -12,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Requires-Dist: SuperSuit (>=3.9.0,<4.0.0)
 Requires-Dist: accelerate (>=1.7.0,<2.0.0)
 Requires-Dist: deepspeed (>=0.17.1,<0.18.0)
@@ -153,11 +155,12 @@ We are constantly updating our tutorials to showcase the latest features of Agil
   | ---------- | --------- |
   | [Bandits](https://docs.agilerl.com/en/latest/bandits/index.html) | [Neural Contextual Bandits with UCB-based Exploration (NeuralUCB)](https://docs.agilerl.com/en/latest/api/algorithms/neural_ucb.html) <br> [Neural Contextual Bandits with Thompson Sampling (NeuralTS)](https://docs.agilerl.com/en/latest/api/algorithms/neural_ts.html) |
-  ### LLM Reasoning Algorithms
+  ### LLM Fine-tuning Algorithms
   | RL         | Algorithm |
   | ---------- | --------- |
   | [On-Policy](https://docs.agilerl.com/en/latest/llm_finetuning/index.html) | [Group Relative Policy Optimization (GRPO)](https://docs.agilerl.com/en/latest/api/algorithms/grpo.html)
+  | [Off-Policy](https://docs.agilerl.com/en/latest/llm_finetuning/index.html) | [Direct Preference Optimization (DPO)](https://docs.agilerl.com/en/latest/api/algorithms/dpo.html)
 ## Train an Agent to Beat a Gym Environment

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/README.md RENAMED Viewed

@@ -106,11 +106,12 @@ We are constantly updating our tutorials to showcase the latest features of Agil
   | ---------- | --------- |
   | [Bandits](https://docs.agilerl.com/en/latest/bandits/index.html) | [Neural Contextual Bandits with UCB-based Exploration (NeuralUCB)](https://docs.agilerl.com/en/latest/api/algorithms/neural_ucb.html) <br> [Neural Contextual Bandits with Thompson Sampling (NeuralTS)](https://docs.agilerl.com/en/latest/api/algorithms/neural_ts.html) |
-  ### LLM Reasoning Algorithms
+  ### LLM Fine-tuning Algorithms
   | RL         | Algorithm |
   | ---------- | --------- |
   | [On-Policy](https://docs.agilerl.com/en/latest/llm_finetuning/index.html) | [Group Relative Policy Optimization (GRPO)](https://docs.agilerl.com/en/latest/api/algorithms/grpo.html)
+  | [Off-Policy](https://docs.agilerl.com/en/latest/llm_finetuning/index.html) | [Direct Preference Optimization (DPO)](https://docs.agilerl.com/en/latest/api/algorithms/dpo.html)
 ## Train an Agent to Beat a Gym Environment

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/core/base.py RENAMED Viewed

@@ -601,14 +601,16 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
         )
         optimizer = opt.optimizer if hasattr(opt, "optimizer") else None
-        if isinstance(opt, DeepSpeedOptimizerWrapper):
-            if isinstance(opt.optimizer, DummyOptimizer):
-                opt = getattr(
+        if isinstance(self, LLMAlgorithm):
+            if hasattr(self.actor, "optimizer"):
+                optimizer = getattr(
                     getattr(self, "actor"), "optimizer"
                 )  # If the optimizer is defined in the deepspeed config, we do this
+            else:
+                optimizer = opt.optimizer
             self.accelerator, self.lr_scheduler = LLMAlgorithm.update_lr(
-                opt,
+                optimizer,
                 lr=getattr(self, config.lr),
                 accelerator=self.accelerator,
                 scheduler_config=self.cosine_lr_schedule_config,
@@ -1898,15 +1900,21 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
         self.use_separate_reference_adapter = use_separate_reference_adapter
         self.cosine_lr_schedule_config = cosine_lr_schedule_config
-        if max_grad_norm and (accelerator is not None) and accelerator.is_main_process:
-            warnings.warn(
-                "Argument 'max_grad_norm' will be overwritten by the 'gradient_clipping' value set in the deepspeed config."
-            )
-            self.max_grad_norm = None
-        else:
-            self.max_grad_norm = max_grad_norm
+        if max_grad_norm and (accelerator is not None):
+            if accelerator.is_main_process:
+                warnings.warn(
+                    "Argument 'max_grad_norm' will overwrite the equivalent value set for 'gradient_clipping' in the deepspeed config."
+                )
+            self.accelerator.state.deepspeed_plugin.deepspeed_config[
+                "gradient_clipping"
+            ] = max_grad_norm
+        self.max_grad_norm = max_grad_norm
         self.reduce_memory_peak = reduce_memory_peak
+        if self.accelerator is not None:
+            self.register_mutation_hook(self._sync_deepspeed_gradient_clipping)
         if self.accelerator is not None:
             self.zero_stage = self.accelerator.state.deepspeed_plugin.deepspeed_config[
                 "zero_optimization"
@@ -2949,3 +2957,28 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
         if self.accelerator is not None:
             self.accelerator.wait_for_everyone()
+    def _sync_deepspeed_gradient_clipping(self) -> None:
+        """Synchronizes max_grad_norm with DeepSpeed gradient_clipping config.
+        Registered as a mutation hook to ensure consistency after mutations.
+        """
+        if self.accelerator is None:
+            return
+        if (
+            "gradient_clipping"
+            not in self.accelerator.state.deepspeed_plugin.deepspeed_config
+        ):
+            return
+        ds_config = self.accelerator.state.deepspeed_plugin.deepspeed_config
+        if ds_config["gradient_clipping"] != self.max_grad_norm:
+            self.accelerator.state.deepspeed_plugin.deepspeed_config[
+                "gradient_clipping"
+            ] = self.max_grad_norm
+        if hasattr(self.actor, "optimizer"):
+            if hasattr(self.actor.optimizer, "grad_clip"):
+                self.actor.optimizer.grad_clip = self.max_grad_norm
+            if hasattr(self.actor.optimizer, "clip_grad"):
+                self.actor.optimizer.clip_grad = self.max_grad_norm

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "agilerl"
-version = "2.4.1.dev0"
+version = "2.4.1.dev1"
 description = "AgileRL is a deep reinforcement learning library focused on improving RL development through RLOps."
 authors = ["Nick Ustaran-Anderegg <dev@agilerl.com>"]

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/LICENSE RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/__init__.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/__init__.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/bc_lm.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/core/__init__.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/core/optimizer_wrapper.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/core/registry.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/cqn.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/ddpg.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/dpo.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/dqn.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/dqn_rainbow.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/grpo.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/ilql.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/ippo.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/maddpg.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/matd3.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/neural_ts_bandit.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/neural_ucb_bandit.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/ppo.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/td3.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/__init__.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/data.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/multi_agent_replay_buffer.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/replay_buffer.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/rollout_buffer.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/sampler.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/segment_tree.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/__init__.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/language_environment.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/rl_data.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/tokenizer.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/torch_datasets.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/hpo/__init__.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/hpo/mutation.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/hpo/tournament.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/__init__.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/base.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/bert.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/cnn.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/configs.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/custom_components.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/dummy.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/gpt.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/lstm.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/mlp.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/multi_input.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/resnet.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/simba.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/__init__.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/actors.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/base.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/custom_modules.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/distributions.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/distributions_experimental.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/q_networks.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/value_networks.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/protocols.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/rollouts/__init__.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/rollouts/on_policy.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/__init__.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_bandits.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_llm.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_multi_agent_off_policy.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_multi_agent_on_policy.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_off_policy.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_offline.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_on_policy.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/typing.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/__init__.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/algo_utils.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/cache.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/evolvable_networks.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/ilql_utils.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/llm_utils.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/log_utils.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/minari_utils.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/probe_envs.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/probe_envs_ma.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/sampling_utils.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/torch_utils.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/utils.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/vector/__init__.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/vector/pz_async_vec_env.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/vector/pz_vec_env.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/__init__.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/agent.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/learning.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/make_evolvable.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/pettingzoo_wrappers.py RENAMED Viewed

File without changes

{agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/utils.py RENAMED Viewed

File without changes

agilerl 2.4.1.dev0__tar.gz → 2.4.1.dev1__tar.gz

agilerl 2.4.1.dev0tar.gz → 2.4.1.dev1tar.gz