agilerl 2.4.1.dev0__tar.gz → 2.4.1.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/PKG-INFO +6 -3
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/README.md +2 -1
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/core/base.py +44 -11
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/pyproject.toml +1 -1
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/LICENSE +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/__init__.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/__init__.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/bc_lm.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/core/__init__.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/core/optimizer_wrapper.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/core/registry.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/cqn.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/ddpg.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/dpo.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/dqn.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/dqn_rainbow.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/grpo.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/ilql.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/ippo.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/maddpg.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/matd3.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/neural_ts_bandit.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/neural_ucb_bandit.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/ppo.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/td3.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/__init__.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/data.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/multi_agent_replay_buffer.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/replay_buffer.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/rollout_buffer.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/sampler.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/segment_tree.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/__init__.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/language_environment.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/rl_data.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/tokenizer.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/torch_datasets.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/hpo/__init__.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/hpo/mutation.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/hpo/tournament.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/__init__.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/base.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/bert.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/cnn.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/configs.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/custom_components.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/dummy.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/gpt.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/lstm.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/mlp.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/multi_input.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/resnet.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/simba.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/__init__.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/actors.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/base.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/custom_modules.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/distributions.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/distributions_experimental.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/q_networks.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/value_networks.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/protocols.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/rollouts/__init__.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/rollouts/on_policy.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/__init__.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_bandits.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_llm.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_multi_agent_off_policy.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_multi_agent_on_policy.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_off_policy.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_offline.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_on_policy.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/typing.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/__init__.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/algo_utils.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/cache.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/evolvable_networks.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/ilql_utils.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/llm_utils.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/log_utils.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/minari_utils.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/probe_envs.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/probe_envs_ma.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/sampling_utils.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/torch_utils.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/utils.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/vector/__init__.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/vector/pz_async_vec_env.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/vector/pz_vec_env.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/__init__.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/agent.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/learning.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/make_evolvable.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/pettingzoo_wrappers.py +0 -0
- {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/utils.py +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: agilerl
|
|
3
|
-
Version: 2.4.1.
|
|
3
|
+
Version: 2.4.1.dev1
|
|
4
4
|
Summary: AgileRL is a deep reinforcement learning library focused on improving RL development through RLOps.
|
|
5
5
|
License: Apache 2.0
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Author: Nick Ustaran-Anderegg
|
|
7
8
|
Author-email: dev@agilerl.com
|
|
8
9
|
Requires-Python: >=3.10,<4.0
|
|
@@ -12,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
12
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
15
17
|
Requires-Dist: SuperSuit (>=3.9.0,<4.0.0)
|
|
16
18
|
Requires-Dist: accelerate (>=1.7.0,<2.0.0)
|
|
17
19
|
Requires-Dist: deepspeed (>=0.17.1,<0.18.0)
|
|
@@ -153,11 +155,12 @@ We are constantly updating our tutorials to showcase the latest features of Agil
|
|
|
153
155
|
| ---------- | --------- |
|
|
154
156
|
| [Bandits](https://docs.agilerl.com/en/latest/bandits/index.html) | [Neural Contextual Bandits with UCB-based Exploration (NeuralUCB)](https://docs.agilerl.com/en/latest/api/algorithms/neural_ucb.html) <br> [Neural Contextual Bandits with Thompson Sampling (NeuralTS)](https://docs.agilerl.com/en/latest/api/algorithms/neural_ts.html) |
|
|
155
157
|
|
|
156
|
-
### LLM
|
|
158
|
+
### LLM Fine-tuning Algorithms
|
|
157
159
|
|
|
158
160
|
| RL | Algorithm |
|
|
159
161
|
| ---------- | --------- |
|
|
160
162
|
| [On-Policy](https://docs.agilerl.com/en/latest/llm_finetuning/index.html) | [Group Relative Policy Optimization (GRPO)](https://docs.agilerl.com/en/latest/api/algorithms/grpo.html)
|
|
163
|
+
| [Off-Policy](https://docs.agilerl.com/en/latest/llm_finetuning/index.html) | [Direct Preference Optimization (DPO)](https://docs.agilerl.com/en/latest/api/algorithms/dpo.html)
|
|
161
164
|
|
|
162
165
|
|
|
163
166
|
## Train an Agent to Beat a Gym Environment
|
|
@@ -106,11 +106,12 @@ We are constantly updating our tutorials to showcase the latest features of Agil
|
|
|
106
106
|
| ---------- | --------- |
|
|
107
107
|
| [Bandits](https://docs.agilerl.com/en/latest/bandits/index.html) | [Neural Contextual Bandits with UCB-based Exploration (NeuralUCB)](https://docs.agilerl.com/en/latest/api/algorithms/neural_ucb.html) <br> [Neural Contextual Bandits with Thompson Sampling (NeuralTS)](https://docs.agilerl.com/en/latest/api/algorithms/neural_ts.html) |
|
|
108
108
|
|
|
109
|
-
### LLM
|
|
109
|
+
### LLM Fine-tuning Algorithms
|
|
110
110
|
|
|
111
111
|
| RL | Algorithm |
|
|
112
112
|
| ---------- | --------- |
|
|
113
113
|
| [On-Policy](https://docs.agilerl.com/en/latest/llm_finetuning/index.html) | [Group Relative Policy Optimization (GRPO)](https://docs.agilerl.com/en/latest/api/algorithms/grpo.html)
|
|
114
|
+
| [Off-Policy](https://docs.agilerl.com/en/latest/llm_finetuning/index.html) | [Direct Preference Optimization (DPO)](https://docs.agilerl.com/en/latest/api/algorithms/dpo.html)
|
|
114
115
|
|
|
115
116
|
|
|
116
117
|
## Train an Agent to Beat a Gym Environment
|
|
@@ -601,14 +601,16 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
|
|
|
601
601
|
)
|
|
602
602
|
optimizer = opt.optimizer if hasattr(opt, "optimizer") else None
|
|
603
603
|
|
|
604
|
-
if isinstance(
|
|
605
|
-
if
|
|
606
|
-
|
|
604
|
+
if isinstance(self, LLMAlgorithm):
|
|
605
|
+
if hasattr(self.actor, "optimizer"):
|
|
606
|
+
optimizer = getattr(
|
|
607
607
|
getattr(self, "actor"), "optimizer"
|
|
608
608
|
) # If the optimizer is defined in the deepspeed config, we do this
|
|
609
|
+
else:
|
|
610
|
+
optimizer = opt.optimizer
|
|
609
611
|
|
|
610
612
|
self.accelerator, self.lr_scheduler = LLMAlgorithm.update_lr(
|
|
611
|
-
|
|
613
|
+
optimizer,
|
|
612
614
|
lr=getattr(self, config.lr),
|
|
613
615
|
accelerator=self.accelerator,
|
|
614
616
|
scheduler_config=self.cosine_lr_schedule_config,
|
|
@@ -1898,15 +1900,21 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
|
|
|
1898
1900
|
self.use_separate_reference_adapter = use_separate_reference_adapter
|
|
1899
1901
|
self.cosine_lr_schedule_config = cosine_lr_schedule_config
|
|
1900
1902
|
|
|
1901
|
-
if max_grad_norm and (accelerator is not None)
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1903
|
+
if max_grad_norm and (accelerator is not None):
|
|
1904
|
+
if accelerator.is_main_process:
|
|
1905
|
+
warnings.warn(
|
|
1906
|
+
"Argument 'max_grad_norm' will overwrite the equivalent value set for 'gradient_clipping' in the deepspeed config."
|
|
1907
|
+
)
|
|
1908
|
+
self.accelerator.state.deepspeed_plugin.deepspeed_config[
|
|
1909
|
+
"gradient_clipping"
|
|
1910
|
+
] = max_grad_norm
|
|
1911
|
+
|
|
1912
|
+
self.max_grad_norm = max_grad_norm
|
|
1908
1913
|
self.reduce_memory_peak = reduce_memory_peak
|
|
1909
1914
|
|
|
1915
|
+
if self.accelerator is not None:
|
|
1916
|
+
self.register_mutation_hook(self._sync_deepspeed_gradient_clipping)
|
|
1917
|
+
|
|
1910
1918
|
if self.accelerator is not None:
|
|
1911
1919
|
self.zero_stage = self.accelerator.state.deepspeed_plugin.deepspeed_config[
|
|
1912
1920
|
"zero_optimization"
|
|
@@ -2949,3 +2957,28 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
|
|
|
2949
2957
|
|
|
2950
2958
|
if self.accelerator is not None:
|
|
2951
2959
|
self.accelerator.wait_for_everyone()
|
|
2960
|
+
|
|
2961
|
+
def _sync_deepspeed_gradient_clipping(self) -> None:
|
|
2962
|
+
"""Synchronizes max_grad_norm with DeepSpeed gradient_clipping config.
|
|
2963
|
+
Registered as a mutation hook to ensure consistency after mutations.
|
|
2964
|
+
"""
|
|
2965
|
+
if self.accelerator is None:
|
|
2966
|
+
return
|
|
2967
|
+
|
|
2968
|
+
if (
|
|
2969
|
+
"gradient_clipping"
|
|
2970
|
+
not in self.accelerator.state.deepspeed_plugin.deepspeed_config
|
|
2971
|
+
):
|
|
2972
|
+
return
|
|
2973
|
+
|
|
2974
|
+
ds_config = self.accelerator.state.deepspeed_plugin.deepspeed_config
|
|
2975
|
+
if ds_config["gradient_clipping"] != self.max_grad_norm:
|
|
2976
|
+
self.accelerator.state.deepspeed_plugin.deepspeed_config[
|
|
2977
|
+
"gradient_clipping"
|
|
2978
|
+
] = self.max_grad_norm
|
|
2979
|
+
|
|
2980
|
+
if hasattr(self.actor, "optimizer"):
|
|
2981
|
+
if hasattr(self.actor.optimizer, "grad_clip"):
|
|
2982
|
+
self.actor.optimizer.grad_clip = self.max_grad_norm
|
|
2983
|
+
if hasattr(self.actor.optimizer, "clip_grad"):
|
|
2984
|
+
self.actor.optimizer.clip_grad = self.max_grad_norm
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|