agilerl 2.4.1.dev0__tar.gz → 2.4.1.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/PKG-INFO +6 -3
  2. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/README.md +2 -1
  3. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/core/base.py +44 -11
  4. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/pyproject.toml +1 -1
  5. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/LICENSE +0 -0
  6. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/__init__.py +0 -0
  7. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/__init__.py +0 -0
  8. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/bc_lm.py +0 -0
  9. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/core/__init__.py +0 -0
  10. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/core/optimizer_wrapper.py +0 -0
  11. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/core/registry.py +0 -0
  12. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/cqn.py +0 -0
  13. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/ddpg.py +0 -0
  14. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/dpo.py +0 -0
  15. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/dqn.py +0 -0
  16. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/dqn_rainbow.py +0 -0
  17. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/grpo.py +0 -0
  18. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/ilql.py +0 -0
  19. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/ippo.py +0 -0
  20. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/maddpg.py +0 -0
  21. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/matd3.py +0 -0
  22. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/neural_ts_bandit.py +0 -0
  23. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/neural_ucb_bandit.py +0 -0
  24. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/ppo.py +0 -0
  25. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/algorithms/td3.py +0 -0
  26. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/__init__.py +0 -0
  27. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/data.py +0 -0
  28. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/multi_agent_replay_buffer.py +0 -0
  29. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/replay_buffer.py +0 -0
  30. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/rollout_buffer.py +0 -0
  31. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/sampler.py +0 -0
  32. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/components/segment_tree.py +0 -0
  33. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/__init__.py +0 -0
  34. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/language_environment.py +0 -0
  35. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/rl_data.py +0 -0
  36. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/tokenizer.py +0 -0
  37. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/data/torch_datasets.py +0 -0
  38. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/hpo/__init__.py +0 -0
  39. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/hpo/mutation.py +0 -0
  40. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/hpo/tournament.py +0 -0
  41. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/__init__.py +0 -0
  42. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/base.py +0 -0
  43. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/bert.py +0 -0
  44. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/cnn.py +0 -0
  45. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/configs.py +0 -0
  46. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/custom_components.py +0 -0
  47. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/dummy.py +0 -0
  48. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/gpt.py +0 -0
  49. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/lstm.py +0 -0
  50. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/mlp.py +0 -0
  51. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/multi_input.py +0 -0
  52. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/resnet.py +0 -0
  53. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/modules/simba.py +0 -0
  54. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/__init__.py +0 -0
  55. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/actors.py +0 -0
  56. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/base.py +0 -0
  57. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/custom_modules.py +0 -0
  58. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/distributions.py +0 -0
  59. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/distributions_experimental.py +0 -0
  60. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/q_networks.py +0 -0
  61. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/networks/value_networks.py +0 -0
  62. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/protocols.py +0 -0
  63. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/rollouts/__init__.py +0 -0
  64. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/rollouts/on_policy.py +0 -0
  65. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/__init__.py +0 -0
  66. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_bandits.py +0 -0
  67. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_llm.py +0 -0
  68. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_multi_agent_off_policy.py +0 -0
  69. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_multi_agent_on_policy.py +0 -0
  70. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_off_policy.py +0 -0
  71. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_offline.py +0 -0
  72. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/training/train_on_policy.py +0 -0
  73. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/typing.py +0 -0
  74. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/__init__.py +0 -0
  75. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/algo_utils.py +0 -0
  76. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/cache.py +0 -0
  77. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/evolvable_networks.py +0 -0
  78. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/ilql_utils.py +0 -0
  79. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/llm_utils.py +0 -0
  80. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/log_utils.py +0 -0
  81. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/minari_utils.py +0 -0
  82. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/probe_envs.py +0 -0
  83. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/probe_envs_ma.py +0 -0
  84. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/sampling_utils.py +0 -0
  85. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/torch_utils.py +0 -0
  86. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/utils/utils.py +0 -0
  87. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/vector/__init__.py +0 -0
  88. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/vector/pz_async_vec_env.py +0 -0
  89. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/vector/pz_vec_env.py +0 -0
  90. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/__init__.py +0 -0
  91. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/agent.py +0 -0
  92. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/learning.py +0 -0
  93. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/make_evolvable.py +0 -0
  94. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/pettingzoo_wrappers.py +0 -0
  95. {agilerl-2.4.1.dev0 → agilerl-2.4.1.dev1}/agilerl/wrappers/utils.py +0 -0
@@ -1,8 +1,9 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: agilerl
3
- Version: 2.4.1.dev0
3
+ Version: 2.4.1.dev1
4
4
  Summary: AgileRL is a deep reinforcement learning library focused on improving RL development through RLOps.
5
5
  License: Apache 2.0
6
+ License-File: LICENSE
6
7
  Author: Nick Ustaran-Anderegg
7
8
  Author-email: dev@agilerl.com
8
9
  Requires-Python: >=3.10,<4.0
@@ -12,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.10
12
13
  Classifier: Programming Language :: Python :: 3.11
13
14
  Classifier: Programming Language :: Python :: 3.12
14
15
  Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
15
17
  Requires-Dist: SuperSuit (>=3.9.0,<4.0.0)
16
18
  Requires-Dist: accelerate (>=1.7.0,<2.0.0)
17
19
  Requires-Dist: deepspeed (>=0.17.1,<0.18.0)
@@ -153,11 +155,12 @@ We are constantly updating our tutorials to showcase the latest features of Agil
153
155
  | ---------- | --------- |
154
156
  | [Bandits](https://docs.agilerl.com/en/latest/bandits/index.html) | [Neural Contextual Bandits with UCB-based Exploration (NeuralUCB)](https://docs.agilerl.com/en/latest/api/algorithms/neural_ucb.html) <br> [Neural Contextual Bandits with Thompson Sampling (NeuralTS)](https://docs.agilerl.com/en/latest/api/algorithms/neural_ts.html) |
155
157
 
156
- ### LLM Reasoning Algorithms
158
+ ### LLM Fine-tuning Algorithms
157
159
 
158
160
  | RL | Algorithm |
159
161
  | ---------- | --------- |
160
162
  | [On-Policy](https://docs.agilerl.com/en/latest/llm_finetuning/index.html) | [Group Relative Policy Optimization (GRPO)](https://docs.agilerl.com/en/latest/api/algorithms/grpo.html)
163
+ | [Off-Policy](https://docs.agilerl.com/en/latest/llm_finetuning/index.html) | [Direct Preference Optimization (DPO)](https://docs.agilerl.com/en/latest/api/algorithms/dpo.html)
161
164
 
162
165
 
163
166
  ## Train an Agent to Beat a Gym Environment
@@ -106,11 +106,12 @@ We are constantly updating our tutorials to showcase the latest features of Agil
106
106
  | ---------- | --------- |
107
107
  | [Bandits](https://docs.agilerl.com/en/latest/bandits/index.html) | [Neural Contextual Bandits with UCB-based Exploration (NeuralUCB)](https://docs.agilerl.com/en/latest/api/algorithms/neural_ucb.html) <br> [Neural Contextual Bandits with Thompson Sampling (NeuralTS)](https://docs.agilerl.com/en/latest/api/algorithms/neural_ts.html) |
108
108
 
109
- ### LLM Reasoning Algorithms
109
+ ### LLM Fine-tuning Algorithms
110
110
 
111
111
  | RL | Algorithm |
112
112
  | ---------- | --------- |
113
113
  | [On-Policy](https://docs.agilerl.com/en/latest/llm_finetuning/index.html) | [Group Relative Policy Optimization (GRPO)](https://docs.agilerl.com/en/latest/api/algorithms/grpo.html)
114
+ | [Off-Policy](https://docs.agilerl.com/en/latest/llm_finetuning/index.html) | [Direct Preference Optimization (DPO)](https://docs.agilerl.com/en/latest/api/algorithms/dpo.html)
114
115
 
115
116
 
116
117
  ## Train an Agent to Beat a Gym Environment
@@ -601,14 +601,16 @@ class EvolvableAlgorithm(ABC, metaclass=RegistryMeta):
601
601
  )
602
602
  optimizer = opt.optimizer if hasattr(opt, "optimizer") else None
603
603
 
604
- if isinstance(opt, DeepSpeedOptimizerWrapper):
605
- if isinstance(opt.optimizer, DummyOptimizer):
606
- opt = getattr(
604
+ if isinstance(self, LLMAlgorithm):
605
+ if hasattr(self.actor, "optimizer"):
606
+ optimizer = getattr(
607
607
  getattr(self, "actor"), "optimizer"
608
608
  ) # If the optimizer is defined in the deepspeed config, we do this
609
+ else:
610
+ optimizer = opt.optimizer
609
611
 
610
612
  self.accelerator, self.lr_scheduler = LLMAlgorithm.update_lr(
611
- opt,
613
+ optimizer,
612
614
  lr=getattr(self, config.lr),
613
615
  accelerator=self.accelerator,
614
616
  scheduler_config=self.cosine_lr_schedule_config,
@@ -1898,15 +1900,21 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
1898
1900
  self.use_separate_reference_adapter = use_separate_reference_adapter
1899
1901
  self.cosine_lr_schedule_config = cosine_lr_schedule_config
1900
1902
 
1901
- if max_grad_norm and (accelerator is not None) and accelerator.is_main_process:
1902
- warnings.warn(
1903
- "Argument 'max_grad_norm' will be overwritten by the 'gradient_clipping' value set in the deepspeed config."
1904
- )
1905
- self.max_grad_norm = None
1906
- else:
1907
- self.max_grad_norm = max_grad_norm
1903
+ if max_grad_norm and (accelerator is not None):
1904
+ if accelerator.is_main_process:
1905
+ warnings.warn(
1906
+ "Argument 'max_grad_norm' will overwrite the equivalent value set for 'gradient_clipping' in the deepspeed config."
1907
+ )
1908
+ self.accelerator.state.deepspeed_plugin.deepspeed_config[
1909
+ "gradient_clipping"
1910
+ ] = max_grad_norm
1911
+
1912
+ self.max_grad_norm = max_grad_norm
1908
1913
  self.reduce_memory_peak = reduce_memory_peak
1909
1914
 
1915
+ if self.accelerator is not None:
1916
+ self.register_mutation_hook(self._sync_deepspeed_gradient_clipping)
1917
+
1910
1918
  if self.accelerator is not None:
1911
1919
  self.zero_stage = self.accelerator.state.deepspeed_plugin.deepspeed_config[
1912
1920
  "zero_optimization"
@@ -2949,3 +2957,28 @@ class LLMAlgorithm(EvolvableAlgorithm, ABC):
2949
2957
 
2950
2958
  if self.accelerator is not None:
2951
2959
  self.accelerator.wait_for_everyone()
2960
+
2961
+ def _sync_deepspeed_gradient_clipping(self) -> None:
2962
+ """Synchronizes max_grad_norm with DeepSpeed gradient_clipping config.
2963
+ Registered as a mutation hook to ensure consistency after mutations.
2964
+ """
2965
+ if self.accelerator is None:
2966
+ return
2967
+
2968
+ if (
2969
+ "gradient_clipping"
2970
+ not in self.accelerator.state.deepspeed_plugin.deepspeed_config
2971
+ ):
2972
+ return
2973
+
2974
+ ds_config = self.accelerator.state.deepspeed_plugin.deepspeed_config
2975
+ if ds_config["gradient_clipping"] != self.max_grad_norm:
2976
+ self.accelerator.state.deepspeed_plugin.deepspeed_config[
2977
+ "gradient_clipping"
2978
+ ] = self.max_grad_norm
2979
+
2980
+ if hasattr(self.actor, "optimizer"):
2981
+ if hasattr(self.actor.optimizer, "grad_clip"):
2982
+ self.actor.optimizer.grad_clip = self.max_grad_norm
2983
+ if hasattr(self.actor.optimizer, "clip_grad"):
2984
+ self.actor.optimizer.clip_grad = self.max_grad_norm
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "agilerl"
3
- version = "2.4.1.dev0"
3
+ version = "2.4.1.dev1"
4
4
 
5
5
  description = "AgileRL is a deep reinforcement learning library focused on improving RL development through RLOps."
6
6
  authors = ["Nick Ustaran-Anderegg <dev@agilerl.com>"]
File without changes