rxnn 0.2.21__py3-none-any.whl → 0.2.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rxnn/training/rl.py CHANGED
@@ -24,8 +24,6 @@ class RlAlgorithm(ABC):
24
24
  return self.critic_loss(rewards, values)
25
25
 
26
26
  class PPOConfig(TypedDict):
27
- gae_gamma: float
28
- gae_lambda: float
29
27
  clip_eps: float
30
28
 
31
29
  class PPOAlgorithm(RlAlgorithm):
@@ -33,8 +31,6 @@ class PPOAlgorithm(RlAlgorithm):
33
31
  super(PPOAlgorithm, self).__init__()
34
32
 
35
33
  # PPO Config
36
- self.gae_gamma = config.get('gae_gamma', 0.99)
37
- self.gae_lambda = config.get('gae_lambda', 0.95)
38
34
  self.clip_eps = config.get('clip_eps', 0.2)
39
35
 
40
36
  def policy_loss(self, query: TokenizedDict, answer: TokenizedDict, logits: torch.Tensor,
@@ -86,15 +82,6 @@ class PPOAlgorithm(RlAlgorithm):
86
82
 
87
83
  return policy_loss
88
84
 
89
- # def _compute_gae(self, rewards: torch.Tensor, values: torch.Tensor, next_value: torch.Tensor) -> torch.Tensor:
90
- # advantages = torch.zeros_like(rewards, device=values.device)
91
- # last_advantage = 0
92
- # for t in reversed(range(rewards.size(0))):
93
- # delta = rewards[t] + self.gae_gamma * next_value - values[t]
94
- # advantages[t] = delta + self.gae_gamma * self.gae_lambda * last_advantage
95
- # last_advantage = advantages[t]
96
- # return advantages
97
-
98
85
  def calculate_advantages(self, rewards: torch.Tensor, values: torch.Tensor) -> torch.Tensor:
99
86
  advantages = rewards - values
100
87
  normalized_advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rxnn
3
- Version: 0.2.21
3
+ Version: 0.2.22
4
4
  Summary: RxNN: Reactive Neural Networks Platform
5
5
  License: Apache-2.0
6
6
  Keywords: deep-learning,ai,machine-learning
@@ -18,7 +18,7 @@ rxnn/training/dataset.py,sha256=7hTilFWPpqUEc6zNcMqBPjxFKxCfvTKKF3E8tVlwccQ,5125
18
18
  rxnn/training/models.py,sha256=wf98gYKKm9-ZY3zwdX9NIeJ-lvh7Ro1SoAijmQxYM28,5599
19
19
  rxnn/training/mrl.py,sha256=zk4m1JFuX0y82J0tG2XkY0Pz6Uy2did9cngOXqR9lMk,43326
20
20
  rxnn/training/reward.py,sha256=7MTVdNm5HnWmt6zFDi3TAYmnVSL_-24riOoY2F7z4x8,11290
21
- rxnn/training/rl.py,sha256=DHFwnPUlnq2JVj6CS6DwifnC_eMeBAUVp36UCAWNMis,3934
21
+ rxnn/training/rl.py,sha256=j-KNLoZjhaEKasYNOc8DxHtwvknAgAJFwvXKot6otFA,3272
22
22
  rxnn/training/scheduler.py,sha256=LcjU35mEwz2U5x3U6tLfeeYlBqMxbFSxYzJYuXkWbSY,1408
23
23
  rxnn/training/tokenizer.py,sha256=umaLByMBx_NMrQElA45HLm9gkuzyKWDTFaKVd-CjXl0,8344
24
24
  rxnn/training/utils.py,sha256=Bw8nZLKIt7NQpUVCYkb_79kWKChVFOYgYXwODo4SvNc,5718
@@ -32,7 +32,7 @@ rxnn/transformers/moe.py,sha256=j6jEx6Ip0zttlUZKKn82azxo95lkLZs-H2GLSMD88hY,5859
32
32
  rxnn/transformers/positional.py,sha256=1PjcJybUzeQlIKJI4tahAGZcYgCRCL0otxs7mpsNuzM,4410
33
33
  rxnn/transformers/sampler.py,sha256=t6iiQTdLQ0TakUWnnhKkb5DKF2F_9-thXHBydDF3fxg,17389
34
34
  rxnn/utils.py,sha256=ihb6OTyDtPiocB_lOvnq7eOkjjpCkgs8wxvXUBNQ7mM,996
35
- rxnn-0.2.21.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
36
- rxnn-0.2.21.dist-info/METADATA,sha256=XXf_qBMs2dOwWyAN5oNEg1W1-oPVIAQPy0FkNcO7QZQ,25960
37
- rxnn-0.2.21.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
38
- rxnn-0.2.21.dist-info/RECORD,,
35
+ rxnn-0.2.22.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
36
+ rxnn-0.2.22.dist-info/METADATA,sha256=KYQSidSUXwKxzuQj77h-jT0DUimKZVW_XTl-7PcQG3o,25960
37
+ rxnn-0.2.22.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
38
+ rxnn-0.2.22.dist-info/RECORD,,
File without changes
File without changes