rxnn 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rxnn/training/models.py CHANGED
@@ -129,7 +129,7 @@ class MrlActorModel(nn.Module):
129
129
  _, ed = self.encoder(x, attention_mask=attention_mask)
130
130
  return self.memory_attention(ed, attention_mask=attention_mask)
131
131
 
132
- class MrlCriticModel(nn.Module):
132
+ class MrlCriticModel(nn.Module, PyTorchModelHubMixin, license="apache-2.0", pipeline_tag="text-classification"):
133
133
  def __init__(self, encoder: nn.Module, embed_dim: int, **kwargs):
134
134
  super(MrlCriticModel, self).__init__(**kwargs)
135
135
  self.encoder = encoder
rxnn/training/rl.py CHANGED
@@ -24,8 +24,6 @@ class RlAlgorithm(ABC):
24
24
  return self.critic_loss(rewards, values)
25
25
 
26
26
  class PPOConfig(TypedDict):
27
- gae_gamma: float
28
- gae_lambda: float
29
27
  clip_eps: float
30
28
 
31
29
  class PPOAlgorithm(RlAlgorithm):
@@ -33,8 +31,6 @@ class PPOAlgorithm(RlAlgorithm):
33
31
  super(PPOAlgorithm, self).__init__()
34
32
 
35
33
  # PPO Config
36
- self.gae_gamma = config.get('gae_gamma', 0.99)
37
- self.gae_lambda = config.get('gae_lambda', 0.95)
38
34
  self.clip_eps = config.get('clip_eps', 0.2)
39
35
 
40
36
  def policy_loss(self, query: TokenizedDict, answer: TokenizedDict, logits: torch.Tensor,
@@ -86,15 +82,6 @@ class PPOAlgorithm(RlAlgorithm):
86
82
 
87
83
  return policy_loss
88
84
 
89
- # def _compute_gae(self, rewards: torch.Tensor, values: torch.Tensor, next_value: torch.Tensor) -> torch.Tensor:
90
- # advantages = torch.zeros_like(rewards, device=values.device)
91
- # last_advantage = 0
92
- # for t in reversed(range(rewards.size(0))):
93
- # delta = rewards[t] + self.gae_gamma * next_value - values[t]
94
- # advantages[t] = delta + self.gae_gamma * self.gae_lambda * last_advantage
95
- # last_advantage = advantages[t]
96
- # return advantages
97
-
98
85
  def calculate_advantages(self, rewards: torch.Tensor, values: torch.Tensor) -> torch.Tensor:
99
86
  advantages = rewards - values
100
87
  normalized_advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rxnn
3
- Version: 0.2.20
3
+ Version: 0.2.22
4
4
  Summary: RxNN: Reactive Neural Networks Platform
5
5
  License: Apache-2.0
6
6
  Keywords: deep-learning,ai,machine-learning
@@ -15,10 +15,10 @@ rxnn/training/base.py,sha256=_xik1GXE4RJ_nxwqLQ1ccXA5pRtBCi-jL-jeRFBdHBU,11851
15
15
  rxnn/training/bml.py,sha256=FJszaQXOLx2ZHBa1CQpyMrG8i4Kj14E-gzDAEK_Ei5k,17272
16
16
  rxnn/training/callbacks.py,sha256=-N0MQPpZQaUWCINdTOsjul4bDGbGr2JgQBqOIXBLS6o,35053
17
17
  rxnn/training/dataset.py,sha256=7hTilFWPpqUEc6zNcMqBPjxFKxCfvTKKF3E8tVlwccQ,51250
18
- rxnn/training/models.py,sha256=qXfD3_97T9z724NN4myjzrpX6-jYA9Igl266ZwtJCtc,5519
18
+ rxnn/training/models.py,sha256=wf98gYKKm9-ZY3zwdX9NIeJ-lvh7Ro1SoAijmQxYM28,5599
19
19
  rxnn/training/mrl.py,sha256=zk4m1JFuX0y82J0tG2XkY0Pz6Uy2did9cngOXqR9lMk,43326
20
20
  rxnn/training/reward.py,sha256=7MTVdNm5HnWmt6zFDi3TAYmnVSL_-24riOoY2F7z4x8,11290
21
- rxnn/training/rl.py,sha256=DHFwnPUlnq2JVj6CS6DwifnC_eMeBAUVp36UCAWNMis,3934
21
+ rxnn/training/rl.py,sha256=j-KNLoZjhaEKasYNOc8DxHtwvknAgAJFwvXKot6otFA,3272
22
22
  rxnn/training/scheduler.py,sha256=LcjU35mEwz2U5x3U6tLfeeYlBqMxbFSxYzJYuXkWbSY,1408
23
23
  rxnn/training/tokenizer.py,sha256=umaLByMBx_NMrQElA45HLm9gkuzyKWDTFaKVd-CjXl0,8344
24
24
  rxnn/training/utils.py,sha256=Bw8nZLKIt7NQpUVCYkb_79kWKChVFOYgYXwODo4SvNc,5718
@@ -32,7 +32,7 @@ rxnn/transformers/moe.py,sha256=j6jEx6Ip0zttlUZKKn82azxo95lkLZs-H2GLSMD88hY,5859
32
32
  rxnn/transformers/positional.py,sha256=1PjcJybUzeQlIKJI4tahAGZcYgCRCL0otxs7mpsNuzM,4410
33
33
  rxnn/transformers/sampler.py,sha256=t6iiQTdLQ0TakUWnnhKkb5DKF2F_9-thXHBydDF3fxg,17389
34
34
  rxnn/utils.py,sha256=ihb6OTyDtPiocB_lOvnq7eOkjjpCkgs8wxvXUBNQ7mM,996
35
- rxnn-0.2.20.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
36
- rxnn-0.2.20.dist-info/METADATA,sha256=mgimK5GvI27RapfLjhlIdBwgfVdKoMA5Ig5yVxfeYIw,25960
37
- rxnn-0.2.20.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
38
- rxnn-0.2.20.dist-info/RECORD,,
35
+ rxnn-0.2.22.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
36
+ rxnn-0.2.22.dist-info/METADATA,sha256=KYQSidSUXwKxzuQj77h-jT0DUimKZVW_XTl-7PcQG3o,25960
37
+ rxnn-0.2.22.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
38
+ rxnn-0.2.22.dist-info/RECORD,,
File without changes
File without changes