PyPI - rxnn - Versions diffs - 0.2.46__py3-none-any.whl → 0.2.47__py3-none-any.whl - Mend

rxnn 0.2.46py3-none-any.whl → 0.2.47py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

rxnn/memory/attention.py +11 -8
rxnn/rxt/models.py +2 -2
rxnn/training/callbacks.py +18 -2
rxnn/training/models.py +1 -1
rxnn/training/mrl.py +2 -2
{rxnn-0.2.46.dist-info → rxnn-0.2.47.dist-info}/METADATA +1 -1
{rxnn-0.2.46.dist-info → rxnn-0.2.47.dist-info}/RECORD +9 -9
{rxnn-0.2.46.dist-info → rxnn-0.2.47.dist-info}/LICENSE +0 -0
{rxnn-0.2.46.dist-info → rxnn-0.2.47.dist-info}/WHEEL +0 -0

rxnn/memory/attention.py CHANGED Viewed

@@ -33,9 +33,16 @@ class StmMemoryAttention(nn.Module):
             if self.attention_layers[i].rope is not None:
                 self.attention_layers[i].rope.update_max_len(max_seq_len)
-    def forward(self, x: torch.Tensor, attention_mask: torch.Tensor = None) -> torch.Tensor:
-        mask = attention_mask.unsqueeze(1).unsqueeze(1).bool() if attention_mask is not None else None
+    def _residual_gate(self, gate: torch.Tensor, layer_stm: torch.Tensor, new_layer_stm: torch.Tensor) -> torch.Tensor:
+        if self.use_dynamic_gate:
+            mean_dim = -1 if self.per_slot_gate else [1, 2]
+            gate_input = gate * (new_layer_stm + layer_stm).mean(dim=mean_dim, keepdim=True)
+            layer_gate = torch.sigmoid(gate_input)
+        else:
+            layer_gate = torch.sigmoid(gate)
+        return layer_gate * new_layer_stm + (1 - layer_gate) * layer_stm
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         new_stm = torch.zeros_like(self.stm.memory)
         for i in range(self.num_layers):
             layer_stm = self.stm(i)
@@ -44,14 +51,10 @@ class StmMemoryAttention(nn.Module):
                 layer_stm = layer_stm.expand(x.size(0), -1, -1)
             encoded_layer_data = x[i]
             normalized_layer_stm = self.memory_norm_layers[i](layer_stm)
-            new_layer_stm = self.attention_layers[i](normalized_layer_stm, encoded_layer_data, encoded_layer_data, mask=mask)
+            new_layer_stm = self.attention_layers[i](normalized_layer_stm, encoded_layer_data, encoded_layer_data)
             if self.use_gated_residual:
-                # gated residual
-                gate_input = self.gate[i] * (new_layer_stm + layer_stm) if self.use_dynamic_gate else self.gate[i]
-                layer_gate = torch.sigmoid(gate_input)
-                new_stm[i] = layer_gate * new_layer_stm + (1 - layer_gate) * layer_stm
+                new_stm[i] = self._residual_gate(self.gate[i], layer_stm, new_layer_stm) # gated residual
             else:
                 new_stm[i] = new_layer_stm + layer_stm # residual
         self.stm.update_all(new_stm)
         return self.stm.memory

rxnn/rxt/models.py CHANGED Viewed

@@ -306,8 +306,8 @@ class RxTAlphaMemoryAttention(nn.Module, PyTorchModelHubMixin, license="apache-2
     def clone_reset_memory(self):
         self.model.stm.clone_detach_reset()
-    def forward(self, x: torch.Tensor, attention_mask: torch.Tensor = None) -> torch.Tensor:
-        return self.model(x, attention_mask=attention_mask)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.model(x)
 class RxTAlphaCriticEncoder(RxTAlphaComponentBase, pipeline_tag="text-classification", license="apache-2.0"):
     """RxT-Alpha (Reactive Transformer) encoder model"""

rxnn/training/callbacks.py CHANGED Viewed

@@ -560,6 +560,12 @@ class MrlTrainerCallback:
 class MrlPrintCallback(MrlTrainerCallback):
+    def __init__(self, update_steps_interval: int = 10) -> None:
+        super(MrlPrintCallback, self).__init__()
+        self.update_steps_interval = update_steps_interval
+        self.policy_losses = []
+        self.critic_losses = []
     def on_epoch_start(self, actor: nn.Module, epoch: int, stage_epochs: int, curriculum_config: dict,
                        global_epoch: int, global_epochs: int) -> None:
         print(
@@ -582,11 +588,21 @@ class MrlPrintCallback(MrlTrainerCallback):
         print(f'Epoch {global_epoch} | Starting update epoch {update_epoch}')
     def on_batch_updated(self, actor: nn.Module, epoch: int, step: int, policy_loss: float) -> None:
-        print(f'Epoch {epoch} | Step {step} - updated policy loss {policy_loss}')
+        if step != 0 and step % self.update_steps_interval == 0:
+            loss = sum(self.policy_losses) / len(self.policy_losses)
+            self.policy_losses = []
+            print(f'Epoch {epoch} | Steps {step - self.update_steps_interval} - {step} - mean policy loss {loss} | current policy loss {policy_loss}')
+        else:
+            self.policy_losses.append(policy_loss)
     def on_critic_updated(self, actor: nn.Module, critic: nn.Module, epoch: int, step: int,
                           critic_loss: float) -> None:
-        print(f'Epoch {epoch} | Step {step} - updated critic loss {critic_loss}')
+        if step != 0 and step % self.update_steps_interval == 0:
+            loss = sum(self.critic_losses) / len(self.critic_losses)
+            self.critic_losses = []
+            print(f'Epoch {epoch} | Steps {step - self.update_steps_interval} - {step} - mean critic loss {loss} | current critic loss {critic_loss}')
+        else:
+            self.critic_losses.append(critic_loss)
     def on_update_epoch_end(self, actor: nn.Module, critic: nn.Module, global_epoch: int, update_epoch: int, policy_loss: float, critic_loss: float) -> None:
         print(f'Epoch {global_epoch} | Update epoch {update_epoch} - mean policy loss {policy_loss} | mean critic loss {critic_loss}')

rxnn/training/models.py CHANGED Viewed

@@ -204,7 +204,7 @@ class MrlActorModel(nn.Module):
             return self.decoder(x, attention_mask=attention_mask)
         else:
             _, ed = self.encoder(x, attention_mask=attention_mask)
-            return self.memory_attention(ed, attention_mask=attention_mask)
+            return self.memory_attention(ed)
 class MrlCriticModel(nn.Module, PyTorchModelHubMixin, license="apache-2.0", pipeline_tag="text-classification"):

rxnn/training/mrl.py CHANGED Viewed

@@ -941,7 +941,7 @@ class MRLTrainer:
             ]
         elif mode == 'fetch':
             params = [
-                {'params': self.actor.embedding_parameters(), 'lr': unfreeze_lr},
+                {'params': self.actor.embedding_parameters(), 'lr': embedding_lr},
                 {'params': self.actor.encoder.not_memory_parameters(), 'lr': unfreeze_lr},
                 {'params': self.actor.encoder.memory_parameters(), 'lr': unfreeze_lr},
                 {'params': self.actor.memory_attention_parameters(), 'lr': unfreeze_lr},
@@ -950,7 +950,7 @@ class MRLTrainer:
             ]
         elif mode == 'joint':
             params = [
-                {'params': self.actor.embedding_parameters(), 'lr': unfreeze_lr},
+                {'params': self.actor.embedding_parameters(), 'lr': embedding_lr},
                 {'params': self.actor.encoder.not_memory_parameters(), 'lr': unfreeze_lr},
                 {'params': self.actor.encoder.memory_parameters(), 'lr': memory_lr},
                 {'params': self.actor.memory_attention_parameters(), 'lr': memory_lr},

{rxnn-0.2.46.dist-info → rxnn-0.2.47.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rxnn
-Version: 0.2.46
+Version: 0.2.47
 Summary: RxNN: Reactive Neural Networks Platform
 License: Apache-2.0
 Keywords: deep-learning,ai,machine-learning

{rxnn-0.2.46.dist-info → rxnn-0.2.47.dist-info}/RECORD RENAMED Viewed

@@ -5,19 +5,19 @@ rxnn/experimental/attention.py,sha256=46qwZLJuZMpIBrZ-r9DaQEPPmmZkO464C3Tkm_Mq-c
 rxnn/experimental/models.py,sha256=foBo0n0ufvBnfIdJomiEg3CuSOiWSt-q5ako7vzYxx4,4888
 rxnn/experimental/moe.py,sha256=jHZ1QhpWiVQOswVpFmuH7b2IUOPf0Uuf-I2Ddwsd7Us,6140
 rxnn/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rxnn/memory/attention.py,sha256=sXh6f_iOpEYCaqyG-QVp_C_A9IF0QcXTi3hW5G8FCwA,2630
+rxnn/memory/attention.py,sha256=kan6UNPTjLfO7zKNp92hGooldgWPi3li_2-_L5xiErs,2784
 rxnn/memory/norm.py,sha256=E98jOQEuIOFFhlkvS8s4fFN-D4tLO6vaOqnObv1oVmA,6592
 rxnn/memory/stm.py,sha256=jv57gsH9XW19sLbxpRDqsp1yfsii_4Ef4Ncr_ztk-i4,3937
 rxnn/rxt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rxnn/rxt/models.py,sha256=4MbCL4xGY3ceewZQmopjmwAyLQS92L6KLOPqaW7-Fho,14673
+rxnn/rxt/models.py,sha256=new_YXLe9vfIBPX-pmFRoV523d7yCjEgfTY06EaH3Ms,14605
 rxnn/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rxnn/training/base.py,sha256=CqaArEZYOdH64nmKfx28U3GI46TzO4oNkjf_hrF23Cw,11835
 rxnn/training/bml.py,sha256=hw6gLpLkGvqLzxIvBg4MvCc5r8cHpEm2RDyh7nH6CtE,16914
-rxnn/training/callbacks.py,sha256=RPW3Lisi31VJvoYyZeAF3dQzttrceDQDsZ6G5Xl09HM,35933
+rxnn/training/callbacks.py,sha256=rS8leuVFPVVfE5Zc8DMkUZhRIPN-vpPbUjowXE5TSBw,36779
 rxnn/training/dataset.py,sha256=7hTilFWPpqUEc6zNcMqBPjxFKxCfvTKKF3E8tVlwccQ,51250
 rxnn/training/ddp.py,sha256=VsNBjn3cY-uUj8hbsW7oKvb0_ZKnXnJ2KgObm-Mr9i4,836
-rxnn/training/models.py,sha256=tqABOt_xEcWbZNEW2I2Jt-3eyaGICK011zILwuTk6Zc,9082
-rxnn/training/mrl.py,sha256=L4G7xSPlxsymvNhvsSloCpaqYjOXxEm7GmKilM_Ojvc,59809
+rxnn/training/models.py,sha256=L2emJM06u7B9f9T1dFsGXzXX-rsV77ND7L1pAM9Z_Ow,9051
+rxnn/training/mrl.py,sha256=VXwRJ4wQtE0OoRsrsjYlWa2toTvHjoBJ_kril3EiK_A,59811
 rxnn/training/reward.py,sha256=B7nerPk9eNAv2i7umtNF88tVQVwijNNrchIrEITGHKk,11623
 rxnn/training/rl.py,sha256=q4NzIZAmXRHVToT13IHrPTtEikWQUvT0NO0IjApjAO8,6171
 rxnn/training/scheduler.py,sha256=LcjU35mEwz2U5x3U6tLfeeYlBqMxbFSxYzJYuXkWbSY,1408
@@ -33,7 +33,7 @@ rxnn/transformers/moe.py,sha256=j6jEx6Ip0zttlUZKKn82azxo95lkLZs-H2GLSMD88hY,5859
 rxnn/transformers/positional.py,sha256=1PjcJybUzeQlIKJI4tahAGZcYgCRCL0otxs7mpsNuzM,4410
 rxnn/transformers/sampler.py,sha256=t6iiQTdLQ0TakUWnnhKkb5DKF2F_9-thXHBydDF3fxg,17389
 rxnn/utils.py,sha256=ihb6OTyDtPiocB_lOvnq7eOkjjpCkgs8wxvXUBNQ7mM,996
-rxnn-0.2.46.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
-rxnn-0.2.46.dist-info/METADATA,sha256=hpTQT4p75cKrAaGOz_56gCBm1rT_y-Nr1TI9Mhv6wv0,25960
-rxnn-0.2.46.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-rxnn-0.2.46.dist-info/RECORD,,
+rxnn-0.2.47.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
+rxnn-0.2.47.dist-info/METADATA,sha256=OqRYFY68bnqQXdXfBNboYLAmXRmojMmR1YFUVQa4Jgo,25960
+rxnn-0.2.47.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+rxnn-0.2.47.dist-info/RECORD,,

{rxnn-0.2.46.dist-info → rxnn-0.2.47.dist-info}/LICENSE RENAMED Viewed

File without changes

{rxnn-0.2.46.dist-info → rxnn-0.2.47.dist-info}/WHEEL RENAMED Viewed

File without changes

rxnn 0.2.46__py3-none-any.whl → 0.2.47__py3-none-any.whl

rxnn 0.2.46py3-none-any.whl → 0.2.47py3-none-any.whl