PyPI - rxnn - Versions diffs - 0.2.31__tar.gz → 0.2.32__tar.gz - Mend

rxnn 0.2.31tar.gz → 0.2.32tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{rxnn-0.2.31 → rxnn-0.2.32}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rxnn
-Version: 0.2.31
+Version: 0.2.32
 Summary: RxNN: Reactive Neural Networks Platform
 License: Apache-2.0
 Keywords: deep-learning,ai,machine-learning

{rxnn-0.2.31 → rxnn-0.2.32}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "rxnn"
-version = "0.2.31"
+version = "0.2.32"
 description = "RxNN: Reactive Neural Networks Platform"
 license = "Apache-2.0"

{rxnn-0.2.31 → rxnn-0.2.32}/src/rxnn/memory/stm.py RENAMED Viewed

@@ -44,7 +44,6 @@ class ShortTermMemory(nn.Module):
     def update_all(self, new_stm: torch.Tensor):
         self.memory = new_stm
-        # self.memory.copy_(new_stm)
     def make_trainable(self):
         if not self.is_trainable:

{rxnn-0.2.31 → rxnn-0.2.32}/src/rxnn/rxt/models.py RENAMED Viewed

@@ -5,7 +5,7 @@ from huggingface_hub import PyTorchModelHubMixin
 from ..transformers.positional import RotaryPositionalEmbedding
 from ..transformers.attention import init_attention
 from ..transformers.layers import ReactiveTransformerLayer
-from ..transformers.models import ReactiveTransformerBase, ReactiveTransformerEncoder, ReactiveTransformerDecoder
+from ..transformers.models import ReactiveTransformerBase, ReactiveTransformerEncoder, ReactiveTransformerDecoder, ReactiveTransformerEncoderDetachStm
 from ..transformers.ff import get_activation_layer
 from ..memory.stm import ShortTermMemory
 from ..memory.norm import init_memory_norm
@@ -293,3 +293,29 @@ class RxTAlphaMemoryAttention(nn.Module, PyTorchModelHubMixin, license="apache-2
     def forward(self, x: torch.Tensor, attention_mask: torch.Tensor = None) -> torch.Tensor:
         return self.model(x, attention_mask=attention_mask)
+class RxTAlphaCriticEncoder(RxTAlphaComponentBase, pipeline_tag="text-classification", license="apache-2.0"):
+    """RxT-Alpha (Reactive Transformer) encoder model"""
+    def __init__(self, **kwargs: RxTAlphaComponentConfig):
+        super(RxTAlphaCriticEncoder, self).__init__(False, **kwargs)
+    def _init_model(
+            self,
+            stm: ShortTermMemory,
+            layers: nn.ModuleList,
+            embedding: nn.Embedding,
+            use_flash_attention: bool,
+            embed_dim: int,
+            vocab_size: int
+    ) -> ReactiveTransformerEncoderDetachStm:
+        return ReactiveTransformerEncoderDetachStm(
+            stm=stm,
+            embedding=embedding,
+            own_layers=layers,
+            use_flash_attention=use_flash_attention,
+        )
+    def forward(self, x: torch.Tensor, attention_mask: torch.Tensor = None) -> tuple[torch.Tensor, torch.Tensor]:
+        return self.model(x, attention_mask=attention_mask)

{rxnn-0.2.31 → rxnn-0.2.32}/src/rxnn/transformers/models.py RENAMED Viewed

@@ -126,6 +126,39 @@ class ReactiveTransformerEncoder(ReactiveTransformerBase):
         return x, torch.stack(hidden_states)
+class ReactiveTransformerEncoderDetachStm(ReactiveTransformerBase):
+    """
+    Reactive Transformer encoder DetachStm version - reactive transformer encoder that's detaching Short-Term Memory tensors,
+    before processing them in layers (memory cross-attention). Made for Memory-Aware Critic models, to not include memory
+    update gradients in Critic optimization.
+    """
+    def forward(self, x: torch.Tensor, attention_mask: torch.Tensor = None) -> tuple[torch.Tensor, torch.Tensor]:
+        x = super().forward(x)  # apply embeddings
+        if attention_mask is not None:
+            attention_mask = attention_mask.unsqueeze(1).unsqueeze(1).bool()
+        hidden_states = []
+        # Process shared layers
+        if self.shared_layers is not None:
+            for i in range(self.num_shared_layers):
+                layer_stm = self.stm(i).detach() # <- Detach STM layer
+                # expand layer STM to batch size, if it's not in batch mode
+                if layer_stm.size(0) == 1:
+                    layer_stm = layer_stm.expand(x.size(0), -1, -1)
+                x = self.shared_layers[i](x, layer_stm, mask=attention_mask)
+                hidden_states.append(x)
+        # Process own layers
+        for i in range(self.num_own_layers):
+            layer_stm = self.stm(i).detach() # <- Detach STM layer
+            # expand layer STM to batch size, if it's not in batch mode
+            if layer_stm.size(0) == 1:
+                layer_stm = layer_stm.expand(x.size(0), -1, -1)
+            x = self.layers[i](x, layer_stm, mask=attention_mask)
+            hidden_states.append(x)
+        return x, torch.stack(hidden_states)
 class ClassicTransformerBase(nn.Module):
     """Base class for Classic Transformer models - common logic for both decoders and encoders."""