PyPI - rxnn - Versions diffs - 0.1.76__py3-none-any.whl → 0.1.78__py3-none-any.whl - Mend

rxnn 0.1.76py3-none-any.whl → 0.1.78py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

rxnn/rxt/models.py CHANGED Viewed

@@ -97,13 +97,13 @@ class RxTAlphaComponentBase(nn.Module, PyTorchModelHubMixin):
         if cross_att_type in ['mha', 'gqa', 'mqa']:
             cross_att_init = lambda: init_attention(embed_dim, att_heads, cross_att_type, att_groups, rope=rope,
                                               use_flash_attention=use_flash_attention, dropout=att_dropout,
-                                              max_seq_len=seq_len, is_causal=is_causal)
+                                              max_seq_len=seq_len, is_causal=is_causal, rope_only_for_query=True)
         else:
             cross_att_init = lambda: init_experimental_attention(embed_dim, att_heads, cross_att_type, cross_att_groups or att_groups, rope=rope,
                                                            use_flash_attention=use_flash_attention, dropout=att_dropout,
                                                            max_seq_len=seq_len, is_causal=is_causal, num_experts=att_experts,
                                                            num_query_experts=att_query_experts,
-                                                           num_query_groups=cross_att_query_groups or att_query_groups)
+                                                           num_query_groups=cross_att_query_groups or att_query_groups, rope_only_for_query=True)
         layers = nn.ModuleList([
             ReactiveTransformerLayer(

rxnn/transformers/positional.py CHANGED Viewed

@@ -40,13 +40,16 @@ class RotaryPositionalEmbedding(nn.Module):
         return q_embed
     def _prepare_freqs(self, seq_len: int, device: torch.device) -> torch.Tensor:
-        if self.cache is None or self.cache.size(1) < seq_len:
+        cache_len = self.cache.size(1)
+        if self.cache is None or cache_len < seq_len:
             t = torch.arange(seq_len, device=device).type_as(self.inv_freq)
             freqs = torch.einsum('i,j->ij', t, self.inv_freq)
             self.cache = freqs
             return freqs[None, None, :, :]
-        else:
+        elif cache_len == seq_len:
             return self.cache[None, None, :, :]
+        else:
+            return self.cache[:seq_len][None, None, :, :]
     def _rotate(self, x: torch.Tensor, freqs: torch.Tensor) -> torch.Tensor:
         x1 = x[..., 0::2]

{rxnn-0.1.76.dist-info → rxnn-0.1.78.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rxnn
-Version: 0.1.76
+Version: 0.1.78
 Summary: RxNN: Reactive Neural Networks Platform
 License: Apache-2.0
 Keywords: deep-learning,ai,machine-learning
@@ -23,8 +23,8 @@ Project-URL: Homepage, https://rxai.dev/rxnn
 Project-URL: Repository, https://github.com/RxAI-dev/rxnn/python
 Description-Content-Type: text/markdown
-<img src="https://raw.githubusercontent.com/RxAI-dev/RxNN/refs/heads/main/assets/logo_rxai.webp" width="300" />
-<img src="https://raw.githubusercontent.com/RxAI-dev/RxNN/refs/heads/main/assets/logo_rxnn.webp" width="300" />
+<img src="https://raw.githubusercontent.com/RxAI-dev/RxNN/refs/heads/main/assets/logo/logo_rxai.webp" width="300" />
+<img src="https://raw.githubusercontent.com/RxAI-dev/RxNN/refs/heads/main/assets/logo/logo_rxnn.webp" width="300" />
 # Reactive AI - RxNN
 ## Reactive Neural Networks Platform

{rxnn-0.1.76.dist-info → rxnn-0.1.78.dist-info}/RECORD RENAMED Viewed

@@ -7,7 +7,7 @@ rxnn/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rxnn/memory/norm.py,sha256=Ofl8Q5NYEF9GQeO0bhM43tkTW91J0y6TSvTAOYMgloM,6278
 rxnn/memory/stm.py,sha256=EsD8slSP4_9dLuq6aFPDmuFe8PWilxh90so5Z3nm-ig,2057
 rxnn/rxt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rxnn/rxt/models.py,sha256=9xJfb1rH7-QVO6PRsvUcbhskb1K7JTcE2ChwR4qT4EY,8711
+rxnn/rxt/models.py,sha256=iUlSvdXrD1NVzZFmdC55qp4_3xoJj31FC40BGgYlf4Q,8763
 rxnn/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rxnn/training/base.py,sha256=xPMA2Bg9-oUZvSZg67ls2p7Gk9pZ9IHUiIJwUzSe2K8,11766
 rxnn/training/bml.py,sha256=S1ZaXTybzeJH7uVFamCr4TPl2bLyZ5xmn_lSsjThTiM,19162
@@ -22,10 +22,10 @@ rxnn/transformers/layers.py,sha256=OX8CsFY9A7uqH1SLwyexR_5BNlwheYrJHCGXjF8Q7HU,7
 rxnn/transformers/mask.py,sha256=J0cfLVLt3SzS2ra3KcY4khrkhI975Dw4CjpUi3Sn25s,419
 rxnn/transformers/models.py,sha256=xbnn3FTNZFhaqq9A0XEM12ie_WL_58pPeq0qFXIgve0,7656
 rxnn/transformers/moe.py,sha256=j6jEx6Ip0zttlUZKKn82azxo95lkLZs-H2GLSMD88hY,5859
-rxnn/transformers/positional.py,sha256=2l38RS0Dini3f6Z3LUHr3XwWzg1UK7fO2C6wazWDAYU,4292
+rxnn/transformers/positional.py,sha256=DE1TP3D6ikBPg3Ym0sP9F666LHuE70H0w-JEH5DfKPw,4415
 rxnn/transformers/sampler.py,sha256=poWBpxg1iuK5gEJtxHkk5VVfS9V48hs2Olqdhy_Gw8c,6548
 rxnn/utils.py,sha256=d5U8i5ukovgDyqiycc2AoxObTz_eF_bgo2MKvdtJ98s,467
-rxnn-0.1.76.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
-rxnn-0.1.76.dist-info/METADATA,sha256=Tf_ZnSlGebQalDujkooJ4p2MGv7_ff6uHTSORzxQ3Ck,16579
-rxnn-0.1.76.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
-rxnn-0.1.76.dist-info/RECORD,,
+rxnn-0.1.78.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
+rxnn-0.1.78.dist-info/METADATA,sha256=559E3b22oEiu6vXNnsi7xLCw0GeuYQmcdmOgHkcdlL0,16589
+rxnn-0.1.78.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
+rxnn-0.1.78.dist-info/RECORD,,

{rxnn-0.1.76.dist-info → rxnn-0.1.78.dist-info}/LICENSE RENAMED Viewed

File without changes

{rxnn-0.1.76.dist-info → rxnn-0.1.78.dist-info}/WHEEL RENAMED Viewed

File without changes

rxnn 0.1.76__py3-none-any.whl → 0.1.78__py3-none-any.whl

rxnn 0.1.76py3-none-any.whl → 0.1.78py3-none-any.whl