PyPI - rxnn - Versions diffs - 0.1.16__py3-none-any.whl → 0.1.18__py3-none-any.whl - Mend

rxnn 0.1.16py3-none-any.whl → 0.1.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

rxnn/experimental/attention.py +14 -4
rxnn/experimental/models.py +0 -1
rxnn/transformers/moe.py +6 -2
{rxnn-0.1.16.dist-info → rxnn-0.1.18.dist-info}/METADATA +24 -1
{rxnn-0.1.16.dist-info → rxnn-0.1.18.dist-info}/RECORD +7 -7
{rxnn-0.1.16.dist-info → rxnn-0.1.18.dist-info}/LICENSE +0 -0
{rxnn-0.1.16.dist-info → rxnn-0.1.18.dist-info}/WHEEL +0 -0

rxnn/experimental/attention.py CHANGED Viewed

@@ -116,8 +116,13 @@ class GroupedMoeAttention(GroupedQueryAttention):
         q = self.q_proj(query).view(b, t, self.num_heads, -1).transpose(1, 2) if not skip_query_processing else query
         # Key/Value processing
-        B, S, _ = key.shape
-        weights_k, indices_k = self.router(key)
+        B, S, D = key.shape
+        key_flat = key.view(-1, D)
+        print('key_flat: ', key_flat.shape)
+        weights_k_flat, indices_k_flat = self.router(key_flat)
+        # Reshape back to original dimensions
+        weights_k = weights_k_flat.view(B, S, -1)
+        indices_k = indices_k_flat.view(B, S, -1)
         k = self._process_grouped_experts(key, self.wk, self.bk, weights_k, indices_k)
         v = self._process_grouped_experts(value, self.wv, self.bv, weights_k, indices_k)
@@ -210,8 +215,13 @@ class DeepMoeAttention(GroupedMoeAttention):
     def _forward_qkv(self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, b: int, t: int, d: int, skip_query_processing: bool = False):
         # Query processing
-        B, T, _ = query.shape
-        weights_q, indices_q = self.query_router(query)
+        B, T, D = query.shape
+        # Flatten for query routing
+        query_flat = query.view(B * T, D)
+        weights_q_flat, indices_q_flat = self.query_router(query_flat)
+        # Reshape back
+        weights_q = weights_q_flat.view(B, T, -1)
+        indices_q = indices_q_flat.view(B, T, -1)
         q = self._process_grouped_experts(query, self.wq, self.bq, weights_q, indices_q)
         q = q.permute(0, 2, 1, 3).reshape(B, self.num_query_groups, T, -1)

rxnn/experimental/models.py CHANGED Viewed

@@ -7,7 +7,6 @@ from ..transformers.attention import init_attention
 from ..transformers.layers import ClassicTransformerLayer
 from ..transformers.models import ClassicTransformerDecoder
 from ..transformers.ff import get_activation_layer
-from ..memory.stm import ShortTermMemory
 from ..utils import get_model_size
 from .attention import init_moe_attention

rxnn/transformers/moe.py CHANGED Viewed

@@ -16,22 +16,26 @@ class MoeRouter(nn.Module):
     def calculate_aux_loss(self, top_k_indices: torch.Tensor, probs: torch.Tensor) -> torch.Tensor:
         expert_mask = F.one_hot(top_k_indices, self.num_experts).float()
+        print('expert mask: ', expert_mask.shape)
         expert_usage = expert_mask.sum(dim=0).mean(dim=0)
+        print('expert usage: ', expert_usage.shape)
         mean_probs = probs.mean(dim=0)
+        print('mean probs: ', mean_probs.shape)
         return (expert_usage * mean_probs).sum() * self.num_experts
     def forward(self, x: torch.Tensor):
         # Input shape: [batch*seq_len, embed_dim]
         logits = self.gate(x)
+        print('router logits: ', logits.shape)
         probs = F.softmax(logits, dim=-1)
+        print('router probs: ', probs.shape)
         # Get top-k experts for each token
         top_k_weights, top_k_indices = probs.topk(self.top_k, dim=-1)
         # Normalize weights (sum to 1 for each token)
         top_k_weights = top_k_weights / (top_k_weights.sum(dim=-1, keepdim=True) + 1e-9)
+        print('top k: ', top_k_weights.shape, top_k_indices.shape)
         # Load Balance Loss
         self.aux_loss = self.calculate_aux_loss(top_k_indices, probs)

{rxnn-0.1.16.dist-info → rxnn-0.1.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rxnn
-Version: 0.1.16
+Version: 0.1.18
 Summary: RxNN: Reactive Neural Networks Platform
 License: Apache-2.0
 Keywords: deep-learning,ai,machine-learning
@@ -53,6 +53,29 @@ that's generating Infinite Chain-of-Thoughts and is communicating in push-based
 Reactive communication patterns in RxNN models are adapted to handle asynchronous nature of model - after it finish generating
 sequence, it has to process it and save it in memory, but it could be done in background.
+## Release plan
+We are working on three new reactive architectures, that progressively advance from language models to awareness models:
+- Reactive Transformer: Reactive Language Model (RLM) with Short-Term Memory
+- Preactor: extending Reactive Transformer with additional Long-Term Memory, providing theoretically infinite context (only
+  single message length is limited) and the ability to learn from interactions (Live Learning)
+- Reactor: AGI awareness model & Strong Reactive Neural Network, that's working in infinite reasoning loop and doesn't require explicit human commands
+Each new architecture is based on the previous one and adding new features/abilities. They will be progressively
+released with next versions of **RxNN** framework:
+- 0.1.x: Reactive Transformer base models, Base Model Learning (pre-training/fine-tuning) & Transformers extensions (MoE Attention, Short-Term Memory, etc.)
+- 0.2.x: Memory Reinforcement Learning (MRL) for Short-Term Memory & Reactive Transformer, Attention-based Memory System details
+- 0.3.x: Reinforcement Learning from Human Feedback for Reactive models (RxRLHF), basic Tensor Reactive
+  Extensions (TRX/Rust) for full Reactive Transformer, RxT-Alpha release (+following models - RxT-Beta, etc.)
+- 0.4.x: Preactor base models, Tensor Database (TDB/Rust) for Long-Term Memory, mxRAG/revRAG subsystems
+- 0.5.x: MRL for Long-Term Memory & Preactor, Live Learning for Preactor, PRx-Alpha release (+following models - PRx-Beta, etc.)
+- 0.6.x: Reactor base models, TRX full implementation, Receptors & Effectors Reactive RNNs
+- 0.7.x: Behavioral Reinforcement Learning (BRL) for Reactor's Infinite Chain-of-Thoughts, Continuous Live Learning for Reactor
+- 0.8.x: Rx-Alpha release
+- 0.9.x: Rx-Beta release
+- 1.0.0: Reactor AGI official release (Expert, Assistant & Utility class models)
+- 1.x.x: Multimodal reactive models (could be released earlier, depending on progress)
+- 2.0.0: Real-Time Vision Reactor - Worker class models
+- x.x.x: ...and more!
                                  Apache License
                            Version 2.0, January 2004
                         http://www.apache.org/licenses/

{rxnn-0.1.16.dist-info → rxnn-0.1.18.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 rxnn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rxnn/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rxnn/experimental/attention.py,sha256=qly-Lf9UsYC9JB945JcLnt27ZbF0vFvfyS5iUm-Rsak,31644
-rxnn/experimental/models.py,sha256=ioYtbJDxJ4zASiKs9dFY4WvAJn7eVqFf7zid-65pbUU,4709
+rxnn/experimental/attention.py,sha256=Ix47gusaRtaRDLf87TEEoS3Jfx4O_b6l_2AzKdiqdFo,32100
+rxnn/experimental/models.py,sha256=-XkEHsyT8iNAjhZbgC7N_5nzP4ENVJLwxSoLHgMfA0I,4668
 rxnn/experimental/moe.py,sha256=PhiaNr3FwR2Zv2a0tfj6sfZ4iyhLo3Jyp2DwXq19qZQ,7935
 rxnn/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rxnn/memory/norm.py,sha256=Ofl8Q5NYEF9GQeO0bhM43tkTW91J0y6TSvTAOYMgloM,6278
@@ -21,11 +21,11 @@ rxnn/transformers/ff.py,sha256=jJnuBDsnnX5uYC_WZH8cXAYrMnz0P-iX7MwcPivjRtI,2533
 rxnn/transformers/layers.py,sha256=HhIiykmrBgdsV4AbMQXr9t0cSo4gSIeN0dPtc8mDyOo,5629
 rxnn/transformers/mask.py,sha256=J0cfLVLt3SzS2ra3KcY4khrkhI975Dw4CjpUi3Sn25s,419
 rxnn/transformers/models.py,sha256=w-zB_8QB9-Fae-GkGgmVDNY-Ts_0gBeWcevpl9qzZVM,7169
-rxnn/transformers/moe.py,sha256=FeaQR7hTX1dE74YdMOcuyZHSkGiV_0JwF8fw-GnfNOQ,4741
+rxnn/transformers/moe.py,sha256=gJ-jXKtc01xcBayaYchRZy7imFGnvwVfUflXvFiKjKU,5048
 rxnn/transformers/positional.py,sha256=2l38RS0Dini3f6Z3LUHr3XwWzg1UK7fO2C6wazWDAYU,4292
 rxnn/transformers/sampler.py,sha256=poWBpxg1iuK5gEJtxHkk5VVfS9V48hs2Olqdhy_Gw8c,6548
 rxnn/utils.py,sha256=d5U8i5ukovgDyqiycc2AoxObTz_eF_bgo2MKvdtJ98s,467
-rxnn-0.1.16.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
-rxnn-0.1.16.dist-info/METADATA,sha256=Cr_8OPHWlf2LHYlZEmc_NaUkIiE3ShJ01Z5B5ZhI6G8,14629
-rxnn-0.1.16.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
-rxnn-0.1.16.dist-info/RECORD,,
+rxnn-0.1.18.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
+rxnn-0.1.18.dist-info/METADATA,sha256=vgk9Zbsmrg8PqnUNDSGTyNIbFWmSq3pRMoQqmR0k704,16627
+rxnn-0.1.18.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
+rxnn-0.1.18.dist-info/RECORD,,

{rxnn-0.1.16.dist-info → rxnn-0.1.18.dist-info}/LICENSE RENAMED Viewed

File without changes

{rxnn-0.1.16.dist-info → rxnn-0.1.18.dist-info}/WHEEL RENAMED Viewed

File without changes

rxnn 0.1.16__py3-none-any.whl → 0.1.18__py3-none-any.whl

rxnn 0.1.16py3-none-any.whl → 0.1.18py3-none-any.whl