rxnn 0.1.16__py3-none-any.whl → 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -116,8 +116,13 @@ class GroupedMoeAttention(GroupedQueryAttention):
116
116
  q = self.q_proj(query).view(b, t, self.num_heads, -1).transpose(1, 2) if not skip_query_processing else query
117
117
 
118
118
  # Key/Value processing
119
- B, S, _ = key.shape
120
- weights_k, indices_k = self.router(key)
119
+ B, S, D = key.shape
120
+ key_flat = key.view(-1, D)
121
+ print('key_flat: ', key_flat.shape)
122
+ weights_k_flat, indices_k_flat = self.router(key_flat)
123
+ # Reshape back to original dimensions
124
+ weights_k = weights_k_flat.view(B, S, -1)
125
+ indices_k = indices_k_flat.view(B, S, -1)
121
126
  k = self._process_grouped_experts(key, self.wk, self.bk, weights_k, indices_k)
122
127
  v = self._process_grouped_experts(value, self.wv, self.bv, weights_k, indices_k)
123
128
 
@@ -210,8 +215,13 @@ class DeepMoeAttention(GroupedMoeAttention):
210
215
 
211
216
  def _forward_qkv(self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, b: int, t: int, d: int, skip_query_processing: bool = False):
212
217
  # Query processing
213
- B, T, _ = query.shape
214
- weights_q, indices_q = self.query_router(query)
218
+ B, T, D = query.shape
219
+ # Flatten for query routing
220
+ query_flat = query.view(B * T, D)
221
+ weights_q_flat, indices_q_flat = self.query_router(query_flat)
222
+ # Reshape back
223
+ weights_q = weights_q_flat.view(B, T, -1)
224
+ indices_q = indices_q_flat.view(B, T, -1)
215
225
  q = self._process_grouped_experts(query, self.wq, self.bq, weights_q, indices_q)
216
226
  q = q.permute(0, 2, 1, 3).reshape(B, self.num_query_groups, T, -1)
217
227
 
@@ -7,7 +7,6 @@ from ..transformers.attention import init_attention
7
7
  from ..transformers.layers import ClassicTransformerLayer
8
8
  from ..transformers.models import ClassicTransformerDecoder
9
9
  from ..transformers.ff import get_activation_layer
10
- from ..memory.stm import ShortTermMemory
11
10
  from ..utils import get_model_size
12
11
  from .attention import init_moe_attention
13
12
 
rxnn/transformers/moe.py CHANGED
@@ -16,22 +16,26 @@ class MoeRouter(nn.Module):
16
16
 
17
17
  def calculate_aux_loss(self, top_k_indices: torch.Tensor, probs: torch.Tensor) -> torch.Tensor:
18
18
  expert_mask = F.one_hot(top_k_indices, self.num_experts).float()
19
+ print('expert mask: ', expert_mask.shape)
19
20
  expert_usage = expert_mask.sum(dim=0).mean(dim=0)
21
+ print('expert usage: ', expert_usage.shape)
20
22
  mean_probs = probs.mean(dim=0)
23
+ print('mean probs: ', mean_probs.shape)
21
24
  return (expert_usage * mean_probs).sum() * self.num_experts
22
25
 
23
26
 
24
27
  def forward(self, x: torch.Tensor):
25
28
  # Input shape: [batch*seq_len, embed_dim]
26
29
  logits = self.gate(x)
30
+ print('router logits: ', logits.shape)
27
31
  probs = F.softmax(logits, dim=-1)
28
-
32
+ print('router probs: ', probs.shape)
29
33
  # Get top-k experts for each token
30
34
  top_k_weights, top_k_indices = probs.topk(self.top_k, dim=-1)
31
35
 
32
36
  # Normalize weights (sum to 1 for each token)
33
37
  top_k_weights = top_k_weights / (top_k_weights.sum(dim=-1, keepdim=True) + 1e-9)
34
-
38
+ print('top k: ', top_k_weights.shape, top_k_indices.shape)
35
39
  # Load Balance Loss
36
40
  self.aux_loss = self.calculate_aux_loss(top_k_indices, probs)
37
41
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rxnn
3
- Version: 0.1.16
3
+ Version: 0.1.18
4
4
  Summary: RxNN: Reactive Neural Networks Platform
5
5
  License: Apache-2.0
6
6
  Keywords: deep-learning,ai,machine-learning
@@ -53,6 +53,29 @@ that's generating Infinite Chain-of-Thoughts and is communicating in push-based
53
53
  Reactive communication patterns in RxNN models are adapted to handle asynchronous nature of model - after it finish generating
54
54
  sequence, it has to process it and save it in memory, but it could be done in background.
55
55
 
56
+ ## Release plan
57
+ We are working on three new reactive architectures, that progressively advance from language models to awareness models:
58
+ - Reactive Transformer: Reactive Language Model (RLM) with Short-Term Memory
59
+ - Preactor: extending Reactive Transformer with additional Long-Term Memory, providing theoretically infinite context (only
60
+ single message length is limited) and the ability to learn from interactions (Live Learning)
61
+ - Reactor: AGI awareness model & Strong Reactive Neural Network, that's working in infinite reasoning loop and doesn't require explicit human commands
62
+
63
+ Each new architecture is based on the previous one and adding new features/abilities. They will be progressively
64
+ released with next versions of **RxNN** framework:
65
+ - 0.1.x: Reactive Transformer base models, Base Model Learning (pre-training/fine-tuning) & Transformers extensions (MoE Attention, Short-Term Memory, etc.)
66
+ - 0.2.x: Memory Reinforcement Learning (MRL) for Short-Term Memory & Reactive Transformer, Attention-based Memory System details
67
+ - 0.3.x: Reinforcement Learning from Human Feedback for Reactive models (RxRLHF), basic Tensor Reactive
68
+ Extensions (TRX/Rust) for full Reactive Transformer, RxT-Alpha release (+following models - RxT-Beta, etc.)
69
+ - 0.4.x: Preactor base models, Tensor Database (TDB/Rust) for Long-Term Memory, mxRAG/revRAG subsystems
70
+ - 0.5.x: MRL for Long-Term Memory & Preactor, Live Learning for Preactor, PRx-Alpha release (+following models - PRx-Beta, etc.)
71
+ - 0.6.x: Reactor base models, TRX full implementation, Receptors & Effectors Reactive RNNs
72
+ - 0.7.x: Behavioral Reinforcement Learning (BRL) for Reactor's Infinite Chain-of-Thoughts, Continuous Live Learning for Reactor
73
+ - 0.8.x: Rx-Alpha release
74
+ - 0.9.x: Rx-Beta release
75
+ - 1.0.0: Reactor AGI official release (Expert, Assistant & Utility class models)
76
+ - 1.x.x: Multimodal reactive models (could be released earlier, depending on progress)
77
+ - 2.0.0: Real-Time Vision Reactor - Worker class models
78
+ - x.x.x: ...and more!
56
79
  Apache License
57
80
  Version 2.0, January 2004
58
81
  http://www.apache.org/licenses/
@@ -1,7 +1,7 @@
1
1
  rxnn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  rxnn/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- rxnn/experimental/attention.py,sha256=qly-Lf9UsYC9JB945JcLnt27ZbF0vFvfyS5iUm-Rsak,31644
4
- rxnn/experimental/models.py,sha256=ioYtbJDxJ4zASiKs9dFY4WvAJn7eVqFf7zid-65pbUU,4709
3
+ rxnn/experimental/attention.py,sha256=Ix47gusaRtaRDLf87TEEoS3Jfx4O_b6l_2AzKdiqdFo,32100
4
+ rxnn/experimental/models.py,sha256=-XkEHsyT8iNAjhZbgC7N_5nzP4ENVJLwxSoLHgMfA0I,4668
5
5
  rxnn/experimental/moe.py,sha256=PhiaNr3FwR2Zv2a0tfj6sfZ4iyhLo3Jyp2DwXq19qZQ,7935
6
6
  rxnn/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  rxnn/memory/norm.py,sha256=Ofl8Q5NYEF9GQeO0bhM43tkTW91J0y6TSvTAOYMgloM,6278
@@ -21,11 +21,11 @@ rxnn/transformers/ff.py,sha256=jJnuBDsnnX5uYC_WZH8cXAYrMnz0P-iX7MwcPivjRtI,2533
21
21
  rxnn/transformers/layers.py,sha256=HhIiykmrBgdsV4AbMQXr9t0cSo4gSIeN0dPtc8mDyOo,5629
22
22
  rxnn/transformers/mask.py,sha256=J0cfLVLt3SzS2ra3KcY4khrkhI975Dw4CjpUi3Sn25s,419
23
23
  rxnn/transformers/models.py,sha256=w-zB_8QB9-Fae-GkGgmVDNY-Ts_0gBeWcevpl9qzZVM,7169
24
- rxnn/transformers/moe.py,sha256=FeaQR7hTX1dE74YdMOcuyZHSkGiV_0JwF8fw-GnfNOQ,4741
24
+ rxnn/transformers/moe.py,sha256=gJ-jXKtc01xcBayaYchRZy7imFGnvwVfUflXvFiKjKU,5048
25
25
  rxnn/transformers/positional.py,sha256=2l38RS0Dini3f6Z3LUHr3XwWzg1UK7fO2C6wazWDAYU,4292
26
26
  rxnn/transformers/sampler.py,sha256=poWBpxg1iuK5gEJtxHkk5VVfS9V48hs2Olqdhy_Gw8c,6548
27
27
  rxnn/utils.py,sha256=d5U8i5ukovgDyqiycc2AoxObTz_eF_bgo2MKvdtJ98s,467
28
- rxnn-0.1.16.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
29
- rxnn-0.1.16.dist-info/METADATA,sha256=Cr_8OPHWlf2LHYlZEmc_NaUkIiE3ShJ01Z5B5ZhI6G8,14629
30
- rxnn-0.1.16.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
31
- rxnn-0.1.16.dist-info/RECORD,,
28
+ rxnn-0.1.18.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
29
+ rxnn-0.1.18.dist-info/METADATA,sha256=vgk9Zbsmrg8PqnUNDSGTyNIbFWmSq3pRMoQqmR0k704,16627
30
+ rxnn-0.1.18.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
31
+ rxnn-0.1.18.dist-info/RECORD,,
File without changes
File without changes