rxnn 0.1.16__py3-none-any.whl → 0.1.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -116,8 +116,12 @@ class GroupedMoeAttention(GroupedQueryAttention):
116
116
  q = self.q_proj(query).view(b, t, self.num_heads, -1).transpose(1, 2) if not skip_query_processing else query
117
117
 
118
118
  # Key/Value processing
119
- B, S, _ = key.shape
120
- weights_k, indices_k = self.router(key)
119
+ B, S, D = key.shape
120
+ key_flat = key.view(-1, D)
121
+ weights_k_flat, indices_k_flat = self.router(key_flat)
122
+ # Reshape back to original dimensions
123
+ weights_k = weights_k_flat.view(B, S, -1)
124
+ indices_k = indices_k_flat.view(B, S, -1)
121
125
  k = self._process_grouped_experts(key, self.wk, self.bk, weights_k, indices_k)
122
126
  v = self._process_grouped_experts(value, self.wv, self.bv, weights_k, indices_k)
123
127
 
@@ -210,8 +214,13 @@ class DeepMoeAttention(GroupedMoeAttention):
210
214
 
211
215
  def _forward_qkv(self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, b: int, t: int, d: int, skip_query_processing: bool = False):
212
216
  # Query processing
213
- B, T, _ = query.shape
214
- weights_q, indices_q = self.query_router(query)
217
+ B, T, D = query.shape
218
+ # Flatten for query routing
219
+ query_flat = query.view(B * T, D)
220
+ weights_q_flat, indices_q_flat = self.query_router(query_flat)
221
+ # Reshape back
222
+ weights_q = weights_q_flat.view(B, T, -1)
223
+ indices_q = indices_q_flat.view(B, T, -1)
215
224
  q = self._process_grouped_experts(query, self.wq, self.bq, weights_q, indices_q)
216
225
  q = q.permute(0, 2, 1, 3).reshape(B, self.num_query_groups, T, -1)
217
226
 
@@ -7,7 +7,6 @@ from ..transformers.attention import init_attention
7
7
  from ..transformers.layers import ClassicTransformerLayer
8
8
  from ..transformers.models import ClassicTransformerDecoder
9
9
  from ..transformers.ff import get_activation_layer
10
- from ..memory.stm import ShortTermMemory
11
10
  from ..utils import get_model_size
12
11
  from .attention import init_moe_attention
13
12
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rxnn
3
- Version: 0.1.16
3
+ Version: 0.1.17
4
4
  Summary: RxNN: Reactive Neural Networks Platform
5
5
  License: Apache-2.0
6
6
  Keywords: deep-learning,ai,machine-learning
@@ -53,6 +53,29 @@ that's generating Infinite Chain-of-Thoughts and is communicating in push-based
53
53
  Reactive communication patterns in RxNN models are adapted to handle asynchronous nature of model - after it finish generating
54
54
  sequence, it has to process it and save it in memory, but it could be done in background.
55
55
 
56
+ ## Release plan
57
+ We are working on three new reactive architectures, that progressively advance from language models to awareness models:
58
+ - Reactive Transformer: Reactive Language Model (RLM) with Short-Term Memory
59
+ - Preactor: extending Reactive Transformer with additional Long-Term Memory, providing theoretically infinite context (only
60
+ single message length is limited) and the ability to learn from interactions (Live Learning)
61
+ - Reactor: AGI awareness model & Strong Reactive Neural Network, that's working in infinite reasoning loop and doesn't require explicit human commands
62
+
63
+ Each new architecture is based on the previous one and adding new features/abilities. They will be progressively
64
+ released with next versions of **RxNN** framework:
65
+ - 0.1.x: Reactive Transformer base models, Base Model Learning (pre-training/fine-tuning) & Transformers extensions (MoE Attention, Short-Term Memory, etc.)
66
+ - 0.2.x: Memory Reinforcement Learning (MRL) for Short-Term Memory & Reactive Transformer, Attention-based Memory System details
67
+ - 0.3.x: Reinforcement Learning from Human Feedback for Reactive models (RxRLHF), basic Tensor Reactive
68
+ Extensions (TRX/Rust) for full Reactive Transformer, RxT-Alpha release (+following models - RxT-Beta, etc.)
69
+ - 0.4.x: Preactor base models, Tensor Database (TDB/Rust) for Long-Term Memory, mxRAG/revRAG subsystems
70
+ - 0.5.x: MRL for Long-Term Memory & Preactor, Live Learning for Preactor, PRx-Alpha release (+following models - PRx-Beta, etc.)
71
+ - 0.6.x: Reactor base models, TRX full implementation, Receptors & Effectors Reactive RNNs
72
+ - 0.7.x: Behavioral Reinforcement Learning (BRL) for Reactor's Infinite Chain-of-Thoughts, Continuous Live Learning for Reactor
73
+ - 0.8.x: Rx-Alpha release
74
+ - 0.9.x: Rx-Beta release
75
+ - 1.0.0: Reactor AGI official release (Expert, Assistant & Utility class models)
76
+ - 1.x.x: Multimodal reactive models (could be released earlier, depending on progress)
77
+ - 2.0.0: Real-Time Vision Reactor - Worker class models
78
+ - x.x.x: ...and more!
56
79
  Apache License
57
80
  Version 2.0, January 2004
58
81
  http://www.apache.org/licenses/
@@ -1,7 +1,7 @@
1
1
  rxnn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  rxnn/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- rxnn/experimental/attention.py,sha256=qly-Lf9UsYC9JB945JcLnt27ZbF0vFvfyS5iUm-Rsak,31644
4
- rxnn/experimental/models.py,sha256=ioYtbJDxJ4zASiKs9dFY4WvAJn7eVqFf7zid-65pbUU,4709
3
+ rxnn/experimental/attention.py,sha256=wjHrxfov3Ybg3iou8FlQtFvxNuHdcs_A7a6FTloosgA,32056
4
+ rxnn/experimental/models.py,sha256=-XkEHsyT8iNAjhZbgC7N_5nzP4ENVJLwxSoLHgMfA0I,4668
5
5
  rxnn/experimental/moe.py,sha256=PhiaNr3FwR2Zv2a0tfj6sfZ4iyhLo3Jyp2DwXq19qZQ,7935
6
6
  rxnn/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  rxnn/memory/norm.py,sha256=Ofl8Q5NYEF9GQeO0bhM43tkTW91J0y6TSvTAOYMgloM,6278
@@ -25,7 +25,7 @@ rxnn/transformers/moe.py,sha256=FeaQR7hTX1dE74YdMOcuyZHSkGiV_0JwF8fw-GnfNOQ,4741
25
25
  rxnn/transformers/positional.py,sha256=2l38RS0Dini3f6Z3LUHr3XwWzg1UK7fO2C6wazWDAYU,4292
26
26
  rxnn/transformers/sampler.py,sha256=poWBpxg1iuK5gEJtxHkk5VVfS9V48hs2Olqdhy_Gw8c,6548
27
27
  rxnn/utils.py,sha256=d5U8i5ukovgDyqiycc2AoxObTz_eF_bgo2MKvdtJ98s,467
28
- rxnn-0.1.16.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
29
- rxnn-0.1.16.dist-info/METADATA,sha256=Cr_8OPHWlf2LHYlZEmc_NaUkIiE3ShJ01Z5B5ZhI6G8,14629
30
- rxnn-0.1.16.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
31
- rxnn-0.1.16.dist-info/RECORD,,
28
+ rxnn-0.1.17.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
29
+ rxnn-0.1.17.dist-info/METADATA,sha256=wId6o7JCcBjRD1plWzgJRmFAY5VlHN7-FIVySeVDqx8,16627
30
+ rxnn-0.1.17.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
31
+ rxnn-0.1.17.dist-info/RECORD,,
File without changes
File without changes