rxnn 0.1.16__py3-none-any.whl → 0.1.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rxnn/experimental/attention.py +13 -4
- rxnn/experimental/models.py +0 -1
- {rxnn-0.1.16.dist-info → rxnn-0.1.17.dist-info}/METADATA +24 -1
- {rxnn-0.1.16.dist-info → rxnn-0.1.17.dist-info}/RECORD +6 -6
- {rxnn-0.1.16.dist-info → rxnn-0.1.17.dist-info}/LICENSE +0 -0
- {rxnn-0.1.16.dist-info → rxnn-0.1.17.dist-info}/WHEEL +0 -0
rxnn/experimental/attention.py
CHANGED
@@ -116,8 +116,12 @@ class GroupedMoeAttention(GroupedQueryAttention):
|
|
116
116
|
q = self.q_proj(query).view(b, t, self.num_heads, -1).transpose(1, 2) if not skip_query_processing else query
|
117
117
|
|
118
118
|
# Key/Value processing
|
119
|
-
B, S,
|
120
|
-
|
119
|
+
B, S, D = key.shape
|
120
|
+
key_flat = key.view(-1, D)
|
121
|
+
weights_k_flat, indices_k_flat = self.router(key_flat)
|
122
|
+
# Reshape back to original dimensions
|
123
|
+
weights_k = weights_k_flat.view(B, S, -1)
|
124
|
+
indices_k = indices_k_flat.view(B, S, -1)
|
121
125
|
k = self._process_grouped_experts(key, self.wk, self.bk, weights_k, indices_k)
|
122
126
|
v = self._process_grouped_experts(value, self.wv, self.bv, weights_k, indices_k)
|
123
127
|
|
@@ -210,8 +214,13 @@ class DeepMoeAttention(GroupedMoeAttention):
|
|
210
214
|
|
211
215
|
def _forward_qkv(self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, b: int, t: int, d: int, skip_query_processing: bool = False):
|
212
216
|
# Query processing
|
213
|
-
B, T,
|
214
|
-
|
217
|
+
B, T, D = query.shape
|
218
|
+
# Flatten for query routing
|
219
|
+
query_flat = query.view(B * T, D)
|
220
|
+
weights_q_flat, indices_q_flat = self.query_router(query_flat)
|
221
|
+
# Reshape back
|
222
|
+
weights_q = weights_q_flat.view(B, T, -1)
|
223
|
+
indices_q = indices_q_flat.view(B, T, -1)
|
215
224
|
q = self._process_grouped_experts(query, self.wq, self.bq, weights_q, indices_q)
|
216
225
|
q = q.permute(0, 2, 1, 3).reshape(B, self.num_query_groups, T, -1)
|
217
226
|
|
rxnn/experimental/models.py
CHANGED
@@ -7,7 +7,6 @@ from ..transformers.attention import init_attention
|
|
7
7
|
from ..transformers.layers import ClassicTransformerLayer
|
8
8
|
from ..transformers.models import ClassicTransformerDecoder
|
9
9
|
from ..transformers.ff import get_activation_layer
|
10
|
-
from ..memory.stm import ShortTermMemory
|
11
10
|
from ..utils import get_model_size
|
12
11
|
from .attention import init_moe_attention
|
13
12
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: rxnn
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.17
|
4
4
|
Summary: RxNN: Reactive Neural Networks Platform
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: deep-learning,ai,machine-learning
|
@@ -53,6 +53,29 @@ that's generating Infinite Chain-of-Thoughts and is communicating in push-based
|
|
53
53
|
Reactive communication patterns in RxNN models are adapted to handle asynchronous nature of model - after it finish generating
|
54
54
|
sequence, it has to process it and save it in memory, but it could be done in background.
|
55
55
|
|
56
|
+
## Release plan
|
57
|
+
We are working on three new reactive architectures, that progressively advance from language models to awareness models:
|
58
|
+
- Reactive Transformer: Reactive Language Model (RLM) with Short-Term Memory
|
59
|
+
- Preactor: extending Reactive Transformer with additional Long-Term Memory, providing theoretically infinite context (only
|
60
|
+
single message length is limited) and the ability to learn from interactions (Live Learning)
|
61
|
+
- Reactor: AGI awareness model & Strong Reactive Neural Network, that's working in infinite reasoning loop and doesn't require explicit human commands
|
62
|
+
|
63
|
+
Each new architecture is based on the previous one and adding new features/abilities. They will be progressively
|
64
|
+
released with next versions of **RxNN** framework:
|
65
|
+
- 0.1.x: Reactive Transformer base models, Base Model Learning (pre-training/fine-tuning) & Transformers extensions (MoE Attention, Short-Term Memory, etc.)
|
66
|
+
- 0.2.x: Memory Reinforcement Learning (MRL) for Short-Term Memory & Reactive Transformer, Attention-based Memory System details
|
67
|
+
- 0.3.x: Reinforcement Learning from Human Feedback for Reactive models (RxRLHF), basic Tensor Reactive
|
68
|
+
Extensions (TRX/Rust) for full Reactive Transformer, RxT-Alpha release (+following models - RxT-Beta, etc.)
|
69
|
+
- 0.4.x: Preactor base models, Tensor Database (TDB/Rust) for Long-Term Memory, mxRAG/revRAG subsystems
|
70
|
+
- 0.5.x: MRL for Long-Term Memory & Preactor, Live Learning for Preactor, PRx-Alpha release (+following models - PRx-Beta, etc.)
|
71
|
+
- 0.6.x: Reactor base models, TRX full implementation, Receptors & Effectors Reactive RNNs
|
72
|
+
- 0.7.x: Behavioral Reinforcement Learning (BRL) for Reactor's Infinite Chain-of-Thoughts, Continuous Live Learning for Reactor
|
73
|
+
- 0.8.x: Rx-Alpha release
|
74
|
+
- 0.9.x: Rx-Beta release
|
75
|
+
- 1.0.0: Reactor AGI official release (Expert, Assistant & Utility class models)
|
76
|
+
- 1.x.x: Multimodal reactive models (could be released earlier, depending on progress)
|
77
|
+
- 2.0.0: Real-Time Vision Reactor - Worker class models
|
78
|
+
- x.x.x: ...and more!
|
56
79
|
Apache License
|
57
80
|
Version 2.0, January 2004
|
58
81
|
http://www.apache.org/licenses/
|
@@ -1,7 +1,7 @@
|
|
1
1
|
rxnn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
rxnn/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
rxnn/experimental/attention.py,sha256=
|
4
|
-
rxnn/experimental/models.py,sha256
|
3
|
+
rxnn/experimental/attention.py,sha256=wjHrxfov3Ybg3iou8FlQtFvxNuHdcs_A7a6FTloosgA,32056
|
4
|
+
rxnn/experimental/models.py,sha256=-XkEHsyT8iNAjhZbgC7N_5nzP4ENVJLwxSoLHgMfA0I,4668
|
5
5
|
rxnn/experimental/moe.py,sha256=PhiaNr3FwR2Zv2a0tfj6sfZ4iyhLo3Jyp2DwXq19qZQ,7935
|
6
6
|
rxnn/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
rxnn/memory/norm.py,sha256=Ofl8Q5NYEF9GQeO0bhM43tkTW91J0y6TSvTAOYMgloM,6278
|
@@ -25,7 +25,7 @@ rxnn/transformers/moe.py,sha256=FeaQR7hTX1dE74YdMOcuyZHSkGiV_0JwF8fw-GnfNOQ,4741
|
|
25
25
|
rxnn/transformers/positional.py,sha256=2l38RS0Dini3f6Z3LUHr3XwWzg1UK7fO2C6wazWDAYU,4292
|
26
26
|
rxnn/transformers/sampler.py,sha256=poWBpxg1iuK5gEJtxHkk5VVfS9V48hs2Olqdhy_Gw8c,6548
|
27
27
|
rxnn/utils.py,sha256=d5U8i5ukovgDyqiycc2AoxObTz_eF_bgo2MKvdtJ98s,467
|
28
|
-
rxnn-0.1.
|
29
|
-
rxnn-0.1.
|
30
|
-
rxnn-0.1.
|
31
|
-
rxnn-0.1.
|
28
|
+
rxnn-0.1.17.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
|
29
|
+
rxnn-0.1.17.dist-info/METADATA,sha256=wId6o7JCcBjRD1plWzgJRmFAY5VlHN7-FIVySeVDqx8,16627
|
30
|
+
rxnn-0.1.17.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
31
|
+
rxnn-0.1.17.dist-info/RECORD,,
|
File without changes
|
File without changes
|