rxnn 0.1.20__py3-none-any.whl → 0.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -125,6 +125,9 @@ class GroupedMoeAttention(GroupedQueryAttention):
125
125
  k = self._process_grouped_experts(key, self.wk, self.bk, weights_k, indices_k)
126
126
  v = self._process_grouped_experts(value, self.wv, self.bv, weights_k, indices_k)
127
127
 
128
+ print('processed k', k.size())
129
+ print('processed v', v.size())
130
+
128
131
  # Expand to GQA format
129
132
  k = k.permute(0, 2, 1, 3).reshape(B, self.num_groups, S, -1)
130
133
  v = v.permute(0, 2, 1, 3).reshape(B, self.num_groups, S, -1)
@@ -138,6 +141,10 @@ class GroupedMoeAttention(GroupedQueryAttention):
138
141
  k = k.flatten(start_dim=1, end_dim=2) # (B, H, S, head_dim)
139
142
  v = v.flatten(start_dim=1, end_dim=2) # (B, H, S, head_dim)
140
143
 
144
+ print('q', q.size())
145
+ print('k', k.size())
146
+ print('v', v.size())
147
+
141
148
  return q, k, v
142
149
 
143
150
 
@@ -222,11 +229,12 @@ class DeepMoeAttention(GroupedMoeAttention):
222
229
  weights_q = weights_q_flat.view(B, T, -1)
223
230
  indices_q = indices_q_flat.view(B, T, -1)
224
231
  q = self._process_grouped_experts(query, self.wq, self.bq, weights_q, indices_q)
232
+ print('processed q', q.size())
225
233
  q = q.permute(0, 2, 1, 3).reshape(B, self.num_query_groups, T, -1)
226
234
 
227
235
  # Expand query groups to match head count
228
236
  group_heads = self.num_heads // self.num_query_groups
229
- q = q.unsqueeze(2).expand(-1, -1, group_heads, -1, -1).flatten(1, 2).transpose(1, 2)
237
+ q = q.unsqueeze(2).expand(-1, -1, group_heads, -1, -1).flatten(1, 2)
230
238
 
231
239
  # Key/Value processing
232
240
  return super()._forward_qkv(q, key, value, b, t, d, skip_query_processing=True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rxnn
3
- Version: 0.1.20
3
+ Version: 0.1.22
4
4
  Summary: RxNN: Reactive Neural Networks Platform
5
5
  License: Apache-2.0
6
6
  Keywords: deep-learning,ai,machine-learning
@@ -1,6 +1,6 @@
1
1
  rxnn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  rxnn/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- rxnn/experimental/attention.py,sha256=_8dgNPxZRmplZb_k86ejZsCxhUx60mOtB8M8ZcnrTpI,32173
3
+ rxnn/experimental/attention.py,sha256=mvTrDiXRTeJsubWYRAJQjmw9nDhfMkQkQoXaAjeUzDo,32363
4
4
  rxnn/experimental/models.py,sha256=-XkEHsyT8iNAjhZbgC7N_5nzP4ENVJLwxSoLHgMfA0I,4668
5
5
  rxnn/experimental/moe.py,sha256=PhiaNr3FwR2Zv2a0tfj6sfZ4iyhLo3Jyp2DwXq19qZQ,7935
6
6
  rxnn/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -25,7 +25,7 @@ rxnn/transformers/moe.py,sha256=msspVdefdt2ekIN8aT-V8DolK4taESQL_NVsSGOepIs,4739
25
25
  rxnn/transformers/positional.py,sha256=2l38RS0Dini3f6Z3LUHr3XwWzg1UK7fO2C6wazWDAYU,4292
26
26
  rxnn/transformers/sampler.py,sha256=poWBpxg1iuK5gEJtxHkk5VVfS9V48hs2Olqdhy_Gw8c,6548
27
27
  rxnn/utils.py,sha256=d5U8i5ukovgDyqiycc2AoxObTz_eF_bgo2MKvdtJ98s,467
28
- rxnn-0.1.20.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
29
- rxnn-0.1.20.dist-info/METADATA,sha256=lEI864O9VwZMOqxSmf2a4IPpiXIhq9SANZGrRlLJxYc,16627
30
- rxnn-0.1.20.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
31
- rxnn-0.1.20.dist-info/RECORD,,
28
+ rxnn-0.1.22.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
29
+ rxnn-0.1.22.dist-info/METADATA,sha256=7yZS7KjHOdh6FNOloKZIDPDWGYAassqQwZtMi1iL5mI,16627
30
+ rxnn-0.1.22.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
31
+ rxnn-0.1.22.dist-info/RECORD,,
File without changes
File without changes