rxnn 0.1.44__tar.gz → 0.1.46__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {rxnn-0.1.44 → rxnn-0.1.46}/PKG-INFO +1 -1
  2. {rxnn-0.1.44 → rxnn-0.1.46}/pyproject.toml +1 -1
  3. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/experimental/attention.py +5 -7
  4. {rxnn-0.1.44 → rxnn-0.1.46}/LICENSE +0 -0
  5. {rxnn-0.1.44 → rxnn-0.1.46}/README.md +0 -0
  6. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/__init__.py +0 -0
  7. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/experimental/__init__.py +0 -0
  8. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/experimental/models.py +0 -0
  9. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/experimental/moe.py +0 -0
  10. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/memory/__init__.py +0 -0
  11. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/memory/norm.py +0 -0
  12. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/memory/stm.py +0 -0
  13. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/rxt/__init__.py +0 -0
  14. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/rxt/models.py +0 -0
  15. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/training/__init__.py +0 -0
  16. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/training/base.py +0 -0
  17. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/training/bml.py +0 -0
  18. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/training/callbacks.py +0 -0
  19. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/training/dataset.py +0 -0
  20. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/training/scheduler.py +0 -0
  21. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/training/tokenizer.py +0 -0
  22. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/transformers/__init__.py +0 -0
  23. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/transformers/attention.py +0 -0
  24. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/transformers/ff.py +0 -0
  25. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/transformers/layers.py +0 -0
  26. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/transformers/mask.py +0 -0
  27. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/transformers/models.py +0 -0
  28. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/transformers/moe.py +0 -0
  29. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/transformers/positional.py +0 -0
  30. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/transformers/sampler.py +0 -0
  31. {rxnn-0.1.44 → rxnn-0.1.46}/src/rxnn/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rxnn
3
- Version: 0.1.44
3
+ Version: 0.1.46
4
4
  Summary: RxNN: Reactive Neural Networks Platform
5
5
  License: Apache-2.0
6
6
  Keywords: deep-learning,ai,machine-learning
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "rxnn"
7
- version = "0.1.44"
7
+ version = "0.1.46"
8
8
  description = "RxNN: Reactive Neural Networks Platform"
9
9
 
10
10
  license = "Apache-2.0"
@@ -95,16 +95,15 @@ class GroupedMoeAttention(GroupedQueryAttention):
95
95
  key_flat = key.reshape(-1, D)
96
96
  weights, indices = self.router(key_flat) # (B*S, num_groups), (B*S, num_groups)
97
97
  weights = weights.view(B, S, self.num_groups, 1)
98
- indices = indices.view(B, S, self.num_groups)
98
+ indices = indices.view(B, S, self.num_groups).unsqueeze(-1).transpose(1, 2).expand(-1, -1, S, -1)
99
99
 
100
100
  # Compute all experts' projections
101
101
  k_all = self.k_proj(key_flat).view(B, S, self.num_experts, -1).permute(0, 2, 1, 3) # [B, num_experts, S, head_dim]
102
102
  v_all = self.v_proj(value).view(B, S, self.num_experts, -1).permute(0, 2, 1, 3) # [B, num_experts, S, head_dim]
103
103
 
104
104
  # Gather top-k experts using expanded indices
105
- expanded_indices = indices.unsqueeze(-1).unsqueeze(-1).expand(-1, -1, S, -1) # [B, num_groups, S, head_dim]
106
- selected_k = torch.gather(k_all, 1, expanded_indices) # [B, num_groups, S, head_dim]
107
- selected_v = torch.gather(v_all, 1, expanded_indices) # [B, num_groups, S, head_dim]
105
+ selected_k = torch.gather(k_all, 1, indices) # [B, num_groups, S, head_dim]
106
+ selected_v = torch.gather(v_all, 1, indices) # [B, num_groups, S, head_dim]
108
107
 
109
108
  # Weighted
110
109
  weighted_k = (selected_k * weights).to(selected_k.device, dtype=selected_k.dtype) # [B, S, num_groups, head_dim]
@@ -209,13 +208,12 @@ class DeepMoeAttention(GroupedMoeAttention):
209
208
  query_flat = query.reshape(-1, D)
210
209
  weights, indices = self.query_router(query_flat)
211
210
  weights = weights.view(B, T, self.num_query_groups, 1)
212
- indices = indices.view(B, T, self.num_query_groups)
211
+ indices = indices.view(B, T, self.num_query_groups).unsqueeze(-1).transpose(1, 2).expand(-1, -1, T, -1) # [B, num_query_groups, T, head_dim]
213
212
 
214
213
  q_all = self.q_proj(query_flat).view(B, T, self.num_query_experts, -1).permute(0, 2, 1, 3) # [B, num_query_experts, T, head_dim]
215
214
 
216
215
  # Gather top-k experts using expanded indices
217
- expanded_indices = indices.unsqueeze(-1).unsqueeze(-1).expand(-1, -1, T, -1) # [B, num_query_groups, T, head_dim]
218
- selected_q = torch.gather(q_all, 1, expanded_indices) # [B, num_query_groups, T, head_dim]
216
+ selected_q = torch.gather(q_all, 1, indices) # [B, num_query_groups, T, head_dim]
219
217
 
220
218
  # Weighted sum
221
219
  q = (selected_q * weights).to(selected_q.device, dtype=selected_q.dtype) # [B, T, num_query_groups, head_dim]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes