rxnn 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rxnn/training/bml.py CHANGED
@@ -208,7 +208,7 @@ class AutoregressiveMoeTrainer(BaseTrainer):
208
208
 
209
209
  model = next(self.model.children()) if isinstance(self.model, DistributedDataParallel) else self.model
210
210
 
211
- router_loss = model.moe_router_loss()
211
+ router_loss = model.model.moe_router_loss()
212
212
  loss = main_loss + self.router_loss_scale * router_loss
213
213
 
214
214
  if self.writer is not None:
@@ -59,7 +59,7 @@ class ReactiveTransformerLayer(nn.Module):
59
59
  for param in self.memory_cross_attention.parameters():
60
60
  param.requires_grad_(is_trainable)
61
61
 
62
- def moe_router_loss_(self):
62
+ def moe_router_loss(self):
63
63
  return self.ff.router_loss() if self.use_moe else None
64
64
 
65
65
  def forward(self, x: torch.Tensor, stm: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor:
@@ -135,7 +135,7 @@ class ClassicTransformerLayer(nn.Module):
135
135
  self.use_post_norm = use_post_norm
136
136
  self.use_moe = use_moe
137
137
 
138
- def moe_router_loss_(self):
138
+ def moe_router_loss(self):
139
139
  return self.ff.router_loss() if self.use_moe else torch.tensor(0.0)
140
140
 
141
141
  def forward(self, x: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor:
@@ -37,7 +37,7 @@ class ReactiveTransformerBase(nn.Module):
37
37
  for i in range(self.num_own_layers):
38
38
  self.layers[i].trainable_cross_attention_(is_trainable)
39
39
 
40
- def moe_router_loss_(self):
40
+ def moe_router_loss(self):
41
41
  return torch.stack([self.layers[i].moe_router_loss() for i in range(self.num_own_layers) if self.layers[i].use_moe] + [
42
42
  self.shared_layers[i].moe_router_loss() for i in range(self.num_shared_layers) if self.shared_layers[i].use_moe]).mean()
43
43
 
@@ -123,7 +123,7 @@ class ClassicTransformerBase(nn.Module):
123
123
  self.layers = layers
124
124
  self.num_layers = len(layers) if layers else 0
125
125
 
126
- def moe_router_loss_(self):
126
+ def moe_router_loss(self):
127
127
  return torch.stack([self.layers[i].moe_router_loss() for i in range(self.num_layers) if self.layers[i].use_moe]).mean()
128
128
 
129
129
  def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rxnn
3
- Version: 0.1.11
3
+ Version: 0.1.13
4
4
  Summary: RxNN: Reactive Neural Networks Platform
5
5
  License: Apache-2.0
6
6
  Keywords: deep-learning,ai,machine-learning
@@ -8,7 +8,7 @@ rxnn/rxt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  rxnn/rxt/models.py,sha256=INTFeNcqzAsjyWhNtbBHL4Tx7tYDsaQHgm72tf6u20M,6918
9
9
  rxnn/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  rxnn/training/base.py,sha256=QD8uS14jSyR5Y_8BgCaBQTKpsarerU3lyufsWsCq_6o,11227
11
- rxnn/training/bml.py,sha256=2kk9q3Buxq4wBHUQhyIAuHoBCninYX2K8hykWAJnxB0,18654
11
+ rxnn/training/bml.py,sha256=o_88ZL1YWd5gWXaBqYPK2UzSTbJaiTiw96E6z73LeOQ,18660
12
12
  rxnn/training/callbacks.py,sha256=_YfMKY_eFdc-tubhO9nYH2PXDZDQwlSI74FVOoCXpQg,22108
13
13
  rxnn/training/dataset.py,sha256=vQ5mDF3bA0HXya474n4D4iL8Mn3AEpJukgzFNVkxjGU,5106
14
14
  rxnn/training/scheduler.py,sha256=ow6oALzWjWQmHSpcJEjv6tg4g4CDMvr73TypxfcefMc,712
@@ -16,14 +16,14 @@ rxnn/training/tokenizer.py,sha256=4Y41f07uo2KPA_7bp3FCcwGKbXoS2hsckOoXUsXfQxY,80
16
16
  rxnn/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  rxnn/transformers/attention.py,sha256=FfEYE0THO73p_1eRupr2mcwfW4UbI_riIxkHfr8X_1c,14022
18
18
  rxnn/transformers/ff.py,sha256=jJnuBDsnnX5uYC_WZH8cXAYrMnz0P-iX7MwcPivjRtI,2533
19
- rxnn/transformers/layers.py,sha256=xMocHzdSu7hcC_mPE_aG3-LQg2RXgunKSxcgNXYnOeo,5631
19
+ rxnn/transformers/layers.py,sha256=HhIiykmrBgdsV4AbMQXr9t0cSo4gSIeN0dPtc8mDyOo,5629
20
20
  rxnn/transformers/mask.py,sha256=J0cfLVLt3SzS2ra3KcY4khrkhI975Dw4CjpUi3Sn25s,419
21
- rxnn/transformers/models.py,sha256=PVhiTTSQ7VTDVdOcyRf-xGNvj6oOa_2fUV2mfthcE0Y,7171
21
+ rxnn/transformers/models.py,sha256=w-zB_8QB9-Fae-GkGgmVDNY-Ts_0gBeWcevpl9qzZVM,7169
22
22
  rxnn/transformers/moe.py,sha256=v21HDEhkDr10--If0P-XBjT5C7IlQJo0wGQlpDnVWEA,5020
23
23
  rxnn/transformers/positional.py,sha256=2l38RS0Dini3f6Z3LUHr3XwWzg1UK7fO2C6wazWDAYU,4292
24
24
  rxnn/transformers/sampler.py,sha256=wSz_1wNloqtuiix5w2Mcsj5NhaO9QlY0j__TVG7wJnM,3938
25
25
  rxnn/utils.py,sha256=d5U8i5ukovgDyqiycc2AoxObTz_eF_bgo2MKvdtJ98s,467
26
- rxnn-0.1.11.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
27
- rxnn-0.1.11.dist-info/METADATA,sha256=WFoe6AqfJVI6wFZ23i3qGQ3babDlLtjIMU0htjOIikw,14629
28
- rxnn-0.1.11.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
29
- rxnn-0.1.11.dist-info/RECORD,,
26
+ rxnn-0.1.13.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
27
+ rxnn-0.1.13.dist-info/METADATA,sha256=i32JDhkCLYc2-Chhy_LMSWbuwN7gQK2LjKiNDIJCQ0U,14629
28
+ rxnn-0.1.13.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
29
+ rxnn-0.1.13.dist-info/RECORD,,
File without changes
File without changes