PyPI - rxnn - Versions diffs - 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl - Mend

rxnn 0.1.11py3-none-any.whl → 0.1.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

rxnn/training/bml.py +1 -1
rxnn/transformers/layers.py +2 -2
rxnn/transformers/models.py +2 -2
{rxnn-0.1.11.dist-info → rxnn-0.1.13.dist-info}/METADATA +1 -1
{rxnn-0.1.11.dist-info → rxnn-0.1.13.dist-info}/RECORD +7 -7
{rxnn-0.1.11.dist-info → rxnn-0.1.13.dist-info}/LICENSE +0 -0
{rxnn-0.1.11.dist-info → rxnn-0.1.13.dist-info}/WHEEL +0 -0

rxnn/training/bml.py CHANGED Viewed

@@ -208,7 +208,7 @@ class AutoregressiveMoeTrainer(BaseTrainer):
         model = next(self.model.children()) if isinstance(self.model, DistributedDataParallel) else self.model
-        router_loss = model.moe_router_loss()
+        router_loss = model.model.moe_router_loss()
         loss = main_loss + self.router_loss_scale * router_loss
         if self.writer is not None:

rxnn/transformers/layers.py CHANGED Viewed

@@ -59,7 +59,7 @@ class ReactiveTransformerLayer(nn.Module):
         for param in self.memory_cross_attention.parameters():
             param.requires_grad_(is_trainable)
-    def moe_router_loss_(self):
+    def moe_router_loss(self):
         return self.ff.router_loss() if self.use_moe else None
     def forward(self, x: torch.Tensor, stm: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor:
@@ -135,7 +135,7 @@ class ClassicTransformerLayer(nn.Module):
         self.use_post_norm = use_post_norm
         self.use_moe = use_moe
-    def moe_router_loss_(self):
+    def moe_router_loss(self):
         return self.ff.router_loss() if self.use_moe else torch.tensor(0.0)
     def forward(self, x: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor:

rxnn/transformers/models.py CHANGED Viewed

@@ -37,7 +37,7 @@ class ReactiveTransformerBase(nn.Module):
         for i in range(self.num_own_layers):
             self.layers[i].trainable_cross_attention_(is_trainable)
-    def moe_router_loss_(self):
+    def moe_router_loss(self):
         return torch.stack([self.layers[i].moe_router_loss() for i in range(self.num_own_layers) if self.layers[i].use_moe] + [
             self.shared_layers[i].moe_router_loss() for i in range(self.num_shared_layers) if self.shared_layers[i].use_moe]).mean()
@@ -123,7 +123,7 @@ class ClassicTransformerBase(nn.Module):
         self.layers = layers
         self.num_layers = len(layers) if layers else 0
-    def moe_router_loss_(self):
+    def moe_router_loss(self):
         return torch.stack([self.layers[i].moe_router_loss() for i in range(self.num_layers) if self.layers[i].use_moe]).mean()
     def forward(self, x: torch.Tensor) -> torch.Tensor:

{rxnn-0.1.11.dist-info → rxnn-0.1.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rxnn
-Version: 0.1.11
+Version: 0.1.13
 Summary: RxNN: Reactive Neural Networks Platform
 License: Apache-2.0
 Keywords: deep-learning,ai,machine-learning

{rxnn-0.1.11.dist-info → rxnn-0.1.13.dist-info}/RECORD RENAMED Viewed

@@ -8,7 +8,7 @@ rxnn/rxt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rxnn/rxt/models.py,sha256=INTFeNcqzAsjyWhNtbBHL4Tx7tYDsaQHgm72tf6u20M,6918
 rxnn/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rxnn/training/base.py,sha256=QD8uS14jSyR5Y_8BgCaBQTKpsarerU3lyufsWsCq_6o,11227
-rxnn/training/bml.py,sha256=2kk9q3Buxq4wBHUQhyIAuHoBCninYX2K8hykWAJnxB0,18654
+rxnn/training/bml.py,sha256=o_88ZL1YWd5gWXaBqYPK2UzSTbJaiTiw96E6z73LeOQ,18660
 rxnn/training/callbacks.py,sha256=_YfMKY_eFdc-tubhO9nYH2PXDZDQwlSI74FVOoCXpQg,22108
 rxnn/training/dataset.py,sha256=vQ5mDF3bA0HXya474n4D4iL8Mn3AEpJukgzFNVkxjGU,5106
 rxnn/training/scheduler.py,sha256=ow6oALzWjWQmHSpcJEjv6tg4g4CDMvr73TypxfcefMc,712
@@ -16,14 +16,14 @@ rxnn/training/tokenizer.py,sha256=4Y41f07uo2KPA_7bp3FCcwGKbXoS2hsckOoXUsXfQxY,80
 rxnn/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rxnn/transformers/attention.py,sha256=FfEYE0THO73p_1eRupr2mcwfW4UbI_riIxkHfr8X_1c,14022
 rxnn/transformers/ff.py,sha256=jJnuBDsnnX5uYC_WZH8cXAYrMnz0P-iX7MwcPivjRtI,2533
-rxnn/transformers/layers.py,sha256=xMocHzdSu7hcC_mPE_aG3-LQg2RXgunKSxcgNXYnOeo,5631
+rxnn/transformers/layers.py,sha256=HhIiykmrBgdsV4AbMQXr9t0cSo4gSIeN0dPtc8mDyOo,5629
 rxnn/transformers/mask.py,sha256=J0cfLVLt3SzS2ra3KcY4khrkhI975Dw4CjpUi3Sn25s,419
-rxnn/transformers/models.py,sha256=PVhiTTSQ7VTDVdOcyRf-xGNvj6oOa_2fUV2mfthcE0Y,7171
+rxnn/transformers/models.py,sha256=w-zB_8QB9-Fae-GkGgmVDNY-Ts_0gBeWcevpl9qzZVM,7169
 rxnn/transformers/moe.py,sha256=v21HDEhkDr10--If0P-XBjT5C7IlQJo0wGQlpDnVWEA,5020
 rxnn/transformers/positional.py,sha256=2l38RS0Dini3f6Z3LUHr3XwWzg1UK7fO2C6wazWDAYU,4292
 rxnn/transformers/sampler.py,sha256=wSz_1wNloqtuiix5w2Mcsj5NhaO9QlY0j__TVG7wJnM,3938
 rxnn/utils.py,sha256=d5U8i5ukovgDyqiycc2AoxObTz_eF_bgo2MKvdtJ98s,467
-rxnn-0.1.11.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
-rxnn-0.1.11.dist-info/METADATA,sha256=WFoe6AqfJVI6wFZ23i3qGQ3babDlLtjIMU0htjOIikw,14629
-rxnn-0.1.11.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
-rxnn-0.1.11.dist-info/RECORD,,
+rxnn-0.1.13.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
+rxnn-0.1.13.dist-info/METADATA,sha256=i32JDhkCLYc2-Chhy_LMSWbuwN7gQK2LjKiNDIJCQ0U,14629
+rxnn-0.1.13.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
+rxnn-0.1.13.dist-info/RECORD,,

{rxnn-0.1.11.dist-info → rxnn-0.1.13.dist-info}/LICENSE RENAMED Viewed

File without changes

{rxnn-0.1.11.dist-info → rxnn-0.1.13.dist-info}/WHEEL RENAMED Viewed

File without changes

rxnn 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

rxnn 0.1.11py3-none-any.whl → 0.1.13py3-none-any.whl