PyPI - rxnn - Versions diffs - 0.2.22__py3-none-any.whl → 0.2.24__py3-none-any.whl - Mend

rxnn 0.2.22py3-none-any.whl → 0.2.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

rxnn/rxt/models.py +12 -4
rxnn/training/models.py +19 -10
rxnn/training/mrl.py +10 -12
rxnn/transformers/layers.py +4 -1
rxnn/transformers/models.py +3 -3
{rxnn-0.2.22.dist-info → rxnn-0.2.24.dist-info}/METADATA +1 -1
{rxnn-0.2.22.dist-info → rxnn-0.2.24.dist-info}/RECORD +9 -9
{rxnn-0.2.22.dist-info → rxnn-0.2.24.dist-info}/LICENSE +0 -0
{rxnn-0.2.22.dist-info → rxnn-0.2.24.dist-info}/WHEEL +0 -0

rxnn/rxt/models.py CHANGED Viewed

@@ -137,13 +137,13 @@ class RxTAlphaComponentBase(nn.Module, PyTorchModelHubMixin):
     def load_shared_memory(self, stm: ShortTermMemory):
         self.model.stm = stm
-    def freeze_without_memory(self):
+    def freeze_without_memory(self, unfreeze_norms: bool = True):
         for param in self.model.parameters():
             param.requires_grad_(False)
-        self.model.trainable_cross_attention_(True)
+        self.model.trainable_cross_attention_(True, with_norms=unfreeze_norms)
-    def freeze_memory(self):
-        self.model.trainable_cross_attention_(False)
+    def freeze_memory(self, with_norms: bool = True):
+        self.model.trainable_cross_attention_(False, with_norms=with_norms)
     def unfreeze_all(self):
         for param in self.model.parameters():
@@ -264,6 +264,14 @@ class RxTAlphaMemoryAttention(nn.Module, PyTorchModelHubMixin, license="apache-2
         attention_layers = nn.ModuleList([att_init() for _ in range(num_layers)])
         self.model = StmMemoryAttention(stm, attention_layers, memory_norm_layers)
+    def freeze(self):
+        for param in self.parameters():
+            param.requires_grad = False
+    def unfreeze(self):
+        for param in self.parameters():
+            param.requires_grad = True
     def load_shared_memory(self, stm: ShortTermMemory):
         self.model.stm = stm

rxnn/training/models.py CHANGED Viewed

@@ -78,24 +78,30 @@ class MrlActorModel(nn.Module):
     def freeze_components(self, stage: Literal['update', 'fetch', 'both'] = 'both'):
         """Freeze encoder/decoder except memory-related layers."""
         if self.encoder.freeze_without_memory is not None:
-            self.encoder.freeze_without_memory()
+            self.encoder.freeze_without_memory(unfreeze_norms=True)
             if stage == 'update':
-                self.encoder.freeze_memory()
+                self.encoder.freeze_memory(with_norms=True)
         else:
             for param in self.encoder.parameters():
                 param.requires_grad = False
-            self.encoder.model.trainable_cross_attention_(True if stage != 'update' else False)
+            self.encoder.model.trainable_cross_attention_(True if stage != 'update' else False, with_norms=True)
         if self.decoder.freeze_without_memory is not None:
-            self.decoder.freeze_without_memory()
+            self.decoder.freeze_without_memory(unfreeze_norms=True)
             if stage == 'update':
-                self.decoder.freeze_memory()
+                self.decoder.freeze_memory(with_norms=True)
         else:
             for param in self.decoder.parameters():
                 param.requires_grad = False
-            self.decoder.model.trainable_cross_attention_(True if stage != 'update' else False)
+            self.decoder.model.trainable_cross_attention_(True if stage != 'update' else False, with_norms=True)
         # Unfreeze memory attention
-        for param in self.memory_attention.parameters():
-            param.requires_grad = True if stage != 'fetch' else False
+        if self.memory_attention.freeze is not None:
+            if stage == 'fetch':
+                self.memory_attention.freeze()
+            else:
+                self.memory_attention.unfreeze()
+        else:
+            for param in self.memory_attention.parameters():
+                param.requires_grad = True if stage != 'fetch' else False
     def unfreeze_components(self):
         """Unfreeze all components after initial training."""
@@ -109,8 +115,11 @@ class MrlActorModel(nn.Module):
         else:
             for param in self.decoder.parameters():
                 param.requires_grad = True
-        for param in self.memory_attention.parameters():
-            param.requires_grad = True
+        if self.memory_attention.unfreeze is not None:
+            self.memory_attention.unfreeze()
+        else:
+            for param in self.memory_attention.parameters():
+                param.requires_grad = True
     def reset_memory(self):
         self.memory_attention.reset_memory()

rxnn/training/mrl.py CHANGED Viewed

@@ -37,7 +37,7 @@ class CurriculumConfig(TypedDict):
     eval_dataset: Optional[MrlCurriculumDataset]
     callbacks: Optional[list[MrlTrainerCallback]]
     strategy: MrlStrategy
-    unfreeze_epoch: Optional[Union[int, tuple[int, int, int]]]
+    unfreeze_epoch: Optional[Union[int, tuple[int, int, int, int]]]
     random_resets: Optional[bool]
     random_resets_from: Optional[int]
     random_resets_ratio: Optional[float]
@@ -724,10 +724,10 @@ class MRLTrainer:
         self.reward = config.get('reward_model', self.shared_reward_model)  # MRL Reward Model for curriculum stage
         if config['lr'] is not None or config['critic_lr'] is not None or config['weight_decay'] is not None or config['critic_weight_decay'] is not None:
             self.optimizer, self.critic_optimizer = self._init_optimizers(
-                lr=config['lr'] or self.base_optim_config['lr'],
-                critic_lr=config['critic_lr'] or self.base_optim_config['critic_lr'],
-                weight_decay=config['weight_decay'] or self.base_optim_config['weight_decay'],
-                critic_weight_decay=config['critic_weight_decay'] or self.base_optim_config['critic_weight_decay']
+                lr=config.get('lr', self.base_optim_config['lr']),
+                critic_lr=config.get('critic_lr', self.base_optim_config['critic_lr']),
+                weight_decay=config.get('weight_decay', self.base_optim_config['weight_decay']),
+                critic_weight_decay=config.get('critic_weight_decay', self.base_optim_config['critic_weight_decay'])
             )
         # 2. Get epochs and random resets configs
@@ -770,11 +770,7 @@ class MRLTrainer:
             # 4. Freeze all components except memory attention and memory cross-attention layers in decoder/encoder
             if unfreeze_epoch != 0:
-                is_staged_unfreeze = isinstance(unfreeze_epoch, tuple)
-                if is_staged_unfreeze:
-                    self.actor.freeze_components('update')
-                else:
-                    self.actor.freeze_components()
+                self.actor.freeze_components('both')
             # 5. Setup train DataLoader
             if self.use_ddp:
@@ -817,8 +813,10 @@ class MRLTrainer:
                 # 11. Unfreeze all components before selected epoch
                 is_staged_unfreeze = isinstance(unfreeze_epoch, tuple)
                 if is_staged_unfreeze:
-                    fetch_epoch, both_epoch, all_epoch = unfreeze_epoch
-                    if epoch == fetch_epoch:
+                    update_epoch, fetch_epoch, both_epoch, all_epoch = unfreeze_epoch
+                    if epoch == update_epoch:
+                        self.actor.freeze_components('update')
+                    elif epoch == fetch_epoch:
                         self.actor.freeze_components('fetch')
                     elif epoch == both_epoch:
                         self.actor.freeze_components('both')

rxnn/transformers/layers.py CHANGED Viewed

@@ -57,9 +57,12 @@ class ReactiveTransformerLayer(nn.Module):
         self.use_moe = use_moe
         self.use_moe_att = use_moe_att
-    def trainable_cross_attention_(self, is_trainable: bool):
+    def trainable_cross_attention_(self, is_trainable: bool, with_norms: bool = True):
         for param in self.memory_cross_attention.parameters():
             param.requires_grad_(is_trainable)
+        if with_norms:
+            for param in self.norm2.parameters():
+                param.requires_grad_(is_trainable)
     def update_max_len(self, max_seq_len: int):
         if self.attention.rope is not None:

rxnn/transformers/models.py CHANGED Viewed

@@ -33,11 +33,11 @@ class ReactiveTransformerBase(nn.Module):
         self.num_shared_layers = len(shared_layers) if shared_layers else 0
         self.num_own_layers = len(own_layers) if own_layers else 0
-    def trainable_cross_attention_(self, is_trainable: bool):
+    def trainable_cross_attention_(self, is_trainable: bool, with_norms: bool = True):
         for i in range(self.num_shared_layers):
-            self.shared_layers[i].trainable_cross_attention_(is_trainable)
+            self.shared_layers[i].trainable_cross_attention_(is_trainable, with_norms)
         for i in range(self.num_own_layers):
-            self.layers[i].trainable_cross_attention_(is_trainable)
+            self.layers[i].trainable_cross_attention_(is_trainable, with_norms)
     def moe_router_loss(self):
         return torch.stack([self.layers[i].moe_router_loss() for i in range(self.num_own_layers) if self.layers[i].use_moe or self.layers[i].use_moe_att] + [

{rxnn-0.2.22.dist-info → rxnn-0.2.24.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rxnn
-Version: 0.2.22
+Version: 0.2.24
 Summary: RxNN: Reactive Neural Networks Platform
 License: Apache-2.0
 Keywords: deep-learning,ai,machine-learning

{rxnn-0.2.22.dist-info → rxnn-0.2.24.dist-info}/RECORD RENAMED Viewed

@@ -9,14 +9,14 @@ rxnn/memory/attention.py,sha256=p-r8DK3iVhNn-JAESVzIXDCG8gk1R_-x5xHclZ5jgb0,1813
 rxnn/memory/norm.py,sha256=Ofl8Q5NYEF9GQeO0bhM43tkTW91J0y6TSvTAOYMgloM,6278
 rxnn/memory/stm.py,sha256=IH_3INw7FdI013t56ui3Zq9GPUq-k3HeZGjx6BerS4g,3888
 rxnn/rxt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rxnn/rxt/models.py,sha256=qlYgU002VE21ZOlcxEM9iv9tAvsbe4mngcMI2sw3j9k,12078
+rxnn/rxt/models.py,sha256=3gCYD_OXvQc8GaXQvRCSj1OcYOSHayWlpP5lsg9wMMk,12389
 rxnn/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rxnn/training/base.py,sha256=_xik1GXE4RJ_nxwqLQ1ccXA5pRtBCi-jL-jeRFBdHBU,11851
 rxnn/training/bml.py,sha256=FJszaQXOLx2ZHBa1CQpyMrG8i4Kj14E-gzDAEK_Ei5k,17272
 rxnn/training/callbacks.py,sha256=-N0MQPpZQaUWCINdTOsjul4bDGbGr2JgQBqOIXBLS6o,35053
 rxnn/training/dataset.py,sha256=7hTilFWPpqUEc6zNcMqBPjxFKxCfvTKKF3E8tVlwccQ,51250
-rxnn/training/models.py,sha256=wf98gYKKm9-ZY3zwdX9NIeJ-lvh7Ro1SoAijmQxYM28,5599
-rxnn/training/mrl.py,sha256=zk4m1JFuX0y82J0tG2XkY0Pz6Uy2did9cngOXqR9lMk,43326
+rxnn/training/models.py,sha256=5fl1hESVj2Hakqz5to8ZJzw5Q4_RKZAUq2bn6nRiPV8,6045
+rxnn/training/mrl.py,sha256=14wx3pVha15B7eRWPRgoxRtV5dPtBI0yadIHOYZjX6k,43275
 rxnn/training/reward.py,sha256=7MTVdNm5HnWmt6zFDi3TAYmnVSL_-24riOoY2F7z4x8,11290
 rxnn/training/rl.py,sha256=j-KNLoZjhaEKasYNOc8DxHtwvknAgAJFwvXKot6otFA,3272
 rxnn/training/scheduler.py,sha256=LcjU35mEwz2U5x3U6tLfeeYlBqMxbFSxYzJYuXkWbSY,1408
@@ -25,14 +25,14 @@ rxnn/training/utils.py,sha256=Bw8nZLKIt7NQpUVCYkb_79kWKChVFOYgYXwODo4SvNc,5718
 rxnn/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rxnn/transformers/attention.py,sha256=d0Igo1Nrn76BphbHrzekiKJfT3RCy4iSXSB6FLAOTgc,16247
 rxnn/transformers/ff.py,sha256=jJnuBDsnnX5uYC_WZH8cXAYrMnz0P-iX7MwcPivjRtI,2533
-rxnn/transformers/layers.py,sha256=MbOIX4PurbTbYxcXSavyFsNpTHCm26K_Ssk_VUCzKIE,7469
+rxnn/transformers/layers.py,sha256=UQZbrAg1UAttPASeqS7BP1a4JalktThmRMzX99Qghss,7618
 rxnn/transformers/mask.py,sha256=J0cfLVLt3SzS2ra3KcY4khrkhI975Dw4CjpUi3Sn25s,419
-rxnn/transformers/models.py,sha256=VvP7r7E6tj7OWsYKlJLCy2vsQ3xSSnlNez6QxR-jBAA,8276
+rxnn/transformers/models.py,sha256=_2qO1SASHtKvTW3dW-Dy9HEmAvoNVC1_addm2tM9Zbs,8325
 rxnn/transformers/moe.py,sha256=j6jEx6Ip0zttlUZKKn82azxo95lkLZs-H2GLSMD88hY,5859
 rxnn/transformers/positional.py,sha256=1PjcJybUzeQlIKJI4tahAGZcYgCRCL0otxs7mpsNuzM,4410
 rxnn/transformers/sampler.py,sha256=t6iiQTdLQ0TakUWnnhKkb5DKF2F_9-thXHBydDF3fxg,17389
 rxnn/utils.py,sha256=ihb6OTyDtPiocB_lOvnq7eOkjjpCkgs8wxvXUBNQ7mM,996
-rxnn-0.2.22.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
-rxnn-0.2.22.dist-info/METADATA,sha256=KYQSidSUXwKxzuQj77h-jT0DUimKZVW_XTl-7PcQG3o,25960
-rxnn-0.2.22.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-rxnn-0.2.22.dist-info/RECORD,,
+rxnn-0.2.24.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
+rxnn-0.2.24.dist-info/METADATA,sha256=PrVfcCd8NBFtFnD8lAJqU7UW3lLEc-Tr7MQhK6obvuo,25960
+rxnn-0.2.24.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+rxnn-0.2.24.dist-info/RECORD,,

{rxnn-0.2.22.dist-info → rxnn-0.2.24.dist-info}/LICENSE RENAMED Viewed

File without changes

{rxnn-0.2.22.dist-info → rxnn-0.2.24.dist-info}/WHEEL RENAMED Viewed

File without changes

rxnn 0.2.22__py3-none-any.whl → 0.2.24__py3-none-any.whl

rxnn 0.2.22py3-none-any.whl → 0.2.24py3-none-any.whl