PyPI - rxnn - Versions diffs - 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl - Mend

rxnn 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

rxnn/training/models.py +10 -5
rxnn/training/mrl.py +53 -15
{rxnn-0.2.19.dist-info → rxnn-0.2.21.dist-info}/METADATA +1 -1
{rxnn-0.2.19.dist-info → rxnn-0.2.21.dist-info}/RECORD +6 -6
{rxnn-0.2.19.dist-info → rxnn-0.2.21.dist-info}/LICENSE +0 -0
{rxnn-0.2.19.dist-info → rxnn-0.2.21.dist-info}/WHEEL +0 -0

rxnn/training/models.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import torch
 import torch.nn as nn
 from enum import Enum
+from typing import Literal
 from huggingface_hub import PyTorchModelHubMixin
 from ..transformers.models import ReactiveTransformerEncoder, ReactiveTransformerDecoder
@@ -74,23 +75,27 @@ class MrlActorModel(nn.Module):
         self.decoder = decoder
         self.memory_attention = memory_attention
-    def freeze_components(self):
+    def freeze_components(self, stage: Literal['update', 'fetch', 'both'] = 'both'):
         """Freeze encoder/decoder except memory-related layers."""
         if self.encoder.freeze_without_memory is not None:
             self.encoder.freeze_without_memory()
+            if stage == 'update':
+                self.encoder.freeze_memory()
         else:
             for param in self.encoder.parameters():
                 param.requires_grad = False
-            self.encoder.model.trainable_cross_attention_(True)
+            self.encoder.model.trainable_cross_attention_(True if stage != 'update' else False)
         if self.decoder.freeze_without_memory is not None:
             self.decoder.freeze_without_memory()
+            if stage == 'update':
+                self.decoder.freeze_memory()
         else:
             for param in self.decoder.parameters():
                 param.requires_grad = False
-            self.decoder.model.trainable_cross_attention_(True)
+            self.decoder.model.trainable_cross_attention_(True if stage != 'update' else False)
         # Unfreeze memory attention
         for param in self.memory_attention.parameters():
-            param.requires_grad = True
+            param.requires_grad = True if stage != 'fetch' else False
     def unfreeze_components(self):
         """Unfreeze all components after initial training."""
@@ -124,7 +129,7 @@ class MrlActorModel(nn.Module):
             _, ed = self.encoder(x, attention_mask=attention_mask)
             return self.memory_attention(ed, attention_mask=attention_mask)
-class MrlCriticModel(nn.Module):
+class MrlCriticModel(nn.Module, PyTorchModelHubMixin, license="apache-2.0", pipeline_tag="text-classification"):
     def __init__(self, encoder: nn.Module, embed_dim: int, **kwargs):
         super(MrlCriticModel, self).__init__(**kwargs)
         self.encoder = encoder

rxnn/training/mrl.py CHANGED Viewed

@@ -3,7 +3,7 @@ from torch.utils.data import DataLoader, DistributedSampler
 from torch.utils.tensorboard import SummaryWriter
 import torch.distributed as dist
 from torch.nn.parallel import DistributedDataParallel
-from typing import Optional, TypedDict
+from typing import Optional, TypedDict, Union
 from enum import Enum
 import random, os
 from ..transformers.sampler import BatchSampler
@@ -37,11 +37,15 @@ class CurriculumConfig(TypedDict):
     eval_dataset: Optional[MrlCurriculumDataset]
     callbacks: Optional[list[MrlTrainerCallback]]
     strategy: MrlStrategy
-    unfreeze_epoch: Optional[int]
+    unfreeze_epoch: Optional[Union[int, tuple[int, int, int]]]
     random_resets: Optional[bool]
     random_resets_from: Optional[int]
     random_resets_ratio: Optional[float]
     reward_model: Optional[MrlRewardModel]
+    lr: Optional[float]
+    critic_lr: Optional[float]
+    weight_decay: Optional[float]
+    critic_weight_decay: Optional[float]
 class SamplerConfig(TypedDict):
@@ -119,17 +123,15 @@ class MRLTrainer:
         self.use_amp = use_amp
         self.dtype = dtype
+        self.base_optim_config = {
+            'lr': config.get('lr', 3e-4),
+            'critic_lr': config.get('critic_lr', 1e-4),
+            'weight_decay': config.get('weight_decay', 0.01),
+            'critic_weight_decay': config.get('critic_weight_decay', 0.01),
+        }
         # Optimizers
-        self.optimizer = torch.optim.AdamW(
-            self.actor.unique_parameters(),
-            lr=config.get("lr", 3e-4),
-            weight_decay=config.get("weight_decay", 0.01),
-        )
-        self.critic_optimizer = torch.optim.AdamW(
-            self.critic.parameters(),
-            lr=config.get("critic_lr", 1e-4),
-            weight_decay=config.get("critic_weight_decay", 0.01),
-        )
+        self.optimizer, self.critic_optimizer = self._init_optimizers(**self.base_optim_config)
         self.scaler = torch.amp.GradScaler() if self.use_amp else None
         self.critic_scaler = torch.amp.GradScaler() if self.use_amp else None
@@ -156,6 +158,21 @@ class MRLTrainer:
         self.global_epoch = 0
         self.global_epochs_count = 0
+    def _init_optimizers(self, lr: float, critic_lr: float, weight_decay: float, critic_weight_decay: float):
+        optimizer = torch.optim.AdamW(
+            self.actor.unique_parameters(),
+            lr=lr,
+            weight_decay=weight_decay,
+        )
+        critic_optimizer = torch.optim.AdamW(
+            self.critic.parameters(),
+            lr=critic_lr,
+            weight_decay=critic_weight_decay,
+        )
+        return optimizer, critic_optimizer
     def _init_steps(self):
         return {
             'collect': 0,
@@ -705,6 +722,13 @@ class MRLTrainer:
         self.strategy = config.get('strategy',
                                    MrlStrategy.MULTI_STEP_STRATEGY)  # MRL strategy for given curriculum stage
         self.reward = config.get('reward_model', self.shared_reward_model)  # MRL Reward Model for curriculum stage
+        if config['lr'] is not None or config['critic_lr'] is not None or config['weight_decay'] is not None or config['critic_weight_decay'] is not None:
+            self.optimizer, self.critic_optimizer = self._init_optimizers(
+                lr=config['lr'] or self.base_optim_config['lr'],
+                critic_lr=config['critic_lr'] or self.base_optim_config['critic_lr'],
+                weight_decay=config['weight_decay'] or self.base_optim_config['weight_decay'],
+                critic_weight_decay=config['critic_weight_decay'] or self.base_optim_config['critic_weight_decay']
+            )
         # 2. Get epochs and random resets configs
         epochs = config.get('epochs', 5)  # number of epochs for current stage
@@ -746,7 +770,11 @@ class MRLTrainer:
             # 4. Freeze all components except memory attention and memory cross-attention layers in decoder/encoder
             if unfreeze_epoch != 0:
-                self.actor.freeze_components()
+                is_staged_unfreeze = isinstance(unfreeze_epoch, tuple)
+                if is_staged_unfreeze:
+                    self.actor.freeze_components('update')
+                else:
+                    self.actor.freeze_components()
             # 5. Setup train DataLoader
             if self.use_ddp:
@@ -787,8 +815,18 @@ class MRLTrainer:
                     self.random_resets_ratio = 1.0
                 # 11. Unfreeze all components before selected epoch
-                if epoch == unfreeze_epoch:
-                    self.actor.unfreeze_components()
+                is_staged_unfreeze = isinstance(unfreeze_epoch, tuple)
+                if is_staged_unfreeze:
+                    fetch_epoch, both_epoch, all_epoch = unfreeze_epoch
+                    if epoch == fetch_epoch:
+                        self.actor.freeze_components('fetch')
+                    elif epoch == both_epoch:
+                        self.actor.freeze_components('both')
+                    elif epoch == all_epoch:
+                        self.actor.unfreeze_components()
+                else:
+                    if epoch == unfreeze_epoch:
+                        self.actor.unfreeze_components()
                 # 12. Set epoch for distributed sampler
                 if train_sampler is not None:

{rxnn-0.2.19.dist-info → rxnn-0.2.21.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rxnn
-Version: 0.2.19
+Version: 0.2.21
 Summary: RxNN: Reactive Neural Networks Platform
 License: Apache-2.0
 Keywords: deep-learning,ai,machine-learning

{rxnn-0.2.19.dist-info → rxnn-0.2.21.dist-info}/RECORD RENAMED Viewed

@@ -15,8 +15,8 @@ rxnn/training/base.py,sha256=_xik1GXE4RJ_nxwqLQ1ccXA5pRtBCi-jL-jeRFBdHBU,11851
 rxnn/training/bml.py,sha256=FJszaQXOLx2ZHBa1CQpyMrG8i4Kj14E-gzDAEK_Ei5k,17272
 rxnn/training/callbacks.py,sha256=-N0MQPpZQaUWCINdTOsjul4bDGbGr2JgQBqOIXBLS6o,35053
 rxnn/training/dataset.py,sha256=7hTilFWPpqUEc6zNcMqBPjxFKxCfvTKKF3E8tVlwccQ,51250
-rxnn/training/models.py,sha256=renPa5YH443XNTMFI-YTCwi5vNp3QzwF5UXedNd5hDk,5187
-rxnn/training/mrl.py,sha256=RSbeJRRjAH1lzkySzeoDmng6hleRmUfnNcM1YVv57as,41388
+rxnn/training/models.py,sha256=wf98gYKKm9-ZY3zwdX9NIeJ-lvh7Ro1SoAijmQxYM28,5599
+rxnn/training/mrl.py,sha256=zk4m1JFuX0y82J0tG2XkY0Pz6Uy2did9cngOXqR9lMk,43326
 rxnn/training/reward.py,sha256=7MTVdNm5HnWmt6zFDi3TAYmnVSL_-24riOoY2F7z4x8,11290
 rxnn/training/rl.py,sha256=DHFwnPUlnq2JVj6CS6DwifnC_eMeBAUVp36UCAWNMis,3934
 rxnn/training/scheduler.py,sha256=LcjU35mEwz2U5x3U6tLfeeYlBqMxbFSxYzJYuXkWbSY,1408
@@ -32,7 +32,7 @@ rxnn/transformers/moe.py,sha256=j6jEx6Ip0zttlUZKKn82azxo95lkLZs-H2GLSMD88hY,5859
 rxnn/transformers/positional.py,sha256=1PjcJybUzeQlIKJI4tahAGZcYgCRCL0otxs7mpsNuzM,4410
 rxnn/transformers/sampler.py,sha256=t6iiQTdLQ0TakUWnnhKkb5DKF2F_9-thXHBydDF3fxg,17389
 rxnn/utils.py,sha256=ihb6OTyDtPiocB_lOvnq7eOkjjpCkgs8wxvXUBNQ7mM,996
-rxnn-0.2.19.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
-rxnn-0.2.19.dist-info/METADATA,sha256=y3om6t6e6WreQXmVjEfmr_vSkqBl-R04Tmch9Qk6rQg,25960
-rxnn-0.2.19.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-rxnn-0.2.19.dist-info/RECORD,,
+rxnn-0.2.21.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
+rxnn-0.2.21.dist-info/METADATA,sha256=XXf_qBMs2dOwWyAN5oNEg1W1-oPVIAQPy0FkNcO7QZQ,25960
+rxnn-0.2.21.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+rxnn-0.2.21.dist-info/RECORD,,

{rxnn-0.2.19.dist-info → rxnn-0.2.21.dist-info}/LICENSE RENAMED Viewed

File without changes

{rxnn-0.2.19.dist-info → rxnn-0.2.21.dist-info}/WHEEL RENAMED Viewed

File without changes

rxnn 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl

rxnn 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl