PyPI - rxnn - Versions diffs - 0.2.26__py3-none-any.whl → 0.2.27__py3-none-any.whl - Mend

rxnn 0.2.26py3-none-any.whl → 0.2.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

rxnn/training/models.py +12 -2
rxnn/training/mrl.py +125 -33
{rxnn-0.2.26.dist-info → rxnn-0.2.27.dist-info}/METADATA +1 -1
{rxnn-0.2.26.dist-info → rxnn-0.2.27.dist-info}/RECORD +6 -6
{rxnn-0.2.26.dist-info → rxnn-0.2.27.dist-info}/LICENSE +0 -0
{rxnn-0.2.26.dist-info → rxnn-0.2.27.dist-info}/WHEEL +0 -0

rxnn/training/models.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import torch
 import torch.nn as nn
 from enum import Enum
-from typing import Literal
+from typing import Literal, Iterator
 from huggingface_hub import PyTorchModelHubMixin
 from ..transformers.models import ReactiveTransformerEncoder, ReactiveTransformerDecoder
@@ -75,7 +75,7 @@ class MrlActorModel(nn.Module):
         self.decoder = decoder
         self.memory_attention = memory_attention
-    def freeze_components(self, stage: Literal['update', 'fetch', 'both'] = 'both'):
+    def freeze_components(self, stage: Literal['update', 'fetch', 'joint'] = 'joint'):
         """Freeze encoder/decoder except memory-related layers."""
         if self.encoder.freeze_without_memory is not None:
             self.encoder.freeze_without_memory(unfreeze_norms=True)
@@ -131,6 +131,16 @@ class MrlActorModel(nn.Module):
             self.memory_attention.parameters()
         ))
+    def memory_cross_attention_parameters(self) -> list[nn.Parameter]:
+        return list(set(
+            self.encoder.memory_parameters() +
+            self.decoder.memory_parameters()
+        ))
+    def memory_attention_parameters(self) -> Iterator[nn.Parameter]:
+        return self.memory_attention.parameters()
     def not_memory_parameters(self) -> list[nn.Parameter]:
         return list(set(
             self.encoder.not_memory_parameters() +

rxnn/training/mrl.py CHANGED Viewed

@@ -3,7 +3,7 @@ from torch.utils.data import DataLoader, DistributedSampler
 from torch.utils.tensorboard import SummaryWriter
 import torch.distributed as dist
 from torch.nn.parallel import DistributedDataParallel
-from typing import Optional, TypedDict, Union
+from typing import Optional, TypedDict, Union, TypeAlias, Literal
 from enum import Enum
 import random, os
 from ..transformers.sampler import BatchSampler
@@ -31,6 +31,8 @@ class MrlStrategy(Enum):
     MULTI_STEP_STRATEGY = 2
     LONG_RANGE_STRATEGY = 3
+UnfreezeItem = Union[int, tuple[int, float]]
+UnfreezeEpochsStrategy: TypeAlias = Union[int, tuple[UnfreezeItem, UnfreezeItem, UnfreezeItem, int]]
 class CurriculumConfig(TypedDict):
     steps: int
@@ -39,7 +41,7 @@ class CurriculumConfig(TypedDict):
     eval_dataset: Optional[MrlCurriculumDataset]
     callbacks: Optional[list[MrlTrainerCallback]]
     strategy: MrlStrategy
-    unfreeze_epoch: Optional[Union[int, tuple[int, int, int, int]]]
+    unfreeze_epoch: Optional[UnfreezeEpochsStrategy]
     random_resets: Optional[bool]
     random_resets_from: Optional[int]
     random_resets_ratio: Optional[float]
@@ -132,7 +134,8 @@ class MRLTrainer:
         if self.separate_memory_lr:
             self.base_optim_config = {
-                'lr': (config.get('lr', 3e-4), config.get('memory_lr', 5e-4)),
+                'lr': config.get('lr', 3e-4),
+                'memory_lr': config.get('memory_lr', 5e-4),
                 'critic_lr': config.get('critic_lr', 1e-4),
                 'weight_decay': config.get('weight_decay', 0.01),
                 'critic_weight_decay': config.get('critic_weight_decay', 0.01),
@@ -145,8 +148,9 @@ class MRLTrainer:
                 'critic_weight_decay': config.get('critic_weight_decay', 0.01),
             }
-        # Optimizers
-        self.optimizer, self.critic_optimizer = self._init_optimizers(**self.base_optim_config, separate_memory_lr=self.separate_memory_lr)
+        self.optim_config = self.base_optim_config
+        self.optimizer, self.critic_optimizer = self._init_optimizers(**self.optim_config)
         self.scaler = torch.amp.GradScaler() if self.use_amp else None
         self.critic_scaler = torch.amp.GradScaler() if self.use_amp else None
@@ -173,11 +177,17 @@ class MRLTrainer:
         self.global_epoch = 0
         self.global_epochs_count = 0
-    def _init_optimizers(self, lr: Union[float, tuple[float, float]], critic_lr: float, weight_decay: float, critic_weight_decay: float, separate_memory_lr: bool = False) -> tuple[torch.optim.Optimizer, torch.optim.Optimizer]:
-        if separate_memory_lr:
-            rest_lr, memory_lr = lr
+    def _init_optimizers(
+            self,
+            lr: float,
+            critic_lr: float,
+            weight_decay: float,
+            critic_weight_decay: float,
+            memory_lr: Optional[float] = None,
+    ) -> tuple[torch.optim.Optimizer, torch.optim.Optimizer]:
+        if memory_lr is not None:
             optimizer = torch.optim.AdamW([
-                { 'params': self.actor.not_memory_parameters(), 'lr': rest_lr },
+                { 'params': self.actor.not_memory_parameters(), 'lr': lr },
                 { 'params': self.actor.memory_parameters(), 'lr': memory_lr },
             ],
                 weight_decay=weight_decay,
@@ -737,7 +747,7 @@ class MRLTrainer:
         return should_stop_stage
-    def _setup_curriculum_step(self, config: CurriculumConfig) -> tuple[tuple[int, int], tuple[bool, int, float]]:
+    def _setup_curriculum_step(self, config: CurriculumConfig) -> tuple[tuple[int, UnfreezeEpochsStrategy], tuple[bool, int, float]]:
         # 1. Set common fields based on config
         self.curriculum_steps = config.get('steps', 1)  # number of steps to run in episode
         self.train_dataset = config.get('dataset', None)  # training dataset for current curriculum stage
@@ -748,13 +758,28 @@ class MRLTrainer:
                                    MrlStrategy.MULTI_STEP_STRATEGY)  # MRL strategy for given curriculum stage
         self.reward = config.get('reward_model', self.shared_reward_model)  # MRL Reward Model for curriculum stage
         if config['lr'] is not None or config['critic_lr'] is not None or config['weight_decay'] is not None or config['critic_weight_decay'] is not None or (config['separate_memory_lr'] and config['memory_lr'] is not None):
-            self.optimizer, self.critic_optimizer = self._init_optimizers(
-                lr=(config.get('lr', self.base_optim_config['lr'][0]), config.get('memory_lr', self.base_optim_config['lr'][1])) if config.get('separate_memory_lr', False) else config.get('lr', self.base_optim_config['lr']),
-                critic_lr=config.get('critic_lr', self.base_optim_config['critic_lr']),
-                weight_decay=config.get('weight_decay', self.base_optim_config['weight_decay']),
-                critic_weight_decay=config.get('critic_weight_decay', self.base_optim_config['critic_weight_decay']),
-                separate_memory_lr=config.get('separate_memory_lr', False),
-            )
+            if config.get('separate_memory_lr', False):
+                self.optim_config = {
+                    'lr': config.get('lr', self.base_optim_config['lr']),
+                    'critic_lr': config.get('critic_lr', self.base_optim_config['critic_lr']),
+                    'weight_decay': config.get('weight_decay', self.base_optim_config['weight_decay']),
+                    'critic_weight_decay': config.get('critic_weight_decay', self.base_optim_config['critic_weight_decay']),
+                    'memory_lr': config.get('memory_lr', self.base_optim_config['memory_lr']),
+                }
+            else:
+                self.optim_config = {
+                    'lr': config.get('lr', self.base_optim_config['lr']),
+                    'critic_lr': config.get('critic_lr', self.base_optim_config['critic_lr']),
+                    'weight_decay': config.get('weight_decay', self.base_optim_config['weight_decay']),
+                    'critic_weight_decay': config.get('critic_weight_decay', self.base_optim_config['critic_weight_decay']),
+                }
+            self.optimizer, self.critic_optimizer = self._init_optimizers(**self.optim_config)
+        elif self.optim_config != self.base_optim_config:
+            self.optim_config = self.base_optim_config
+            self.optimizer, self.critic_optimizer = self._init_optimizers(**self.optim_config)
         # 2. Get epochs and random resets configs
         epochs = config.get('epochs', 5)  # number of epochs for current stage
@@ -771,6 +796,82 @@ class MRLTrainer:
         return (epochs, unfreeze_epoch), (random_resets, random_resets_from, random_resets_ratio)
+    def _apply_unfreeze_strategy(self, epoch: int, unfreeze_epoch: UnfreezeEpochsStrategy):
+        is_staged_unfreeze = isinstance(unfreeze_epoch, tuple)
+        if is_staged_unfreeze:
+            update_epoch, fetch_epoch, joint_epoch, all_epoch = unfreeze_epoch
+            if isinstance(update_epoch, tuple):
+                switch_epoch, cross_att_lr = update_epoch
+                if epoch == switch_epoch:
+                    self.actor.freeze_components('joint')
+                    self.optimizer = self._init_unfreeze_optimizer('update', cross_att_lr)
+                    print(f"Activating 'update' unfreeze strategy with custom cross_att_lr: {cross_att_lr}")
+            elif epoch == update_epoch:
+                 self.actor.freeze_components('update')
+                 print(f"Activating 'update' unfreeze strategy - mem-att trainable / cross-att frozen / rest model frozen")
+            if isinstance(fetch_epoch, tuple):
+                switch_epoch, mem_att_lr = fetch_epoch
+                if epoch == fetch_epoch:
+                    self.actor.freeze_components('joint')
+                    self.optimizer = self._init_unfreeze_optimizer('fetch', mem_att_lr)
+                    print(f"Activating 'fetch' unfreeze strategy with custom mem_att_lr: {mem_att_lr}")
+            elif epoch == fetch_epoch:
+                self.actor.freeze_components('fetch')
+                print(f"Activating 'fetch' unfreeze strategy - mem-att frozen / cross-att trainable / rest model frozen")
+            if isinstance(joint_epoch, tuple):
+                switch_epoch, model_lr = joint_epoch
+                if epoch == joint_epoch:
+                    self.actor.unfreeze_components()
+                    self.optimizer = self._init_unfreeze_optimizer('joint', model_lr)
+                    print(f"Activating 'joint' unfreeze strategy with custom model_lr: {model_lr}")
+            elif epoch == joint_epoch:
+                    self.actor.freeze_components('joint')
+                    print(f"Activating 'joint' unfreeze strategy - mem-att/cross-att trainable / rest model frozen")
+            if epoch == all_epoch:
+                self.actor.unfreeze_components()
+                self.optimizer = self._init_unfreeze_optimizer('all', 0.)
+                print(f"Switching to train 'all' strategy - unfreeze all components")
+        elif epoch == unfreeze_epoch:
+            self.actor.unfreeze_components()
+            print(f"Switching to train 'all' strategy - unfreeze all components")
+    def _init_unfreeze_optimizer(
+            self,
+            mode: Literal['update', 'fetch', 'joint', 'all'],
+            unfreeze_lr: float,
+    ) -> torch.optim.Optimizer:
+        memory_lr = self.optim_config['memory_lr'] if 'memory_lr' in self.optim_config else self.optim_config['lr']
+        model_lr = self.optim_config['lr']
+        if mode == 'update':
+            params = [
+                {'params': self.actor.not_memory_parameters(), 'lr': model_lr},
+                {'params': self.actor.memory_attention_parameters(), 'lr': memory_lr},
+                {'params': self.actor.memory_cross_attention_parameters(), 'lr': unfreeze_lr},
+            ]
+        elif mode == 'fetch':
+            params = [
+                {'params': self.actor.not_memory_parameters(), 'lr': model_lr},
+                {'params': self.actor.memory_cross_attention_parameters(), 'lr': memory_lr},
+                {'params': self.actor.memory_attention_parameters(), 'lr': unfreeze_lr},
+            ]
+        elif mode == 'joint':
+            params = [
+                {'params': self.actor.not_memory_parameters(), 'lr': unfreeze_lr},
+                {'params': self.actor.memory_parameters(), 'lr': memory_lr},
+            ]
+        else:
+            params = [
+                {'params': self.actor.not_memory_parameters(), 'lr': model_lr},
+                {'params': self.actor.memory_parameters(), 'lr': memory_lr},
+            ]
+        return torch.optim.AdamW(params, weight_decay=self.optim_config['weight_decay'])
     def __call__(self, curriculum_config: list[CurriculumConfig], batch_size: int):
         """Start Memory Reinforcement Learning Curriculum."""
@@ -796,7 +897,11 @@ class MRLTrainer:
             # 4. Freeze all components except memory attention and memory cross-attention layers in decoder/encoder
             if unfreeze_epoch != 0:
-                self.actor.freeze_components('both')
+                self.actor.freeze_components('joint')
+                if isinstance(unfreeze_epoch, tuple):
+                    print(f"Starting training with unfreeze strategies - 'warmup' - mem-att/cross-att trainable / rest model frozen")
+                else:
+                    print(f"Starting training with simple unfreeze - 'joint' - mem-att/cross-att trainable / rest model frozen")
             # 5. Setup train DataLoader
             if self.use_ddp:
@@ -836,21 +941,8 @@ class MRLTrainer:
                 else:
                     self.random_resets_ratio = 1.0
-                # 11. Unfreeze all components before selected epoch
-                is_staged_unfreeze = isinstance(unfreeze_epoch, tuple)
-                if is_staged_unfreeze:
-                    update_epoch, fetch_epoch, both_epoch, all_epoch = unfreeze_epoch
-                    if epoch == update_epoch:
-                        self.actor.freeze_components('update')
-                    elif epoch == fetch_epoch:
-                        self.actor.freeze_components('fetch')
-                    elif epoch == both_epoch:
-                        self.actor.freeze_components('both')
-                    elif epoch == all_epoch:
-                        self.actor.unfreeze_components()
-                else:
-                    if epoch == unfreeze_epoch:
-                        self.actor.unfreeze_components()
+                # 11. Apply the unfreeze strategy
+                self._apply_unfreeze_strategy(epoch, unfreeze_epoch)
                 # 12. Set epoch for distributed sampler
                 if train_sampler is not None:

{rxnn-0.2.26.dist-info → rxnn-0.2.27.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rxnn
-Version: 0.2.26
+Version: 0.2.27
 Summary: RxNN: Reactive Neural Networks Platform
 License: Apache-2.0
 Keywords: deep-learning,ai,machine-learning

{rxnn-0.2.26.dist-info → rxnn-0.2.27.dist-info}/RECORD RENAMED Viewed

@@ -15,8 +15,8 @@ rxnn/training/base.py,sha256=_xik1GXE4RJ_nxwqLQ1ccXA5pRtBCi-jL-jeRFBdHBU,11851
 rxnn/training/bml.py,sha256=FJszaQXOLx2ZHBa1CQpyMrG8i4Kj14E-gzDAEK_Ei5k,17272
 rxnn/training/callbacks.py,sha256=-N0MQPpZQaUWCINdTOsjul4bDGbGr2JgQBqOIXBLS6o,35053
 rxnn/training/dataset.py,sha256=7hTilFWPpqUEc6zNcMqBPjxFKxCfvTKKF3E8tVlwccQ,51250
-rxnn/training/models.py,sha256=_TrFwrQ_m6NDPalrafd8faPRyCnDFFFtN_gfzavaCFs,6474
-rxnn/training/mrl.py,sha256=hDsKQTaQcEVmnJruD3TxHZJJzDWu5I6Rq2HVDLj8ADU,44747
+rxnn/training/models.py,sha256=bY6yZoXYJEsrcymtb5Ep41vmFVHplCGWlrw1dI0oFRc,6807
+rxnn/training/mrl.py,sha256=MnLaYWxblc5cF261R5PNjIvddVQVNxyjAkEYtchBn9E,49299
 rxnn/training/reward.py,sha256=7MTVdNm5HnWmt6zFDi3TAYmnVSL_-24riOoY2F7z4x8,11290
 rxnn/training/rl.py,sha256=j-KNLoZjhaEKasYNOc8DxHtwvknAgAJFwvXKot6otFA,3272
 rxnn/training/scheduler.py,sha256=LcjU35mEwz2U5x3U6tLfeeYlBqMxbFSxYzJYuXkWbSY,1408
@@ -32,7 +32,7 @@ rxnn/transformers/moe.py,sha256=j6jEx6Ip0zttlUZKKn82azxo95lkLZs-H2GLSMD88hY,5859
 rxnn/transformers/positional.py,sha256=1PjcJybUzeQlIKJI4tahAGZcYgCRCL0otxs7mpsNuzM,4410
 rxnn/transformers/sampler.py,sha256=t6iiQTdLQ0TakUWnnhKkb5DKF2F_9-thXHBydDF3fxg,17389
 rxnn/utils.py,sha256=ihb6OTyDtPiocB_lOvnq7eOkjjpCkgs8wxvXUBNQ7mM,996
-rxnn-0.2.26.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
-rxnn-0.2.26.dist-info/METADATA,sha256=XDqI42X3zLRAAKZlVLmstm24KFPP_MfvDtObG9GBc0Y,25960
-rxnn-0.2.26.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-rxnn-0.2.26.dist-info/RECORD,,
+rxnn-0.2.27.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
+rxnn-0.2.27.dist-info/METADATA,sha256=woZT3PVGgtEJP7DIAJv1-Mdfd4XvKoCRHANQgoTXoXk,25960
+rxnn-0.2.27.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+rxnn-0.2.27.dist-info/RECORD,,

{rxnn-0.2.26.dist-info → rxnn-0.2.27.dist-info}/LICENSE RENAMED Viewed

File without changes

{rxnn-0.2.26.dist-info → rxnn-0.2.27.dist-info}/WHEEL RENAMED Viewed

File without changes

rxnn 0.2.26__py3-none-any.whl → 0.2.27__py3-none-any.whl

rxnn 0.2.26py3-none-any.whl → 0.2.27py3-none-any.whl