PyPI - rxnn - Versions diffs - 0.1.83__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

rxnn 0.1.83py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

rxnn/.DS_Store +0 -0
rxnn/experimental/attention.py +5 -0
rxnn/memory/attention.py +42 -0
rxnn/memory/stm.py +53 -12
rxnn/rxt/models.py +71 -0
rxnn/training/bml.py +2 -59
rxnn/training/callbacks.py +302 -39
rxnn/training/dataset.py +344 -1
rxnn/training/models.py +142 -0
rxnn/training/mrl.py +808 -0
rxnn/training/reward.py +111 -0
rxnn/training/rl.py +69 -0
rxnn/training/utils.py +148 -0
rxnn/transformers/attention.py +10 -0
rxnn/transformers/layers.py +6 -0
rxnn/transformers/models.py +16 -4
rxnn/transformers/positional.py +7 -0
rxnn/transformers/sampler.py +283 -9
{rxnn-0.1.83.dist-info → rxnn-0.2.0.dist-info}/METADATA +11 -9
rxnn-0.2.0.dist-info/RECORD +38 -0
rxnn-0.1.83.dist-info/RECORD +0 -31
{rxnn-0.1.83.dist-info → rxnn-0.2.0.dist-info}/LICENSE +0 -0
{rxnn-0.1.83.dist-info → rxnn-0.2.0.dist-info}/WHEEL +0 -0

rxnn/training/callbacks.py CHANGED Viewed

@@ -2,30 +2,31 @@ import os, traceback, shutil
 import numpy as np
 import torch
 import torch.nn as nn
-from typing import Union
+from typing import Union, Optional
 from torch.nn.parallel import DistributedDataParallel
 from huggingface_hub import PyTorchModelHubMixin
 from ..utils import human_format
 class TrainerCallback:
-    def on_epoch_start(self, model: torch.nn.Module, epoch: int) -> None:
+    def on_epoch_start(self, model: nn.Module, epoch: int) -> None:
         pass
-    def on_epoch_end(self, model: torch.nn.Module, epoch: int) -> Union[bool, None]:
+    def on_epoch_end(self, model: nn.Module, epoch: int) -> Union[bool, None]:
         pass
-    def on_batch_start(self, model: torch.nn.Module, batch_idx: int, batch: dict[str, torch.Tensor]) -> None:
+    def on_batch_start(self, model: nn.Module, batch_idx: int, batch: dict[str, torch.Tensor]) -> None:
         pass
-    def on_batch_end(self, model: torch.nn.Module, batch_idx: int, loss: float, batch: dict[str, torch.Tensor]) -> \
-    Union[
-        bool, None]:
+    def on_batch_end(self, model: nn.Module, batch_idx: int, loss: float, batch: dict[str, torch.Tensor]) -> \
+            Union[
+                bool, None]:
         pass
-    def on_training_end(self, model: torch.nn.Module) -> None:
+    def on_training_end(self, model: nn.Module) -> None:
         pass
-    def on_validation_end(self, model: torch.nn.Module, epoch: int, val_loss: float, val_metrics: dict) -> Union[
+    def on_validation_end(self, model: nn.Module, epoch: int, val_loss: float, val_metrics: dict) -> Union[
         bool, None]:
         pass
@@ -111,7 +112,7 @@ class TokenCounterCallback(TrainerCallback):
             print(f'Reached a limit of {human_format(self.limit)} processed tokens - stopping training')
         return should_stop_training
-    def on_training_end(self, model: torch.nn.Module) -> None:
+    def on_training_end(self, model: nn.Module) -> None:
         print(f'Total training tokens: {human_format(self.total_tokens)}')
     def get_total_tokens(self):
@@ -122,7 +123,6 @@ class ModelSaveCallback(TrainerCallback):
     def __init__(
             self,
             save_dir: str,
-            save_best_only: bool = True,
             max_keep: int = 3,
             push_to_hub: bool = False,
             hub_model_id: str = None,
@@ -136,7 +136,6 @@ class ModelSaveCallback(TrainerCallback):
             use_ddp: bool = False,
     ):
         self.save_dir = save_dir
-        self.save_best_only = save_best_only
         self.max_keep = max_keep
         self.best_loss = float('inf')
         self.ckpt_paths = []
@@ -152,7 +151,7 @@ class ModelSaveCallback(TrainerCallback):
         self.display_exc_trace = display_exc_trace
         self.rank = int(os.environ['RANK']) if use_ddp else 0
-    def on_batch_end(self, model: torch.nn.Module, batch_idx: int, loss: int, batch: dict[str, torch.Tensor]) -> Union[
+    def on_batch_end(self, model: nn.Module, batch_idx: int, loss: int, batch: dict[str, torch.Tensor]) -> Union[
         bool, None]:
         if self.rank == 0 and self.save_checkpoint_after_n_batches is not None and batch_idx != 0 and batch_idx % self.save_checkpoint_after_n_batches == 0:
             if isinstance(model, DistributedDataParallel):
@@ -195,7 +194,7 @@ class ModelSaveCallback(TrainerCallback):
     def on_validation_end(
             self,
-            model: Union[torch.nn.Module, PyTorchModelHubMixin],
+            model: Union[nn.Module, PyTorchModelHubMixin],
             epoch: int,
             val_loss: float,
             val_metrics: dict
@@ -252,7 +251,7 @@ class ModelSaveCallback(TrainerCallback):
                     if self.display_exc_trace:
                         traceback.print_exc()
-    def on_training_end(self, model: Union[torch.nn.Module, PyTorchModelHubMixin]):
+    def on_training_end(self, model: Union[nn.Module, PyTorchModelHubMixin]):
         if self.rank == 0:
             if isinstance(model, DistributedDataParallel):
                 model = next(model.children())
@@ -291,7 +290,6 @@ class JointModelSaveCallback(TrainerCallback):
     def __init__(
             self,
             save_dir: str,
-            save_best_only: bool = True,
             max_keep: int = 3,
             push_to_hub: bool = False,
             hub_model_decoder: str = None,
@@ -308,7 +306,6 @@ class JointModelSaveCallback(TrainerCallback):
             use_ddp: bool = False,
     ):
         self.save_dir = save_dir
-        self.save_best_only = save_best_only
         self.max_keep = max_keep
         self.best_loss = float('inf')
         self.ckpt_paths = []
@@ -369,7 +366,7 @@ class JointModelSaveCallback(TrainerCallback):
             if self.display_exc_trace:
                 traceback.print_exc()
-    def on_batch_end(self, model: torch.nn.Module, batch_idx: int, loss: int, batch: dict[str, torch.Tensor]) -> Union[
+    def on_batch_end(self, model: nn.Module, batch_idx: int, loss: int, batch: dict[str, torch.Tensor]) -> Union[
         bool, None]:
         if self.rank == 0 and self.save_checkpoint_after_n_batches is not None and batch_idx != 0 and batch_idx % self.save_checkpoint_after_n_batches == 0:
             if isinstance(model, DistributedDataParallel):
@@ -434,7 +431,7 @@ class JointModelSaveCallback(TrainerCallback):
     def on_validation_end(
             self,
-            model: Union[torch.nn.Module, PyTorchModelHubMixin],
+            model: Union[nn.Module, PyTorchModelHubMixin],
             epoch: int,
             val_loss: float,
             val_metrics: dict
@@ -491,7 +488,7 @@ class JointModelSaveCallback(TrainerCallback):
             if self.display_exc_trace:
                 traceback.print_exc()
-    def on_training_end(self, model: Union[torch.nn.Module, PyTorchModelHubMixin]):
+    def on_training_end(self, model: Union[nn.Module, PyTorchModelHubMixin]):
         if self.rank == 0:
             if isinstance(model, DistributedDataParallel):
                 model = next(model.children())
@@ -500,23 +497,289 @@ class JointModelSaveCallback(TrainerCallback):
                 self._save_final(model.decoder, 'decoder', hub_id=self.hub_model_decoder)
             self._save_final(model.mlm_head, 'head', hub_id=self.hub_model_head)
 class EarlyStoppageCallback(TrainerCallback):
-  def __init__(self, num_plateau_epochs: int = 3) -> None:
-      super().__init__()
-      self.num_plateau_epochs = num_plateau_epochs
-      self.best_loss = 9999.0
-      self.best_loss_epoch = 0
-  def on_validation_end(
-        self,
-        model: torch.nn.Module,
-        epoch: int,
-        val_loss: float,
-        val_metrics: dict
-  ):
-    if val_loss < self.best_loss:
-      self.best_loss = val_loss
-      self.best_loss_epoch = epoch
-    elif epoch - self.best_loss_epoch > self.num_plateau_epochs:
-      return True
-    return None
+    def __init__(self, num_plateau_epochs: int = 3) -> None:
+        super().__init__()
+        self.num_plateau_epochs = num_plateau_epochs
+        self.best_loss = 9999.0
+        self.best_loss_epoch = 0
+    def on_validation_end(
+            self,
+            model: nn.Module,
+            epoch: int,
+            val_loss: float,
+            val_metrics: dict
+    ):
+        if val_loss < self.best_loss:
+            self.best_loss = val_loss
+            self.best_loss_epoch = epoch
+        elif epoch - self.best_loss_epoch >= self.num_plateau_epochs:
+            return True
+        return None
+class MrlTrainerCallback:
+    def on_epoch_start(self, actor: nn.Module, epoch: int, stage_epochs: int, global_epoch: int,
+                       global_epochs: int, curriculum_config: dict) -> None:
+        pass
+    def on_epoch_end(self, actor: nn.Module, epoch: int, stage_epochs: int, policy_loss: float,
+                     critic_loss: float, global_epoch: int, global_epochs: int) -> None:
+        pass
+    def on_episode_collected(self, actor: nn.Module, batch_idx: int, episode_trajectories: list[dict],
+                             reward: float) -> None:
+        pass
+    def on_reward(self, actor: nn.Module, reward: float, generated: str, reference: str, saved_data: str, eval_mode: bool) -> None:
+        pass
+    def on_batch_updated(self, actor: nn.Module, epoch: int, step: int, policy_loss: float) -> None:
+        pass
+    def on_critic_updated(self, actor: nn.Module, critic: nn.Module, epoch: int, step: int,
+                          critic_loss: float) -> None:
+        pass
+    def on_training_end(self, actor: nn.Module, critic: nn.Module, curriculum_config: dict) -> None:
+        pass
+    def on_eval_end(self, actor: nn.Module, critic: nn.Module, epoch: int, eval_mean_reward: float) -> Union[bool, None]:
+        pass
+    def on_eval_episode_end(self, actor: nn.Module, epoch: int, batch_idx: int, reward: float) -> None:
+        pass
+class MrlPrintCallback(MrlTrainerCallback):
+    def on_epoch_start(self, actor: nn.Module, epoch: int, stage_epochs: int, curriculum_config: dict,
+                       global_epoch: int, global_epochs: int) -> None:
+        print(
+            f'Starting epoch {epoch}/{stage_epochs} (stage) | {global_epoch}/{global_epochs} (global) for {curriculum_config['steps']} steps in {curriculum_config['strategy']} strategy.')
+    def on_epoch_end(self, actor: nn.Module, epoch: int, stage_epochs: int, policy_loss: float,
+                     critic_loss: float, global_epoch: int, global_epochs: int) -> None:
+        print(f'Finished epoch {epoch}/{stage_epochs} (stage) | {global_epoch}/{global_epochs} (global)')
+        print(f'Policy mean loss: {policy_loss} | Critic mean loss: {critic_loss}')
+    def on_episode_collected(self, actor: nn.Module, batch_idx: int, episode_trajectories: list[dict],
+                             reward: float) -> None:
+        print(f'Collected {batch_idx} episode | mean reward {reward}')
+    def on_reward(self, actor: nn.Module, reward: float, generated: dict[str, torch.Tensor],
+                  reference: dict[str, torch.Tensor], saved_data: dict[str, torch.Tensor], eval_mode: bool) -> None:
+        print(f"{'Eval' if eval_mode else 'Train'} | Collected reward {reward}")
+    def on_batch_updated(self, actor: nn.Module, epoch: int, step: int, policy_loss: float) -> None:
+        print(f'Epoch {epoch} | Step {step} - updated policy loss {policy_loss}')
+    def on_critic_updated(self, actor: nn.Module, critic: nn.Module, epoch: int, step: int,
+                          critic_loss: float) -> None:
+        print(f'Epoch {epoch} | Step {step} - updated policy loss {critic_loss}')
+    def on_training_end(self, actor: nn.Module, critic: nn.Module, curriculum_config: dict) -> None:
+        print(f'Finished training for {curriculum_config['steps']} steps in {curriculum_config['strategy']} strategy.')
+    def on_eval_end(self, actor: nn.Module, critic: nn.Module, epoch: int, eval_mean_reward: float) -> None:
+        print(f'Eval epoch {epoch} - mean reward {eval_mean_reward}')
+    def on_eval_episode_end(self, actor: nn.Module, epoch: int, batch_idx: int, reward: float) -> None:
+        print(f'Eval epoch {epoch} / Episode {batch_idx} - mean reward {reward}')
+class MrlEarlyStoppageCallback(MrlTrainerCallback):
+    def __init__(self, num_plateau_epochs: int = 2, threshold: Optional[float] = None) -> None:
+        super().__init__()
+        self.num_plateau_epochs = num_plateau_epochs
+        self.best_reward = -9999.0
+        self.best_reward_epoch = 0
+        self.threshold = threshold
+    def on_eval_end(self, _actor: nn.Module, _critic: nn.Module, epoch: int, eval_mean_reward: float) -> Union[bool, None]:
+        if self.threshold is not None:
+            if eval_mean_reward > self.threshold:
+                return True
+        if eval_mean_reward > self.best_reward:
+            self.best_reward = eval_mean_reward
+            self.best_reward_epoch = epoch
+        elif epoch - self.best_reward_epoch >= self.num_plateau_epochs:
+            return True
+        return None
+class MrlModelSaveCallback(MrlTrainerCallback):
+    def __init__(
+            self,
+            save_dir: str,
+            max_keep: int = 3,
+            push_to_hub: bool = False,
+            hub_model_decoder: str = None,
+            hub_model_encoder: str = None,
+            hub_model_memory_attention: str = None,
+            hub_model_critic: str = None,
+            private_repo: bool = False,
+            hf_token: str = None,
+            push_checkpoint_weights: bool = True,
+            final_commit_message: str = None,
+            display_exc_trace: bool = False,
+            use_ddp: bool = False,
+    ):
+        self.save_dir = save_dir
+        self.max_keep = max_keep
+        self.best_reward = float('-inf')
+        self.ckpt_paths = []
+        self.push_to_hub = push_to_hub
+        self.hub_model_decoder = hub_model_decoder
+        self.hub_model_encoder = hub_model_encoder
+        self.hub_model_memory_attention = hub_model_memory_attention
+        self.hub_model_critic = hub_model_critic
+        self.private_repo = private_repo
+        self.hf_token = hf_token
+        self.push_checkpoint_weights = push_checkpoint_weights
+        self.final_commit_message = final_commit_message
+        self.finished_epochs = 0
+        self.display_exc_trace = display_exc_trace
+        self.rank = int(os.environ['RANK']) if use_ddp else 0
+    def _save_eval(self, model: Union[nn.Module, PyTorchModelHubMixin], component: str, epoch: int,
+                         reward: float, hub_id: str = None):
+        try:
+            if model.save_pretrained is not None:
+                ckpt_path = os.path.join(
+                    self.save_dir,
+                    component,
+                    f'epoch_{epoch}_eval_reward_{reward:.4f}'
+                )
+                path_exists = os.path.exists(ckpt_path)
+                if not path_exists:
+                    os.makedirs(ckpt_path)
+                model.save_pretrained(save_directory=ckpt_path)
+            else:
+                comp_path = os.path.join(
+                    self.save_dir,
+                    component
+                )
+                path_exists = os.path.exists(comp_path)
+                if not path_exists:
+                    os.makedirs(comp_path)
+                ckpt_path = os.path.join(
+                    comp_path,
+                    f'epoch_{epoch}_eval_reward_{reward:.4f}.pt'
+                )
+                torch.save(model.state_dict(), ckpt_path)
+            self.ckpt_paths.append(ckpt_path)
+            # Keep only N best checkpoints
+            if len(self.ckpt_paths) > self.max_keep:
+                oldest_path = self.ckpt_paths.pop(0)
+                if model.save_pretrained is not None:
+                    shutil.rmtree(oldest_path)
+                else:
+                    os.remove(oldest_path)
+        except Exception as e:
+            print(f"Error saving epoch checkpoint: {str(e)}")
+            if self.display_exc_trace:
+                traceback.print_exc()
+        try:
+            if self.push_to_hub and self.push_checkpoint_weights and model.push_to_hub is not None and hub_id:
+                model.push_to_hub(
+                    repo_id=hub_id,
+                    commit_message=f'Epoch {epoch} - Eval reward {reward:.4f}',
+                    token=self.hf_token,
+                    private=self.private_repo,
+                )
+        except Exception as e:
+            print(f"Error pushing epoch checkpoint: {str(e)}")
+            if self.display_exc_trace:
+                traceback.print_exc()
+    def on_eval_end(self, actor: nn.Module, critic: nn.Module, epoch: int, eval_mean_reward: float) -> None:
+        if self.rank == 0:
+            self.finished_epochs += 1
+            if eval_mean_reward > self.best_reward:
+                self.best_reward = eval_mean_reward
+                if isinstance(actor, DistributedDataParallel):
+                    actor = next(actor.children())
+                self._save_eval(actor.encoder, 'encoder', epoch, eval_mean_reward, hub_id=self.hub_model_encoder)
+                self._save_eval(actor.decoder, 'decoder', epoch, eval_mean_reward, hub_id=self.hub_model_decoder)
+                self._save_eval(actor.memory_attention, 'memory_attention', epoch, eval_mean_reward, hub_id=self.hub_model_memory_attention)
+                if isinstance(critic, DistributedDataParallel):
+                    critic = next(critic.children())
+                self._save_eval(critic, 'critic', epoch, eval_mean_reward, hub_id=self.hub_model_critic)
+    def _save_final(self, model: Union[nn.Module, PyTorchModelHubMixin], component: str, hub_id: str = None):
+        try:
+            # Save final model
+            if model.save_pretrained is not None:
+                ckpt_path = os.path.join(
+                    self.save_dir,
+                    component,
+                    'final_model'
+                )
+                path_exists = os.path.exists(ckpt_path)
+                if not path_exists:
+                    os.makedirs(ckpt_path)
+                model.save_pretrained(save_directory=ckpt_path)
+            else:
+                comp_path = os.path.join(
+                    self.save_dir,
+                    component
+                )
+                path_exists = os.path.exists(comp_path)
+                if not path_exists:
+                    os.makedirs(comp_path)
+                ckpt_path = os.path.join(comp_path, 'final_model.pt')
+                torch.save(model.state_dict(), ckpt_path)
+            print(f"Final model saved to {ckpt_path}")
+        except Exception as e:
+            print(f"Error saving final model: {str(e)}")
+            if self.display_exc_trace:
+                traceback.print_exc()
+        try:
+            if self.push_to_hub and model.push_to_hub is not None and hub_id:
+                model.push_to_hub(
+                    repo_id=hub_id,
+                    commit_message=self.final_commit_message or f'Model after full curriculum stage, after {self.finished_epochs} epochs',
+                    token=self.hf_token,
+                    private=self.private_repo,
+                )
+        except Exception as e:
+            print(f"Error pushing final model: {str(e)}")
+            if self.display_exc_trace:
+                traceback.print_exc()
+    def on_training_end(self, actor: nn.Module, critic: nn.Module, curriculum_config: dict) -> None:
+        if self.rank == 0:
+            if isinstance(actor, DistributedDataParallel):
+                actor = next(actor.children())
+            self._save_final(actor.encoder, 'encoder', hub_id=self.hub_model_encoder)
+            self._save_final(actor.decoder, 'decoder', hub_id=self.hub_model_decoder)
+            self._save_final(actor.memory_attention, 'memory_attention', hub_id=self.hub_model_memory_attention)
+            if isinstance(critic, DistributedDataParallel):
+                critic = next(critic.children())
+            self._save_final(critic, 'critic', hub_id=self.hub_model_critic)
+class MrlGeneratedTokensCallback(MrlTrainerCallback):
+    def __init__(self, steps_log_interval: int = 100):
+        self.total_tokens = 0
+        self.steps_log_interval = steps_log_interval
+        self.step = 0
+    def on_reward(self, actor: nn.Module, reward: float, generated: dict[str, torch.Tensor],
+                  reference: dict[str, torch.Tensor], saved_data: dict[str, torch.Tensor], eval_mode: bool) -> None:
+        self.step += 1
+        attention_mask = generated['attention_mask']
+        batch_tokens = attention_mask.sum().item()
+        self.total_tokens += batch_tokens
+        if self.step != 0 and self.step % self.steps_log_interval == 0:
+            print(f'Total processed tokens: {human_format(self.total_tokens)}')
+    def on_training_end(self, actor: nn.Module, critic: nn.Module, curriculum_config: dict) -> None:
+        print(f'Total training tokens: {human_format(self.total_tokens)}')
+    def get_total_tokens(self):
+        return self.total_tokens

rxnn 0.1.83__py3-none-any.whl → 0.2.0__py3-none-any.whl

rxnn 0.1.83py3-none-any.whl → 0.2.0py3-none-any.whl