PyPI - rxnn - Versions diffs - 0.2.44__py3-none-any.whl → 0.2.46__py3-none-any.whl - Mend

rxnn 0.2.44py3-none-any.whl → 0.2.46py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

rxnn/memory/attention.py +6 -2
rxnn/rxt/models.py +3 -1
rxnn/training/callbacks.py +6 -6
rxnn/training/mrl.py +14 -2
{rxnn-0.2.44.dist-info → rxnn-0.2.46.dist-info}/METADATA +1 -1
{rxnn-0.2.44.dist-info → rxnn-0.2.46.dist-info}/RECORD +8 -8
{rxnn-0.2.44.dist-info → rxnn-0.2.46.dist-info}/LICENSE +0 -0
{rxnn-0.2.44.dist-info → rxnn-0.2.46.dist-info}/WHEEL +0 -0

rxnn/memory/attention.py CHANGED Viewed

@@ -11,6 +11,7 @@ class StmMemoryAttention(nn.Module):
             use_gated_residual: bool = False,
             per_slot_gate: bool = False,
             init_gate: float = 0.0,
+            use_dynamic_gate: bool = False,
             *args,
             **kwargs
     ):
@@ -22,8 +23,10 @@ class StmMemoryAttention(nn.Module):
         self.num_layers = len(attention_layers)
         self.use_gated_residual = use_gated_residual
         self.per_slot_gate = per_slot_gate
+        self.use_dynamic_gate = use_dynamic_gate
         if self.use_gated_residual:
-            self.gate = nn.Parameter(torch.full((self.num_layers, self.stm.stm_size, 1), init_gate) if self.per_slot_gate else torch.full((self.num_layers,), init_gate))
+            gate_shape = (self.num_layers, self.stm.stm_size, 1) if self.per_slot_gate else (self.num_layers,)
+            self.gate = nn.Parameter(torch.full(gate_shape, init_gate))
     def update_max_len(self, max_seq_len: int):
         for i in range(self.num_layers):
@@ -44,7 +47,8 @@ class StmMemoryAttention(nn.Module):
             new_layer_stm = self.attention_layers[i](normalized_layer_stm, encoded_layer_data, encoded_layer_data, mask=mask)
             if self.use_gated_residual:
                 # gated residual
-                layer_gate = torch.sigmoid(self.gate[i])
+                gate_input = self.gate[i] * (new_layer_stm + layer_stm) if self.use_dynamic_gate else self.gate[i]
+                layer_gate = torch.sigmoid(gate_input)
                 new_stm[i] = layer_gate * new_layer_stm + (1 - layer_gate) * layer_stm
             else:
                 new_stm[i] = new_layer_stm + layer_stm # residual

rxnn/rxt/models.py CHANGED Viewed

@@ -253,6 +253,7 @@ class RxTAlphaMemoryAttention(nn.Module, PyTorchModelHubMixin, license="apache-2
             use_gated_residual: bool = False,
             residual_per_slot_gate: bool = False,
             residual_init_gate: float = 0.0,
+            use_dynamic_residual_gate: bool = False,
             **kwargs,
     ):
         super(RxTAlphaMemoryAttention, self).__init__(**kwargs)
@@ -281,7 +282,8 @@ class RxTAlphaMemoryAttention(nn.Module, PyTorchModelHubMixin, license="apache-2
         attention_layers = nn.ModuleList([att_init() for _ in range(num_layers)])
         self.model = StmMemoryAttention(
             stm, attention_layers, memory_norm_layers,
-            use_gated_residual=use_gated_residual, per_slot_gate=residual_per_slot_gate, init_gate=residual_init_gate
+            use_gated_residual=use_gated_residual, per_slot_gate=residual_per_slot_gate,
+            init_gate=residual_init_gate, use_dynamic_gate=use_dynamic_residual_gate,
         )
     def freeze(self):

rxnn/training/callbacks.py CHANGED Viewed

@@ -533,7 +533,7 @@ class MrlTrainerCallback:
                              reward: float) -> None:
         pass
-    def on_reward(self, actor: nn.Module, reward: float, generated: str, reference: str, saved_data: str, eval_mode: bool) -> None:
+    def on_reward(self, actor: nn.Module, rewards: list[float], generated: str, reference: str, saved_data: str, eval_mode: bool) -> None:
         pass
     def on_update_epoch_start(self, actor: nn.Module, critic: nn.Module, global_epoch: int, update_epoch: int) -> None:
@@ -563,20 +563,20 @@ class MrlPrintCallback(MrlTrainerCallback):
     def on_epoch_start(self, actor: nn.Module, epoch: int, stage_epochs: int, curriculum_config: dict,
                        global_epoch: int, global_epochs: int) -> None:
         print(
-            f'Starting epoch {epoch}/{stage_epochs} (stage) | {global_epoch}/{global_epochs} (global) for {curriculum_config["steps"]} steps in {curriculum_config["strategy"]} strategy.')
+            f'Starting epoch {epoch}/{stage_epochs - 1} (stage) | {global_epoch}/{global_epochs} (global) for {curriculum_config["steps"]} steps in {curriculum_config["strategy"]} strategy.')
     def on_epoch_end(self, actor: nn.Module, epoch: int, stage_epochs: int, policy_loss: float,
                      critic_loss: float, global_epoch: int, global_epochs: int) -> None:
-        print(f'Finished epoch {epoch}/{stage_epochs} (stage) | {global_epoch}/{global_epochs} (global)')
+        print(f'Finished epoch {epoch}/{stage_epochs - 1} (stage) | {global_epoch}/{global_epochs} (global)')
         print(f'Policy mean loss: {policy_loss} | Critic mean loss: {critic_loss}')
     def on_episode_collected(self, actor: nn.Module, batch_idx: int, episode_trajectories: list[dict],
                              reward: float) -> None:
         print(f'Collected {batch_idx} episode | mean reward {reward}')
-    def on_reward(self, actor: nn.Module, reward: float, generated: dict[str, torch.Tensor],
+    def on_reward(self, actor: nn.Module, rewards: list[float], generated: dict[str, torch.Tensor],
                   reference: dict[str, torch.Tensor], saved_data: dict[str, torch.Tensor], eval_mode: bool) -> None:
-        print(f"{'Eval' if eval_mode else 'Train'} | Collected reward {reward}")
+        print(f"{'Eval' if eval_mode else 'Train'} | Mean reward: {sum(rewards) / len(rewards)} | All collected rewards: {rewards}")
     def on_update_epoch_start(self, actor: nn.Module, critic: nn.Module, global_epoch: int, update_epoch: int) -> None:
         print(f'Epoch {global_epoch} | Starting update epoch {update_epoch}')
@@ -780,7 +780,7 @@ class MrlGeneratedTokensCallback(MrlTrainerCallback):
         self.steps_log_interval = steps_log_interval
         self.step = 0
-    def on_reward(self, actor: nn.Module, reward: float, generated: dict[str, torch.Tensor],
+    def on_reward(self, actor: nn.Module, rewards: list[float], generated: dict[str, torch.Tensor],
                   reference: dict[str, torch.Tensor], saved_data: dict[str, torch.Tensor], eval_mode: bool) -> None:
         self.step += 1
         attention_mask = generated['attention_mask']

rxnn/training/mrl.py CHANGED Viewed

@@ -91,6 +91,7 @@ class MrlTrajectoryEpisode(TypedDict):
     reset_stm: bool
     steps: list[MrlTrajectoryStep]
+OptimField: TypeAlias = Literal['lr', 'critic_lr', 'weight_decay', 'critic_weight_decay', 'separate_memory_lr', 'memory_lr']
 class MRLTrainer:
     def __init__(
@@ -981,8 +982,19 @@ class MRLTrainer:
         self.reward = config.get('reward_model', self.shared_reward_model)  # MRL Reward Model for curriculum stage
         self.update_epochs = config.get('update_epochs', self.shared_update_epochs)  # Internal update epochs
         self.freeze_embeddings = config.get('freeze_embeddings', self.shared_freeze_embeddings)
-        if config['lr'] is not None or config['critic_lr'] is not None or config['weight_decay'] is not None or config[
-            'critic_weight_decay'] is not None or (config['separate_memory_lr'] and config['memory_lr'] is not None):
+        def has_param(field: OptimField) -> bool:
+            return field in config and config[field] is not None
+        optim_params: list[OptimField] = ['lr', 'critic_lr', 'weight_decay', 'critic_weight_decay']
+        has_any_optim_param = any(
+            has_param(field) for field in optim_params
+        ) or (has_param('separate_memory_lr') and config['separate_memory_lr'] and has_param('memory_lr'))
+        if has_any_optim_param:
             if config.get('separate_memory_lr', False):
                 self.optim_config = {
                     'lr': config.get('lr', self.base_optim_config['lr']),

{rxnn-0.2.44.dist-info → rxnn-0.2.46.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rxnn
-Version: 0.2.44
+Version: 0.2.46
 Summary: RxNN: Reactive Neural Networks Platform
 License: Apache-2.0
 Keywords: deep-learning,ai,machine-learning

{rxnn-0.2.44.dist-info → rxnn-0.2.46.dist-info}/RECORD RENAMED Viewed

@@ -5,19 +5,19 @@ rxnn/experimental/attention.py,sha256=46qwZLJuZMpIBrZ-r9DaQEPPmmZkO464C3Tkm_Mq-c
 rxnn/experimental/models.py,sha256=foBo0n0ufvBnfIdJomiEg3CuSOiWSt-q5ako7vzYxx4,4888
 rxnn/experimental/moe.py,sha256=jHZ1QhpWiVQOswVpFmuH7b2IUOPf0Uuf-I2Ddwsd7Us,6140
 rxnn/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rxnn/memory/attention.py,sha256=POszZeW0QBKOh4VTDVekmZGKKwUr1Zj0FOAilTv8Vyg,2411
+rxnn/memory/attention.py,sha256=sXh6f_iOpEYCaqyG-QVp_C_A9IF0QcXTi3hW5G8FCwA,2630
 rxnn/memory/norm.py,sha256=E98jOQEuIOFFhlkvS8s4fFN-D4tLO6vaOqnObv1oVmA,6592
 rxnn/memory/stm.py,sha256=jv57gsH9XW19sLbxpRDqsp1yfsii_4Ef4Ncr_ztk-i4,3937
 rxnn/rxt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rxnn/rxt/models.py,sha256=jh7TNLu_7CL0PH_T99rMZHcLezFPiZi-xnPazNyn_dU,14563
+rxnn/rxt/models.py,sha256=4MbCL4xGY3ceewZQmopjmwAyLQS92L6KLOPqaW7-Fho,14673
 rxnn/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rxnn/training/base.py,sha256=CqaArEZYOdH64nmKfx28U3GI46TzO4oNkjf_hrF23Cw,11835
 rxnn/training/bml.py,sha256=hw6gLpLkGvqLzxIvBg4MvCc5r8cHpEm2RDyh7nH6CtE,16914
-rxnn/training/callbacks.py,sha256=p72lbzFAmFjpcUvyy4aUB3qd53I8C6Sk5w9nQvsKgTk,35852
+rxnn/training/callbacks.py,sha256=RPW3Lisi31VJvoYyZeAF3dQzttrceDQDsZ6G5Xl09HM,35933
 rxnn/training/dataset.py,sha256=7hTilFWPpqUEc6zNcMqBPjxFKxCfvTKKF3E8tVlwccQ,51250
 rxnn/training/ddp.py,sha256=VsNBjn3cY-uUj8hbsW7oKvb0_ZKnXnJ2KgObm-Mr9i4,836
 rxnn/training/models.py,sha256=tqABOt_xEcWbZNEW2I2Jt-3eyaGICK011zILwuTk6Zc,9082
-rxnn/training/mrl.py,sha256=BvrwqrIIyg_EmUA5p7c6UBcfFQ0ePIcl-EHEFQqyl2E,59472
+rxnn/training/mrl.py,sha256=L4G7xSPlxsymvNhvsSloCpaqYjOXxEm7GmKilM_Ojvc,59809
 rxnn/training/reward.py,sha256=B7nerPk9eNAv2i7umtNF88tVQVwijNNrchIrEITGHKk,11623
 rxnn/training/rl.py,sha256=q4NzIZAmXRHVToT13IHrPTtEikWQUvT0NO0IjApjAO8,6171
 rxnn/training/scheduler.py,sha256=LcjU35mEwz2U5x3U6tLfeeYlBqMxbFSxYzJYuXkWbSY,1408
@@ -33,7 +33,7 @@ rxnn/transformers/moe.py,sha256=j6jEx6Ip0zttlUZKKn82azxo95lkLZs-H2GLSMD88hY,5859
 rxnn/transformers/positional.py,sha256=1PjcJybUzeQlIKJI4tahAGZcYgCRCL0otxs7mpsNuzM,4410
 rxnn/transformers/sampler.py,sha256=t6iiQTdLQ0TakUWnnhKkb5DKF2F_9-thXHBydDF3fxg,17389
 rxnn/utils.py,sha256=ihb6OTyDtPiocB_lOvnq7eOkjjpCkgs8wxvXUBNQ7mM,996
-rxnn-0.2.44.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
-rxnn-0.2.44.dist-info/METADATA,sha256=tW2Ve4whRK2LfCxix10dTLS5Dl_0C6KhcK8FsoKq-x0,25960
-rxnn-0.2.44.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-rxnn-0.2.44.dist-info/RECORD,,
+rxnn-0.2.46.dist-info/LICENSE,sha256=C8coDFIUYuOcke4JLPwTqahQUCyXyGq6WOaigOkx8tY,11275
+rxnn-0.2.46.dist-info/METADATA,sha256=hpTQT4p75cKrAaGOz_56gCBm1rT_y-Nr1TI9Mhv6wv0,25960
+rxnn-0.2.46.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+rxnn-0.2.46.dist-info/RECORD,,

{rxnn-0.2.44.dist-info → rxnn-0.2.46.dist-info}/LICENSE RENAMED Viewed

File without changes

{rxnn-0.2.44.dist-info → rxnn-0.2.46.dist-info}/WHEEL RENAMED Viewed

File without changes

rxnn 0.2.44__py3-none-any.whl → 0.2.46__py3-none-any.whl

rxnn 0.2.44py3-none-any.whl → 0.2.46py3-none-any.whl