PyPI - rxnn - Versions diffs - 0.2.36__tar.gz → 0.2.38__tar.gz - Mend

rxnn 0.2.36tar.gz → 0.2.38tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{rxnn-0.2.36 → rxnn-0.2.38}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rxnn
-Version: 0.2.36
+Version: 0.2.38
 Summary: RxNN: Reactive Neural Networks Platform
 License: Apache-2.0
 Keywords: deep-learning,ai,machine-learning

{rxnn-0.2.36 → rxnn-0.2.38}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "rxnn"
-version = "0.2.36"
+version = "0.2.38"
 description = "RxNN: Reactive Neural Networks Platform"
 license = "Apache-2.0"

{rxnn-0.2.36 → rxnn-0.2.38}/src/rxnn/training/base.py RENAMED Viewed

@@ -82,6 +82,7 @@ class BaseTrainer(ABC):
             dataset: torch.utils.data.Dataset = None,
             optimizer: torch.optim.Optimizer = None,
             scheduler: torch.optim.lr_scheduler.LRScheduler = None,
+            ddp_find_unused_parameters: bool = False,
     ) -> None:
         self.is_running = True
         if dataset is None:
@@ -94,7 +95,7 @@ class BaseTrainer(ABC):
         if self.use_ddp:
             rank, world_size = get_os_ddp_config()
             dist.init_process_group(backend='nccl', rank=rank, world_size=world_size)
-            self.model = DistributedDataParallel(self.model, device_ids=[self.device.index])
+            self.model = DistributedDataParallel(self.model, device_ids=[self.device.index], find_unused_parameters=ddp_find_unused_parameters)
             train_sampler = torch.utils.data.DistributedSampler(dataset, shuffle=True)
             dataloader = torch.utils.data.DataLoader(
                 dataset,

{rxnn-0.2.36 → rxnn-0.2.38}/src/rxnn/training/bml.py RENAMED Viewed

@@ -51,6 +51,10 @@ class MLMTrainer(BaseTrainer):
         model = next(self.model.children()) if isinstance(self.model, DistributedDataParallel) else self.model
         router_loss = model.encoder.model.moe_router_loss()
+        if self.use_ddp:
+            router_loss = distributed_mean(router_loss)
         loss = main_loss + self.moe_aux_loss_scale * router_loss
         if self.writer is not None:
@@ -152,6 +156,10 @@ class AutoregressiveTrainer(BaseTrainer):
         model = next(self.model.children()) if isinstance(self.model, DistributedDataParallel) else self.model
         router_loss = model.model.moe_router_loss()
+        if self.use_ddp:
+            router_loss = distributed_mean(router_loss)
         loss = main_loss + self.moe_aux_loss_scale * router_loss
         if self.writer is not None:

{rxnn-0.2.36 → rxnn-0.2.38}/src/rxnn/training/rl.py RENAMED Viewed

@@ -112,21 +112,20 @@ class PPOAlgorithm(RlAlgorithm):
         advantages = torch.zeros_like(rewards, device=rewards.device)
         last_advantage = 0
         next_value = last_value
-        next_done = last_done
+        next_done = last_done.float()
         dones = dones.float()
         for t in reversed(range(trajectory_len)):
-            if t == trajectory_len - 1:
-                # For the last step, use the provided last_value
-                delta = rewards[t] + self.gae_gamma * next_value * (1 - next_done) - values[t]
-            else:
-                # For other steps, use the next value in the trajectory
-                delta = rewards[t] + self.gae_gamma * values[t + 1] * (1 - dones[t + 1]) - values[t]
+            # Calculate delta from rewards, stored next_value, masked by stored next_done, and values
+            delta = rewards[t] + self.gae_gamma * next_value * (1 - next_done) - values[t]
+            # Calculate advantages based on delta, gamma/lambda factors and last advantage, masked by current done flags
             advantages[t] = delta + self.gae_gamma * self.gae_lambda * (1 - dones[t]) * last_advantage
+            # Store current step data as last_advantage, next_done and next_value, for the next iteration step
             last_advantage = advantages[t]
             next_done = dones[t]
+            next_value = values[t]
+        # Calculate reference returns, based on advantages and values, and return them with advantages for critic update
         returns = advantages + values
         return advantages, returns