rxnn 0.2.10__tar.gz → 0.2.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {rxnn-0.2.10 → rxnn-0.2.12}/PKG-INFO +1 -1
  2. {rxnn-0.2.10 → rxnn-0.2.12}/pyproject.toml +1 -1
  3. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/memory/stm.py +2 -1
  4. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/training/callbacks.py +1 -1
  5. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/training/dataset.py +1 -1
  6. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/training/mrl.py +3 -2
  7. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/training/rl.py +9 -9
  8. {rxnn-0.2.10 → rxnn-0.2.12}/LICENSE +0 -0
  9. {rxnn-0.2.10 → rxnn-0.2.12}/README.md +0 -0
  10. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/.DS_Store +0 -0
  11. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/__init__.py +0 -0
  12. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/experimental/__init__.py +0 -0
  13. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/experimental/attention.py +0 -0
  14. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/experimental/models.py +0 -0
  15. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/experimental/moe.py +0 -0
  16. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/memory/__init__.py +0 -0
  17. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/memory/attention.py +0 -0
  18. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/memory/norm.py +0 -0
  19. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/rxt/__init__.py +0 -0
  20. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/rxt/models.py +0 -0
  21. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/training/__init__.py +0 -0
  22. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/training/base.py +0 -0
  23. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/training/bml.py +0 -0
  24. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/training/models.py +0 -0
  25. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/training/reward.py +0 -0
  26. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/training/scheduler.py +0 -0
  27. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/training/tokenizer.py +0 -0
  28. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/training/utils.py +0 -0
  29. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/transformers/__init__.py +0 -0
  30. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/transformers/attention.py +0 -0
  31. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/transformers/ff.py +0 -0
  32. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/transformers/layers.py +0 -0
  33. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/transformers/mask.py +0 -0
  34. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/transformers/models.py +0 -0
  35. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/transformers/moe.py +0 -0
  36. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/transformers/positional.py +0 -0
  37. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/transformers/sampler.py +0 -0
  38. {rxnn-0.2.10 → rxnn-0.2.12}/src/rxnn/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rxnn
3
- Version: 0.2.10
3
+ Version: 0.2.12
4
4
  Summary: RxNN: Reactive Neural Networks Platform
5
5
  License: Apache-2.0
6
6
  Keywords: deep-learning,ai,machine-learning
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "rxnn"
7
- version = "0.2.10"
7
+ version = "0.2.12"
8
8
  description = "RxNN: Reactive Neural Networks Platform"
9
9
 
10
10
  license = "Apache-2.0"
@@ -42,7 +42,8 @@ class ShortTermMemory(nn.Module):
42
42
  self.memory[layer] = new_stm
43
43
 
44
44
  def update_all(self, new_stm: torch.Tensor):
45
- self.memory.copy_(new_stm)
45
+ self.memory = new_stm
46
+ # self.memory.copy_(new_stm)
46
47
 
47
48
  def make_trainable(self):
48
49
  if not self.is_trainable:
@@ -577,7 +577,7 @@ class MrlPrintCallback(MrlTrainerCallback):
577
577
 
578
578
  def on_critic_updated(self, actor: nn.Module, critic: nn.Module, epoch: int, step: int,
579
579
  critic_loss: float) -> None:
580
- print(f'Epoch {epoch} | Step {step} - updated policy loss {critic_loss}')
580
+ print(f'Epoch {epoch} | Step {step} - updated critic loss {critic_loss}')
581
581
 
582
582
  def on_training_end(self, actor: nn.Module, critic: nn.Module, curriculum_config: dict) -> None:
583
583
  print(f'Finished training for {curriculum_config["steps"]} steps in {curriculum_config["strategy"]} strategy.')
@@ -936,7 +936,7 @@ class MrlCurriculumDataset(Dataset):
936
936
  else:
937
937
  subset = self.episodes[split_point:-1] if not from_start else self.episodes[0:split_point]
938
938
  self.episodes = self.episodes[0:split_point] if not from_start else self.episodes[split_point:-1]
939
- return self.__class__(subset, query_field=self.query_field, answer_field=self.answer_field, interactions_field=self.interactions_field, **kwargs)
939
+ return self.__class__(subset, tokenizer=self.tokenizer, query_field=self.query_field, answer_field=self.answer_field, interactions_field=self.interactions_field, **kwargs)
940
940
 
941
941
  def pre_tokenize(self, verbose: bool = False, log_interval: int = 10_000, keep_order: bool = False):
942
942
  """
@@ -328,7 +328,8 @@ class MRLTrainer:
328
328
 
329
329
  # 10. Update STM with generated response (except last interaction, it's not needed)
330
330
  if not is_last_interaction:
331
- self.encode_and_update_stm(next_query, generated_answer) # update with generated_answer on GPU
331
+ self.encode_and_update_stm(next_query,
332
+ generated_answer) # update with generated_answer on GPU
332
333
 
333
334
  # 11. Store trajectory step
334
335
  trajectory: MrlTrajectoryStep = {
@@ -438,7 +439,7 @@ class MRLTrainer:
438
439
  critic_losses.append(critic_loss)
439
440
 
440
441
  # 7. Calculate mean loss for epoch callbacks
441
- critic_mean_loss = torch.stack(critic_losses).mean().item()
442
+ critic_mean_loss = torch.tensor(critic_losses).mean().item()
442
443
 
443
444
  return critic_mean_loss
444
445
 
@@ -54,16 +54,16 @@ class PPOAlgorithm(RlAlgorithm):
54
54
 
55
55
  return policy_loss
56
56
 
57
- def _compute_gae(self, rewards: torch.Tensor, values: torch.Tensor, next_value: torch.Tensor) -> torch.Tensor:
58
- advantages = torch.zeros_like(rewards, device=values.device)
59
- last_advantage = 0
60
- for t in reversed(range(rewards.size(0))):
61
- delta = rewards[t] + self.gae_gamma * next_value - values[t]
62
- advantages[t] = delta + self.gae_gamma * self.gae_lambda * last_advantage
63
- last_advantage = advantages[t]
64
- return advantages
57
+ # def _compute_gae(self, rewards: torch.Tensor, values: torch.Tensor, next_value: torch.Tensor) -> torch.Tensor:
58
+ # advantages = torch.zeros_like(rewards, device=values.device)
59
+ # last_advantage = 0
60
+ # for t in reversed(range(rewards.size(0))):
61
+ # delta = rewards[t] + self.gae_gamma * next_value - values[t]
62
+ # advantages[t] = delta + self.gae_gamma * self.gae_lambda * last_advantage
63
+ # last_advantage = advantages[t]
64
+ # return advantages
65
65
 
66
66
  def calculate_advantages(self, rewards: torch.Tensor, values: torch.Tensor) -> torch.Tensor:
67
- advantages = self._compute_gae(rewards, values[:-1], values[-1])
67
+ advantages = rewards - values
68
68
  normalized_advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
69
69
  return normalized_advantages
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes