rxnn 0.2.9__tar.gz → 0.2.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {rxnn-0.2.9 → rxnn-0.2.11}/PKG-INFO +1 -1
  2. {rxnn-0.2.9 → rxnn-0.2.11}/pyproject.toml +1 -1
  3. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/memory/attention.py +3 -2
  4. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/training/callbacks.py +1 -1
  5. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/training/dataset.py +1 -1
  6. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/training/mrl.py +3 -2
  7. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/training/reward.py +2 -2
  8. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/training/rl.py +9 -9
  9. {rxnn-0.2.9 → rxnn-0.2.11}/LICENSE +0 -0
  10. {rxnn-0.2.9 → rxnn-0.2.11}/README.md +0 -0
  11. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/.DS_Store +0 -0
  12. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/__init__.py +0 -0
  13. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/experimental/__init__.py +0 -0
  14. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/experimental/attention.py +0 -0
  15. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/experimental/models.py +0 -0
  16. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/experimental/moe.py +0 -0
  17. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/memory/__init__.py +0 -0
  18. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/memory/norm.py +0 -0
  19. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/memory/stm.py +0 -0
  20. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/rxt/__init__.py +0 -0
  21. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/rxt/models.py +0 -0
  22. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/training/__init__.py +0 -0
  23. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/training/base.py +0 -0
  24. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/training/bml.py +0 -0
  25. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/training/models.py +0 -0
  26. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/training/scheduler.py +0 -0
  27. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/training/tokenizer.py +0 -0
  28. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/training/utils.py +0 -0
  29. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/transformers/__init__.py +0 -0
  30. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/transformers/attention.py +0 -0
  31. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/transformers/ff.py +0 -0
  32. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/transformers/layers.py +0 -0
  33. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/transformers/mask.py +0 -0
  34. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/transformers/models.py +0 -0
  35. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/transformers/moe.py +0 -0
  36. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/transformers/positional.py +0 -0
  37. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/transformers/sampler.py +0 -0
  38. {rxnn-0.2.9 → rxnn-0.2.11}/src/rxnn/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rxnn
3
- Version: 0.2.9
3
+ Version: 0.2.11
4
4
  Summary: RxNN: Reactive Neural Networks Platform
5
5
  License: Apache-2.0
6
6
  Keywords: deep-learning,ai,machine-learning
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "rxnn"
7
- version = "0.2.9"
7
+ version = "0.2.11"
8
8
  description = "RxNN: Reactive Neural Networks Platform"
9
9
 
10
10
  license = "Apache-2.0"
@@ -36,7 +36,8 @@ class StmMemoryAttention(nn.Module):
36
36
  normalized_layer_stm = self.memory_norm_layers[i](layer_stm)
37
37
  new_layer_stm = self.attention_layers[i](normalized_layer_stm, encoded_layer_data, encoded_layer_data, mask=mask)
38
38
  # self.stm.update_layer(i, new_layer_stm + layer_stm)
39
- new_stm[i] = new_layer_stm + layer_stm # residual
40
- self.stm.update_all(new_stm)
39
+ final_layer_stm = new_layer_stm + layer_stm # residual
40
+ self.stm.update_layer(i, final_layer_stm)
41
+ # self.stm.update_all(new_stm)
41
42
  return self.stm.memory
42
43
 
@@ -577,7 +577,7 @@ class MrlPrintCallback(MrlTrainerCallback):
577
577
 
578
578
  def on_critic_updated(self, actor: nn.Module, critic: nn.Module, epoch: int, step: int,
579
579
  critic_loss: float) -> None:
580
- print(f'Epoch {epoch} | Step {step} - updated policy loss {critic_loss}')
580
+ print(f'Epoch {epoch} | Step {step} - updated critic loss {critic_loss}')
581
581
 
582
582
  def on_training_end(self, actor: nn.Module, critic: nn.Module, curriculum_config: dict) -> None:
583
583
  print(f'Finished training for {curriculum_config["steps"]} steps in {curriculum_config["strategy"]} strategy.')
@@ -936,7 +936,7 @@ class MrlCurriculumDataset(Dataset):
936
936
  else:
937
937
  subset = self.episodes[split_point:-1] if not from_start else self.episodes[0:split_point]
938
938
  self.episodes = self.episodes[0:split_point] if not from_start else self.episodes[split_point:-1]
939
- return self.__class__(subset, query_field=self.query_field, answer_field=self.answer_field, interactions_field=self.interactions_field, **kwargs)
939
+ return self.__class__(subset, tokenizer=self.tokenizer, query_field=self.query_field, answer_field=self.answer_field, interactions_field=self.interactions_field, **kwargs)
940
940
 
941
941
  def pre_tokenize(self, verbose: bool = False, log_interval: int = 10_000, keep_order: bool = False):
942
942
  """
@@ -328,7 +328,8 @@ class MRLTrainer:
328
328
 
329
329
  # 10. Update STM with generated response (except last interaction, it's not needed)
330
330
  if not is_last_interaction:
331
- self.encode_and_update_stm(next_query, generated_answer) # update with generated_answer on GPU
331
+ self.encode_and_update_stm(next_query,
332
+ generated_answer) # update with generated_answer on GPU
332
333
 
333
334
  # 11. Store trajectory step
334
335
  trajectory: MrlTrajectoryStep = {
@@ -438,7 +439,7 @@ class MRLTrainer:
438
439
  critic_losses.append(critic_loss)
439
440
 
440
441
  # 7. Calculate mean loss for epoch callbacks
441
- critic_mean_loss = torch.stack(critic_losses).mean().item()
442
+ critic_mean_loss = torch.tensor(critic_losses).mean().item()
442
443
 
443
444
  return critic_mean_loss
444
445
 
@@ -103,9 +103,9 @@ class MrlRewardModel:
103
103
  if mode == MrlRewardMode.STANDARD or mode == MrlRewardMode.LONG_RANGE:
104
104
  bleu = self.batch_bleu(generated['input_ids'], reference['input_ids'], saved_data['input_ids'])
105
105
  cosine = self.batch_cosine(generated['input_ids'], reference['input_ids'], saved_data['input_ids'])
106
- return (self.bleu_factor * torch.tensor(bleu) + self.cos_factor * cosine).tolist()
106
+ return (self.bleu_factor * torch.tensor(bleu, device=self.device) + self.cos_factor * cosine).tolist()
107
107
  else:
108
108
  bleu = self.batch_bleu(generated['input_ids'], reference['input_ids'], saved_data['input_ids'])
109
109
  cosine = self.negative_cosine(generated['input_ids'], reference['input_ids'], saved_data['input_ids'])
110
- return (self.neg_bleu_factor * torch.tensor(bleu) + self.neg_cos_factor * cosine).tolist()
110
+ return (self.neg_bleu_factor * torch.tensor(bleu, device=self.device) + self.neg_cos_factor * cosine).tolist()
111
111
 
@@ -54,16 +54,16 @@ class PPOAlgorithm(RlAlgorithm):
54
54
 
55
55
  return policy_loss
56
56
 
57
- def _compute_gae(self, rewards: torch.Tensor, values: torch.Tensor, next_value: torch.Tensor) -> torch.Tensor:
58
- advantages = torch.zeros_like(rewards, device=values.device)
59
- last_advantage = 0
60
- for t in reversed(range(rewards.size(0))):
61
- delta = rewards[t] + self.gae_gamma * next_value - values[t]
62
- advantages[t] = delta + self.gae_gamma * self.gae_lambda * last_advantage
63
- last_advantage = advantages[t]
64
- return advantages
57
+ # def _compute_gae(self, rewards: torch.Tensor, values: torch.Tensor, next_value: torch.Tensor) -> torch.Tensor:
58
+ # advantages = torch.zeros_like(rewards, device=values.device)
59
+ # last_advantage = 0
60
+ # for t in reversed(range(rewards.size(0))):
61
+ # delta = rewards[t] + self.gae_gamma * next_value - values[t]
62
+ # advantages[t] = delta + self.gae_gamma * self.gae_lambda * last_advantage
63
+ # last_advantage = advantages[t]
64
+ # return advantages
65
65
 
66
66
  def calculate_advantages(self, rewards: torch.Tensor, values: torch.Tensor) -> torch.Tensor:
67
- advantages = self._compute_gae(rewards, values[:-1], values[-1])
67
+ advantages = rewards - values
68
68
  normalized_advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
69
69
  return normalized_advantages
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes