PyPI - homa - Versions diffs - 0.2.9__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

homa 0.2.9py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

homa/activations/learnable/AOAF.py +1 -1
homa/activations/learnable/AReLU.py +6 -3
homa/activations/learnable/PiLU.py +1 -1
homa/activations/learnable/__init__.py +2 -2
homa/activations/learnable/concerns/ChannelBased.py +2 -0
homa/core/__init__.py +0 -0
homa/core/concerns/MovesNetworkToDevice.py +13 -0
homa/core/concerns/TracksTime.py +7 -0
homa/core/concerns/__init__.py +2 -0
homa/device.py +5 -0
homa/ensemble/Ensemble.py +4 -2
homa/ensemble/concerns/CalculatesMetricNecessities.py +2 -2
homa/ensemble/concerns/PredictsProbabilities.py +2 -2
homa/ensemble/concerns/ReportsClassificationMetrics.py +2 -1
homa/ensemble/concerns/ReportsEnsembleAccuracy.py +2 -2
homa/ensemble/concerns/ReportsEnsembleF1.py +2 -2
homa/ensemble/concerns/ReportsEnsembleKappa.py +2 -2
homa/ensemble/concerns/ReportsEnsembleSize.py +11 -0
homa/ensemble/concerns/ReportsLogits.py +26 -5
homa/ensemble/concerns/SavesEnsembleModels.py +13 -0
homa/ensemble/concerns/StoresModels.py +11 -8
homa/ensemble/concerns/__init__.py +2 -1
homa/ensemble/utils.py +9 -0
homa/graph/GraphAttention.py +13 -0
homa/graph/__init__.py +1 -0
homa/graph/modules/GraphAttentionHeadModule.py +37 -0
homa/graph/modules/MultiHeadGraphAttentionModule.py +22 -0
homa/graph/modules/__init__.py +2 -0
homa/loss/Loss.py +4 -1
homa/rl/DQN.py +2 -0
homa/rl/DRQN.py +5 -0
homa/rl/DiversityIsAllYouNeed.py +96 -0
homa/rl/SoftActorCritic.py +67 -0
homa/rl/__init__.py +4 -0
homa/rl/buffers/Buffer.py +13 -0
homa/rl/buffers/DiversityIsAllYouNeedBuffer.py +50 -0
homa/rl/buffers/ImageBuffer.py +5 -0
homa/rl/buffers/SoftActorCriticBuffer.py +64 -0
homa/rl/buffers/__init__.py +4 -0
homa/rl/buffers/concerns/HasRecordAlternatives.py +12 -0
homa/rl/buffers/concerns/ResetsCollection.py +9 -0
homa/rl/buffers/concerns/__init__.py +2 -0
homa/rl/diayn/Actor.py +54 -0
homa/rl/diayn/Critic.py +41 -0
homa/rl/diayn/Discriminator.py +45 -0
homa/rl/diayn/__init__.py +3 -0
homa/rl/diayn/modules/ContinuousActorModule.py +42 -0
homa/rl/diayn/modules/CriticModule.py +28 -0
homa/rl/diayn/modules/DiscriminatorModule.py +24 -0
homa/rl/diayn/modules/__init__.py +3 -0
homa/rl/sac/SoftActor.py +70 -0
homa/rl/sac/SoftCritic.py +98 -0
homa/rl/sac/__init__.py +2 -0
homa/rl/sac/modules/DualSoftCriticModule.py +22 -0
homa/rl/sac/modules/SoftActorModule.py +35 -0
homa/rl/sac/modules/SoftCriticModule.py +30 -0
homa/rl/sac/modules/__init__.py +3 -0
homa/rl/utils.py +7 -0
homa/vision/Resnet.py +3 -3
homa/vision/Swin.py +17 -5
homa/vision/modules/SwinModule.py +17 -9
{homa-0.2.9.dist-info → homa-0.3.2.dist-info}/METADATA +1 -1
{homa-0.2.9.dist-info → homa-0.3.2.dist-info}/RECORD +66 -28
homa/ensemble/concerns/ReportsSize.py +0 -11
homa/torch/__init__.py +0 -1
homa/torch/helpers.py +0 -6
{homa-0.2.9.dist-info → homa-0.3.2.dist-info}/WHEEL +0 -0
{homa-0.2.9.dist-info → homa-0.3.2.dist-info}/entry_points.txt +0 -0
{homa-0.2.9.dist-info → homa-0.3.2.dist-info}/top_level.txt +0 -0

homa/activations/learnable/AOAF.py CHANGED Viewed

@@ -12,5 +12,5 @@ class AOAF(AdaptiveActivationFunction, ChannelBased):
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         self.initialize(x, "a")
-        a = self.a.view(self.parameter_view(x))
+        a = self.a.view(self.parameter_shape(x))
         return torch.relu(x - self.b * a) + self.c * a

homa/activations/learnable/AReLU.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import torch
 from ..AdaptiveActivationFunction import AdaptiveActivationFunction
+from ...device import get_device
 class AReLU(AdaptiveActivationFunction):
@@ -7,10 +8,12 @@ class AReLU(AdaptiveActivationFunction):
         super(AReLU, self).__init__()
         self.a = torch.nn.Parameter(torch.tensor(0.9, requires_grad=True))
         self.b = torch.nn.Parameter(torch.tensor(2.0, requires_grad=True))
+        self.a.to(get_device())
+        self.b.to(get_device())
-    def forward(self, z):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         negative_slope = torch.clamp(self.a, 0.01, 0.99)
         positive_slope = 1 + torch.sigmoid(self.b)
-        positive = positive_slope * torch.relu(z)
-        negative = negative_slope * (-torch.relu(-z))
+        positive = positive_slope * torch.relu(x)
+        negative = negative_slope * (-torch.relu(-x))
         return positive + negative

homa/activations/learnable/PiLU.py CHANGED Viewed

@@ -3,7 +3,7 @@ from ..AdaptiveActivationFunction import AdaptiveActivationFunction
 from .concerns import ChannelBased
-class DualLine(AdaptiveActivationFunction, ChannelBased):
+class PiLU(AdaptiveActivationFunction, ChannelBased):
     def __init__(self):
         super().__init__()
         self.a = None

homa/activations/learnable/__init__.py CHANGED Viewed

@@ -1,10 +1,10 @@
-from .StarReLU import StarReLU
 from .DualLine import DualLine
 from .LeLeLU import LeLeLU
 from .AReLU import AReLU
 from .PERU import PERU
 from .ShiLU import ShiLU
+from .StarReLU import StarReLU
 from .DPReLU import DPReLU
-from .PiLU import DualLine
+from .PiLU import PiLU
 from .FReLU import FReLU
 from .AOAF import AOAF

homa/activations/learnable/concerns/ChannelBased.py CHANGED Viewed

@@ -21,12 +21,14 @@ class ChannelBased:
             attrs = [attrs]
         self.num_channels = x.shape[1]
+        device = x.device
         for index, attr in enumerate(attrs):
             if index < len(values) and values[index] is not None:
                 default_value = float(values[index])
             else:
                 default_value = 1.0
             param = torch.nn.Parameter(torch.full((self.num_channels,), default_value))
+            param = param.to(device)
             setattr(self, attr, param)
         self._initialized = True

homa/core/__init__.py ADDED Viewed

File without changes

homa/core/concerns/MovesNetworkToDevice.py ADDED Viewed

@@ -0,0 +1,13 @@
+from ...device import move
+class MovesNetworkToDevice:
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if not hasattr(self, "network"):
+            raise RuntimeError(
+                "MovesNetworkToDevice assumes the underlying class has a network property."
+            )
+        move(self.network)

homa/core/concerns/TracksTime.py ADDED Viewed

@@ -0,0 +1,7 @@
+class TracksTime:
+    def __init__(self):
+        super().__init__()
+        self.t = 0
+    def tick(self):
+        self.t += 1

homa/core/concerns/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .MovesNetworkToDevice import MovesNetworkToDevice
2	+ from .TracksTime import TracksTime

homa/device.py CHANGED Viewed

@@ -23,3 +23,8 @@ def mps():
 def device():
     return get_device()
+def move(*modules):
+    for module in modules:
+        module.to(get_device())

homa/ensemble/Ensemble.py CHANGED Viewed

@@ -1,16 +1,18 @@
 from .concerns import (
-    ReportsSize,
+    ReportsEnsembleSize,
     StoresModels,
     ReportsClassificationMetrics,
     PredictsProbabilities,
+    SavesEnsembleModels,
 )
 class Ensemble(
-    ReportsSize,
+    ReportsEnsembleSize,
     ReportsClassificationMetrics,
     PredictsProbabilities,
     StoresModels,
+    SavesEnsembleModels,
 ):
     def __init__(self):
         super().__init__()

homa/ensemble/concerns/CalculatesMetricNecessities.py CHANGED Viewed

@@ -3,8 +3,8 @@ from ...device import get_device
 class CalculatesMetricNecessities:
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self):
+        super().__init__()
     @torch.no_grad()
     def metric_necessities(self, dataloader):

homa/ensemble/concerns/PredictsProbabilities.py CHANGED Viewed

@@ -3,8 +3,8 @@ from .ReportsLogits import ReportsLogits
 class PredictsProbabilities(ReportsLogits):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self):
+        super().__init__()
     def predict(self, x: torch.Tensor) -> torch.Tensor:
         logits = self.logits(x)

homa/ensemble/concerns/ReportsClassificationMetrics.py CHANGED Viewed

@@ -10,4 +10,5 @@ class ReportsClassificationMetrics(
     ReportsEnsembleF1,
     ReportsEnsembleKappa,
 ):
-    pass
+    def __init__(self):
+        super().__init__()

homa/ensemble/concerns/ReportsEnsembleAccuracy.py CHANGED Viewed

@@ -3,8 +3,8 @@ from torch.utils.data import DataLoader
 class ReportsEnsembleAccuracy:
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self):
+        super().__init__()
     def accuracy(self, dataloader: DataLoader) -> float:
         predictions, labels = self.metric_necessities(dataloader)

homa/ensemble/concerns/ReportsEnsembleF1.py CHANGED Viewed

@@ -2,8 +2,8 @@ from sklearn.metrics import f1_score as f1
 class ReportsEnsembleF1:
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self):
+        super().__init__()
     def f1(self) -> float:
         predictions, labels = self.metric_necessities()

homa/ensemble/concerns/ReportsEnsembleKappa.py CHANGED Viewed

@@ -2,8 +2,8 @@ from sklearn.metrics import cohen_kappa_score as kappa
 class ReportsEnsembleKappa:
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self):
+        super().__init__()
     def accuracy(self) -> float:
         predictions, labels = self.metric_necessities()

homa/ensemble/concerns/ReportsEnsembleSize.py ADDED Viewed

@@ -0,0 +1,11 @@
+class ReportsEnsembleSize:
+    def __init__(self):
+        super().__init__()
+    @property
+    def size(self):
+        return len(self.weights)
+    @property
+    def length(self):
+        return self.size

homa/ensemble/concerns/ReportsLogits.py CHANGED Viewed

@@ -2,16 +2,37 @@ import torch
 class ReportsLogits:
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self):
+        super().__init__()
-    def logits(self, x: torch.Tensor) -> torch.Tensor:
-        batch_size = x.shape[0]
+    def logits_average(self, x: torch.Tensor) -> torch.Tensor:
+        return self.logits_sim(x) / len(self.factories)
+    def logits_sum(self, x: torch.Tensor) -> torch.Tensor:
+        batch_size = x.size(0)
         logits = torch.zeros((batch_size, self.num_classes))
-        for model in self.models:
+        for factory, weight in zip(self.factories, self.weights):
+            model = factory(num_classes=self.num_classes)
+            model.load_state_dict(weight)
             logits += model(x)
         return logits
+    def check_aggregation_strategy(self, aggregation: str):
+        if aggregation not in ["mean", "average", "sum"]:
+            raise ValueError(
+                f"Ensemble aggregation strategy must be in [mean, average, sum], but found {aggregation}."
+            )
+    def logits(self, x: torch.Tensor, aggregation: str = "mean") -> torch.Tensor:
+        self.check_aggregation_strategy(aggregation=aggregation)
+        logits_handlers = {
+            "mean": self.logits_average,
+            "average": self.logits_average,
+            "sum": self.logits_sum,
+        }
+        handler = logits_handlers.get(aggregation)
+        return handler(x)
     @torch.no_grad()
     def logits_(self, *args, **kwargs):
         return self.logits(*args, **kwargs)

homa/ensemble/concerns/SavesEnsembleModels.py ADDED Viewed

@@ -0,0 +1,13 @@
+class SavesEnsembleModels:
+    def __init__(self):
+        super().__init__()
+    def save(self):
+        self.save_factories()
+        self.save_weights()
+    def save_factories(self):
+        pass
+    def save_weights(self):
+        pass

homa/ensemble/concerns/StoresModels.py CHANGED Viewed

@@ -1,23 +1,26 @@
 import torch
-from copy import deepcopy
-from typing import List
+from typing import List, Type
+from collections import OrderedDict
 from ...vision import Model
 class StoresModels:
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.models: List[torch.nn.Module] = []
+    def __init__(self):
+        super().__init__()
+        self.factories: List[Type[torch.nn.Module]] = []
+        self.weights: List[OrderedDict] = []
     def record(self, model: Model | torch.nn.Module):
         model_: torch.nn.Module | None = None
         if isinstance(model, Model):
-            model_ = deepcopy(model.network)
+            model_ = model.network
         elif isinstance(model, torch.nn.Module):
-            model_ = deepcopy(model)
+            model_ = model
         else:
             raise TypeError("Wrong input to ensemble record")
-        self.models.append(model_)
+        self.factories.append(model_.__class__)
+        self.weights.append(model_.state_dict())
     def push(self, *args, **kwargs):
         self.record(*args, **kwargs)

homa/ensemble/concerns/__init__.py CHANGED Viewed

@@ -5,5 +5,6 @@ from .ReportsEnsembleAccuracy import ReportsEnsembleAccuracy
 from .ReportsEnsembleF1 import ReportsEnsembleF1
 from .ReportsEnsembleKappa import ReportsEnsembleKappa
 from .ReportsLogits import ReportsLogits
-from .ReportsSize import ReportsSize
+from .ReportsEnsembleSize import ReportsEnsembleSize
 from .StoresModels import StoresModels
+from .SavesEnsembleModels import SavesEnsembleModels

homa/ensemble/utils.py ADDED Viewed

@@ -0,0 +1,9 @@
+import torch
+def get_model_device(model: torch.nn.Module):
+    try:
+        device = next(model.parameters()).device
+    except StopIteration:
+        device = torch.device("cpu")
+    return device

homa/graph/GraphAttention.py ADDED Viewed

@@ -0,0 +1,13 @@
+import torch
+from .modules import GraphAttentionModule
+from ..core.concerns import MovesNetworkToDevice
+class GraphAttention(MovesNetworkToDevice):
+    def __init__(self, lr: float = 0.005, decay: float = 5e-4, dropout: float = 0.6):
+        super().__init__()
+        self.network = GraphAttentionModule()
+        self.optimizer = torch.nn.AdamW(
+            self.network.parameters(), lr=lr, weight_decay=decay
+        )
+        self.criterion = torch.nn.CrossEntropyLoss()

homa/graph/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .GraphAttention import GraphAttention

homa/graph/modules/GraphAttentionHeadModule.py ADDED Viewed

@@ -0,0 +1,37 @@
+import torch
+class GraphAttentionHeadModule(torch.nn.Module):
+    def __init__(self, in_features: int, out_features: int, alpha=0.2):
+        super().__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.alpha = alpha
+        self.W = torch.nn.Linear(in_features, out_features, bias=False)
+        self.a_1 = torch.nn.Parameter(torch.randn(out_features, 1))
+        self.a_2 = torch.nn.Parameter(torch.randn(out_features, 1))
+        self.leaky_relu = torch.nn.LeakyReLU(self.alpha)
+        self.elu = torch.nn.ELU()
+        self.reset_parameters()
+    def reset_parameters(self):
+        torch.nn.init.xavier_uniform_(self.W.weight, gain=1.414)
+        torch.nn.init.xavier_uniform_(self.a_1, gain=1.414)
+        torch.nn.init.xavier_uniform_(self.a_2, gain=1.414)
+    def forward(self, node_features, adj_matrix):
+        N = node_features.size(0)
+        h_prime = self.W(node_features)
+        s1 = torch.matmul(h_prime, self.a_1)
+        s2 = torch.matmul(h_prime, self.a_2)
+        e = s1 + s2.T
+        e = self.leaky_relu(e)
+        zero_vec = -9e15 * torch.ones_like(e)
+        attention_mask = torch.where(
+            adj_matrix > 0, e, zero_vec.to(node_features.device)
+        )
+        attention_weights = F.softmax(attention_mask, dim=1)
+        h_new = torch.matmul(attention_weights, h_prime)
+        return self.elu(h_new)

homa/graph/modules/MultiHeadGraphAttentionModule.py ADDED Viewed

@@ -0,0 +1,22 @@
+import torch
+from .GraphAttentionHeadModule import GraphAttentionHeadModule
+class MultiHeadGraphAttentionModule(torch.nn.Module):
+    def __init__(self, num_heads: int, in_features: int, out_features: int, alpha=0.2):
+        super().__init__()
+        self.num_heads = num_heads
+        self.head_out_features = out_features
+        self.heads = torch.nn.ModuleList(
+            [
+                GraphAttentionHeadModule(in_features, out_features, alpha=alpha)
+                for _ in range(num_heads)
+            ]
+        )
+    def forward(
+        self, node_features: torch.Tensor, adj_matrix: torch.Tensor
+    ) -> torch.Tensor:
+        outputs = [head(node_features, adj_matrix) for head in self.heads]
+        h_new_concat = torch.cat(outputs, dim=1)
+        return h_new_concat

homa/graph/modules/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .GraphAttentionHeadModule import GraphAttentionHeadModule
2	+ from .MultiHeadGraphAttentionModule import MultiHeadGraphAttentionModule

homa/loss/Loss.py CHANGED Viewed

@@ -1,2 +1,5 @@
-class Loss:
+import torch
+class Loss(torch.nn.Module):
     pass

homa/rl/DQN.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ class DQN:
2	+ pass

homa/rl/DRQN.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .DQN import DQN
+class DRQN(DQN):
+    pass

homa/rl/DiversityIsAllYouNeed.py ADDED Viewed

@@ -0,0 +1,96 @@
+import torch
+from .diayn.Actor import Actor
+from .diayn.Critic import Critic
+from .diayn.Discriminator import Discriminator
+from .buffers import DiversityIsAllYouNeedBuffer, Buffer
+class DiversityIsAllYouNeed:
+    def __init__(
+        self,
+        state_dimension: int,
+        action_dimension: int,
+        hidden_dimension: int = 256,
+        num_skills: int = 10,
+        critic_decay: float = 0.0,
+        actor_decay: float = 0.0,
+        discriminator_decay: float = 0.0,
+        actor_lr: float = 0.0001,
+        critic_lr: float = 0.001,
+        discriminator_lr=0.001,
+        buffer_capacity: int = 1_000_000,
+        actor_epsilon: float = 1e-6,
+        gamma: float = 0.99,
+        min_std: float = -20.0,
+        max_std: float = 2.0,
+    ):
+        self.buffer: Buffer = DiversityIsAllYouNeedBuffer(capacity=buffer_capacity)
+        self.num_skills: int = num_skills
+        self.actor = Actor(
+            state_dimension=state_dimension,
+            action_dimension=action_dimension,
+            hidden_dimension=hidden_dimension,
+            num_skills=num_skills,
+            lr=actor_lr,
+            decay=actor_decay,
+            epsilon=actor_epsilon,
+            min_std=min_std,
+            max_std=max_std,
+        )
+        self.critic = Critic(
+            state_dimension=state_dimension,
+            hidden_dimension=hidden_dimension,
+            num_skills=num_skills,
+            lr=critic_lr,
+            decay=critic_decay,
+            gamma=gamma,
+        )
+        self.discriminator = Discriminator(
+            state_dimension=state_dimension,
+            hidden_dimension=hidden_dimension,
+            num_skills=num_skills,
+            lr=discriminator_lr,
+            decay=discriminator_decay,
+        )
+    def one_hot(self, indices, max_index) -> torch.Tensor:
+        one_hot = torch.zeros(indices.size(0), max_index)
+        one_hot.scatter_(1, indices.unsqueeze(1), 1)
+        return one_hot
+    def skill_index(self) -> torch.Tensor:
+        return torch.randint(0, self.num_skills, (1,))
+    def skill(self) -> torch.Tensor:
+        return self.one_hot(self.skill_index(), self.num_skills)
+    def advantages(
+        self,
+        states: torch.Tensor,
+        skills: torch.Tensor,
+        rewards: torch.Tensor,
+        terminations: torch.Tensor,
+        next_states: torch.Tensor,
+    ) -> torch.Tensor:
+        values = self.critic.values(states=states, skills=skills)
+        termination_mask = 1 - terminations
+        next_values = self.critic.values_(states=next_states, skills=skills)
+        update = self.gamma * next_values * termination_mask
+        return rewards + update - values
+    def train(self, skill: torch.Tensor):
+        data = self.buffer.all_tensor()
+        skill_indices = skill.repeat(data.states.size(0), 1).long()
+        skills_indices_one_hot = self.one_hot(skill_indices, self.num_skills)
+        self.discriminator.train(
+            states=data.states, skills_indices=skills_indices_one_hot
+        )
+        advantages = self.advantages(
+            states=data.states,
+            rewards=data.rewards,
+            terminations=data.terminations,
+            next_states=data.next_states,
+            skills=skills,
+        )
+        self.critic.train(advantages=advantages)
+        self.actor.train(advantages=advantages)

homa/rl/SoftActorCritic.py ADDED Viewed

@@ -0,0 +1,67 @@
+from .sac import SoftActor, SoftCritic
+from .buffers import SoftActorCriticBuffer
+from ..core.concerns import TracksTime
+class SoftActorCritic(TracksTime):
+    def __init__(
+        self,
+        state_dimension: int,
+        action_dimension: int,
+        hidden_dimension: int = 256,
+        buffer_capacity: int = 100_000,
+        batch_size: int = 256,
+        actor_lr: float = 0.0002,
+        critic_lr: float = 0.0003,
+        actor_decay: float = 0.0,
+        critic_decay: float = 0.0,
+        tau: float = 0.005,
+        alpha: float = 0.2,
+        gamma: float = 0.99,
+        min_std: float = -20,
+        max_std: float = 2,
+        warmup: int = 20_000,
+    ):
+        super().__init__()
+        self.batch_size: int = batch_size
+        self.warmup: int = warmup
+        self.tau: float = tau
+        self.actor = SoftActor(
+            state_dimension=state_dimension,
+            action_dimension=action_dimension,
+            hidden_dimension=hidden_dimension,
+            lr=actor_lr,
+            weight_decay=actor_decay,
+            alpha=alpha,
+            min_std=min_std,
+            max_std=max_std,
+        )
+        self.critic = SoftCritic(
+            state_dimension=state_dimension,
+            action_dimension=action_dimension,
+            hidden_dimension=hidden_dimension,
+            lr=critic_lr,
+            weight_decay=critic_decay,
+            gamma=gamma,
+            alpha=alpha,
+        )
+        self.buffer = SoftActorCriticBuffer(capacity=buffer_capacity)
+    def train(self):
+        # don't train before warmup
+        if self.buffer.size < self.warmup:
+            return
+        data = self.buffer.sample_torch(self.batch_size)
+        self.critic.train(
+            states=data.states,
+            actions=data.actions,
+            rewards=data.rewards,
+            terminations=data.terminations,
+            next_states=data.next_states,
+            actor=self.actor,
+        )
+        self.actor.train(states=data.states, critic=self.critic)
+        self.critic.update(tau=self.tau)

homa/rl/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .DiversityIsAllYouNeed import DiversityIsAllYouNeed
+from .SoftActorCritic import SoftActorCritic
+from .DQN import DQN
+from .DRQN import DRQN

homa/rl/buffers/Buffer.py ADDED Viewed

@@ -0,0 +1,13 @@
+from collections import deque
+from typing import Type
+from .concerns import ResetsCollection, HasRecordAlternatives
+class Buffer(ResetsCollection, HasRecordAlternatives):
+    def __init__(self, capacity: int):
+        self.capacity: int = capacity
+        self.collection: Type[deque] = deque(maxlen=self.capacity)
+    @property
+    def size(self):
+        return len(self.collection)

homa/rl/buffers/DiversityIsAllYouNeedBuffer.py ADDED Viewed

@@ -0,0 +1,50 @@
+import torch
+import numpy
+from types import SimpleNamespace
+from .Buffer import Buffer
+from .concerns import HasRecordAlternatives
+class DiversityIsAllYouNeedBuffer(Buffer, HasRecordAlternatives):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def all_tensor(self) -> SimpleNamespace:
+        return self.all(tensor=True)
+    def all(self, tensor: bool = False) -> SimpleNamespace:
+        states, actions, rewards, next_states, terminations, probabilities = zip(
+            *self.collection
+        )
+        if tensor:
+            states = torch.from_numpy(numpy.array(states))
+            actions = torch.from_numpy(numpy.array(actions))
+            rewards = torch.from_numpy(numpy.array(rewards))
+            next_states = torch.from_numpy(numpy.array(next_states))
+            terminations = torch.from_numpy(numpy.array(terminations))
+            probabilities = torch.from_numpy(numpy.array(probabilities))
+        return SimpleNamespace(
+            **{
+                "states": states,
+                "actions": actions,
+                "rewards": rewards,
+                "next_states": next_states,
+                "terminations": terminations,
+                "probabilities": probabilities,
+            }
+        )
+    def record(
+        self,
+        state: numpy.ndarray,
+        action: int,
+        reward: float,
+        next_state: numpy.ndarray,
+        termination: bool,
+        probability: numpy.ndarray,
+    ) -> None:
+        self.collection.append(
+            (state, action, reward, next_state, termination, probability)
+        )

homa 0.2.9__py3-none-any.whl → 0.3.2__py3-none-any.whl

homa 0.2.9py3-none-any.whl → 0.3.2py3-none-any.whl