PyPI - homa - Versions diffs - 0.3.11__py3-none-any.whl → 0.3.15__py3-none-any.whl - Mend

homa 0.3.11py3-none-any.whl → 0.3.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

homa/rl/buffers/Buffer.py +3 -1
homa/rl/buffers/SoftActorCriticBuffer.py +1 -1
homa/rl/sac/SoftActor.py +3 -2
homa/rl/sac/SoftCritic.py +13 -14
homa/rl/sac/modules/SoftActorModule.py +1 -1
homa/rl/utils.py +7 -0
{homa-0.3.11.dist-info → homa-0.3.15.dist-info}/METADATA +1 -1
{homa-0.3.11.dist-info → homa-0.3.15.dist-info}/RECORD +11 -10
{homa-0.3.11.dist-info → homa-0.3.15.dist-info}/WHEEL +0 -0
{homa-0.3.11.dist-info → homa-0.3.15.dist-info}/entry_points.txt +0 -0
{homa-0.3.11.dist-info → homa-0.3.15.dist-info}/top_level.txt +0 -0

homa/rl/buffers/Buffer.py CHANGED Viewed

@@ -1,10 +1,12 @@
+from collections import deque
+from typing import Type
 from .concerns import ResetsCollection, HasRecordAlternatives
 class Buffer(ResetsCollection, HasRecordAlternatives):
     def __init__(self, capacity: int):
         self.capacity: int = capacity
-        self.reset()
+        self.collection: Type[deque] = deque(maxlen=self.capacity)
     @property
     def size(self):

homa/rl/buffers/SoftActorCriticBuffer.py CHANGED Viewed

@@ -35,7 +35,7 @@ class SoftActorCriticBuffer(Buffer):
         if as_tensor:
             states = torch.from_numpy(states).float()
-            actions = torch.from_numpy(actions).long()
+            actions = torch.from_numpy(actions).float()
             rewards = torch.from_numpy(rewards).float()
             next_states = torch.from_numpy(next_states).float()
             terminations = torch.from_numpy(terminations).float()

homa/rl/sac/SoftActor.py CHANGED Viewed

@@ -29,6 +29,7 @@ class SoftActor:
         )
     def train(self, states: torch.Tensor, critic_network: torch.nn.Module):
+        self.network.train()
         self.optimizer.zero_grad()
         loss = self.loss(states=states, critic_network=critic_network)
         loss.backward()
@@ -64,6 +65,6 @@ class SoftActor:
         action = torch.tanh(pre_tanh)
         probabilities = distribution.log_prob(pre_tanh).sum(dim=1, keepdim=True)
-        correction = torch.log(1 - action.pow(2) + 1e-6).sum(dim=1, keepdim=True)
+        probabilities -= torch.log(1 - action.pow(2) + 1e-6).sum(dim=1, keepdim=True)
-        return action, probabilities - correction
+        return action, probabilities

homa/rl/sac/SoftCritic.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import torch
 from torch.nn.functional import mse_loss as mse
-from typing import Type
 from .modules import DualSoftCriticModule
 from .SoftActor import SoftActor
+from ..utils import soft_update
 class SoftCritic:
@@ -31,6 +31,10 @@ class SoftCritic:
             hidden_dimension=hidden_dimension,
             action_dimension=action_dimension,
         )
+        # copy source to target when initiated
+        self.target.load_state_dict(self.network.state_dict())
         self.optimizer = torch.optim.AdamW(
             self.network.parameters(), lr=lr, weight_decay=weight_decay
         )
@@ -42,8 +46,9 @@ class SoftCritic:
         rewards: torch.Tensor,
         terminations: torch.Tensor,
         next_states: torch.Tensor,
-        actor: torch.nn.Module,
+        actor: SoftActor,
     ):
+        self.network.train()
         self.optimizer.zero_grad()
         loss = self.loss(
             states=states,
@@ -65,7 +70,7 @@ class SoftCritic:
         next_states: torch.Tensor,
         actor: torch.nn.Module,
     ):
-        q_alpha, q_beta = self.target(states, actions)
+        q_alpha, q_beta = self.network(states, actions)
         target = self.calculate_target(
             rewards=rewards,
             terminations=terminations,
@@ -82,19 +87,13 @@ class SoftCritic:
         next_states: torch.Tensor,
         actor: SoftActor,
     ):
+        termination_mask = 1 - terminations
         next_actions, next_probabilities = actor.sample(next_states)
         q_alpha, q_beta = self.target(next_states, next_actions)
         q = torch.min(q_alpha, q_beta)
-        termination_mask = 1 - terminations
-        entropy_q = q - self.alpha * next_probabilities * termination_mask
-        return rewards + self.gamma * entropy_q
-    def soft_update(
-        self, network: Type[torch.nn.Module], target: Type[torch.nn.Module]
-    ):
-        for s, t in zip(network.parameters(), target.parameters()):
-            t.data.copy_(self.tau * s.data + (1 - self.tau) * t.data)
+        entropy_q = q - self.alpha * next_probabilities
+        return rewards + self.gamma * termination_mask * entropy_q
     def update(self):
-        self.soft_update(self.network.alpha, self.target.alpha)
-        self.soft_update(self.network.beta, self.target.beta)
+        soft_update(network=self.network.alpha, target=self.target.alpha, tau=self.tau)
+        soft_update(network=self.network.beta, target=self.target.beta, tau=self.tau)

homa/rl/sac/modules/SoftActorModule.py CHANGED Viewed

@@ -30,6 +30,6 @@ class SoftActorModule(torch.nn.Module):
     def forward(self, state: torch.Tensor):
         features = self.phi(state)
         mean = self.mu(features)
-        std = self.mu(features)
+        std = self.xi(features)
         std = std.clamp(self.min_std, self.max_std)
         return mean, std

homa/rl/utils.py ADDED Viewed

@@ -0,0 +1,7 @@
+import torch
+@torch.no_grad()
+def soft_update(network: torch.nn.Module, target: torch.nn.Module, tau: float):
+    for s, t in zip(network.parameters(), target.parameters()):
+        t.data.copy_(tau * s.data + (1 - tau) * t.data)

{homa-0.3.11.dist-info → homa-0.3.15.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: homa
-Version: 0.3.11
+Version: 0.3.15
 Summary: A curated list of machine learning and deep learning helpers.
 Author-email: Taha Shieenavaz <tahashieenavaz@gmail.com>
 Requires-Python: >=3.7

{homa-0.3.11.dist-info → homa-0.3.15.dist-info}/RECORD RENAMED Viewed

@@ -101,10 +101,11 @@ homa/rl/DRQN.py,sha256=zooojji9aeeubOP7cRPSHg31u2Assxk-qjXyGUWIO3A,49
 homa/rl/DiversityIsAllYouNeed.py,sha256=8yKzlVdLisForGyXqxaXUAWG_dozq7dNY8MBasCvniE,3322
 homa/rl/SoftActorCritic.py,sha256=N8EsiYbsLH-dpT2EmqdYFG9KvHNfO3JX8SG2LPTy94s,1962
 homa/rl/__init__.py,sha256=EaNDkIzLH1Oy0Wc0aAyyVs4HVMcZS1tdHDh631LKSXs,146
-homa/rl/buffers/Buffer.py,sha256=wOk8MH0Wf0cpvavpHIK2O7PrbGP6MwHTH5YFkq2Ints,288
+homa/rl/utils.py,sha256=IqbN5aDLwovocpPbxgywuetjz7GQwh9aJ4WFIOtLP3g,232
+homa/rl/buffers/Buffer.py,sha256=YCESh9tFxgWOLzGQj_IA0zLJoZWDmz6gCNu1iYsGp1s,388
 homa/rl/buffers/DiversityIsAllYouNeedBuffer.py,sha256=Nwcqs3Q10x6OKZ-zWug4IcBc6RR1TwEIybuFQOtmftA,1612
 homa/rl/buffers/ImageBuffer.py,sha256=HSmMt82hmkL3ooBYo7c6YUtTsMz9TAA8CvPh3y8z3yg,65
-homa/rl/buffers/SoftActorCriticBuffer.py,sha256=iDC2C5XFvONT3f7YX_gYXQJGU9wz2usvPOVGbQUd22M,1796
+homa/rl/buffers/SoftActorCriticBuffer.py,sha256=JQ9Y6KeeQS5naO_JPONiks-HYXw7hiZZAbqpoWDZlNI,1797
 homa/rl/buffers/__init__.py,sha256=h1AkCHs6isXbNtxpaZfLp6YudHj1KlnOvURE64vhRa4,190
 homa/rl/buffers/concerns/HasRecordAlternatives.py,sha256=D5aVlPZlnGm0GyGtikKb4wZqyO6zpyqR1IOETmAgLx4,362
 homa/rl/buffers/concerns/ResetsCollection.py,sha256=bZ8q4czYXo1jMtVCnnlG69OgiJ0AqSGY6CiKzJC6xtQ,215
@@ -117,11 +118,11 @@ homa/rl/diayn/modules/ContinuousActorModule.py,sha256=yeC117I5gkXZSidQhjwakjiY7G
 homa/rl/diayn/modules/CriticModule.py,sha256=OUenwCG0dG4PnK7Iq-jy7oCTv_Cn9s7bXRpro6Pvb40,956
 homa/rl/diayn/modules/DiscriminatorModule.py,sha256=D58dKBv4f6gtrpqMKLK8XAZpiMqKfS4sG6s3QcF8iGE,891
 homa/rl/diayn/modules/__init__.py,sha256=1Pgjr4FT5WG-AMh26NPEfbf5pK6I02B1x8HYsgyUCJ4,149
-homa/rl/sac/SoftActor.py,sha256=CxR58IFrZ6xlmBj_gq_abZfgdzlVD71c6wA6wQiVL2c,2142
-homa/rl/sac/SoftCritic.py,sha256=wFIunTgKGBy64Igu7zuvE2BvGz2e-DTplviLyq4tQ7M,3031
+homa/rl/sac/SoftActor.py,sha256=NSTqnv_BZzTqfgEEOIEtOgYV2_VycicIF0alD1O5Nk8,2162
+homa/rl/sac/SoftCritic.py,sha256=rOgPR8zRUtjEwF9W4q5nZQaGXFmf_9tmXqaRWzUkAm8,2980
 homa/rl/sac/__init__.py,sha256=8EIkOcVvxN94gGzcZoX2XTnvTsHqW6yBaZ2RdFwIveM,68
 homa/rl/sac/modules/DualSoftCriticModule.py,sha256=Ax28i7U-KnP4QJig-AeeCfpPYNvTT3DfvRMJI-f-TGY,749
-homa/rl/sac/modules/SoftActorModule.py,sha256=AiWnsWkmQONjOAWAp06eO-lLWEYNJDmx8FSjPKTcjI0,1152
+homa/rl/sac/modules/SoftActorModule.py,sha256=LQ4z7s8mE3wwb1JgxPs0QvnriZULK3_ULdhkt60Ffpw,1152
 homa/rl/sac/modules/SoftCriticModule.py,sha256=aOfhDZTB5og-BLTsmdBdIcRufygCJUas7P-ikBvWQ34,928
 homa/rl/sac/modules/__init__.py,sha256=h-22B5CAK1xhn75tolI5J5sQMxl--kOXbQ6r_JfHIOA,147
 homa/vision/Classifier.py,sha256=bAypqREQVuPamnc8hpbLCwmW9Uly3T1rvrlbMxXp1eA,61
@@ -142,8 +143,8 @@ homa/vision/concerns/__init__.py,sha256=mrw1YvN-GpQPvMwDF00KxnFkksPKo23RWM4KRioU
 homa/vision/modules/ResnetModule.py,sha256=eFudBnILD6OmgQtcW_CQQ8aZ62NEa4HyZ15-lobTtt0,712
 homa/vision/modules/SwinModule.py,sha256=3ZtUcfyJt0NMGmIlGpN35MIJG9QsgcLdFniZH7NxZQo,1227
 homa/vision/modules/__init__.py,sha256=zVMYB9IAO_xZylC1-N3p8ymHgEkAE2sBbuVz8K5Y1kk,74
-homa-0.3.11.dist-info/METADATA,sha256=SvSxNXB1IsX3N5IfhOsnWYtvhjpfzauJPanVH7i5cRs,1760
-homa-0.3.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-homa-0.3.11.dist-info/entry_points.txt,sha256=tJZzjs-f2QvFe3ES8Qta8IE5sAbeE8-cyZ_UtbgqG4s,51
-homa-0.3.11.dist-info/top_level.txt,sha256=tmOfy2tuaAwc3W5-i6j61_vYJsXgR4ivBWkhJ3ZtJDc,5
-homa-0.3.11.dist-info/RECORD,,
+homa-0.3.15.dist-info/METADATA,sha256=jo-jsI9A6KvK95nGmxrh_IExb1WeNp_3DWTZgMDSxcI,1760
+homa-0.3.15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+homa-0.3.15.dist-info/entry_points.txt,sha256=tJZzjs-f2QvFe3ES8Qta8IE5sAbeE8-cyZ_UtbgqG4s,51
+homa-0.3.15.dist-info/top_level.txt,sha256=tmOfy2tuaAwc3W5-i6j61_vYJsXgR4ivBWkhJ3ZtJDc,5
+homa-0.3.15.dist-info/RECORD,,

{homa-0.3.11.dist-info → homa-0.3.15.dist-info}/WHEEL RENAMED Viewed

File without changes

{homa-0.3.11.dist-info → homa-0.3.15.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{homa-0.3.11.dist-info → homa-0.3.15.dist-info}/top_level.txt RENAMED Viewed

File without changes

homa 0.3.11__py3-none-any.whl → 0.3.15__py3-none-any.whl

homa 0.3.11py3-none-any.whl → 0.3.15py3-none-any.whl