PyPI - gym-examples - Versions diffs - 3.0.31__py3-none-any.whl → 3.0.33__py3-none-any.whl - Mend

gym-examples 3.0.31py3-none-any.whl → 3.0.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

gym_examples/__init__.py CHANGED Viewed

@@ -6,4 +6,4 @@ register(
      max_episode_steps=50,
 )
-__version__ = "3.0.31"
+__version__ = "3.0.33"

gym_examples/envs/wsn_env.py CHANGED Viewed

@@ -10,7 +10,7 @@ import torch.nn as nn
 import torch.nn.functional as F
 # Define the network parameters for the final reward function
-input_dim = 4  # number of individual rewards
+input_dim = 4  # lenght of the individual rewards vector
 output_dim = 1  # final reward
 Eelec = 50e-9  # energy consumption per bit in joules
@@ -40,11 +40,6 @@ class Attention(nn.Module):
         x = self.linear2(x)  # Pass the result through another linear layer
         return x
-# Calculate the reward
-net = Attention(input_dim, output_dim)
-net = net.double()  # Convert the weights to Double
 class WSNRoutingEnv(gym.Env):
     def __init__(self, n_sensors = 20, coverage_radius=(upper_bound - lower_bound)/4):
@@ -56,7 +51,6 @@ class WSNRoutingEnv(gym.Env):
         self.episode_count = 0
         self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
         self.epsilon = 1e-10 # small value to avoid division by zero
-        # self.rewards_individual = [0] * self.n_sensors
         # Initialize the position of the sensors randomly
         # Define observation space
@@ -129,7 +123,6 @@ class WSNRoutingEnv(gym.Env):
                 self.total_latency += self.packet_latency[i] + latency_per_hop
                 self.packet_latency[i] = 0
-                # rewards[i] = self.compute_individual_rewards(i, action)
                 rewards[i] = np.ones(input_dim) # input_dim should be equal to the number of individual rewards
                 dones[i] = True
             else:
@@ -157,13 +150,7 @@ class WSNRoutingEnv(gym.Env):
                 self.number_of_packets[action] += self.number_of_packets[i]
             self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
             # Calculate final reward
-            rewards_individual = torch.tensor(rewards[i], dtype=torch.double)
-            final_reward = net(rewards_individual)
-            # final_reward = np.sum(rewards[i])
-            # weights = np.ones(self.n_sensors, dtype=int)
-            # final_reward = np.sum(reward * weight for reward, weight in zip(rewards[i], weights))
-            rewards[i] = final_reward
-        # rewards = np.mean(rewards)
+            rewards[i] = self.compute_attention_rewards(rewards[i])
         for i in range(self.n_sensors):
             if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
                 dones[i] = True
@@ -178,12 +165,16 @@ class WSNRoutingEnv(gym.Env):
         self.get_metrics()
+        reward_packet_delivery_ratio = self.compute_reward_packet_delivery_ratio()
+        reward_latency = self.compute_reward_latency()
+        rewards_metrics = [reward_packet_delivery_ratio, reward_latency]
+        rewards_metrics = self.compute_attention_rewards(rewards_metrics)
+        rewards = np.array([r + rewards_metrics.detach().numpy() for r in rewards])
         return self._get_obs(), rewards, dones, {}
     def _get_obs(self):
-        performance = self.get_metrics()
         return [{'remaining_energy': np.array([e]),
                  'consumption_energy': np.array([initial_energy - e]),
                  'sensor_positions': p,
@@ -301,7 +292,7 @@ class WSNRoutingEnv(gym.Env):
         Compute the individual rewards
         '''
         reward_angle = self.compute_reward_angle(i, action)
-        reward_distance = self.compute_reward_distance(i, action)
+        # reward_distance = self.compute_reward_distance(i, action)
         reward_consumption_energy = self.compute_reward_consumption_energy(i, action)
         reward_dispersion_remaining_energy = self.compute_reward_dispersion_remaining_energy()
         reward_number_of_packets = self.compute_reward_number_of_packets(action)
@@ -329,6 +320,39 @@ class WSNRoutingEnv(gym.Env):
         normalized_total_energy = total_energy / max_total_energy
         return np.clip(1 - normalized_total_energy, 0, 1)
+    def compute_reward_packet_delivery_ratio(self):
+        '''
+        Compute the reward based on the packet delivery ratio
+        '''
+        return np.clip(self.packet_delivery_ratio, 0, 1)
+    def compute_reward_latency(self):
+        '''
+        Compute the reward based on the average latency
+        '''
+        # Normalize the average latency
+        max_latency = self.n_sensors * self.steps
+        normalized_latency = self.total_latency / max_latency
+        return np.clip(1 - normalized_latency, 0, 1)
+    def compute_sum_rewards(self, rewards):
+        '''
+        Compute the sum of the rewards
+        '''
+        return np.sum(rewards)
+    def compute_attention_rewards(self, rewards):
+        '''
+        Compute the attention-based rewards
+        '''
+        rewards = torch.tensor(rewards, dtype=torch.double)
+        net = Attention(len(rewards), output_dim)
+        net = net.double()  # Convert the weights to Double
+        final_reward = net(rewards)
+        return final_reward
     def integrate_mobility(self):
         '''

{gym_examples-3.0.31.dist-info → gym_examples-3.0.33.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gym-examples
-Version: 3.0.31
+Version: 3.0.33
 Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
 Home-page: https://github.com/gedji/CODES.git
 Author: Georges Djimefo

gym_examples-3.0.33.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+gym_examples/__init__.py,sha256=wE0UbTYKFzfps8WaV0sJVNYVtrIOFHjFfECrlT0DlmE,193
+gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
+gym_examples/envs/wsn_env.py,sha256=uV5c1RRIyTkA6KMmKuZTLsDWjFNVfALOUoUMLDuwdz0,18917
+gym_examples-3.0.33.dist-info/METADATA,sha256=ebgEj1_GpRVmSw5xzyC88RYQNQ7H47LBmLkdQa__jtA,411
+gym_examples-3.0.33.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
+gym_examples-3.0.33.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
+gym_examples-3.0.33.dist-info/RECORD,,

gym_examples-3.0.31.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-gym_examples/__init__.py,sha256=augD2S6JxvSYGCFGUz5j2KohuhHRrqFqekAH7LtUdZ4,193
-gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
-gym_examples/envs/wsn_env.py,sha256=HDLGNMvbhGuweGA0IcJkhw1VuzdSiMTN0Ip-6rdvXUQ,18132
-gym_examples-3.0.31.dist-info/METADATA,sha256=IQDb4-6MmoLEdaGk9c2paIu5VrOp31WLclbjjW9M2mM,411
-gym_examples-3.0.31.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
-gym_examples-3.0.31.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
-gym_examples-3.0.31.dist-info/RECORD,,

{gym_examples-3.0.31.dist-info → gym_examples-3.0.33.dist-info}/WHEEL RENAMED Viewed

File without changes

{gym_examples-3.0.31.dist-info → gym_examples-3.0.33.dist-info}/top_level.txt RENAMED Viewed

File without changes

gym-examples 3.0.31__py3-none-any.whl → 3.0.33__py3-none-any.whl

gym-examples 3.0.31py3-none-any.whl → 3.0.33py3-none-any.whl