PyPI - gym-examples - Versions diffs - 3.0.282__py3-none-any.whl → 3.0.283__py3-none-any.whl - Mend

gym-examples 3.0.282py3-none-any.whl → 3.0.283py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

gym_examples/__init__.py CHANGED Viewed

@@ -5,4 +5,4 @@ register(
      entry_point="gym_examples.envs:WSNRoutingEnv",
 )
-__version__ = "3.0.282"
+__version__ = "3.0.283"

gym_examples/envs/wsn_env.py CHANGED Viewed

@@ -12,7 +12,7 @@ import os
 from collections import OrderedDict
 # Define the network parameters for the final reward function
-input_dim = 7  # length of the individual rewards vector
+input_dim = 4  # length of the individual rewards vector
 output_dim = 1  # final reward
 Eelec = 50e-9  # energy consumption per bit in joules
@@ -26,7 +26,7 @@ initial_number_of_packets = 1  # initial number of packets to transmit
 latency_per_hop = 1  # latency per hop in seconds
 base_back_up_dir = "results/data/"
-max_reward = 1 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
+max_reward = 3 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
 # Define the final reward function using an attention mechanism
 class Attention(nn.Module):
@@ -49,7 +49,7 @@ net = net.double()  # Convert the weights to Double
 class WSNRoutingEnv(gym.Env):
-    print_stats = False  # Global flag to control printing of statistics
+    PRINT_STATS = "False"  # Global flag to control printing of statistics
     def __init__(self, n_sensors = 20, coverage_radius=(upper_bound - lower_bound)/4, num_timesteps = None, version = None):
@@ -74,7 +74,7 @@ class WSNRoutingEnv(gym.Env):
         self.episode_count = 0
         self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
         self.epsilon = 1e-10 # small value to avoid division by zero
-        # Initialize the position of the sensors randomly
+        self.current_sensor = 0 # Index of the current sensor
         # Define observation space
         self.observation_space = Tuple(
@@ -82,7 +82,9 @@ class WSNRoutingEnv(gym.Env):
         )
         # self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
-        self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
+        # self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
+        # self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
+        self.action_space = Discrete(self.n_sensors + 1)  # +1 for the base station
         self.reset()
@@ -113,13 +115,14 @@ class WSNRoutingEnv(gym.Env):
         return self._get_obs()
     def step(self, actions):
-        actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
         self.steps += 1
-        rewards = [-max_reward] * self.n_sensors
-        # rewards = [0] * self.n_sensors
-        dones = [False] * self.n_sensors
-        for i, action in enumerate(actions):
+        rewards = - max_reward
+        # rewards = 0
+        dones = False
+        actions = np.array([self.current_sensor, actions])
+        for i, action in [actions]:
             if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
                 continue  # Skip if sensor has no energy left or no packets to transmit
@@ -134,6 +137,11 @@ class WSNRoutingEnv(gym.Env):
                 transmission_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
                 if self.remaining_energy[i] < transmission_energy:
                     self.remaining_energy[i] = 0
+                    next_sensor = self.find_next_sensor()
+                    if next_sensor is None:
+                        dones = True
+                    else:
+                        self.current_sensor = next_sensor
                     continue  # Skip if the sensor does not have enough energy to transmit data to the base station
                 self.update_sensor_energies(i, transmission_energy)
@@ -145,10 +153,12 @@ class WSNRoutingEnv(gym.Env):
                 self.total_latency += self.packet_latency[i] + latency_per_hop
                 self.packet_latency[i] = 0
-                # rewards[i] = self.compute_individual_rewards(i, action)
-                rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
-                # rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
-                dones[i] = True
+                rewards = max_reward # Reward for transmitting data to the base station
+                next_sensor = self.find_next_sensor()
+                if next_sensor is None:
+                    dones = True
+                else:
+                    self.current_sensor = next_sensor
             else:
                 distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
                 if distance > self.coverage_radius:
@@ -157,11 +167,21 @@ class WSNRoutingEnv(gym.Env):
                 transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
                 reception_energy = self.reception_energy(self.number_of_packets[i])
                 if self.remaining_energy[i] < transmission_energy:
-                    self.remaining_energy[i] = 0
+                    self.remaining_energy[i] = 0
+                    next_sensor = self.find_next_sensor()
+                    if next_sensor is None:
+                        dones = True
+                    else:
+                        self.current_sensor = next_sensor
                     continue  # Skip if the sensor does not have enough energy to transmit data to the next hop
                 if self.remaining_energy[action] < reception_energy:
                     self.number_of_packets[i] = 0
                     self.remaining_energy[action] = 0
+                    next_sensor = self.find_next_sensor()
+                    if next_sensor is None:
+                        dones = True
+                    else:
+                        self.current_sensor = next_sensor
                     continue  # Skip if the next hop does not have enough energy to receive data
                 self.update_sensor_energies(i, transmission_energy)
@@ -173,18 +193,17 @@ class WSNRoutingEnv(gym.Env):
                 self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
                 self.packet_latency[i] = 0
-                rewards[i] = self.compute_individual_rewards(i, action)
+                rewards = self.compute_individual_rewards(i, action)
                 # Update the number of packets
                 self.number_of_packets[action] += self.number_of_packets[i]
+                self.current_sensor = action
             self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
             # Calculate final reward
-            rewards[i] = self.compute_attention_rewards(rewards[i])
-            # rewards[i] = np.mean(rewards[i])
-        for i in range(self.n_sensors):
-            if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
-                dones[i] = True
+            # rewards[i] = self.compute_attention_rewards(rewards[i])
+            rewards = np.mean(rewards)
         # Integrate the mobility of the sensors
         # self.integrate_mobility()
@@ -195,47 +214,54 @@ class WSNRoutingEnv(gym.Env):
         self.get_metrics()
-        rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
-        # rewards = np.sum(rewards)  # Sum the rewards of all agents
-        rewards = np.mean(rewards)  # Average the rewards of all agents
-        # rewards = np.mean(self.compute_network_rewards())  # Average the rewards of all agents
-        # print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
-        dones = all(dones)  # Done if all agents are done
+        rewards = rewards.item() if isinstance(rewards, torch.Tensor) else rewards # Convert the reward to a float
+        if not dones:
+            dones = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
         return self._get_obs(), rewards, dones, self.get_metrics()
     def _get_obs(self):
         return [{'remaining_energy': np.array([e]),
                  'consumption_energy': np.array([initial_energy - e]),
                  'sensor_positions': p,
-                 'number_of_packets': np.array([d])
+                 'number_of_packets': np.array([d]),
+                 'curent_sensor': np.array([self.current_sensor])
                 } for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
     def _get_observation_space(self):
         return Dict(OrderedDict([
         ('remaining_energy', Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64)),
         ('consumption_energy', Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64)),
         ('sensor_positions', Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64)),
-        ('number_of_packets', Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int))
+        ('number_of_packets', Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)),
+        ('current_sensor', Box(low=0, high=self.n_sensors - 1, shape=(1,), dtype=int))
     ]))
     def get_state(self):
         return self._get_obs()
     def get_avail_actions(self):
         return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
     def update_sensor_energies(self, i, delta_energy):
         self.remaining_energy[i] -= delta_energy
     def transmission_energy(self, number_of_packets, distance):
         # energy consumption for transmitting data on a distance
         return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
     def reception_energy(self, number_of_packets):
         # energy consumption for receiving data
         return number_of_packets * info_amount * Eelec
     def compute_angle_vectors(self, i, action):
         '''
         Compute the angle in radians between the vectors formed by (i, action) and (i, base station)
@@ -249,6 +275,7 @@ class WSNRoutingEnv(gym.Env):
             return np.arccos(np.clip(cosine_angle, -1, 1))
     def compute_reward_angle(self, i, action):
         '''
         Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
@@ -261,6 +288,7 @@ class WSNRoutingEnv(gym.Env):
         return np.clip(1 - normalized_angle, 0, 1)
         # return np.clip(- normalized_angle, -1, 1)
     def compute_reward_distance(self, i, action):
         '''
         Compute the reward based on the distance to the next hop
@@ -275,6 +303,7 @@ class WSNRoutingEnv(gym.Env):
         return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
         # return np.clip(-normalized_distance_to_next_hop, -1, 1)
     def compute_reward_consumption_energy(self, i, action):
         '''
         Compute the reward based on the total energy consumption (transmission, reception)
@@ -297,6 +326,7 @@ class WSNRoutingEnv(gym.Env):
         return np.clip(1 - normalized_total_energy, 0, 1)
         # return np.clip(- normalized_total_energy, -1, 1)
     def compute_reward_dispersion_remaining_energy(self):
         '''
         Compute the reward based on the standard deviation of the remaining energy
@@ -309,6 +339,7 @@ class WSNRoutingEnv(gym.Env):
         return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
         # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
     def compute_reward_number_of_packets(self, action):
         '''
         Compute the reward based on the number of packets of the receiver
@@ -322,6 +353,7 @@ class WSNRoutingEnv(gym.Env):
         return np.clip(1 - normalized_number_of_packets, 0, 1)
         # return np.clip(- normalized_number_of_packets, -1, 1)
     def compute_individual_rewards(self, i, action):
         '''
         Compute the individual rewards
@@ -343,8 +375,9 @@ class WSNRoutingEnv(gym.Env):
         rewards_performance = np.array([reward_latency, reward_network_throughput, reward_packet_delivery_ratio])
-        return np.concatenate((rewards_energy, rewards_performance))
+        # return np.concatenate((rewards_energy, rewards_performance))
         # return np.array([reward_consumption_energy, reward_dispersion_remaining_energy])
+        return rewards_energy
     def compute_network_rewards(self):
@@ -360,6 +393,7 @@ class WSNRoutingEnv(gym.Env):
         return np.concatenate((rewards_energy, rewards_performance))
     def network_reward_dispersion_remaining_energy(self):
         '''
         Compute the reward based on the standard deviation of the remaining energy at the network level
@@ -372,6 +406,7 @@ class WSNRoutingEnv(gym.Env):
         return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
         # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
     def network_reward_consumption_energy(self):
         '''
         Compute the reward based on the total energy consumption (transmission, reception) at the network level
@@ -384,6 +419,7 @@ class WSNRoutingEnv(gym.Env):
         return np.clip(1 - normalized_total_energy, 0, 1)
         # return np.clip(- normalized_total_energy, -1, 1)
     def compute_reward_packet_delivery_ratio(self):
         '''
         Compute the reward based on the packet delivery ratio
@@ -391,6 +427,7 @@ class WSNRoutingEnv(gym.Env):
         packet_delivery_ratio = self.packets_delivered / (self.total_packets_sent_by_sensors + self.epsilon) if self.total_packets_sent_by_sensors > 0 else 0
         return np.clip(packet_delivery_ratio, 0, 1)
     def compute_reward_latency(self):
         '''
         Compute the reward based on the average latency
@@ -402,6 +439,7 @@ class WSNRoutingEnv(gym.Env):
         return np.clip(1 - normalized_latency, 0, 1)
         # return np.clip(- normalized_latency, -1, 1)
     def compute_reward_network_throughput(self):
         '''
         Compute the reward based on the network throughput
@@ -411,6 +449,7 @@ class WSNRoutingEnv(gym.Env):
         normalized_throughput = network_throughput / (maximum_throughput + self.epsilon)
         return np.clip(normalized_throughput, 0, 1)
     def compute_attention_rewards(self, rewards):
         '''
         Compute the attention-based rewards
@@ -419,6 +458,7 @@ class WSNRoutingEnv(gym.Env):
         final_reward = net(rewards)
         return final_reward
     def integrate_mobility(self):
         '''
         Integrate the mobility of the sensors after each step
@@ -431,6 +471,7 @@ class WSNRoutingEnv(gym.Env):
             if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
                 self.sensor_positions[i] -= displacement[i]
     def get_metrics(self):
         # Calculate network throughput
         self.network_throughput = self.packets_delivered / (self.steps + self.epsilon) if self.steps > 0 else 0
@@ -449,4 +490,12 @@ class WSNRoutingEnv(gym.Env):
             "packet_delivery_ratio": self.packet_delivery_ratio,
             "network_lifetime": self.network_lifetime,
             "average_latency": self.average_latency
-        }
+        }
+    def find_next_sensor(self):
+        for offset in range(1, self.n_sensors):
+            next_index = (self.current_sensor + offset) % self.n_sensors
+            if self.remaining_energy[next_index] > 0 and self.number_of_packets[next_index] > 0:
+                return next_index
+        return None  # If no such sensor is found

{gym_examples-3.0.282.dist-info → gym_examples-3.0.283.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gym-examples
-Version: 3.0.282
+Version: 3.0.283
 Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
 Home-page: https://github.com/gedji/CODES.git
 Author: Georges Djimefo

gym_examples-3.0.283.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+gym_examples/__init__.py,sha256=B6nFhjmZ3o9wglL3vYZps18eP8W7b436z2-pBFs_-2w,166
+gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
+gym_examples/envs/wsn_env.py,sha256=pi4-ErjIpxM1jrWjMr8vbUHmp0YSY9YaGSTXsL2k50I,24121
+gym_examples-3.0.283.dist-info/METADATA,sha256=0J7nhDUnmHi_7HwHNYycQqRikH_nlbSNNGmVVf6ujm0,412
+gym_examples-3.0.283.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
+gym_examples-3.0.283.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
+gym_examples-3.0.283.dist-info/RECORD,,

gym_examples-3.0.282.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-gym_examples/__init__.py,sha256=fBEwdHLyLAy9RQVdFO5HIxVvlyPKD2yEcyS6eJvwA80,166
-gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
-gym_examples/envs/wsn_env.py,sha256=bwXjZ9uS41ChZ7DvgeIwhPkLVUfkRzWCwPQjFDs34ho,23218
-gym_examples-3.0.282.dist-info/METADATA,sha256=PgGDOKmwo0s0Vx5X8CEgJntslyORoybP5IDEE-4CI7M,412
-gym_examples-3.0.282.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
-gym_examples-3.0.282.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
-gym_examples-3.0.282.dist-info/RECORD,,

{gym_examples-3.0.282.dist-info → gym_examples-3.0.283.dist-info}/WHEEL RENAMED Viewed

File without changes

{gym_examples-3.0.282.dist-info → gym_examples-3.0.283.dist-info}/top_level.txt RENAMED Viewed

File without changes

gym-examples 3.0.282__py3-none-any.whl → 3.0.283__py3-none-any.whl

gym-examples 3.0.282py3-none-any.whl → 3.0.283py3-none-any.whl