PyPI - gym-examples - Versions diffs - 3.0.266__py3-none-any.whl → 3.0.268__py3-none-any.whl - Mend

gym-examples 3.0.266py3-none-any.whl → 3.0.268py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

gym_examples/__init__.py CHANGED Viewed

@@ -5,4 +5,4 @@ register(
      entry_point="gym_examples.envs:WSNRoutingEnv",
 )
-__version__ = "3.0.266"
+__version__ = "3.0.268"

gym_examples/envs/wsn_env.py CHANGED Viewed

@@ -25,7 +25,7 @@ initial_number_of_packets = 1  # initial number of packets to transmit
 latency_per_hop = 1  # latency per hop in seconds
 base_back_up_dir = "results/data/"
-max_reward = 5 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
+max_reward = 3 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
 # Define the final reward function using an attention mechanism
 class Attention(nn.Module):
@@ -73,7 +73,7 @@ class WSNRoutingEnv(gym.Env):
         self.episode_count = 0
         self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
         self.epsilon = 1e-10 # small value to avoid division by zero
-        # Initialize the position of the sensors randomly
+        self.current_sensor = 0 # Index of the current sensor
         # Define observation space
         self.observation_space = Tuple(
@@ -81,28 +81,13 @@ class WSNRoutingEnv(gym.Env):
         )
         # self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
-        self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
+        # self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
         # self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
-        # self.action_space = Discrete(self.n_agents * (self.n_sensors + 1))
+        self.action_space = Discrete(self.n_sensors + 1)  # +1 for the base station
         self.reset()
     def reset(self):
-        # if self.episode_count > 1 and os.getenv('PRINT_STATS') == 'True': # Statistics for the PPO algorithm during the training phase
-        #     self.episode_returns.append(self.episode_return)
-        #     self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
-        #     self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
-        #     self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
-        #     self.episode_network_throughput.append(self.network_throughput)
-        #     self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
-        #     self.episode_network_lifetime.append(self.network_lifetime)
-        #     self.episode_average_latency.append(self.average_latency)
-        # if self.episode_count > 1 and os.getenv('PRINT_STATS') == 'True-False': # Statistics for the PPO algorithm during the evaluation phase
-        #     print(f"Episode: {self.episode_count}")
-        #     print(self.get_metrics())
         self.episode_return = 0
         self.sensor_positions = np.random.rand(self.n_sensors, 2) * (upper_bound - lower_bound) + lower_bound
         self.distance_to_base = np.linalg.norm(self.sensor_positions - base_station_position, axis=1)
@@ -131,18 +116,13 @@ class WSNRoutingEnv(gym.Env):
     def step(self, actions):
-        actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
         self.steps += 1
-        # rewards = [-max_reward] * self.n_sensors
-        rewards = [0] * self.n_sensors
-        dones = [False] * self.n_sensors
-        for i, action in enumerate(actions):
-            if action not in range(self.n_sensors + 1):
-                raise ValueError("Invalid action!")
-            if i >= self.n_sensors:
-                raise ValueError("Invalid sensor index!") # the number of actions is greater than the number of sensors
+        rewards = - max_reward
+        # rewards = 0
+        dones = False
+        actions = np.array([self.current_sensor, actions])
+        print(f"\nactions from step in WSNRoutingEnv: {actions}\n")
+        for i, action in [actions]:
             if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
                 continue  # Skip if sensor has no energy left or no packets to transmit
@@ -157,6 +137,11 @@ class WSNRoutingEnv(gym.Env):
                 transmission_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
                 if self.remaining_energy[i] < transmission_energy:
                     self.remaining_energy[i] = 0
+                    next_sensor = self.find_next_sensor()
+                    if next_sensor is None:
+                        dones = True
+                    else:
+                        self.current_sensor = next_sensor
                     continue  # Skip if the sensor does not have enough energy to transmit data to the base station
                 self.update_sensor_energies(i, transmission_energy)
@@ -168,10 +153,12 @@ class WSNRoutingEnv(gym.Env):
                 self.total_latency += self.packet_latency[i] + latency_per_hop
                 self.packet_latency[i] = 0
-                # rewards[i] = self.compute_individual_rewards(i, action)
-                # rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
-                rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
-                dones[i] = True
+                rewards = max_reward # Reward for transmitting data to the base station
+                next_sensor = self.find_next_sensor()
+                if next_sensor is None:
+                    dones = True
+                else:
+                    self.current_sensor = next_sensor
             else:
                 distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
                 if distance > self.coverage_radius:
@@ -180,11 +167,21 @@ class WSNRoutingEnv(gym.Env):
                 transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
                 reception_energy = self.reception_energy(self.number_of_packets[i])
                 if self.remaining_energy[i] < transmission_energy:
-                    self.remaining_energy[i] = 0
+                    self.remaining_energy[i] = 0
+                    next_sensor = self.find_next_sensor()
+                    if next_sensor is None:
+                        dones = True
+                    else:
+                        self.current_sensor = next_sensor
                     continue  # Skip if the sensor does not have enough energy to transmit data to the next hop
                 if self.remaining_energy[action] < reception_energy:
                     self.number_of_packets[i] = 0
                     self.remaining_energy[action] = 0
+                    next_sensor = self.find_next_sensor()
+                    if next_sensor is None:
+                        dones = True
+                    else:
+                        self.current_sensor = next_sensor
                     continue  # Skip if the next hop does not have enough energy to receive data
                 self.update_sensor_energies(i, transmission_energy)
@@ -196,19 +193,17 @@ class WSNRoutingEnv(gym.Env):
                 self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
                 self.packet_latency[i] = 0
-                rewards[i] = self.compute_individual_rewards(i, action)
+                rewards = self.compute_individual_rewards(i, action)
                 # Update the number of packets
                 self.number_of_packets[action] += self.number_of_packets[i]
+                self.current_sensor = action
             self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
             # Calculate final reward
             # rewards[i] = self.compute_attention_rewards(rewards[i])
-            rewards[i] = np.mean(rewards[i])
-            # rewards[i] = self.compute_weighted_sum_rewards(rewards[i])
-        for i in range(self.n_sensors):
-            if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
-                dones[i] = True
+            rewards = np.mean(rewards)
         # Integrate the mobility of the sensors
         # self.integrate_mobility()
@@ -219,13 +214,10 @@ class WSNRoutingEnv(gym.Env):
         self.get_metrics()
-        rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
-        # rewards = np.sum(rewards)  # Sum the rewards of all agents
-        rewards = np.mean(rewards)  # Average the rewards of all agents
-        # rewards = np.mean(self.compute_network_rewards())  # Average the rewards of all agents
-        # print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
-        dones = all(dones)  # Done if all agents are done
+        rewards = rewards.item() if isinstance(rewards, torch.Tensor) else rewards # Convert the reward to a float
+        if not dones:
+            dones = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
         return self._get_obs(), rewards, dones, self.get_metrics()
@@ -233,7 +225,8 @@ class WSNRoutingEnv(gym.Env):
         return [{'remaining_energy': np.array([e]),
                  'consumption_energy': np.array([initial_energy - e]),
                  'sensor_positions': p,
-                 'number_of_packets': np.array([d])
+                 'number_of_packets': np.array([d]),
+                 'curent_sensor': np.array([self.current_sensor])
                 } for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
@@ -242,7 +235,8 @@ class WSNRoutingEnv(gym.Env):
             'remaining_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
             'consumption_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
             'sensor_positions': Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64),
-            'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)
+            'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int),
+            'current_sensor': Box(low=0, high=self.n_sensors - 1, shape=(1,), dtype=int)
         })
@@ -496,4 +490,12 @@ class WSNRoutingEnv(gym.Env):
             "packet_delivery_ratio": self.packet_delivery_ratio,
             "network_lifetime": self.network_lifetime,
             "average_latency": self.average_latency
-        }
+        }
+    def find_next_sensor(self):
+        for offset in range(1, self.n_sensors):
+            next_index = (self.current_sensor + offset) % self.n_sensors
+            if self.remaining_energy[next_index] > 0 and self.number_of_packets[next_index] > 0:
+                return next_index
+        return None  # If no such sensor is found

{gym_examples-3.0.266.dist-info → gym_examples-3.0.268.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gym-examples
-Version: 3.0.266
+Version: 3.0.268
 Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
 Home-page: https://github.com/gedji/CODES.git
 Author: Georges Djimefo

gym_examples-3.0.268.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+gym_examples/__init__.py,sha256=HpffWs56Jd1UDNQGyBLzk4OLzrDNZMUx_SN58BZaJA8,166
+gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
+gym_examples/envs/wsn_env.py,sha256=MTP_6sqJT4T2IBGedoLOGaKVZras8gT2NXvWHfggOXA,24154
+gym_examples-3.0.268.dist-info/METADATA,sha256=NvjGw3Ag9m0cr-a5mu_DQn2IkoEiiCUmZjeFAQye0wI,412
+gym_examples-3.0.268.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
+gym_examples-3.0.268.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
+gym_examples-3.0.268.dist-info/RECORD,,

gym_examples-3.0.266.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-gym_examples/__init__.py,sha256=KqEqczXOEfVmbkVd7dsDXk4gyzc5VC_HrycnPdSH_8M,166
-gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
-gym_examples/envs/wsn_env.py,sha256=4CXrSu-8epidbKW7JgPIzbXPS-WPtCC82FIsdPPAlMQ,24863
-gym_examples-3.0.266.dist-info/METADATA,sha256=KdqFD13218m6mSDcVJ5fpru17gAS0IuYts1m4uMPUTE,412
-gym_examples-3.0.266.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
-gym_examples-3.0.266.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
-gym_examples-3.0.266.dist-info/RECORD,,

{gym_examples-3.0.266.dist-info → gym_examples-3.0.268.dist-info}/WHEEL RENAMED Viewed

File without changes

{gym_examples-3.0.266.dist-info → gym_examples-3.0.268.dist-info}/top_level.txt RENAMED Viewed

File without changes

gym-examples 3.0.266__py3-none-any.whl → 3.0.268__py3-none-any.whl

gym-examples 3.0.266py3-none-any.whl → 3.0.268py3-none-any.whl