gym-examples 3.0.247__py3-none-any.whl → 3.0.249__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -5,4 +5,4 @@ register(
5
5
  entry_point="gym_examples.envs:WSNRoutingEnv",
6
6
  )
7
7
 
8
- __version__ = "3.0.247"
8
+ __version__ = "3.0.249"
@@ -48,7 +48,7 @@ net = net.double() # Convert the weights to Double
48
48
 
49
49
  class WSNRoutingEnv(gym.Env):
50
50
 
51
- print_stats = False # Global flag to control printing of statistics
51
+ PRINT_STATS = "False" # Global flag to control printing of statistics
52
52
 
53
53
  def __init__(self, n_sensors = 20, coverage_radius=(upper_bound - lower_bound)/4, num_timesteps = None, version = None):
54
54
 
@@ -130,18 +130,13 @@ class WSNRoutingEnv(gym.Env):
130
130
 
131
131
  def step(self, actions):
132
132
  self.steps += 1
133
- rewards = [-max_reward] * self.n_sensors
134
- # rewards = [0] * self.n_sensors
135
- dones = [False] * self.n_sensors
133
+ # rewards = [-max_reward] * self.n_sensors
134
+ reward = -max_reward
135
+ # dones = [False] * self.n_sensors
136
+ done = False
136
137
  # actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
137
138
  # for i, action in enumerate(actions):
138
- for i, action in [actions]:
139
- if action not in range(self.n_sensors + 1):
140
- raise ValueError("Invalid action!")
141
-
142
- if i >= self.n_sensors:
143
- raise ValueError("Invalid sensor index!") # the number of actions is greater than the number of sensors
144
-
139
+ for i, action in [actions]: # This loop is for the PPO algorithm: actions is a numpy array of shape (1, 2)
145
140
  if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
146
141
  continue # Skip if sensor has no energy left or no packets to transmit
147
142
 
@@ -167,10 +162,10 @@ class WSNRoutingEnv(gym.Env):
167
162
  self.total_latency += self.packet_latency[i] + latency_per_hop
168
163
  self.packet_latency[i] = 0
169
164
 
170
- # rewards[i] = self.compute_individual_rewards(i, action)
171
- rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
172
- # rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
173
- dones[i] = True
165
+ # rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
166
+ reward = max_reward
167
+ # dones[i] = True
168
+ # done = True
174
169
  else:
175
170
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
176
171
  if distance > self.coverage_radius:
@@ -195,18 +190,19 @@ class WSNRoutingEnv(gym.Env):
195
190
  self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
196
191
  self.packet_latency[i] = 0
197
192
 
198
- rewards[i] = self.compute_individual_rewards(i, action)
193
+ # rewards[i] = self.compute_individual_rewards(i, action)
194
+ reward = self.compute_individual_rewards(i, action)
199
195
 
200
196
  # Update the number of packets
201
197
  self.number_of_packets[action] += self.number_of_packets[i]
198
+
202
199
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
203
200
  # Calculate final reward
204
201
  # rewards[i] = self.compute_attention_rewards(rewards[i])
205
- rewards[i] = np.mean(rewards[i])
206
- # rewards[i] = self.compute_weighted_sum_rewards(rewards[i])
207
- for i in range(self.n_sensors):
208
- if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
209
- dones[i] = True
202
+ # rewards[i] = np.mean(rewards[i])
203
+ # for i in range(self.n_sensors):
204
+ # if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
205
+ # dones[i] = True
210
206
 
211
207
  # Integrate the mobility of the sensors
212
208
  # self.integrate_mobility()
@@ -218,41 +214,45 @@ class WSNRoutingEnv(gym.Env):
218
214
 
219
215
  self.get_metrics()
220
216
 
221
- rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
222
- # rewards = np.sum(rewards) # Sum the rewards of all agents
223
- rewards = np.mean(rewards) # Average the rewards of all agents
224
- # rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
225
- # print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
226
- dones = all(dones) # Done if all agents are done
217
+ # rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
218
+ reward = reward.item() if isinstance(reward, torch.Tensor) else reward # Convert the reward to a float
219
+ # rewards = np.mean(rewards) # Average the rewards of all agents
220
+ # dones = all(dones) # Done if all agents are done
221
+ # Check if done condition is met
222
+ done = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
227
223
 
228
- if os.getenv('PRINT_STATS') == 'True': # We are trying to extract only the statistics for the PPO algorithm
229
- self.number_of_steps += 1
230
- self.episode_return += rewards
231
- if self.number_of_steps >= self.num_timesteps:
232
- self.episode_returns.append(self.episode_return)
233
- self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
234
- self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
235
- self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
236
- self.episode_network_throughput.append(self.network_throughput)
237
- self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
238
- self.episode_network_lifetime.append(self.network_lifetime)
239
- self.episode_average_latency.append(self.average_latency)
240
-
241
- metrics = {
242
- "returns_PPO": self.episode_returns,
243
- "std_remaining_energy_PPO": self.episode_std_remaining_energy,
244
- "total_consumption_energy_PPO": self.episode_total_consumption_energy,
245
- "mean_remaining_energy_PPO": self.episode_mean_remaining_energy,
246
- "network_throughput_PPO": self.episode_network_throughput,
247
- "packet_delivery_ratio_PPO": self.episode_packet_delivery_ratio,
248
- "network_lifetime_PPO": self.episode_network_lifetime,
249
- "average_latency_PPO": self.episode_average_latency
250
- }
251
-
252
- for metric_name, metric_value in metrics.items():
253
- np.save(f"{base_back_up_dir}{metric_name}_{self.version}.npy", np.array(metric_value))
254
-
255
- return self._get_obs(), rewards, dones, {}
224
+ # if os.getenv('PRINT_STATS') == 'True': # We are trying to extract only the statistics for the PPO algorithm
225
+ # self.number_of_steps += 1
226
+
227
+
228
+
229
+ # self.episode_return += reward
230
+ # if self.number_of_steps >= self.num_timesteps:
231
+ # self.episode_returns.append(self.episode_return)
232
+ # self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
233
+ # self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
234
+ # self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
235
+ # self.episode_network_throughput.append(self.network_throughput)
236
+ # self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
237
+ # self.episode_network_lifetime.append(self.network_lifetime)
238
+ # self.episode_average_latency.append(self.average_latency)
239
+
240
+ # metrics = {
241
+ # "returns_PPO": self.episode_returns,
242
+ # "std_remaining_energy_PPO": self.episode_std_remaining_energy,
243
+ # "total_consumption_energy_PPO": self.episode_total_consumption_energy,
244
+ # "mean_remaining_energy_PPO": self.episode_mean_remaining_energy,
245
+ # "network_throughput_PPO": self.episode_network_throughput,
246
+ # "packet_delivery_ratio_PPO": self.episode_packet_delivery_ratio,
247
+ # "network_lifetime_PPO": self.episode_network_lifetime,
248
+ # "average_latency_PPO": self.episode_average_latency
249
+ # }
250
+
251
+ # for metric_name, metric_value in metrics.items():
252
+ # np.save(f"{base_back_up_dir}{metric_name}_{self.version}.npy", np.array(metric_value))
253
+
254
+ # return self._get_obs(), rewards, dones, {}
255
+ return self._get_obs(), reward, done, self.get_metrics()
256
256
 
257
257
  def _get_obs(self):
258
258
  return [{'remaining_energy': np.array([e]),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 3.0.247
3
+ Version: 3.0.249
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=G2DLzOhUNAdwD7RkznOfc878KsdCCKd4YRmcvwcOf1g,166
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=xanAlM0cT_9dXLm4HRVPHgyDuBRtMELKDdORnSgLyYI,26384
4
+ gym_examples-3.0.249.dist-info/METADATA,sha256=JF6kr6DB9NP3By_vj9tgw7AXbCV3v4Q5RbM5r4yE0AQ,412
5
+ gym_examples-3.0.249.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-3.0.249.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-3.0.249.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=4YqbEiyWGUkSMfKMEm3-P_iG4WOE9cGwRiS4uYcUirY,166
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=qNvcWbmU5fv6t-CRHNWzWfPU_Z3qp_Qu9m6IPe-EuGg,26505
4
- gym_examples-3.0.247.dist-info/METADATA,sha256=BZj5MGtQrmRwuaB6F2XyUwJjhrMJrw37qkH7CouJ85s,412
5
- gym_examples-3.0.247.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-3.0.247.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-3.0.247.dist-info/RECORD,,