gym-examples 3.0.263__py3-none-any.whl → 3.0.265__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -5,4 +5,4 @@ register(
5
5
  entry_point="gym_examples.envs:WSNRoutingEnv",
6
6
  )
7
7
 
8
- __version__ = "3.0.263"
8
+ __version__ = "3.0.265"
@@ -81,9 +81,9 @@ class WSNRoutingEnv(gym.Env):
81
81
  )
82
82
 
83
83
  # self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
84
- # self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
84
+ self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
85
85
  # self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
86
- self.action_space = Discrete(self.n_agents * (self.n_sensors + 1))
86
+ # self.action_space = Discrete(self.n_agents * (self.n_sensors + 1))
87
87
 
88
88
  self.reset()
89
89
 
@@ -131,16 +131,17 @@ class WSNRoutingEnv(gym.Env):
131
131
 
132
132
 
133
133
  def step(self, actions):
134
+ actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
134
135
  self.steps += 1
135
- # rewards = [-max_reward] * self.n_sensors
136
- reward = -max_reward
137
- # dones = [False] * self.n_sensors
138
- done = False
139
- # actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
140
- # for i, action in enumerate(actions):
141
- selected_sensor = actions // (self.n_sensors + 1)
142
- target = actions % (self.n_sensors + 1)
143
- for i, action in [np.array([selected_sensor, target])]: # This loop is for the PPO algorithm: actions is a numpy array of shape (1, 2)
136
+ rewards = [-max_reward] * self.n_sensors
137
+ dones = [False] * self.n_sensors
138
+ for i, action in enumerate(actions):
139
+ if action not in range(self.n_sensors + 1):
140
+ raise ValueError("Invalid action!")
141
+
142
+ if i >= self.n_sensors:
143
+ raise ValueError("Invalid sensor index!") # the number of actions is greater than the number of sensors
144
+
144
145
  if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
145
146
  continue # Skip if sensor has no energy left or no packets to transmit
146
147
 
@@ -166,10 +167,10 @@ class WSNRoutingEnv(gym.Env):
166
167
  self.total_latency += self.packet_latency[i] + latency_per_hop
167
168
  self.packet_latency[i] = 0
168
169
 
169
- # rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
170
- reward = max_reward
171
- # dones[i] = True
172
- # done = True
170
+ # rewards[i] = self.compute_individual_rewards(i, action)
171
+ rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
172
+ # rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
173
+ dones[i] = True
173
174
  else:
174
175
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
175
176
  if distance > self.coverage_radius:
@@ -194,20 +195,18 @@ class WSNRoutingEnv(gym.Env):
194
195
  self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
195
196
  self.packet_latency[i] = 0
196
197
 
197
- # rewards[i] = self.compute_individual_rewards(i, action)
198
- reward = self.compute_individual_rewards(i, action)
198
+ rewards[i] = self.compute_individual_rewards(i, action)
199
199
 
200
200
  # Update the number of packets
201
201
  self.number_of_packets[action] += self.number_of_packets[i]
202
-
203
202
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
204
203
  # Calculate final reward
205
204
  # rewards[i] = self.compute_attention_rewards(rewards[i])
206
- # rewards[i] = np.mean(rewards[i])
207
- reward = np.mean(reward)
208
- # for i in range(self.n_sensors):
209
- # if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
210
- # dones[i] = True
205
+ rewards[i] = np.mean(rewards[i])
206
+ # rewards[i] = self.compute_weighted_sum_rewards(rewards[i])
207
+ for i in range(self.n_sensors):
208
+ if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
209
+ dones[i] = True
211
210
 
212
211
  # Integrate the mobility of the sensors
213
212
  # self.integrate_mobility()
@@ -219,45 +218,15 @@ class WSNRoutingEnv(gym.Env):
219
218
 
220
219
  self.get_metrics()
221
220
 
222
- # rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
223
- reward = reward.item() if isinstance(reward, torch.Tensor) else reward # Convert the reward to a float
224
- # rewards = np.mean(rewards) # Average the rewards of all agents
225
- # dones = all(dones) # Done if all agents are done
226
- # Check if done condition is met
227
- done = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
221
+ rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
222
+ # rewards = np.sum(rewards) # Sum the rewards of all agents
223
+ rewards = np.mean(rewards) # Average the rewards of all agents
224
+ # rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
225
+ # print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
226
+ dones = all(dones) # Done if all agents are done
228
227
 
229
- # if os.getenv('PRINT_STATS') == 'True': # We are trying to extract only the statistics for the PPO algorithm
230
- # self.number_of_steps += 1
228
+ return self._get_obs(), rewards, dones, self.get_metrics()
231
229
 
232
-
233
-
234
- # self.episode_return += reward
235
- # if self.number_of_steps >= self.num_timesteps:
236
- # self.episode_returns.append(self.episode_return)
237
- # self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
238
- # self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
239
- # self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
240
- # self.episode_network_throughput.append(self.network_throughput)
241
- # self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
242
- # self.episode_network_lifetime.append(self.network_lifetime)
243
- # self.episode_average_latency.append(self.average_latency)
244
-
245
- # metrics = {
246
- # "returns_PPO": self.episode_returns,
247
- # "std_remaining_energy_PPO": self.episode_std_remaining_energy,
248
- # "total_consumption_energy_PPO": self.episode_total_consumption_energy,
249
- # "mean_remaining_energy_PPO": self.episode_mean_remaining_energy,
250
- # "network_throughput_PPO": self.episode_network_throughput,
251
- # "packet_delivery_ratio_PPO": self.episode_packet_delivery_ratio,
252
- # "network_lifetime_PPO": self.episode_network_lifetime,
253
- # "average_latency_PPO": self.episode_average_latency
254
- # }
255
-
256
- # for metric_name, metric_value in metrics.items():
257
- # np.save(f"{base_back_up_dir}{metric_name}_{self.version}.npy", np.array(metric_value))
258
-
259
- # return self._get_obs(), rewards, dones, {}
260
- return self._get_obs(), reward, done, self.get_metrics()
261
230
 
262
231
  def _get_obs(self):
263
232
  return [{'remaining_energy': np.array([e]),
@@ -266,6 +235,7 @@ class WSNRoutingEnv(gym.Env):
266
235
  'number_of_packets': np.array([d])
267
236
  } for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
268
237
 
238
+
269
239
  def _get_observation_space(self):
270
240
  return Dict({
271
241
  'remaining_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
@@ -274,23 +244,29 @@ class WSNRoutingEnv(gym.Env):
274
244
  'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)
275
245
  })
276
246
 
247
+
277
248
  def get_state(self):
278
249
  return self._get_obs()
279
250
 
251
+
280
252
  def get_avail_actions(self):
281
253
  return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
282
254
 
255
+
283
256
  def update_sensor_energies(self, i, delta_energy):
284
257
  self.remaining_energy[i] -= delta_energy
285
258
 
259
+
286
260
  def transmission_energy(self, number_of_packets, distance):
287
261
  # energy consumption for transmitting data on a distance
288
262
  return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
289
263
 
264
+
290
265
  def reception_energy(self, number_of_packets):
291
266
  # energy consumption for receiving data
292
267
  return number_of_packets * info_amount * Eelec
293
268
 
269
+
294
270
  def compute_angle_vectors(self, i, action):
295
271
  '''
296
272
  Compute the angle in radians between the vectors formed by (i, action) and (i, base station)
@@ -304,6 +280,7 @@ class WSNRoutingEnv(gym.Env):
304
280
 
305
281
  return np.arccos(np.clip(cosine_angle, -1, 1))
306
282
 
283
+
307
284
  def compute_reward_angle(self, i, action):
308
285
  '''
309
286
  Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
@@ -316,6 +293,7 @@ class WSNRoutingEnv(gym.Env):
316
293
  return np.clip(1 - normalized_angle, 0, 1)
317
294
  # return np.clip(- normalized_angle, -1, 1)
318
295
 
296
+
319
297
  def compute_reward_distance(self, i, action):
320
298
  '''
321
299
  Compute the reward based on the distance to the next hop
@@ -330,6 +308,7 @@ class WSNRoutingEnv(gym.Env):
330
308
  return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
331
309
  # return np.clip(-normalized_distance_to_next_hop, -1, 1)
332
310
 
311
+
333
312
  def compute_reward_consumption_energy(self, i, action):
334
313
  '''
335
314
  Compute the reward based on the total energy consumption (transmission, reception)
@@ -352,6 +331,7 @@ class WSNRoutingEnv(gym.Env):
352
331
  return np.clip(1 - normalized_total_energy, 0, 1)
353
332
  # return np.clip(- normalized_total_energy, -1, 1)
354
333
 
334
+
355
335
  def compute_reward_dispersion_remaining_energy(self):
356
336
  '''
357
337
  Compute the reward based on the standard deviation of the remaining energy
@@ -364,6 +344,7 @@ class WSNRoutingEnv(gym.Env):
364
344
  return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
365
345
  # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
366
346
 
347
+
367
348
  def compute_reward_number_of_packets(self, action):
368
349
  '''
369
350
  Compute the reward based on the number of packets of the receiver
@@ -377,6 +358,7 @@ class WSNRoutingEnv(gym.Env):
377
358
  return np.clip(1 - normalized_number_of_packets, 0, 1)
378
359
  # return np.clip(- normalized_number_of_packets, -1, 1)
379
360
 
361
+
380
362
  def compute_individual_rewards(self, i, action):
381
363
  '''
382
364
  Compute the individual rewards
@@ -416,6 +398,7 @@ class WSNRoutingEnv(gym.Env):
416
398
 
417
399
  return np.concatenate((rewards_energy, rewards_performance))
418
400
 
401
+
419
402
  def network_reward_dispersion_remaining_energy(self):
420
403
  '''
421
404
  Compute the reward based on the standard deviation of the remaining energy at the network level
@@ -428,6 +411,7 @@ class WSNRoutingEnv(gym.Env):
428
411
  return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
429
412
  # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
430
413
 
414
+
431
415
  def network_reward_consumption_energy(self):
432
416
  '''
433
417
  Compute the reward based on the total energy consumption (transmission, reception) at the network level
@@ -440,6 +424,7 @@ class WSNRoutingEnv(gym.Env):
440
424
  return np.clip(1 - normalized_total_energy, 0, 1)
441
425
  # return np.clip(- normalized_total_energy, -1, 1)
442
426
 
427
+
443
428
  def compute_reward_packet_delivery_ratio(self):
444
429
  '''
445
430
  Compute the reward based on the packet delivery ratio
@@ -447,6 +432,7 @@ class WSNRoutingEnv(gym.Env):
447
432
  packet_delivery_ratio = self.packets_delivered / (self.total_packets_sent_by_sensors + self.epsilon) if self.total_packets_sent_by_sensors > 0 else 0
448
433
  return np.clip(packet_delivery_ratio, 0, 1)
449
434
 
435
+
450
436
  def compute_reward_latency(self):
451
437
  '''
452
438
  Compute the reward based on the average latency
@@ -458,6 +444,7 @@ class WSNRoutingEnv(gym.Env):
458
444
  return np.clip(1 - normalized_latency, 0, 1)
459
445
  # return np.clip(- normalized_latency, -1, 1)
460
446
 
447
+
461
448
  def compute_reward_network_throughput(self):
462
449
  '''
463
450
  Compute the reward based on the network throughput
@@ -467,6 +454,7 @@ class WSNRoutingEnv(gym.Env):
467
454
  normalized_throughput = network_throughput / (maximum_throughput + self.epsilon)
468
455
  return np.clip(normalized_throughput, 0, 1)
469
456
 
457
+
470
458
  def compute_attention_rewards(self, rewards):
471
459
  '''
472
460
  Compute the attention-based rewards
@@ -475,6 +463,7 @@ class WSNRoutingEnv(gym.Env):
475
463
  final_reward = net(rewards)
476
464
  return final_reward
477
465
 
466
+
478
467
  def integrate_mobility(self):
479
468
  '''
480
469
  Integrate the mobility of the sensors after each step
@@ -487,6 +476,7 @@ class WSNRoutingEnv(gym.Env):
487
476
  if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
488
477
  self.sensor_positions[i] -= displacement[i]
489
478
 
479
+
490
480
  def get_metrics(self):
491
481
  # Calculate network throughput
492
482
  self.network_throughput = self.packets_delivered / (self.steps + self.epsilon) if self.steps > 0 else 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 3.0.263
3
+ Version: 3.0.265
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=F0u_yF7l2eD3fOQTUzN2XUFt7etMgtlhgDv5tFMubpE,166
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=2GvyLeNjLM-q9oLCobVzpTUrUqzjTyRn_H3BAQZZvbY,24821
4
+ gym_examples-3.0.265.dist-info/METADATA,sha256=D0__HbG6kJd9Kb1SX_Lh_MysXCxkK_ToMFudnMPWqgU,412
5
+ gym_examples-3.0.265.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-3.0.265.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-3.0.265.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=V-lDBqJirIbwK5vx8WNT5JyIwiwk22PGBiYSvm6JPo8,166
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=Ee3PTxhwKw2igwK5z465IdgK3mP0ParioXCni6BqXiE,26695
4
- gym_examples-3.0.263.dist-info/METADATA,sha256=rpRe1k5FCesyS6hKApj_Z-_sQAjJ01WM90Mzk_a5XPs,412
5
- gym_examples-3.0.263.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-3.0.263.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-3.0.263.dist-info/RECORD,,