gym-examples 3.0.263__py3-none-any.whl → 3.0.264__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -5,4 +5,4 @@ register(
5
5
  entry_point="gym_examples.envs:WSNRoutingEnv",
6
6
  )
7
7
 
8
- __version__ = "3.0.263"
8
+ __version__ = "3.0.264"
@@ -81,9 +81,9 @@ class WSNRoutingEnv(gym.Env):
81
81
  )
82
82
 
83
83
  # self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
84
- # self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
84
+ self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
85
85
  # self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
86
- self.action_space = Discrete(self.n_agents * (self.n_sensors + 1))
86
+ # self.action_space = Discrete(self.n_agents * (self.n_sensors + 1))
87
87
 
88
88
  self.reset()
89
89
 
@@ -131,16 +131,18 @@ class WSNRoutingEnv(gym.Env):
131
131
 
132
132
 
133
133
  def step(self, actions):
134
+ actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
134
135
  self.steps += 1
135
- # rewards = [-max_reward] * self.n_sensors
136
- reward = -max_reward
137
- # dones = [False] * self.n_sensors
138
- done = False
139
- # actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
140
- # for i, action in enumerate(actions):
141
- selected_sensor = actions // (self.n_sensors + 1)
142
- target = actions % (self.n_sensors + 1)
143
- for i, action in [np.array([selected_sensor, target])]: # This loop is for the PPO algorithm: actions is a numpy array of shape (1, 2)
136
+ rewards = [-max_reward] * self.n_sensors
137
+ dones = [False] * self.n_sensors
138
+ for i, action in enumerate(actions):
139
+ print(f"\nAction in WSNRoutingEnv: {action} and type: {type(action)}")
140
+ if action not in range(self.n_sensors + 1):
141
+ raise ValueError("Invalid action!")
142
+
143
+ if i >= self.n_sensors:
144
+ raise ValueError("Invalid sensor index!") # the number of actions is greater than the number of sensors
145
+
144
146
  if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
145
147
  continue # Skip if sensor has no energy left or no packets to transmit
146
148
 
@@ -166,10 +168,10 @@ class WSNRoutingEnv(gym.Env):
166
168
  self.total_latency += self.packet_latency[i] + latency_per_hop
167
169
  self.packet_latency[i] = 0
168
170
 
169
- # rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
170
- reward = max_reward
171
- # dones[i] = True
172
- # done = True
171
+ # rewards[i] = self.compute_individual_rewards(i, action)
172
+ rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
173
+ # rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
174
+ dones[i] = True
173
175
  else:
174
176
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
175
177
  if distance > self.coverage_radius:
@@ -194,20 +196,18 @@ class WSNRoutingEnv(gym.Env):
194
196
  self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
195
197
  self.packet_latency[i] = 0
196
198
 
197
- # rewards[i] = self.compute_individual_rewards(i, action)
198
- reward = self.compute_individual_rewards(i, action)
199
+ rewards[i] = self.compute_individual_rewards(i, action)
199
200
 
200
201
  # Update the number of packets
201
202
  self.number_of_packets[action] += self.number_of_packets[i]
202
-
203
203
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
204
204
  # Calculate final reward
205
205
  # rewards[i] = self.compute_attention_rewards(rewards[i])
206
- # rewards[i] = np.mean(rewards[i])
207
- reward = np.mean(reward)
208
- # for i in range(self.n_sensors):
209
- # if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
210
- # dones[i] = True
206
+ rewards[i] = np.mean(rewards[i])
207
+ # rewards[i] = self.compute_weighted_sum_rewards(rewards[i])
208
+ for i in range(self.n_sensors):
209
+ if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
210
+ dones[i] = True
211
211
 
212
212
  # Integrate the mobility of the sensors
213
213
  # self.integrate_mobility()
@@ -219,45 +219,15 @@ class WSNRoutingEnv(gym.Env):
219
219
 
220
220
  self.get_metrics()
221
221
 
222
- # rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
223
- reward = reward.item() if isinstance(reward, torch.Tensor) else reward # Convert the reward to a float
224
- # rewards = np.mean(rewards) # Average the rewards of all agents
225
- # dones = all(dones) # Done if all agents are done
226
- # Check if done condition is met
227
- done = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
222
+ rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
223
+ # rewards = np.sum(rewards) # Sum the rewards of all agents
224
+ rewards = np.mean(rewards) # Average the rewards of all agents
225
+ # rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
226
+ # print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
227
+ dones = all(dones) # Done if all agents are done
228
228
 
229
- # if os.getenv('PRINT_STATS') == 'True': # We are trying to extract only the statistics for the PPO algorithm
230
- # self.number_of_steps += 1
229
+ return self._get_obs(), rewards, dones, self.get_metrics()
231
230
 
232
-
233
-
234
- # self.episode_return += reward
235
- # if self.number_of_steps >= self.num_timesteps:
236
- # self.episode_returns.append(self.episode_return)
237
- # self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
238
- # self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
239
- # self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
240
- # self.episode_network_throughput.append(self.network_throughput)
241
- # self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
242
- # self.episode_network_lifetime.append(self.network_lifetime)
243
- # self.episode_average_latency.append(self.average_latency)
244
-
245
- # metrics = {
246
- # "returns_PPO": self.episode_returns,
247
- # "std_remaining_energy_PPO": self.episode_std_remaining_energy,
248
- # "total_consumption_energy_PPO": self.episode_total_consumption_energy,
249
- # "mean_remaining_energy_PPO": self.episode_mean_remaining_energy,
250
- # "network_throughput_PPO": self.episode_network_throughput,
251
- # "packet_delivery_ratio_PPO": self.episode_packet_delivery_ratio,
252
- # "network_lifetime_PPO": self.episode_network_lifetime,
253
- # "average_latency_PPO": self.episode_average_latency
254
- # }
255
-
256
- # for metric_name, metric_value in metrics.items():
257
- # np.save(f"{base_back_up_dir}{metric_name}_{self.version}.npy", np.array(metric_value))
258
-
259
- # return self._get_obs(), rewards, dones, {}
260
- return self._get_obs(), reward, done, self.get_metrics()
261
231
 
262
232
  def _get_obs(self):
263
233
  return [{'remaining_energy': np.array([e]),
@@ -266,6 +236,7 @@ class WSNRoutingEnv(gym.Env):
266
236
  'number_of_packets': np.array([d])
267
237
  } for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
268
238
 
239
+
269
240
  def _get_observation_space(self):
270
241
  return Dict({
271
242
  'remaining_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
@@ -274,23 +245,29 @@ class WSNRoutingEnv(gym.Env):
274
245
  'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)
275
246
  })
276
247
 
248
+
277
249
  def get_state(self):
278
250
  return self._get_obs()
279
251
 
252
+
280
253
  def get_avail_actions(self):
281
254
  return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
282
255
 
256
+
283
257
  def update_sensor_energies(self, i, delta_energy):
284
258
  self.remaining_energy[i] -= delta_energy
285
259
 
260
+
286
261
  def transmission_energy(self, number_of_packets, distance):
287
262
  # energy consumption for transmitting data on a distance
288
263
  return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
289
264
 
265
+
290
266
  def reception_energy(self, number_of_packets):
291
267
  # energy consumption for receiving data
292
268
  return number_of_packets * info_amount * Eelec
293
269
 
270
+
294
271
  def compute_angle_vectors(self, i, action):
295
272
  '''
296
273
  Compute the angle in radians between the vectors formed by (i, action) and (i, base station)
@@ -304,6 +281,7 @@ class WSNRoutingEnv(gym.Env):
304
281
 
305
282
  return np.arccos(np.clip(cosine_angle, -1, 1))
306
283
 
284
+
307
285
  def compute_reward_angle(self, i, action):
308
286
  '''
309
287
  Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
@@ -316,6 +294,7 @@ class WSNRoutingEnv(gym.Env):
316
294
  return np.clip(1 - normalized_angle, 0, 1)
317
295
  # return np.clip(- normalized_angle, -1, 1)
318
296
 
297
+
319
298
  def compute_reward_distance(self, i, action):
320
299
  '''
321
300
  Compute the reward based on the distance to the next hop
@@ -330,6 +309,7 @@ class WSNRoutingEnv(gym.Env):
330
309
  return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
331
310
  # return np.clip(-normalized_distance_to_next_hop, -1, 1)
332
311
 
312
+
333
313
  def compute_reward_consumption_energy(self, i, action):
334
314
  '''
335
315
  Compute the reward based on the total energy consumption (transmission, reception)
@@ -352,6 +332,7 @@ class WSNRoutingEnv(gym.Env):
352
332
  return np.clip(1 - normalized_total_energy, 0, 1)
353
333
  # return np.clip(- normalized_total_energy, -1, 1)
354
334
 
335
+
355
336
  def compute_reward_dispersion_remaining_energy(self):
356
337
  '''
357
338
  Compute the reward based on the standard deviation of the remaining energy
@@ -364,6 +345,7 @@ class WSNRoutingEnv(gym.Env):
364
345
  return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
365
346
  # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
366
347
 
348
+
367
349
  def compute_reward_number_of_packets(self, action):
368
350
  '''
369
351
  Compute the reward based on the number of packets of the receiver
@@ -377,6 +359,7 @@ class WSNRoutingEnv(gym.Env):
377
359
  return np.clip(1 - normalized_number_of_packets, 0, 1)
378
360
  # return np.clip(- normalized_number_of_packets, -1, 1)
379
361
 
362
+
380
363
  def compute_individual_rewards(self, i, action):
381
364
  '''
382
365
  Compute the individual rewards
@@ -416,6 +399,7 @@ class WSNRoutingEnv(gym.Env):
416
399
 
417
400
  return np.concatenate((rewards_energy, rewards_performance))
418
401
 
402
+
419
403
  def network_reward_dispersion_remaining_energy(self):
420
404
  '''
421
405
  Compute the reward based on the standard deviation of the remaining energy at the network level
@@ -428,6 +412,7 @@ class WSNRoutingEnv(gym.Env):
428
412
  return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
429
413
  # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
430
414
 
415
+
431
416
  def network_reward_consumption_energy(self):
432
417
  '''
433
418
  Compute the reward based on the total energy consumption (transmission, reception) at the network level
@@ -440,6 +425,7 @@ class WSNRoutingEnv(gym.Env):
440
425
  return np.clip(1 - normalized_total_energy, 0, 1)
441
426
  # return np.clip(- normalized_total_energy, -1, 1)
442
427
 
428
+
443
429
  def compute_reward_packet_delivery_ratio(self):
444
430
  '''
445
431
  Compute the reward based on the packet delivery ratio
@@ -447,6 +433,7 @@ class WSNRoutingEnv(gym.Env):
447
433
  packet_delivery_ratio = self.packets_delivered / (self.total_packets_sent_by_sensors + self.epsilon) if self.total_packets_sent_by_sensors > 0 else 0
448
434
  return np.clip(packet_delivery_ratio, 0, 1)
449
435
 
436
+
450
437
  def compute_reward_latency(self):
451
438
  '''
452
439
  Compute the reward based on the average latency
@@ -458,6 +445,7 @@ class WSNRoutingEnv(gym.Env):
458
445
  return np.clip(1 - normalized_latency, 0, 1)
459
446
  # return np.clip(- normalized_latency, -1, 1)
460
447
 
448
+
461
449
  def compute_reward_network_throughput(self):
462
450
  '''
463
451
  Compute the reward based on the network throughput
@@ -467,6 +455,7 @@ class WSNRoutingEnv(gym.Env):
467
455
  normalized_throughput = network_throughput / (maximum_throughput + self.epsilon)
468
456
  return np.clip(normalized_throughput, 0, 1)
469
457
 
458
+
470
459
  def compute_attention_rewards(self, rewards):
471
460
  '''
472
461
  Compute the attention-based rewards
@@ -475,6 +464,7 @@ class WSNRoutingEnv(gym.Env):
475
464
  final_reward = net(rewards)
476
465
  return final_reward
477
466
 
467
+
478
468
  def integrate_mobility(self):
479
469
  '''
480
470
  Integrate the mobility of the sensors after each step
@@ -487,6 +477,7 @@ class WSNRoutingEnv(gym.Env):
487
477
  if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
488
478
  self.sensor_positions[i] -= displacement[i]
489
479
 
480
+
490
481
  def get_metrics(self):
491
482
  # Calculate network throughput
492
483
  self.network_throughput = self.packets_delivered / (self.steps + self.epsilon) if self.steps > 0 else 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 3.0.263
3
+ Version: 3.0.264
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=OE2WBgKyEbqBOaXf4v4N5a0qFaCkN5iUcgmZIdLghqk,166
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=t0uJq77RsuDKn_VRKh8dY9khp4DE1etoHzaUd582OSw,24905
4
+ gym_examples-3.0.264.dist-info/METADATA,sha256=4l07YRLJpfbh8kvvuZIlaOTAls-N81hZPxxvNNAu8bM,412
5
+ gym_examples-3.0.264.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-3.0.264.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-3.0.264.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=V-lDBqJirIbwK5vx8WNT5JyIwiwk22PGBiYSvm6JPo8,166
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=Ee3PTxhwKw2igwK5z465IdgK3mP0ParioXCni6BqXiE,26695
4
- gym_examples-3.0.263.dist-info/METADATA,sha256=rpRe1k5FCesyS6hKApj_Z-_sQAjJ01WM90Mzk_a5XPs,412
5
- gym_examples-3.0.263.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-3.0.263.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-3.0.263.dist-info/RECORD,,