gym-examples 3.0.262__py3-none-any.whl → 3.0.264__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -5,4 +5,4 @@ register(
5
5
  entry_point="gym_examples.envs:WSNRoutingEnv",
6
6
  )
7
7
 
8
- __version__ = "3.0.262"
8
+ __version__ = "3.0.264"
@@ -81,8 +81,9 @@ class WSNRoutingEnv(gym.Env):
81
81
  )
82
82
 
83
83
  # self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
84
- # self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
85
- self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
84
+ self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
85
+ # self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
86
+ # self.action_space = Discrete(self.n_agents * (self.n_sensors + 1))
86
87
 
87
88
  self.reset()
88
89
 
@@ -130,14 +131,18 @@ class WSNRoutingEnv(gym.Env):
130
131
 
131
132
 
132
133
  def step(self, actions):
134
+ actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
133
135
  self.steps += 1
134
- # rewards = [-max_reward] * self.n_sensors
135
- reward = -max_reward
136
- # dones = [False] * self.n_sensors
137
- done = False
138
- # actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
139
- # for i, action in enumerate(actions):
140
- for i, action in [actions]: # This loop is for the PPO algorithm: actions is a numpy array of shape (1, 2)
136
+ rewards = [-max_reward] * self.n_sensors
137
+ dones = [False] * self.n_sensors
138
+ for i, action in enumerate(actions):
139
+ print(f"\nAction in WSNRoutingEnv: {action} and type: {type(action)}")
140
+ if action not in range(self.n_sensors + 1):
141
+ raise ValueError("Invalid action!")
142
+
143
+ if i >= self.n_sensors:
144
+ raise ValueError("Invalid sensor index!") # the number of actions is greater than the number of sensors
145
+
141
146
  if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
142
147
  continue # Skip if sensor has no energy left or no packets to transmit
143
148
 
@@ -163,10 +168,10 @@ class WSNRoutingEnv(gym.Env):
163
168
  self.total_latency += self.packet_latency[i] + latency_per_hop
164
169
  self.packet_latency[i] = 0
165
170
 
166
- # rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
167
- reward = max_reward
168
- # dones[i] = True
169
- # done = True
171
+ # rewards[i] = self.compute_individual_rewards(i, action)
172
+ rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
173
+ # rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
174
+ dones[i] = True
170
175
  else:
171
176
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
172
177
  if distance > self.coverage_radius:
@@ -191,20 +196,18 @@ class WSNRoutingEnv(gym.Env):
191
196
  self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
192
197
  self.packet_latency[i] = 0
193
198
 
194
- # rewards[i] = self.compute_individual_rewards(i, action)
195
- reward = self.compute_individual_rewards(i, action)
199
+ rewards[i] = self.compute_individual_rewards(i, action)
196
200
 
197
201
  # Update the number of packets
198
202
  self.number_of_packets[action] += self.number_of_packets[i]
199
-
200
203
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
201
204
  # Calculate final reward
202
205
  # rewards[i] = self.compute_attention_rewards(rewards[i])
203
- # rewards[i] = np.mean(rewards[i])
204
- reward = np.mean(reward)
205
- # for i in range(self.n_sensors):
206
- # if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
207
- # dones[i] = True
206
+ rewards[i] = np.mean(rewards[i])
207
+ # rewards[i] = self.compute_weighted_sum_rewards(rewards[i])
208
+ for i in range(self.n_sensors):
209
+ if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
210
+ dones[i] = True
208
211
 
209
212
  # Integrate the mobility of the sensors
210
213
  # self.integrate_mobility()
@@ -216,45 +219,15 @@ class WSNRoutingEnv(gym.Env):
216
219
 
217
220
  self.get_metrics()
218
221
 
219
- # rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
220
- reward = reward.item() if isinstance(reward, torch.Tensor) else reward # Convert the reward to a float
221
- # rewards = np.mean(rewards) # Average the rewards of all agents
222
- # dones = all(dones) # Done if all agents are done
223
- # Check if done condition is met
224
- done = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
222
+ rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
223
+ # rewards = np.sum(rewards) # Sum the rewards of all agents
224
+ rewards = np.mean(rewards) # Average the rewards of all agents
225
+ # rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
226
+ # print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
227
+ dones = all(dones) # Done if all agents are done
225
228
 
226
- # if os.getenv('PRINT_STATS') == 'True': # We are trying to extract only the statistics for the PPO algorithm
227
- # self.number_of_steps += 1
229
+ return self._get_obs(), rewards, dones, self.get_metrics()
228
230
 
229
-
230
-
231
- # self.episode_return += reward
232
- # if self.number_of_steps >= self.num_timesteps:
233
- # self.episode_returns.append(self.episode_return)
234
- # self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
235
- # self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
236
- # self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
237
- # self.episode_network_throughput.append(self.network_throughput)
238
- # self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
239
- # self.episode_network_lifetime.append(self.network_lifetime)
240
- # self.episode_average_latency.append(self.average_latency)
241
-
242
- # metrics = {
243
- # "returns_PPO": self.episode_returns,
244
- # "std_remaining_energy_PPO": self.episode_std_remaining_energy,
245
- # "total_consumption_energy_PPO": self.episode_total_consumption_energy,
246
- # "mean_remaining_energy_PPO": self.episode_mean_remaining_energy,
247
- # "network_throughput_PPO": self.episode_network_throughput,
248
- # "packet_delivery_ratio_PPO": self.episode_packet_delivery_ratio,
249
- # "network_lifetime_PPO": self.episode_network_lifetime,
250
- # "average_latency_PPO": self.episode_average_latency
251
- # }
252
-
253
- # for metric_name, metric_value in metrics.items():
254
- # np.save(f"{base_back_up_dir}{metric_name}_{self.version}.npy", np.array(metric_value))
255
-
256
- # return self._get_obs(), rewards, dones, {}
257
- return self._get_obs(), reward, done, self.get_metrics()
258
231
 
259
232
  def _get_obs(self):
260
233
  return [{'remaining_energy': np.array([e]),
@@ -263,6 +236,7 @@ class WSNRoutingEnv(gym.Env):
263
236
  'number_of_packets': np.array([d])
264
237
  } for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
265
238
 
239
+
266
240
  def _get_observation_space(self):
267
241
  return Dict({
268
242
  'remaining_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
@@ -271,23 +245,29 @@ class WSNRoutingEnv(gym.Env):
271
245
  'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)
272
246
  })
273
247
 
248
+
274
249
  def get_state(self):
275
250
  return self._get_obs()
276
251
 
252
+
277
253
  def get_avail_actions(self):
278
254
  return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
279
255
 
256
+
280
257
  def update_sensor_energies(self, i, delta_energy):
281
258
  self.remaining_energy[i] -= delta_energy
282
259
 
260
+
283
261
  def transmission_energy(self, number_of_packets, distance):
284
262
  # energy consumption for transmitting data on a distance
285
263
  return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
286
264
 
265
+
287
266
  def reception_energy(self, number_of_packets):
288
267
  # energy consumption for receiving data
289
268
  return number_of_packets * info_amount * Eelec
290
269
 
270
+
291
271
  def compute_angle_vectors(self, i, action):
292
272
  '''
293
273
  Compute the angle in radians between the vectors formed by (i, action) and (i, base station)
@@ -301,6 +281,7 @@ class WSNRoutingEnv(gym.Env):
301
281
 
302
282
  return np.arccos(np.clip(cosine_angle, -1, 1))
303
283
 
284
+
304
285
  def compute_reward_angle(self, i, action):
305
286
  '''
306
287
  Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
@@ -313,6 +294,7 @@ class WSNRoutingEnv(gym.Env):
313
294
  return np.clip(1 - normalized_angle, 0, 1)
314
295
  # return np.clip(- normalized_angle, -1, 1)
315
296
 
297
+
316
298
  def compute_reward_distance(self, i, action):
317
299
  '''
318
300
  Compute the reward based on the distance to the next hop
@@ -327,6 +309,7 @@ class WSNRoutingEnv(gym.Env):
327
309
  return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
328
310
  # return np.clip(-normalized_distance_to_next_hop, -1, 1)
329
311
 
312
+
330
313
  def compute_reward_consumption_energy(self, i, action):
331
314
  '''
332
315
  Compute the reward based on the total energy consumption (transmission, reception)
@@ -349,6 +332,7 @@ class WSNRoutingEnv(gym.Env):
349
332
  return np.clip(1 - normalized_total_energy, 0, 1)
350
333
  # return np.clip(- normalized_total_energy, -1, 1)
351
334
 
335
+
352
336
  def compute_reward_dispersion_remaining_energy(self):
353
337
  '''
354
338
  Compute the reward based on the standard deviation of the remaining energy
@@ -361,6 +345,7 @@ class WSNRoutingEnv(gym.Env):
361
345
  return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
362
346
  # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
363
347
 
348
+
364
349
  def compute_reward_number_of_packets(self, action):
365
350
  '''
366
351
  Compute the reward based on the number of packets of the receiver
@@ -374,6 +359,7 @@ class WSNRoutingEnv(gym.Env):
374
359
  return np.clip(1 - normalized_number_of_packets, 0, 1)
375
360
  # return np.clip(- normalized_number_of_packets, -1, 1)
376
361
 
362
+
377
363
  def compute_individual_rewards(self, i, action):
378
364
  '''
379
365
  Compute the individual rewards
@@ -413,6 +399,7 @@ class WSNRoutingEnv(gym.Env):
413
399
 
414
400
  return np.concatenate((rewards_energy, rewards_performance))
415
401
 
402
+
416
403
  def network_reward_dispersion_remaining_energy(self):
417
404
  '''
418
405
  Compute the reward based on the standard deviation of the remaining energy at the network level
@@ -425,6 +412,7 @@ class WSNRoutingEnv(gym.Env):
425
412
  return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
426
413
  # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
427
414
 
415
+
428
416
  def network_reward_consumption_energy(self):
429
417
  '''
430
418
  Compute the reward based on the total energy consumption (transmission, reception) at the network level
@@ -437,6 +425,7 @@ class WSNRoutingEnv(gym.Env):
437
425
  return np.clip(1 - normalized_total_energy, 0, 1)
438
426
  # return np.clip(- normalized_total_energy, -1, 1)
439
427
 
428
+
440
429
  def compute_reward_packet_delivery_ratio(self):
441
430
  '''
442
431
  Compute the reward based on the packet delivery ratio
@@ -444,6 +433,7 @@ class WSNRoutingEnv(gym.Env):
444
433
  packet_delivery_ratio = self.packets_delivered / (self.total_packets_sent_by_sensors + self.epsilon) if self.total_packets_sent_by_sensors > 0 else 0
445
434
  return np.clip(packet_delivery_ratio, 0, 1)
446
435
 
436
+
447
437
  def compute_reward_latency(self):
448
438
  '''
449
439
  Compute the reward based on the average latency
@@ -455,6 +445,7 @@ class WSNRoutingEnv(gym.Env):
455
445
  return np.clip(1 - normalized_latency, 0, 1)
456
446
  # return np.clip(- normalized_latency, -1, 1)
457
447
 
448
+
458
449
  def compute_reward_network_throughput(self):
459
450
  '''
460
451
  Compute the reward based on the network throughput
@@ -464,6 +455,7 @@ class WSNRoutingEnv(gym.Env):
464
455
  normalized_throughput = network_throughput / (maximum_throughput + self.epsilon)
465
456
  return np.clip(normalized_throughput, 0, 1)
466
457
 
458
+
467
459
  def compute_attention_rewards(self, rewards):
468
460
  '''
469
461
  Compute the attention-based rewards
@@ -472,6 +464,7 @@ class WSNRoutingEnv(gym.Env):
472
464
  final_reward = net(rewards)
473
465
  return final_reward
474
466
 
467
+
475
468
  def integrate_mobility(self):
476
469
  '''
477
470
  Integrate the mobility of the sensors after each step
@@ -484,6 +477,7 @@ class WSNRoutingEnv(gym.Env):
484
477
  if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
485
478
  self.sensor_positions[i] -= displacement[i]
486
479
 
480
+
487
481
  def get_metrics(self):
488
482
  # Calculate network throughput
489
483
  self.network_throughput = self.packets_delivered / (self.steps + self.epsilon) if self.steps > 0 else 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 3.0.262
3
+ Version: 3.0.264
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=OE2WBgKyEbqBOaXf4v4N5a0qFaCkN5iUcgmZIdLghqk,166
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=t0uJq77RsuDKn_VRKh8dY9khp4DE1etoHzaUd582OSw,24905
4
+ gym_examples-3.0.264.dist-info/METADATA,sha256=4l07YRLJpfbh8kvvuZIlaOTAls-N81hZPxxvNNAu8bM,412
5
+ gym_examples-3.0.264.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-3.0.264.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-3.0.264.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=CYfXHf7cNLnC7-qIfJQRpOs1xZSNHDP-MXO0DIQF2r0,166
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=i2aXordG52k5GhxIYQ1AVP2EiBzHtFMHUqwjAmdhWBQ,26481
4
- gym_examples-3.0.262.dist-info/METADATA,sha256=nUKnPlBtDhKZCsEpJqp8sdmStdg6ePbgbKlglo7RMck,412
5
- gym_examples-3.0.262.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-3.0.262.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-3.0.262.dist-info/RECORD,,