gym-examples 3.0.263__py3-none-any.whl → 3.0.265__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +50 -60
- {gym_examples-3.0.263.dist-info → gym_examples-3.0.265.dist-info}/METADATA +1 -1
- gym_examples-3.0.265.dist-info/RECORD +7 -0
- gym_examples-3.0.263.dist-info/RECORD +0 -7
- {gym_examples-3.0.263.dist-info → gym_examples-3.0.265.dist-info}/WHEEL +0 -0
- {gym_examples-3.0.263.dist-info → gym_examples-3.0.265.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -81,9 +81,9 @@ class WSNRoutingEnv(gym.Env):
|
|
81
81
|
)
|
82
82
|
|
83
83
|
# self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
|
84
|
-
|
84
|
+
self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
|
85
85
|
# self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
|
86
|
-
self.action_space = Discrete(self.n_agents * (self.n_sensors + 1))
|
86
|
+
# self.action_space = Discrete(self.n_agents * (self.n_sensors + 1))
|
87
87
|
|
88
88
|
self.reset()
|
89
89
|
|
@@ -131,16 +131,17 @@ class WSNRoutingEnv(gym.Env):
|
|
131
131
|
|
132
132
|
|
133
133
|
def step(self, actions):
|
134
|
+
actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
|
134
135
|
self.steps += 1
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
136
|
+
rewards = [-max_reward] * self.n_sensors
|
137
|
+
dones = [False] * self.n_sensors
|
138
|
+
for i, action in enumerate(actions):
|
139
|
+
if action not in range(self.n_sensors + 1):
|
140
|
+
raise ValueError("Invalid action!")
|
141
|
+
|
142
|
+
if i >= self.n_sensors:
|
143
|
+
raise ValueError("Invalid sensor index!") # the number of actions is greater than the number of sensors
|
144
|
+
|
144
145
|
if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
|
145
146
|
continue # Skip if sensor has no energy left or no packets to transmit
|
146
147
|
|
@@ -166,10 +167,10 @@ class WSNRoutingEnv(gym.Env):
|
|
166
167
|
self.total_latency += self.packet_latency[i] + latency_per_hop
|
167
168
|
self.packet_latency[i] = 0
|
168
169
|
|
169
|
-
# rewards[i] =
|
170
|
-
|
171
|
-
#
|
172
|
-
|
170
|
+
# rewards[i] = self.compute_individual_rewards(i, action)
|
171
|
+
rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
|
172
|
+
# rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
|
173
|
+
dones[i] = True
|
173
174
|
else:
|
174
175
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
175
176
|
if distance > self.coverage_radius:
|
@@ -194,20 +195,18 @@ class WSNRoutingEnv(gym.Env):
|
|
194
195
|
self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
|
195
196
|
self.packet_latency[i] = 0
|
196
197
|
|
197
|
-
|
198
|
-
reward = self.compute_individual_rewards(i, action)
|
198
|
+
rewards[i] = self.compute_individual_rewards(i, action)
|
199
199
|
|
200
200
|
# Update the number of packets
|
201
201
|
self.number_of_packets[action] += self.number_of_packets[i]
|
202
|
-
|
203
202
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
204
203
|
# Calculate final reward
|
205
204
|
# rewards[i] = self.compute_attention_rewards(rewards[i])
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
205
|
+
rewards[i] = np.mean(rewards[i])
|
206
|
+
# rewards[i] = self.compute_weighted_sum_rewards(rewards[i])
|
207
|
+
for i in range(self.n_sensors):
|
208
|
+
if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
|
209
|
+
dones[i] = True
|
211
210
|
|
212
211
|
# Integrate the mobility of the sensors
|
213
212
|
# self.integrate_mobility()
|
@@ -219,45 +218,15 @@ class WSNRoutingEnv(gym.Env):
|
|
219
218
|
|
220
219
|
self.get_metrics()
|
221
220
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
#
|
226
|
-
#
|
227
|
-
|
221
|
+
rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
|
222
|
+
# rewards = np.sum(rewards) # Sum the rewards of all agents
|
223
|
+
rewards = np.mean(rewards) # Average the rewards of all agents
|
224
|
+
# rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
|
225
|
+
# print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
|
226
|
+
dones = all(dones) # Done if all agents are done
|
228
227
|
|
229
|
-
|
230
|
-
# self.number_of_steps += 1
|
228
|
+
return self._get_obs(), rewards, dones, self.get_metrics()
|
231
229
|
|
232
|
-
|
233
|
-
|
234
|
-
# self.episode_return += reward
|
235
|
-
# if self.number_of_steps >= self.num_timesteps:
|
236
|
-
# self.episode_returns.append(self.episode_return)
|
237
|
-
# self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
|
238
|
-
# self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
|
239
|
-
# self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
|
240
|
-
# self.episode_network_throughput.append(self.network_throughput)
|
241
|
-
# self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
|
242
|
-
# self.episode_network_lifetime.append(self.network_lifetime)
|
243
|
-
# self.episode_average_latency.append(self.average_latency)
|
244
|
-
|
245
|
-
# metrics = {
|
246
|
-
# "returns_PPO": self.episode_returns,
|
247
|
-
# "std_remaining_energy_PPO": self.episode_std_remaining_energy,
|
248
|
-
# "total_consumption_energy_PPO": self.episode_total_consumption_energy,
|
249
|
-
# "mean_remaining_energy_PPO": self.episode_mean_remaining_energy,
|
250
|
-
# "network_throughput_PPO": self.episode_network_throughput,
|
251
|
-
# "packet_delivery_ratio_PPO": self.episode_packet_delivery_ratio,
|
252
|
-
# "network_lifetime_PPO": self.episode_network_lifetime,
|
253
|
-
# "average_latency_PPO": self.episode_average_latency
|
254
|
-
# }
|
255
|
-
|
256
|
-
# for metric_name, metric_value in metrics.items():
|
257
|
-
# np.save(f"{base_back_up_dir}{metric_name}_{self.version}.npy", np.array(metric_value))
|
258
|
-
|
259
|
-
# return self._get_obs(), rewards, dones, {}
|
260
|
-
return self._get_obs(), reward, done, self.get_metrics()
|
261
230
|
|
262
231
|
def _get_obs(self):
|
263
232
|
return [{'remaining_energy': np.array([e]),
|
@@ -266,6 +235,7 @@ class WSNRoutingEnv(gym.Env):
|
|
266
235
|
'number_of_packets': np.array([d])
|
267
236
|
} for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
|
268
237
|
|
238
|
+
|
269
239
|
def _get_observation_space(self):
|
270
240
|
return Dict({
|
271
241
|
'remaining_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
|
@@ -274,23 +244,29 @@ class WSNRoutingEnv(gym.Env):
|
|
274
244
|
'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)
|
275
245
|
})
|
276
246
|
|
247
|
+
|
277
248
|
def get_state(self):
|
278
249
|
return self._get_obs()
|
279
250
|
|
251
|
+
|
280
252
|
def get_avail_actions(self):
|
281
253
|
return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
|
282
254
|
|
255
|
+
|
283
256
|
def update_sensor_energies(self, i, delta_energy):
|
284
257
|
self.remaining_energy[i] -= delta_energy
|
285
258
|
|
259
|
+
|
286
260
|
def transmission_energy(self, number_of_packets, distance):
|
287
261
|
# energy consumption for transmitting data on a distance
|
288
262
|
return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
|
289
263
|
|
264
|
+
|
290
265
|
def reception_energy(self, number_of_packets):
|
291
266
|
# energy consumption for receiving data
|
292
267
|
return number_of_packets * info_amount * Eelec
|
293
268
|
|
269
|
+
|
294
270
|
def compute_angle_vectors(self, i, action):
|
295
271
|
'''
|
296
272
|
Compute the angle in radians between the vectors formed by (i, action) and (i, base station)
|
@@ -304,6 +280,7 @@ class WSNRoutingEnv(gym.Env):
|
|
304
280
|
|
305
281
|
return np.arccos(np.clip(cosine_angle, -1, 1))
|
306
282
|
|
283
|
+
|
307
284
|
def compute_reward_angle(self, i, action):
|
308
285
|
'''
|
309
286
|
Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
|
@@ -316,6 +293,7 @@ class WSNRoutingEnv(gym.Env):
|
|
316
293
|
return np.clip(1 - normalized_angle, 0, 1)
|
317
294
|
# return np.clip(- normalized_angle, -1, 1)
|
318
295
|
|
296
|
+
|
319
297
|
def compute_reward_distance(self, i, action):
|
320
298
|
'''
|
321
299
|
Compute the reward based on the distance to the next hop
|
@@ -330,6 +308,7 @@ class WSNRoutingEnv(gym.Env):
|
|
330
308
|
return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
|
331
309
|
# return np.clip(-normalized_distance_to_next_hop, -1, 1)
|
332
310
|
|
311
|
+
|
333
312
|
def compute_reward_consumption_energy(self, i, action):
|
334
313
|
'''
|
335
314
|
Compute the reward based on the total energy consumption (transmission, reception)
|
@@ -352,6 +331,7 @@ class WSNRoutingEnv(gym.Env):
|
|
352
331
|
return np.clip(1 - normalized_total_energy, 0, 1)
|
353
332
|
# return np.clip(- normalized_total_energy, -1, 1)
|
354
333
|
|
334
|
+
|
355
335
|
def compute_reward_dispersion_remaining_energy(self):
|
356
336
|
'''
|
357
337
|
Compute the reward based on the standard deviation of the remaining energy
|
@@ -364,6 +344,7 @@ class WSNRoutingEnv(gym.Env):
|
|
364
344
|
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
365
345
|
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
366
346
|
|
347
|
+
|
367
348
|
def compute_reward_number_of_packets(self, action):
|
368
349
|
'''
|
369
350
|
Compute the reward based on the number of packets of the receiver
|
@@ -377,6 +358,7 @@ class WSNRoutingEnv(gym.Env):
|
|
377
358
|
return np.clip(1 - normalized_number_of_packets, 0, 1)
|
378
359
|
# return np.clip(- normalized_number_of_packets, -1, 1)
|
379
360
|
|
361
|
+
|
380
362
|
def compute_individual_rewards(self, i, action):
|
381
363
|
'''
|
382
364
|
Compute the individual rewards
|
@@ -416,6 +398,7 @@ class WSNRoutingEnv(gym.Env):
|
|
416
398
|
|
417
399
|
return np.concatenate((rewards_energy, rewards_performance))
|
418
400
|
|
401
|
+
|
419
402
|
def network_reward_dispersion_remaining_energy(self):
|
420
403
|
'''
|
421
404
|
Compute the reward based on the standard deviation of the remaining energy at the network level
|
@@ -428,6 +411,7 @@ class WSNRoutingEnv(gym.Env):
|
|
428
411
|
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
429
412
|
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
430
413
|
|
414
|
+
|
431
415
|
def network_reward_consumption_energy(self):
|
432
416
|
'''
|
433
417
|
Compute the reward based on the total energy consumption (transmission, reception) at the network level
|
@@ -440,6 +424,7 @@ class WSNRoutingEnv(gym.Env):
|
|
440
424
|
return np.clip(1 - normalized_total_energy, 0, 1)
|
441
425
|
# return np.clip(- normalized_total_energy, -1, 1)
|
442
426
|
|
427
|
+
|
443
428
|
def compute_reward_packet_delivery_ratio(self):
|
444
429
|
'''
|
445
430
|
Compute the reward based on the packet delivery ratio
|
@@ -447,6 +432,7 @@ class WSNRoutingEnv(gym.Env):
|
|
447
432
|
packet_delivery_ratio = self.packets_delivered / (self.total_packets_sent_by_sensors + self.epsilon) if self.total_packets_sent_by_sensors > 0 else 0
|
448
433
|
return np.clip(packet_delivery_ratio, 0, 1)
|
449
434
|
|
435
|
+
|
450
436
|
def compute_reward_latency(self):
|
451
437
|
'''
|
452
438
|
Compute the reward based on the average latency
|
@@ -458,6 +444,7 @@ class WSNRoutingEnv(gym.Env):
|
|
458
444
|
return np.clip(1 - normalized_latency, 0, 1)
|
459
445
|
# return np.clip(- normalized_latency, -1, 1)
|
460
446
|
|
447
|
+
|
461
448
|
def compute_reward_network_throughput(self):
|
462
449
|
'''
|
463
450
|
Compute the reward based on the network throughput
|
@@ -467,6 +454,7 @@ class WSNRoutingEnv(gym.Env):
|
|
467
454
|
normalized_throughput = network_throughput / (maximum_throughput + self.epsilon)
|
468
455
|
return np.clip(normalized_throughput, 0, 1)
|
469
456
|
|
457
|
+
|
470
458
|
def compute_attention_rewards(self, rewards):
|
471
459
|
'''
|
472
460
|
Compute the attention-based rewards
|
@@ -475,6 +463,7 @@ class WSNRoutingEnv(gym.Env):
|
|
475
463
|
final_reward = net(rewards)
|
476
464
|
return final_reward
|
477
465
|
|
466
|
+
|
478
467
|
def integrate_mobility(self):
|
479
468
|
'''
|
480
469
|
Integrate the mobility of the sensors after each step
|
@@ -487,6 +476,7 @@ class WSNRoutingEnv(gym.Env):
|
|
487
476
|
if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
|
488
477
|
self.sensor_positions[i] -= displacement[i]
|
489
478
|
|
479
|
+
|
490
480
|
def get_metrics(self):
|
491
481
|
# Calculate network throughput
|
492
482
|
self.network_throughput = self.packets_delivered / (self.steps + self.epsilon) if self.steps > 0 else 0
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=F0u_yF7l2eD3fOQTUzN2XUFt7etMgtlhgDv5tFMubpE,166
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=2GvyLeNjLM-q9oLCobVzpTUrUqzjTyRn_H3BAQZZvbY,24821
|
4
|
+
gym_examples-3.0.265.dist-info/METADATA,sha256=D0__HbG6kJd9Kb1SX_Lh_MysXCxkK_ToMFudnMPWqgU,412
|
5
|
+
gym_examples-3.0.265.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-3.0.265.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-3.0.265.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=V-lDBqJirIbwK5vx8WNT5JyIwiwk22PGBiYSvm6JPo8,166
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=Ee3PTxhwKw2igwK5z465IdgK3mP0ParioXCni6BqXiE,26695
|
4
|
-
gym_examples-3.0.263.dist-info/METADATA,sha256=rpRe1k5FCesyS6hKApj_Z-_sQAjJ01WM90Mzk_a5XPs,412
|
5
|
-
gym_examples-3.0.263.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-3.0.263.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-3.0.263.dist-info/RECORD,,
|
File without changes
|
File without changes
|