gym-examples 3.0.263__py3-none-any.whl → 3.0.264__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +51 -60
- {gym_examples-3.0.263.dist-info → gym_examples-3.0.264.dist-info}/METADATA +1 -1
- gym_examples-3.0.264.dist-info/RECORD +7 -0
- gym_examples-3.0.263.dist-info/RECORD +0 -7
- {gym_examples-3.0.263.dist-info → gym_examples-3.0.264.dist-info}/WHEEL +0 -0
- {gym_examples-3.0.263.dist-info → gym_examples-3.0.264.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -81,9 +81,9 @@ class WSNRoutingEnv(gym.Env):
|
|
81
81
|
)
|
82
82
|
|
83
83
|
# self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
|
84
|
-
|
84
|
+
self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
|
85
85
|
# self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
|
86
|
-
self.action_space = Discrete(self.n_agents * (self.n_sensors + 1))
|
86
|
+
# self.action_space = Discrete(self.n_agents * (self.n_sensors + 1))
|
87
87
|
|
88
88
|
self.reset()
|
89
89
|
|
@@ -131,16 +131,18 @@ class WSNRoutingEnv(gym.Env):
|
|
131
131
|
|
132
132
|
|
133
133
|
def step(self, actions):
|
134
|
+
actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
|
134
135
|
self.steps += 1
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
136
|
+
rewards = [-max_reward] * self.n_sensors
|
137
|
+
dones = [False] * self.n_sensors
|
138
|
+
for i, action in enumerate(actions):
|
139
|
+
print(f"\nAction in WSNRoutingEnv: {action} and type: {type(action)}")
|
140
|
+
if action not in range(self.n_sensors + 1):
|
141
|
+
raise ValueError("Invalid action!")
|
142
|
+
|
143
|
+
if i >= self.n_sensors:
|
144
|
+
raise ValueError("Invalid sensor index!") # the number of actions is greater than the number of sensors
|
145
|
+
|
144
146
|
if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
|
145
147
|
continue # Skip if sensor has no energy left or no packets to transmit
|
146
148
|
|
@@ -166,10 +168,10 @@ class WSNRoutingEnv(gym.Env):
|
|
166
168
|
self.total_latency += self.packet_latency[i] + latency_per_hop
|
167
169
|
self.packet_latency[i] = 0
|
168
170
|
|
169
|
-
# rewards[i] =
|
170
|
-
|
171
|
-
#
|
172
|
-
|
171
|
+
# rewards[i] = self.compute_individual_rewards(i, action)
|
172
|
+
rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
|
173
|
+
# rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
|
174
|
+
dones[i] = True
|
173
175
|
else:
|
174
176
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
175
177
|
if distance > self.coverage_radius:
|
@@ -194,20 +196,18 @@ class WSNRoutingEnv(gym.Env):
|
|
194
196
|
self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
|
195
197
|
self.packet_latency[i] = 0
|
196
198
|
|
197
|
-
|
198
|
-
reward = self.compute_individual_rewards(i, action)
|
199
|
+
rewards[i] = self.compute_individual_rewards(i, action)
|
199
200
|
|
200
201
|
# Update the number of packets
|
201
202
|
self.number_of_packets[action] += self.number_of_packets[i]
|
202
|
-
|
203
203
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
204
204
|
# Calculate final reward
|
205
205
|
# rewards[i] = self.compute_attention_rewards(rewards[i])
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
206
|
+
rewards[i] = np.mean(rewards[i])
|
207
|
+
# rewards[i] = self.compute_weighted_sum_rewards(rewards[i])
|
208
|
+
for i in range(self.n_sensors):
|
209
|
+
if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
|
210
|
+
dones[i] = True
|
211
211
|
|
212
212
|
# Integrate the mobility of the sensors
|
213
213
|
# self.integrate_mobility()
|
@@ -219,45 +219,15 @@ class WSNRoutingEnv(gym.Env):
|
|
219
219
|
|
220
220
|
self.get_metrics()
|
221
221
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
#
|
226
|
-
#
|
227
|
-
|
222
|
+
rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
|
223
|
+
# rewards = np.sum(rewards) # Sum the rewards of all agents
|
224
|
+
rewards = np.mean(rewards) # Average the rewards of all agents
|
225
|
+
# rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
|
226
|
+
# print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
|
227
|
+
dones = all(dones) # Done if all agents are done
|
228
228
|
|
229
|
-
|
230
|
-
# self.number_of_steps += 1
|
229
|
+
return self._get_obs(), rewards, dones, self.get_metrics()
|
231
230
|
|
232
|
-
|
233
|
-
|
234
|
-
# self.episode_return += reward
|
235
|
-
# if self.number_of_steps >= self.num_timesteps:
|
236
|
-
# self.episode_returns.append(self.episode_return)
|
237
|
-
# self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
|
238
|
-
# self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
|
239
|
-
# self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
|
240
|
-
# self.episode_network_throughput.append(self.network_throughput)
|
241
|
-
# self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
|
242
|
-
# self.episode_network_lifetime.append(self.network_lifetime)
|
243
|
-
# self.episode_average_latency.append(self.average_latency)
|
244
|
-
|
245
|
-
# metrics = {
|
246
|
-
# "returns_PPO": self.episode_returns,
|
247
|
-
# "std_remaining_energy_PPO": self.episode_std_remaining_energy,
|
248
|
-
# "total_consumption_energy_PPO": self.episode_total_consumption_energy,
|
249
|
-
# "mean_remaining_energy_PPO": self.episode_mean_remaining_energy,
|
250
|
-
# "network_throughput_PPO": self.episode_network_throughput,
|
251
|
-
# "packet_delivery_ratio_PPO": self.episode_packet_delivery_ratio,
|
252
|
-
# "network_lifetime_PPO": self.episode_network_lifetime,
|
253
|
-
# "average_latency_PPO": self.episode_average_latency
|
254
|
-
# }
|
255
|
-
|
256
|
-
# for metric_name, metric_value in metrics.items():
|
257
|
-
# np.save(f"{base_back_up_dir}{metric_name}_{self.version}.npy", np.array(metric_value))
|
258
|
-
|
259
|
-
# return self._get_obs(), rewards, dones, {}
|
260
|
-
return self._get_obs(), reward, done, self.get_metrics()
|
261
231
|
|
262
232
|
def _get_obs(self):
|
263
233
|
return [{'remaining_energy': np.array([e]),
|
@@ -266,6 +236,7 @@ class WSNRoutingEnv(gym.Env):
|
|
266
236
|
'number_of_packets': np.array([d])
|
267
237
|
} for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
|
268
238
|
|
239
|
+
|
269
240
|
def _get_observation_space(self):
|
270
241
|
return Dict({
|
271
242
|
'remaining_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
|
@@ -274,23 +245,29 @@ class WSNRoutingEnv(gym.Env):
|
|
274
245
|
'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)
|
275
246
|
})
|
276
247
|
|
248
|
+
|
277
249
|
def get_state(self):
|
278
250
|
return self._get_obs()
|
279
251
|
|
252
|
+
|
280
253
|
def get_avail_actions(self):
|
281
254
|
return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
|
282
255
|
|
256
|
+
|
283
257
|
def update_sensor_energies(self, i, delta_energy):
|
284
258
|
self.remaining_energy[i] -= delta_energy
|
285
259
|
|
260
|
+
|
286
261
|
def transmission_energy(self, number_of_packets, distance):
|
287
262
|
# energy consumption for transmitting data on a distance
|
288
263
|
return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
|
289
264
|
|
265
|
+
|
290
266
|
def reception_energy(self, number_of_packets):
|
291
267
|
# energy consumption for receiving data
|
292
268
|
return number_of_packets * info_amount * Eelec
|
293
269
|
|
270
|
+
|
294
271
|
def compute_angle_vectors(self, i, action):
|
295
272
|
'''
|
296
273
|
Compute the angle in radians between the vectors formed by (i, action) and (i, base station)
|
@@ -304,6 +281,7 @@ class WSNRoutingEnv(gym.Env):
|
|
304
281
|
|
305
282
|
return np.arccos(np.clip(cosine_angle, -1, 1))
|
306
283
|
|
284
|
+
|
307
285
|
def compute_reward_angle(self, i, action):
|
308
286
|
'''
|
309
287
|
Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
|
@@ -316,6 +294,7 @@ class WSNRoutingEnv(gym.Env):
|
|
316
294
|
return np.clip(1 - normalized_angle, 0, 1)
|
317
295
|
# return np.clip(- normalized_angle, -1, 1)
|
318
296
|
|
297
|
+
|
319
298
|
def compute_reward_distance(self, i, action):
|
320
299
|
'''
|
321
300
|
Compute the reward based on the distance to the next hop
|
@@ -330,6 +309,7 @@ class WSNRoutingEnv(gym.Env):
|
|
330
309
|
return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
|
331
310
|
# return np.clip(-normalized_distance_to_next_hop, -1, 1)
|
332
311
|
|
312
|
+
|
333
313
|
def compute_reward_consumption_energy(self, i, action):
|
334
314
|
'''
|
335
315
|
Compute the reward based on the total energy consumption (transmission, reception)
|
@@ -352,6 +332,7 @@ class WSNRoutingEnv(gym.Env):
|
|
352
332
|
return np.clip(1 - normalized_total_energy, 0, 1)
|
353
333
|
# return np.clip(- normalized_total_energy, -1, 1)
|
354
334
|
|
335
|
+
|
355
336
|
def compute_reward_dispersion_remaining_energy(self):
|
356
337
|
'''
|
357
338
|
Compute the reward based on the standard deviation of the remaining energy
|
@@ -364,6 +345,7 @@ class WSNRoutingEnv(gym.Env):
|
|
364
345
|
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
365
346
|
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
366
347
|
|
348
|
+
|
367
349
|
def compute_reward_number_of_packets(self, action):
|
368
350
|
'''
|
369
351
|
Compute the reward based on the number of packets of the receiver
|
@@ -377,6 +359,7 @@ class WSNRoutingEnv(gym.Env):
|
|
377
359
|
return np.clip(1 - normalized_number_of_packets, 0, 1)
|
378
360
|
# return np.clip(- normalized_number_of_packets, -1, 1)
|
379
361
|
|
362
|
+
|
380
363
|
def compute_individual_rewards(self, i, action):
|
381
364
|
'''
|
382
365
|
Compute the individual rewards
|
@@ -416,6 +399,7 @@ class WSNRoutingEnv(gym.Env):
|
|
416
399
|
|
417
400
|
return np.concatenate((rewards_energy, rewards_performance))
|
418
401
|
|
402
|
+
|
419
403
|
def network_reward_dispersion_remaining_energy(self):
|
420
404
|
'''
|
421
405
|
Compute the reward based on the standard deviation of the remaining energy at the network level
|
@@ -428,6 +412,7 @@ class WSNRoutingEnv(gym.Env):
|
|
428
412
|
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
429
413
|
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
430
414
|
|
415
|
+
|
431
416
|
def network_reward_consumption_energy(self):
|
432
417
|
'''
|
433
418
|
Compute the reward based on the total energy consumption (transmission, reception) at the network level
|
@@ -440,6 +425,7 @@ class WSNRoutingEnv(gym.Env):
|
|
440
425
|
return np.clip(1 - normalized_total_energy, 0, 1)
|
441
426
|
# return np.clip(- normalized_total_energy, -1, 1)
|
442
427
|
|
428
|
+
|
443
429
|
def compute_reward_packet_delivery_ratio(self):
|
444
430
|
'''
|
445
431
|
Compute the reward based on the packet delivery ratio
|
@@ -447,6 +433,7 @@ class WSNRoutingEnv(gym.Env):
|
|
447
433
|
packet_delivery_ratio = self.packets_delivered / (self.total_packets_sent_by_sensors + self.epsilon) if self.total_packets_sent_by_sensors > 0 else 0
|
448
434
|
return np.clip(packet_delivery_ratio, 0, 1)
|
449
435
|
|
436
|
+
|
450
437
|
def compute_reward_latency(self):
|
451
438
|
'''
|
452
439
|
Compute the reward based on the average latency
|
@@ -458,6 +445,7 @@ class WSNRoutingEnv(gym.Env):
|
|
458
445
|
return np.clip(1 - normalized_latency, 0, 1)
|
459
446
|
# return np.clip(- normalized_latency, -1, 1)
|
460
447
|
|
448
|
+
|
461
449
|
def compute_reward_network_throughput(self):
|
462
450
|
'''
|
463
451
|
Compute the reward based on the network throughput
|
@@ -467,6 +455,7 @@ class WSNRoutingEnv(gym.Env):
|
|
467
455
|
normalized_throughput = network_throughput / (maximum_throughput + self.epsilon)
|
468
456
|
return np.clip(normalized_throughput, 0, 1)
|
469
457
|
|
458
|
+
|
470
459
|
def compute_attention_rewards(self, rewards):
|
471
460
|
'''
|
472
461
|
Compute the attention-based rewards
|
@@ -475,6 +464,7 @@ class WSNRoutingEnv(gym.Env):
|
|
475
464
|
final_reward = net(rewards)
|
476
465
|
return final_reward
|
477
466
|
|
467
|
+
|
478
468
|
def integrate_mobility(self):
|
479
469
|
'''
|
480
470
|
Integrate the mobility of the sensors after each step
|
@@ -487,6 +477,7 @@ class WSNRoutingEnv(gym.Env):
|
|
487
477
|
if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
|
488
478
|
self.sensor_positions[i] -= displacement[i]
|
489
479
|
|
480
|
+
|
490
481
|
def get_metrics(self):
|
491
482
|
# Calculate network throughput
|
492
483
|
self.network_throughput = self.packets_delivered / (self.steps + self.epsilon) if self.steps > 0 else 0
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=OE2WBgKyEbqBOaXf4v4N5a0qFaCkN5iUcgmZIdLghqk,166
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=t0uJq77RsuDKn_VRKh8dY9khp4DE1etoHzaUd582OSw,24905
|
4
|
+
gym_examples-3.0.264.dist-info/METADATA,sha256=4l07YRLJpfbh8kvvuZIlaOTAls-N81hZPxxvNNAu8bM,412
|
5
|
+
gym_examples-3.0.264.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-3.0.264.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-3.0.264.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=V-lDBqJirIbwK5vx8WNT5JyIwiwk22PGBiYSvm6JPo8,166
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=Ee3PTxhwKw2igwK5z465IdgK3mP0ParioXCni6BqXiE,26695
|
4
|
-
gym_examples-3.0.263.dist-info/METADATA,sha256=rpRe1k5FCesyS6hKApj_Z-_sQAjJ01WM90Mzk_a5XPs,412
|
5
|
-
gym_examples-3.0.263.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-3.0.263.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-3.0.263.dist-info/RECORD,,
|
File without changes
|
File without changes
|