gym-examples 3.0.282__py3-none-any.whl → 3.0.284__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +65 -26
- {gym_examples-3.0.282.dist-info → gym_examples-3.0.284.dist-info}/METADATA +1 -1
- gym_examples-3.0.284.dist-info/RECORD +7 -0
- gym_examples-3.0.282.dist-info/RECORD +0 -7
- {gym_examples-3.0.282.dist-info → gym_examples-3.0.284.dist-info}/WHEEL +0 -0
- {gym_examples-3.0.282.dist-info → gym_examples-3.0.284.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -12,7 +12,7 @@ import os
|
|
12
12
|
from collections import OrderedDict
|
13
13
|
|
14
14
|
# Define the network parameters for the final reward function
|
15
|
-
input_dim =
|
15
|
+
input_dim = 4 # length of the individual rewards vector
|
16
16
|
output_dim = 1 # final reward
|
17
17
|
|
18
18
|
Eelec = 50e-9 # energy consumption per bit in joules
|
@@ -26,7 +26,7 @@ initial_number_of_packets = 1 # initial number of packets to transmit
|
|
26
26
|
latency_per_hop = 1 # latency per hop in seconds
|
27
27
|
|
28
28
|
base_back_up_dir = "results/data/"
|
29
|
-
max_reward =
|
29
|
+
max_reward = 3 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
|
30
30
|
|
31
31
|
# Define the final reward function using an attention mechanism
|
32
32
|
class Attention(nn.Module):
|
@@ -49,7 +49,7 @@ net = net.double() # Convert the weights to Double
|
|
49
49
|
|
50
50
|
class WSNRoutingEnv(gym.Env):
|
51
51
|
|
52
|
-
|
52
|
+
PRINT_STATS = "False" # Global flag to control printing of statistics
|
53
53
|
|
54
54
|
def __init__(self, n_sensors = 20, coverage_radius=(upper_bound - lower_bound)/4, num_timesteps = None, version = None):
|
55
55
|
|
@@ -74,7 +74,6 @@ class WSNRoutingEnv(gym.Env):
|
|
74
74
|
self.episode_count = 0
|
75
75
|
self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
|
76
76
|
self.epsilon = 1e-10 # small value to avoid division by zero
|
77
|
-
# Initialize the position of the sensors randomly
|
78
77
|
|
79
78
|
# Define observation space
|
80
79
|
self.observation_space = Tuple(
|
@@ -82,7 +81,10 @@ class WSNRoutingEnv(gym.Env):
|
|
82
81
|
)
|
83
82
|
|
84
83
|
# self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
|
85
|
-
self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
|
84
|
+
# self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
|
85
|
+
# self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
|
86
|
+
# self.action_space = Discrete(self.n_sensors + 1) # +1 for the base station
|
87
|
+
self.action_space = Discrete((self.n_sensors + 1)**self.n_agents)
|
86
88
|
|
87
89
|
self.reset()
|
88
90
|
|
@@ -113,12 +115,12 @@ class WSNRoutingEnv(gym.Env):
|
|
113
115
|
|
114
116
|
return self._get_obs()
|
115
117
|
|
118
|
+
|
116
119
|
def step(self, actions):
|
117
|
-
actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
|
118
120
|
self.steps += 1
|
119
121
|
rewards = [-max_reward] * self.n_sensors
|
120
|
-
# rewards = [0] * self.n_sensors
|
121
122
|
dones = [False] * self.n_sensors
|
123
|
+
actions = self.to_base_n(actions, self.n_sensors + 1)
|
122
124
|
for i, action in enumerate(actions):
|
123
125
|
if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
|
124
126
|
continue # Skip if sensor has no energy left or no packets to transmit
|
@@ -145,10 +147,8 @@ class WSNRoutingEnv(gym.Env):
|
|
145
147
|
self.total_latency += self.packet_latency[i] + latency_per_hop
|
146
148
|
self.packet_latency[i] = 0
|
147
149
|
|
148
|
-
|
149
|
-
|
150
|
-
# rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
|
151
|
-
dones[i] = True
|
150
|
+
rewards[i] = max_reward # Reward for transmitting data to the base station
|
151
|
+
dones[i] = True
|
152
152
|
else:
|
153
153
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
154
154
|
if distance > self.coverage_radius:
|
@@ -173,18 +173,16 @@ class WSNRoutingEnv(gym.Env):
|
|
173
173
|
self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
|
174
174
|
self.packet_latency[i] = 0
|
175
175
|
|
176
|
-
rewards
|
176
|
+
rewards = self.compute_individual_rewards(i, action)
|
177
177
|
|
178
178
|
# Update the number of packets
|
179
179
|
self.number_of_packets[action] += self.number_of_packets[i]
|
180
|
+
|
180
181
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
181
182
|
# Calculate final reward
|
182
|
-
rewards[i] = self.compute_attention_rewards(rewards[i])
|
183
|
-
|
184
|
-
|
185
|
-
if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
|
186
|
-
dones[i] = True
|
187
|
-
|
183
|
+
# rewards[i] = self.compute_attention_rewards(rewards[i])
|
184
|
+
rewards = np.mean(rewards)
|
185
|
+
|
188
186
|
# Integrate the mobility of the sensors
|
189
187
|
# self.integrate_mobility()
|
190
188
|
|
@@ -195,15 +193,14 @@ class WSNRoutingEnv(gym.Env):
|
|
195
193
|
|
196
194
|
self.get_metrics()
|
197
195
|
|
198
|
-
rewards = [
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
# print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
|
203
|
-
dones = all(dones) # Done if all agents are done
|
196
|
+
rewards = [reward.item() if isinstance(reward, torch.Tensor) else reward for reward in rewards] # Convert the reward to a float
|
197
|
+
for i in range(self.n_sensors):
|
198
|
+
if not dones[i]:
|
199
|
+
dones[i] = self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0
|
204
200
|
|
205
201
|
return self._get_obs(), rewards, dones, self.get_metrics()
|
206
202
|
|
203
|
+
|
207
204
|
def _get_obs(self):
|
208
205
|
return [{'remaining_energy': np.array([e]),
|
209
206
|
'consumption_energy': np.array([initial_energy - e]),
|
@@ -211,6 +208,7 @@ class WSNRoutingEnv(gym.Env):
|
|
211
208
|
'number_of_packets': np.array([d])
|
212
209
|
} for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
|
213
210
|
|
211
|
+
|
214
212
|
def _get_observation_space(self):
|
215
213
|
return Dict(OrderedDict([
|
216
214
|
('remaining_energy', Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64)),
|
@@ -219,23 +217,29 @@ class WSNRoutingEnv(gym.Env):
|
|
219
217
|
('number_of_packets', Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int))
|
220
218
|
]))
|
221
219
|
|
220
|
+
|
222
221
|
def get_state(self):
|
223
222
|
return self._get_obs()
|
224
223
|
|
224
|
+
|
225
225
|
def get_avail_actions(self):
|
226
226
|
return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
|
227
227
|
|
228
|
+
|
228
229
|
def update_sensor_energies(self, i, delta_energy):
|
229
230
|
self.remaining_energy[i] -= delta_energy
|
230
231
|
|
232
|
+
|
231
233
|
def transmission_energy(self, number_of_packets, distance):
|
232
234
|
# energy consumption for transmitting data on a distance
|
233
235
|
return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
|
234
236
|
|
237
|
+
|
235
238
|
def reception_energy(self, number_of_packets):
|
236
239
|
# energy consumption for receiving data
|
237
240
|
return number_of_packets * info_amount * Eelec
|
238
241
|
|
242
|
+
|
239
243
|
def compute_angle_vectors(self, i, action):
|
240
244
|
'''
|
241
245
|
Compute the angle in radians between the vectors formed by (i, action) and (i, base station)
|
@@ -249,6 +253,7 @@ class WSNRoutingEnv(gym.Env):
|
|
249
253
|
|
250
254
|
return np.arccos(np.clip(cosine_angle, -1, 1))
|
251
255
|
|
256
|
+
|
252
257
|
def compute_reward_angle(self, i, action):
|
253
258
|
'''
|
254
259
|
Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
|
@@ -261,6 +266,7 @@ class WSNRoutingEnv(gym.Env):
|
|
261
266
|
return np.clip(1 - normalized_angle, 0, 1)
|
262
267
|
# return np.clip(- normalized_angle, -1, 1)
|
263
268
|
|
269
|
+
|
264
270
|
def compute_reward_distance(self, i, action):
|
265
271
|
'''
|
266
272
|
Compute the reward based on the distance to the next hop
|
@@ -275,6 +281,7 @@ class WSNRoutingEnv(gym.Env):
|
|
275
281
|
return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
|
276
282
|
# return np.clip(-normalized_distance_to_next_hop, -1, 1)
|
277
283
|
|
284
|
+
|
278
285
|
def compute_reward_consumption_energy(self, i, action):
|
279
286
|
'''
|
280
287
|
Compute the reward based on the total energy consumption (transmission, reception)
|
@@ -297,6 +304,7 @@ class WSNRoutingEnv(gym.Env):
|
|
297
304
|
return np.clip(1 - normalized_total_energy, 0, 1)
|
298
305
|
# return np.clip(- normalized_total_energy, -1, 1)
|
299
306
|
|
307
|
+
|
300
308
|
def compute_reward_dispersion_remaining_energy(self):
|
301
309
|
'''
|
302
310
|
Compute the reward based on the standard deviation of the remaining energy
|
@@ -309,6 +317,7 @@ class WSNRoutingEnv(gym.Env):
|
|
309
317
|
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
310
318
|
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
311
319
|
|
320
|
+
|
312
321
|
def compute_reward_number_of_packets(self, action):
|
313
322
|
'''
|
314
323
|
Compute the reward based on the number of packets of the receiver
|
@@ -322,6 +331,7 @@ class WSNRoutingEnv(gym.Env):
|
|
322
331
|
return np.clip(1 - normalized_number_of_packets, 0, 1)
|
323
332
|
# return np.clip(- normalized_number_of_packets, -1, 1)
|
324
333
|
|
334
|
+
|
325
335
|
def compute_individual_rewards(self, i, action):
|
326
336
|
'''
|
327
337
|
Compute the individual rewards
|
@@ -343,8 +353,9 @@ class WSNRoutingEnv(gym.Env):
|
|
343
353
|
|
344
354
|
rewards_performance = np.array([reward_latency, reward_network_throughput, reward_packet_delivery_ratio])
|
345
355
|
|
346
|
-
return np.concatenate((rewards_energy, rewards_performance))
|
356
|
+
# return np.concatenate((rewards_energy, rewards_performance))
|
347
357
|
# return np.array([reward_consumption_energy, reward_dispersion_remaining_energy])
|
358
|
+
return rewards_energy
|
348
359
|
|
349
360
|
|
350
361
|
def compute_network_rewards(self):
|
@@ -360,6 +371,7 @@ class WSNRoutingEnv(gym.Env):
|
|
360
371
|
|
361
372
|
return np.concatenate((rewards_energy, rewards_performance))
|
362
373
|
|
374
|
+
|
363
375
|
def network_reward_dispersion_remaining_energy(self):
|
364
376
|
'''
|
365
377
|
Compute the reward based on the standard deviation of the remaining energy at the network level
|
@@ -372,6 +384,7 @@ class WSNRoutingEnv(gym.Env):
|
|
372
384
|
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
373
385
|
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
374
386
|
|
387
|
+
|
375
388
|
def network_reward_consumption_energy(self):
|
376
389
|
'''
|
377
390
|
Compute the reward based on the total energy consumption (transmission, reception) at the network level
|
@@ -384,6 +397,7 @@ class WSNRoutingEnv(gym.Env):
|
|
384
397
|
return np.clip(1 - normalized_total_energy, 0, 1)
|
385
398
|
# return np.clip(- normalized_total_energy, -1, 1)
|
386
399
|
|
400
|
+
|
387
401
|
def compute_reward_packet_delivery_ratio(self):
|
388
402
|
'''
|
389
403
|
Compute the reward based on the packet delivery ratio
|
@@ -391,6 +405,7 @@ class WSNRoutingEnv(gym.Env):
|
|
391
405
|
packet_delivery_ratio = self.packets_delivered / (self.total_packets_sent_by_sensors + self.epsilon) if self.total_packets_sent_by_sensors > 0 else 0
|
392
406
|
return np.clip(packet_delivery_ratio, 0, 1)
|
393
407
|
|
408
|
+
|
394
409
|
def compute_reward_latency(self):
|
395
410
|
'''
|
396
411
|
Compute the reward based on the average latency
|
@@ -402,6 +417,7 @@ class WSNRoutingEnv(gym.Env):
|
|
402
417
|
return np.clip(1 - normalized_latency, 0, 1)
|
403
418
|
# return np.clip(- normalized_latency, -1, 1)
|
404
419
|
|
420
|
+
|
405
421
|
def compute_reward_network_throughput(self):
|
406
422
|
'''
|
407
423
|
Compute the reward based on the network throughput
|
@@ -411,6 +427,7 @@ class WSNRoutingEnv(gym.Env):
|
|
411
427
|
normalized_throughput = network_throughput / (maximum_throughput + self.epsilon)
|
412
428
|
return np.clip(normalized_throughput, 0, 1)
|
413
429
|
|
430
|
+
|
414
431
|
def compute_attention_rewards(self, rewards):
|
415
432
|
'''
|
416
433
|
Compute the attention-based rewards
|
@@ -419,6 +436,7 @@ class WSNRoutingEnv(gym.Env):
|
|
419
436
|
final_reward = net(rewards)
|
420
437
|
return final_reward
|
421
438
|
|
439
|
+
|
422
440
|
def integrate_mobility(self):
|
423
441
|
'''
|
424
442
|
Integrate the mobility of the sensors after each step
|
@@ -431,6 +449,7 @@ class WSNRoutingEnv(gym.Env):
|
|
431
449
|
if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
|
432
450
|
self.sensor_positions[i] -= displacement[i]
|
433
451
|
|
452
|
+
|
434
453
|
def get_metrics(self):
|
435
454
|
# Calculate network throughput
|
436
455
|
self.network_throughput = self.packets_delivered / (self.steps + self.epsilon) if self.steps > 0 else 0
|
@@ -449,4 +468,24 @@ class WSNRoutingEnv(gym.Env):
|
|
449
468
|
"packet_delivery_ratio": self.packet_delivery_ratio,
|
450
469
|
"network_lifetime": self.network_lifetime,
|
451
470
|
"average_latency": self.average_latency
|
452
|
-
}
|
471
|
+
}
|
472
|
+
|
473
|
+
|
474
|
+
def find_next_sensor(self):
|
475
|
+
for offset in range(1, self.n_sensors):
|
476
|
+
next_index = (self.current_sensor + offset) % self.n_sensors
|
477
|
+
if self.remaining_energy[next_index] > 0 and self.number_of_packets[next_index] > 0:
|
478
|
+
return next_index
|
479
|
+
return None # If no such sensor is found
|
480
|
+
|
481
|
+
|
482
|
+
def to_base_n(self, number, base):
|
483
|
+
"""Convert a number to a base-n number."""
|
484
|
+
if number == 0:
|
485
|
+
return [0] * (base - 1)
|
486
|
+
|
487
|
+
digits = []
|
488
|
+
while number:
|
489
|
+
digits.append(number % base)
|
490
|
+
number //= base
|
491
|
+
return digits[::-1] # Reverse the list to get the correct order
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=SbYVbDAngXH8S9SM6aQijqGwMUt-FSXYtOV00-_0ahI,166
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=VzbARUwPIdS6-RkQND3dxx0lLheFpP6NqaBwT5JY91o,23503
|
4
|
+
gym_examples-3.0.284.dist-info/METADATA,sha256=U9huJCLpn-NIXHhbpD97MFIY_WpCrOxX7ebSgLBsf_w,412
|
5
|
+
gym_examples-3.0.284.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-3.0.284.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-3.0.284.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=fBEwdHLyLAy9RQVdFO5HIxVvlyPKD2yEcyS6eJvwA80,166
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=bwXjZ9uS41ChZ7DvgeIwhPkLVUfkRzWCwPQjFDs34ho,23218
|
4
|
-
gym_examples-3.0.282.dist-info/METADATA,sha256=PgGDOKmwo0s0Vx5X8CEgJntslyORoybP5IDEE-4CI7M,412
|
5
|
-
gym_examples-3.0.282.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-3.0.282.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-3.0.282.dist-info/RECORD,,
|
File without changes
|
File without changes
|