gym-examples 3.0.282__py3-none-any.whl → 3.0.283__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +81 -32
- {gym_examples-3.0.282.dist-info → gym_examples-3.0.283.dist-info}/METADATA +1 -1
- gym_examples-3.0.283.dist-info/RECORD +7 -0
- gym_examples-3.0.282.dist-info/RECORD +0 -7
- {gym_examples-3.0.282.dist-info → gym_examples-3.0.283.dist-info}/WHEEL +0 -0
- {gym_examples-3.0.282.dist-info → gym_examples-3.0.283.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -12,7 +12,7 @@ import os
|
|
12
12
|
from collections import OrderedDict
|
13
13
|
|
14
14
|
# Define the network parameters for the final reward function
|
15
|
-
input_dim =
|
15
|
+
input_dim = 4 # length of the individual rewards vector
|
16
16
|
output_dim = 1 # final reward
|
17
17
|
|
18
18
|
Eelec = 50e-9 # energy consumption per bit in joules
|
@@ -26,7 +26,7 @@ initial_number_of_packets = 1 # initial number of packets to transmit
|
|
26
26
|
latency_per_hop = 1 # latency per hop in seconds
|
27
27
|
|
28
28
|
base_back_up_dir = "results/data/"
|
29
|
-
max_reward =
|
29
|
+
max_reward = 3 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
|
30
30
|
|
31
31
|
# Define the final reward function using an attention mechanism
|
32
32
|
class Attention(nn.Module):
|
@@ -49,7 +49,7 @@ net = net.double() # Convert the weights to Double
|
|
49
49
|
|
50
50
|
class WSNRoutingEnv(gym.Env):
|
51
51
|
|
52
|
-
|
52
|
+
PRINT_STATS = "False" # Global flag to control printing of statistics
|
53
53
|
|
54
54
|
def __init__(self, n_sensors = 20, coverage_radius=(upper_bound - lower_bound)/4, num_timesteps = None, version = None):
|
55
55
|
|
@@ -74,7 +74,7 @@ class WSNRoutingEnv(gym.Env):
|
|
74
74
|
self.episode_count = 0
|
75
75
|
self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
|
76
76
|
self.epsilon = 1e-10 # small value to avoid division by zero
|
77
|
-
|
77
|
+
self.current_sensor = 0 # Index of the current sensor
|
78
78
|
|
79
79
|
# Define observation space
|
80
80
|
self.observation_space = Tuple(
|
@@ -82,7 +82,9 @@ class WSNRoutingEnv(gym.Env):
|
|
82
82
|
)
|
83
83
|
|
84
84
|
# self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
|
85
|
-
self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
|
85
|
+
# self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
|
86
|
+
# self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
|
87
|
+
self.action_space = Discrete(self.n_sensors + 1) # +1 for the base station
|
86
88
|
|
87
89
|
self.reset()
|
88
90
|
|
@@ -113,13 +115,14 @@ class WSNRoutingEnv(gym.Env):
|
|
113
115
|
|
114
116
|
return self._get_obs()
|
115
117
|
|
118
|
+
|
116
119
|
def step(self, actions):
|
117
|
-
actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
|
118
120
|
self.steps += 1
|
119
|
-
rewards =
|
120
|
-
# rewards =
|
121
|
-
dones =
|
122
|
-
|
121
|
+
rewards = - max_reward
|
122
|
+
# rewards = 0
|
123
|
+
dones = False
|
124
|
+
actions = np.array([self.current_sensor, actions])
|
125
|
+
for i, action in [actions]:
|
123
126
|
if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
|
124
127
|
continue # Skip if sensor has no energy left or no packets to transmit
|
125
128
|
|
@@ -134,6 +137,11 @@ class WSNRoutingEnv(gym.Env):
|
|
134
137
|
transmission_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
|
135
138
|
if self.remaining_energy[i] < transmission_energy:
|
136
139
|
self.remaining_energy[i] = 0
|
140
|
+
next_sensor = self.find_next_sensor()
|
141
|
+
if next_sensor is None:
|
142
|
+
dones = True
|
143
|
+
else:
|
144
|
+
self.current_sensor = next_sensor
|
137
145
|
continue # Skip if the sensor does not have enough energy to transmit data to the base station
|
138
146
|
|
139
147
|
self.update_sensor_energies(i, transmission_energy)
|
@@ -145,10 +153,12 @@ class WSNRoutingEnv(gym.Env):
|
|
145
153
|
self.total_latency += self.packet_latency[i] + latency_per_hop
|
146
154
|
self.packet_latency[i] = 0
|
147
155
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
156
|
+
rewards = max_reward # Reward for transmitting data to the base station
|
157
|
+
next_sensor = self.find_next_sensor()
|
158
|
+
if next_sensor is None:
|
159
|
+
dones = True
|
160
|
+
else:
|
161
|
+
self.current_sensor = next_sensor
|
152
162
|
else:
|
153
163
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
154
164
|
if distance > self.coverage_radius:
|
@@ -157,11 +167,21 @@ class WSNRoutingEnv(gym.Env):
|
|
157
167
|
transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
|
158
168
|
reception_energy = self.reception_energy(self.number_of_packets[i])
|
159
169
|
if self.remaining_energy[i] < transmission_energy:
|
160
|
-
self.remaining_energy[i] = 0
|
170
|
+
self.remaining_energy[i] = 0
|
171
|
+
next_sensor = self.find_next_sensor()
|
172
|
+
if next_sensor is None:
|
173
|
+
dones = True
|
174
|
+
else:
|
175
|
+
self.current_sensor = next_sensor
|
161
176
|
continue # Skip if the sensor does not have enough energy to transmit data to the next hop
|
162
177
|
if self.remaining_energy[action] < reception_energy:
|
163
178
|
self.number_of_packets[i] = 0
|
164
179
|
self.remaining_energy[action] = 0
|
180
|
+
next_sensor = self.find_next_sensor()
|
181
|
+
if next_sensor is None:
|
182
|
+
dones = True
|
183
|
+
else:
|
184
|
+
self.current_sensor = next_sensor
|
165
185
|
continue # Skip if the next hop does not have enough energy to receive data
|
166
186
|
|
167
187
|
self.update_sensor_energies(i, transmission_energy)
|
@@ -173,18 +193,17 @@ class WSNRoutingEnv(gym.Env):
|
|
173
193
|
self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
|
174
194
|
self.packet_latency[i] = 0
|
175
195
|
|
176
|
-
rewards
|
196
|
+
rewards = self.compute_individual_rewards(i, action)
|
177
197
|
|
178
198
|
# Update the number of packets
|
179
199
|
self.number_of_packets[action] += self.number_of_packets[i]
|
200
|
+
|
201
|
+
self.current_sensor = action
|
180
202
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
181
203
|
# Calculate final reward
|
182
|
-
rewards[i] = self.compute_attention_rewards(rewards[i])
|
183
|
-
|
184
|
-
|
185
|
-
if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
|
186
|
-
dones[i] = True
|
187
|
-
|
204
|
+
# rewards[i] = self.compute_attention_rewards(rewards[i])
|
205
|
+
rewards = np.mean(rewards)
|
206
|
+
|
188
207
|
# Integrate the mobility of the sensors
|
189
208
|
# self.integrate_mobility()
|
190
209
|
|
@@ -195,47 +214,54 @@ class WSNRoutingEnv(gym.Env):
|
|
195
214
|
|
196
215
|
self.get_metrics()
|
197
216
|
|
198
|
-
rewards =
|
199
|
-
|
200
|
-
|
201
|
-
# rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
|
202
|
-
# print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
|
203
|
-
dones = all(dones) # Done if all agents are done
|
217
|
+
rewards = rewards.item() if isinstance(rewards, torch.Tensor) else rewards # Convert the reward to a float
|
218
|
+
if not dones:
|
219
|
+
dones = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
|
204
220
|
|
205
221
|
return self._get_obs(), rewards, dones, self.get_metrics()
|
206
222
|
|
223
|
+
|
207
224
|
def _get_obs(self):
|
208
225
|
return [{'remaining_energy': np.array([e]),
|
209
226
|
'consumption_energy': np.array([initial_energy - e]),
|
210
227
|
'sensor_positions': p,
|
211
|
-
'number_of_packets': np.array([d])
|
228
|
+
'number_of_packets': np.array([d]),
|
229
|
+
'curent_sensor': np.array([self.current_sensor])
|
212
230
|
} for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
|
213
231
|
|
232
|
+
|
214
233
|
def _get_observation_space(self):
|
215
234
|
return Dict(OrderedDict([
|
216
235
|
('remaining_energy', Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64)),
|
217
236
|
('consumption_energy', Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64)),
|
218
237
|
('sensor_positions', Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64)),
|
219
|
-
('number_of_packets', Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int))
|
238
|
+
('number_of_packets', Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)),
|
239
|
+
('current_sensor', Box(low=0, high=self.n_sensors - 1, shape=(1,), dtype=int))
|
220
240
|
]))
|
221
241
|
|
242
|
+
|
222
243
|
def get_state(self):
|
223
244
|
return self._get_obs()
|
224
245
|
|
246
|
+
|
225
247
|
def get_avail_actions(self):
|
226
248
|
return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
|
227
249
|
|
250
|
+
|
228
251
|
def update_sensor_energies(self, i, delta_energy):
|
229
252
|
self.remaining_energy[i] -= delta_energy
|
230
253
|
|
254
|
+
|
231
255
|
def transmission_energy(self, number_of_packets, distance):
|
232
256
|
# energy consumption for transmitting data on a distance
|
233
257
|
return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
|
234
258
|
|
259
|
+
|
235
260
|
def reception_energy(self, number_of_packets):
|
236
261
|
# energy consumption for receiving data
|
237
262
|
return number_of_packets * info_amount * Eelec
|
238
263
|
|
264
|
+
|
239
265
|
def compute_angle_vectors(self, i, action):
|
240
266
|
'''
|
241
267
|
Compute the angle in radians between the vectors formed by (i, action) and (i, base station)
|
@@ -249,6 +275,7 @@ class WSNRoutingEnv(gym.Env):
|
|
249
275
|
|
250
276
|
return np.arccos(np.clip(cosine_angle, -1, 1))
|
251
277
|
|
278
|
+
|
252
279
|
def compute_reward_angle(self, i, action):
|
253
280
|
'''
|
254
281
|
Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
|
@@ -261,6 +288,7 @@ class WSNRoutingEnv(gym.Env):
|
|
261
288
|
return np.clip(1 - normalized_angle, 0, 1)
|
262
289
|
# return np.clip(- normalized_angle, -1, 1)
|
263
290
|
|
291
|
+
|
264
292
|
def compute_reward_distance(self, i, action):
|
265
293
|
'''
|
266
294
|
Compute the reward based on the distance to the next hop
|
@@ -275,6 +303,7 @@ class WSNRoutingEnv(gym.Env):
|
|
275
303
|
return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
|
276
304
|
# return np.clip(-normalized_distance_to_next_hop, -1, 1)
|
277
305
|
|
306
|
+
|
278
307
|
def compute_reward_consumption_energy(self, i, action):
|
279
308
|
'''
|
280
309
|
Compute the reward based on the total energy consumption (transmission, reception)
|
@@ -297,6 +326,7 @@ class WSNRoutingEnv(gym.Env):
|
|
297
326
|
return np.clip(1 - normalized_total_energy, 0, 1)
|
298
327
|
# return np.clip(- normalized_total_energy, -1, 1)
|
299
328
|
|
329
|
+
|
300
330
|
def compute_reward_dispersion_remaining_energy(self):
|
301
331
|
'''
|
302
332
|
Compute the reward based on the standard deviation of the remaining energy
|
@@ -309,6 +339,7 @@ class WSNRoutingEnv(gym.Env):
|
|
309
339
|
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
310
340
|
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
311
341
|
|
342
|
+
|
312
343
|
def compute_reward_number_of_packets(self, action):
|
313
344
|
'''
|
314
345
|
Compute the reward based on the number of packets of the receiver
|
@@ -322,6 +353,7 @@ class WSNRoutingEnv(gym.Env):
|
|
322
353
|
return np.clip(1 - normalized_number_of_packets, 0, 1)
|
323
354
|
# return np.clip(- normalized_number_of_packets, -1, 1)
|
324
355
|
|
356
|
+
|
325
357
|
def compute_individual_rewards(self, i, action):
|
326
358
|
'''
|
327
359
|
Compute the individual rewards
|
@@ -343,8 +375,9 @@ class WSNRoutingEnv(gym.Env):
|
|
343
375
|
|
344
376
|
rewards_performance = np.array([reward_latency, reward_network_throughput, reward_packet_delivery_ratio])
|
345
377
|
|
346
|
-
return np.concatenate((rewards_energy, rewards_performance))
|
378
|
+
# return np.concatenate((rewards_energy, rewards_performance))
|
347
379
|
# return np.array([reward_consumption_energy, reward_dispersion_remaining_energy])
|
380
|
+
return rewards_energy
|
348
381
|
|
349
382
|
|
350
383
|
def compute_network_rewards(self):
|
@@ -360,6 +393,7 @@ class WSNRoutingEnv(gym.Env):
|
|
360
393
|
|
361
394
|
return np.concatenate((rewards_energy, rewards_performance))
|
362
395
|
|
396
|
+
|
363
397
|
def network_reward_dispersion_remaining_energy(self):
|
364
398
|
'''
|
365
399
|
Compute the reward based on the standard deviation of the remaining energy at the network level
|
@@ -372,6 +406,7 @@ class WSNRoutingEnv(gym.Env):
|
|
372
406
|
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
373
407
|
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
374
408
|
|
409
|
+
|
375
410
|
def network_reward_consumption_energy(self):
|
376
411
|
'''
|
377
412
|
Compute the reward based on the total energy consumption (transmission, reception) at the network level
|
@@ -384,6 +419,7 @@ class WSNRoutingEnv(gym.Env):
|
|
384
419
|
return np.clip(1 - normalized_total_energy, 0, 1)
|
385
420
|
# return np.clip(- normalized_total_energy, -1, 1)
|
386
421
|
|
422
|
+
|
387
423
|
def compute_reward_packet_delivery_ratio(self):
|
388
424
|
'''
|
389
425
|
Compute the reward based on the packet delivery ratio
|
@@ -391,6 +427,7 @@ class WSNRoutingEnv(gym.Env):
|
|
391
427
|
packet_delivery_ratio = self.packets_delivered / (self.total_packets_sent_by_sensors + self.epsilon) if self.total_packets_sent_by_sensors > 0 else 0
|
392
428
|
return np.clip(packet_delivery_ratio, 0, 1)
|
393
429
|
|
430
|
+
|
394
431
|
def compute_reward_latency(self):
|
395
432
|
'''
|
396
433
|
Compute the reward based on the average latency
|
@@ -402,6 +439,7 @@ class WSNRoutingEnv(gym.Env):
|
|
402
439
|
return np.clip(1 - normalized_latency, 0, 1)
|
403
440
|
# return np.clip(- normalized_latency, -1, 1)
|
404
441
|
|
442
|
+
|
405
443
|
def compute_reward_network_throughput(self):
|
406
444
|
'''
|
407
445
|
Compute the reward based on the network throughput
|
@@ -411,6 +449,7 @@ class WSNRoutingEnv(gym.Env):
|
|
411
449
|
normalized_throughput = network_throughput / (maximum_throughput + self.epsilon)
|
412
450
|
return np.clip(normalized_throughput, 0, 1)
|
413
451
|
|
452
|
+
|
414
453
|
def compute_attention_rewards(self, rewards):
|
415
454
|
'''
|
416
455
|
Compute the attention-based rewards
|
@@ -419,6 +458,7 @@ class WSNRoutingEnv(gym.Env):
|
|
419
458
|
final_reward = net(rewards)
|
420
459
|
return final_reward
|
421
460
|
|
461
|
+
|
422
462
|
def integrate_mobility(self):
|
423
463
|
'''
|
424
464
|
Integrate the mobility of the sensors after each step
|
@@ -431,6 +471,7 @@ class WSNRoutingEnv(gym.Env):
|
|
431
471
|
if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
|
432
472
|
self.sensor_positions[i] -= displacement[i]
|
433
473
|
|
474
|
+
|
434
475
|
def get_metrics(self):
|
435
476
|
# Calculate network throughput
|
436
477
|
self.network_throughput = self.packets_delivered / (self.steps + self.epsilon) if self.steps > 0 else 0
|
@@ -449,4 +490,12 @@ class WSNRoutingEnv(gym.Env):
|
|
449
490
|
"packet_delivery_ratio": self.packet_delivery_ratio,
|
450
491
|
"network_lifetime": self.network_lifetime,
|
451
492
|
"average_latency": self.average_latency
|
452
|
-
}
|
493
|
+
}
|
494
|
+
|
495
|
+
|
496
|
+
def find_next_sensor(self):
|
497
|
+
for offset in range(1, self.n_sensors):
|
498
|
+
next_index = (self.current_sensor + offset) % self.n_sensors
|
499
|
+
if self.remaining_energy[next_index] > 0 and self.number_of_packets[next_index] > 0:
|
500
|
+
return next_index
|
501
|
+
return None # If no such sensor is found
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=B6nFhjmZ3o9wglL3vYZps18eP8W7b436z2-pBFs_-2w,166
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=pi4-ErjIpxM1jrWjMr8vbUHmp0YSY9YaGSTXsL2k50I,24121
|
4
|
+
gym_examples-3.0.283.dist-info/METADATA,sha256=0J7nhDUnmHi_7HwHNYycQqRikH_nlbSNNGmVVf6ujm0,412
|
5
|
+
gym_examples-3.0.283.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-3.0.283.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-3.0.283.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=fBEwdHLyLAy9RQVdFO5HIxVvlyPKD2yEcyS6eJvwA80,166
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=bwXjZ9uS41ChZ7DvgeIwhPkLVUfkRzWCwPQjFDs34ho,23218
|
4
|
-
gym_examples-3.0.282.dist-info/METADATA,sha256=PgGDOKmwo0s0Vx5X8CEgJntslyORoybP5IDEE-4CI7M,412
|
5
|
-
gym_examples-3.0.282.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-3.0.282.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-3.0.282.dist-info/RECORD,,
|
File without changes
|
File without changes
|