gym-examples 3.0.280__py3-none-any.whl → 3.0.282__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +32 -81
- {gym_examples-3.0.280.dist-info → gym_examples-3.0.282.dist-info}/METADATA +1 -1
- gym_examples-3.0.282.dist-info/RECORD +7 -0
- gym_examples-3.0.280.dist-info/RECORD +0 -7
- {gym_examples-3.0.280.dist-info → gym_examples-3.0.282.dist-info}/WHEEL +0 -0
- {gym_examples-3.0.280.dist-info → gym_examples-3.0.282.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -12,7 +12,7 @@ import os
|
|
12
12
|
from collections import OrderedDict
|
13
13
|
|
14
14
|
# Define the network parameters for the final reward function
|
15
|
-
input_dim =
|
15
|
+
input_dim = 7 # length of the individual rewards vector
|
16
16
|
output_dim = 1 # final reward
|
17
17
|
|
18
18
|
Eelec = 50e-9 # energy consumption per bit in joules
|
@@ -26,7 +26,7 @@ initial_number_of_packets = 1 # initial number of packets to transmit
|
|
26
26
|
latency_per_hop = 1 # latency per hop in seconds
|
27
27
|
|
28
28
|
base_back_up_dir = "results/data/"
|
29
|
-
max_reward =
|
29
|
+
max_reward = 1 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
|
30
30
|
|
31
31
|
# Define the final reward function using an attention mechanism
|
32
32
|
class Attention(nn.Module):
|
@@ -49,7 +49,7 @@ net = net.double() # Convert the weights to Double
|
|
49
49
|
|
50
50
|
class WSNRoutingEnv(gym.Env):
|
51
51
|
|
52
|
-
|
52
|
+
print_stats = False # Global flag to control printing of statistics
|
53
53
|
|
54
54
|
def __init__(self, n_sensors = 20, coverage_radius=(upper_bound - lower_bound)/4, num_timesteps = None, version = None):
|
55
55
|
|
@@ -74,7 +74,7 @@ class WSNRoutingEnv(gym.Env):
|
|
74
74
|
self.episode_count = 0
|
75
75
|
self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
|
76
76
|
self.epsilon = 1e-10 # small value to avoid division by zero
|
77
|
-
|
77
|
+
# Initialize the position of the sensors randomly
|
78
78
|
|
79
79
|
# Define observation space
|
80
80
|
self.observation_space = Tuple(
|
@@ -82,9 +82,7 @@ class WSNRoutingEnv(gym.Env):
|
|
82
82
|
)
|
83
83
|
|
84
84
|
# self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
|
85
|
-
|
86
|
-
# self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
|
87
|
-
self.action_space = Discrete(self.n_sensors + 1) # +1 for the base station
|
85
|
+
self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
|
88
86
|
|
89
87
|
self.reset()
|
90
88
|
|
@@ -115,14 +113,13 @@ class WSNRoutingEnv(gym.Env):
|
|
115
113
|
|
116
114
|
return self._get_obs()
|
117
115
|
|
118
|
-
|
119
116
|
def step(self, actions):
|
117
|
+
actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
|
120
118
|
self.steps += 1
|
121
|
-
rewards = -
|
122
|
-
# rewards = 0
|
123
|
-
dones = False
|
124
|
-
|
125
|
-
for i, action in [actions]:
|
119
|
+
rewards = [-max_reward] * self.n_sensors
|
120
|
+
# rewards = [0] * self.n_sensors
|
121
|
+
dones = [False] * self.n_sensors
|
122
|
+
for i, action in enumerate(actions):
|
126
123
|
if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
|
127
124
|
continue # Skip if sensor has no energy left or no packets to transmit
|
128
125
|
|
@@ -137,11 +134,6 @@ class WSNRoutingEnv(gym.Env):
|
|
137
134
|
transmission_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
|
138
135
|
if self.remaining_energy[i] < transmission_energy:
|
139
136
|
self.remaining_energy[i] = 0
|
140
|
-
next_sensor = self.find_next_sensor()
|
141
|
-
if next_sensor is None:
|
142
|
-
dones = True
|
143
|
-
else:
|
144
|
-
self.current_sensor = next_sensor
|
145
137
|
continue # Skip if the sensor does not have enough energy to transmit data to the base station
|
146
138
|
|
147
139
|
self.update_sensor_energies(i, transmission_energy)
|
@@ -153,12 +145,10 @@ class WSNRoutingEnv(gym.Env):
|
|
153
145
|
self.total_latency += self.packet_latency[i] + latency_per_hop
|
154
146
|
self.packet_latency[i] = 0
|
155
147
|
|
156
|
-
rewards =
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
else:
|
161
|
-
self.current_sensor = next_sensor
|
148
|
+
# rewards[i] = self.compute_individual_rewards(i, action)
|
149
|
+
rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
|
150
|
+
# rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
|
151
|
+
dones[i] = True
|
162
152
|
else:
|
163
153
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
164
154
|
if distance > self.coverage_radius:
|
@@ -167,21 +157,11 @@ class WSNRoutingEnv(gym.Env):
|
|
167
157
|
transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
|
168
158
|
reception_energy = self.reception_energy(self.number_of_packets[i])
|
169
159
|
if self.remaining_energy[i] < transmission_energy:
|
170
|
-
self.remaining_energy[i] = 0
|
171
|
-
next_sensor = self.find_next_sensor()
|
172
|
-
if next_sensor is None:
|
173
|
-
dones = True
|
174
|
-
else:
|
175
|
-
self.current_sensor = next_sensor
|
160
|
+
self.remaining_energy[i] = 0
|
176
161
|
continue # Skip if the sensor does not have enough energy to transmit data to the next hop
|
177
162
|
if self.remaining_energy[action] < reception_energy:
|
178
163
|
self.number_of_packets[i] = 0
|
179
164
|
self.remaining_energy[action] = 0
|
180
|
-
next_sensor = self.find_next_sensor()
|
181
|
-
if next_sensor is None:
|
182
|
-
dones = True
|
183
|
-
else:
|
184
|
-
self.current_sensor = next_sensor
|
185
165
|
continue # Skip if the next hop does not have enough energy to receive data
|
186
166
|
|
187
167
|
self.update_sensor_energies(i, transmission_energy)
|
@@ -193,17 +173,18 @@ class WSNRoutingEnv(gym.Env):
|
|
193
173
|
self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
|
194
174
|
self.packet_latency[i] = 0
|
195
175
|
|
196
|
-
rewards = self.compute_individual_rewards(i, action)
|
176
|
+
rewards[i] = self.compute_individual_rewards(i, action)
|
197
177
|
|
198
178
|
# Update the number of packets
|
199
179
|
self.number_of_packets[action] += self.number_of_packets[i]
|
200
|
-
|
201
|
-
self.current_sensor = action
|
202
180
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
203
181
|
# Calculate final reward
|
204
|
-
|
205
|
-
rewards = np.mean(rewards)
|
206
|
-
|
182
|
+
rewards[i] = self.compute_attention_rewards(rewards[i])
|
183
|
+
# rewards[i] = np.mean(rewards[i])
|
184
|
+
for i in range(self.n_sensors):
|
185
|
+
if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
|
186
|
+
dones[i] = True
|
187
|
+
|
207
188
|
# Integrate the mobility of the sensors
|
208
189
|
# self.integrate_mobility()
|
209
190
|
|
@@ -214,54 +195,47 @@ class WSNRoutingEnv(gym.Env):
|
|
214
195
|
|
215
196
|
self.get_metrics()
|
216
197
|
|
217
|
-
rewards =
|
218
|
-
|
219
|
-
|
198
|
+
rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
|
199
|
+
# rewards = np.sum(rewards) # Sum the rewards of all agents
|
200
|
+
rewards = np.mean(rewards) # Average the rewards of all agents
|
201
|
+
# rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
|
202
|
+
# print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
|
203
|
+
dones = all(dones) # Done if all agents are done
|
220
204
|
|
221
205
|
return self._get_obs(), rewards, dones, self.get_metrics()
|
222
206
|
|
223
|
-
|
224
207
|
def _get_obs(self):
|
225
208
|
return [{'remaining_energy': np.array([e]),
|
226
209
|
'consumption_energy': np.array([initial_energy - e]),
|
227
210
|
'sensor_positions': p,
|
228
|
-
'number_of_packets': np.array([d])
|
229
|
-
'curent_sensor': np.array([self.current_sensor])
|
211
|
+
'number_of_packets': np.array([d])
|
230
212
|
} for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
|
231
213
|
|
232
|
-
|
233
214
|
def _get_observation_space(self):
|
234
215
|
return Dict(OrderedDict([
|
235
216
|
('remaining_energy', Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64)),
|
236
217
|
('consumption_energy', Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64)),
|
237
218
|
('sensor_positions', Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64)),
|
238
|
-
('number_of_packets', Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int))
|
239
|
-
('current_sensor', Box(low=0, high=self.n_sensors - 1, shape=(1,), dtype=int))
|
219
|
+
('number_of_packets', Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int))
|
240
220
|
]))
|
241
221
|
|
242
|
-
|
243
222
|
def get_state(self):
|
244
223
|
return self._get_obs()
|
245
224
|
|
246
|
-
|
247
225
|
def get_avail_actions(self):
|
248
226
|
return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
|
249
227
|
|
250
|
-
|
251
228
|
def update_sensor_energies(self, i, delta_energy):
|
252
229
|
self.remaining_energy[i] -= delta_energy
|
253
230
|
|
254
|
-
|
255
231
|
def transmission_energy(self, number_of_packets, distance):
|
256
232
|
# energy consumption for transmitting data on a distance
|
257
233
|
return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
|
258
234
|
|
259
|
-
|
260
235
|
def reception_energy(self, number_of_packets):
|
261
236
|
# energy consumption for receiving data
|
262
237
|
return number_of_packets * info_amount * Eelec
|
263
238
|
|
264
|
-
|
265
239
|
def compute_angle_vectors(self, i, action):
|
266
240
|
'''
|
267
241
|
Compute the angle in radians between the vectors formed by (i, action) and (i, base station)
|
@@ -275,7 +249,6 @@ class WSNRoutingEnv(gym.Env):
|
|
275
249
|
|
276
250
|
return np.arccos(np.clip(cosine_angle, -1, 1))
|
277
251
|
|
278
|
-
|
279
252
|
def compute_reward_angle(self, i, action):
|
280
253
|
'''
|
281
254
|
Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
|
@@ -288,7 +261,6 @@ class WSNRoutingEnv(gym.Env):
|
|
288
261
|
return np.clip(1 - normalized_angle, 0, 1)
|
289
262
|
# return np.clip(- normalized_angle, -1, 1)
|
290
263
|
|
291
|
-
|
292
264
|
def compute_reward_distance(self, i, action):
|
293
265
|
'''
|
294
266
|
Compute the reward based on the distance to the next hop
|
@@ -303,7 +275,6 @@ class WSNRoutingEnv(gym.Env):
|
|
303
275
|
return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
|
304
276
|
# return np.clip(-normalized_distance_to_next_hop, -1, 1)
|
305
277
|
|
306
|
-
|
307
278
|
def compute_reward_consumption_energy(self, i, action):
|
308
279
|
'''
|
309
280
|
Compute the reward based on the total energy consumption (transmission, reception)
|
@@ -326,7 +297,6 @@ class WSNRoutingEnv(gym.Env):
|
|
326
297
|
return np.clip(1 - normalized_total_energy, 0, 1)
|
327
298
|
# return np.clip(- normalized_total_energy, -1, 1)
|
328
299
|
|
329
|
-
|
330
300
|
def compute_reward_dispersion_remaining_energy(self):
|
331
301
|
'''
|
332
302
|
Compute the reward based on the standard deviation of the remaining energy
|
@@ -339,7 +309,6 @@ class WSNRoutingEnv(gym.Env):
|
|
339
309
|
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
340
310
|
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
341
311
|
|
342
|
-
|
343
312
|
def compute_reward_number_of_packets(self, action):
|
344
313
|
'''
|
345
314
|
Compute the reward based on the number of packets of the receiver
|
@@ -353,7 +322,6 @@ class WSNRoutingEnv(gym.Env):
|
|
353
322
|
return np.clip(1 - normalized_number_of_packets, 0, 1)
|
354
323
|
# return np.clip(- normalized_number_of_packets, -1, 1)
|
355
324
|
|
356
|
-
|
357
325
|
def compute_individual_rewards(self, i, action):
|
358
326
|
'''
|
359
327
|
Compute the individual rewards
|
@@ -375,9 +343,8 @@ class WSNRoutingEnv(gym.Env):
|
|
375
343
|
|
376
344
|
rewards_performance = np.array([reward_latency, reward_network_throughput, reward_packet_delivery_ratio])
|
377
345
|
|
378
|
-
|
346
|
+
return np.concatenate((rewards_energy, rewards_performance))
|
379
347
|
# return np.array([reward_consumption_energy, reward_dispersion_remaining_energy])
|
380
|
-
return rewards_energy
|
381
348
|
|
382
349
|
|
383
350
|
def compute_network_rewards(self):
|
@@ -393,7 +360,6 @@ class WSNRoutingEnv(gym.Env):
|
|
393
360
|
|
394
361
|
return np.concatenate((rewards_energy, rewards_performance))
|
395
362
|
|
396
|
-
|
397
363
|
def network_reward_dispersion_remaining_energy(self):
|
398
364
|
'''
|
399
365
|
Compute the reward based on the standard deviation of the remaining energy at the network level
|
@@ -406,7 +372,6 @@ class WSNRoutingEnv(gym.Env):
|
|
406
372
|
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
407
373
|
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
408
374
|
|
409
|
-
|
410
375
|
def network_reward_consumption_energy(self):
|
411
376
|
'''
|
412
377
|
Compute the reward based on the total energy consumption (transmission, reception) at the network level
|
@@ -419,7 +384,6 @@ class WSNRoutingEnv(gym.Env):
|
|
419
384
|
return np.clip(1 - normalized_total_energy, 0, 1)
|
420
385
|
# return np.clip(- normalized_total_energy, -1, 1)
|
421
386
|
|
422
|
-
|
423
387
|
def compute_reward_packet_delivery_ratio(self):
|
424
388
|
'''
|
425
389
|
Compute the reward based on the packet delivery ratio
|
@@ -427,7 +391,6 @@ class WSNRoutingEnv(gym.Env):
|
|
427
391
|
packet_delivery_ratio = self.packets_delivered / (self.total_packets_sent_by_sensors + self.epsilon) if self.total_packets_sent_by_sensors > 0 else 0
|
428
392
|
return np.clip(packet_delivery_ratio, 0, 1)
|
429
393
|
|
430
|
-
|
431
394
|
def compute_reward_latency(self):
|
432
395
|
'''
|
433
396
|
Compute the reward based on the average latency
|
@@ -439,7 +402,6 @@ class WSNRoutingEnv(gym.Env):
|
|
439
402
|
return np.clip(1 - normalized_latency, 0, 1)
|
440
403
|
# return np.clip(- normalized_latency, -1, 1)
|
441
404
|
|
442
|
-
|
443
405
|
def compute_reward_network_throughput(self):
|
444
406
|
'''
|
445
407
|
Compute the reward based on the network throughput
|
@@ -449,7 +411,6 @@ class WSNRoutingEnv(gym.Env):
|
|
449
411
|
normalized_throughput = network_throughput / (maximum_throughput + self.epsilon)
|
450
412
|
return np.clip(normalized_throughput, 0, 1)
|
451
413
|
|
452
|
-
|
453
414
|
def compute_attention_rewards(self, rewards):
|
454
415
|
'''
|
455
416
|
Compute the attention-based rewards
|
@@ -458,7 +419,6 @@ class WSNRoutingEnv(gym.Env):
|
|
458
419
|
final_reward = net(rewards)
|
459
420
|
return final_reward
|
460
421
|
|
461
|
-
|
462
422
|
def integrate_mobility(self):
|
463
423
|
'''
|
464
424
|
Integrate the mobility of the sensors after each step
|
@@ -471,7 +431,6 @@ class WSNRoutingEnv(gym.Env):
|
|
471
431
|
if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
|
472
432
|
self.sensor_positions[i] -= displacement[i]
|
473
433
|
|
474
|
-
|
475
434
|
def get_metrics(self):
|
476
435
|
# Calculate network throughput
|
477
436
|
self.network_throughput = self.packets_delivered / (self.steps + self.epsilon) if self.steps > 0 else 0
|
@@ -490,12 +449,4 @@ class WSNRoutingEnv(gym.Env):
|
|
490
449
|
"packet_delivery_ratio": self.packet_delivery_ratio,
|
491
450
|
"network_lifetime": self.network_lifetime,
|
492
451
|
"average_latency": self.average_latency
|
493
|
-
}
|
494
|
-
|
495
|
-
|
496
|
-
def find_next_sensor(self):
|
497
|
-
for offset in range(1, self.n_sensors):
|
498
|
-
next_index = (self.current_sensor + offset) % self.n_sensors
|
499
|
-
if self.remaining_energy[next_index] > 0 and self.number_of_packets[next_index] > 0:
|
500
|
-
return next_index
|
501
|
-
return None # If no such sensor is found
|
452
|
+
}
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=fBEwdHLyLAy9RQVdFO5HIxVvlyPKD2yEcyS6eJvwA80,166
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=bwXjZ9uS41ChZ7DvgeIwhPkLVUfkRzWCwPQjFDs34ho,23218
|
4
|
+
gym_examples-3.0.282.dist-info/METADATA,sha256=PgGDOKmwo0s0Vx5X8CEgJntslyORoybP5IDEE-4CI7M,412
|
5
|
+
gym_examples-3.0.282.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-3.0.282.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-3.0.282.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=31Vwz4E6SJG5a7ryaw856CyfouCCdUwVnXmVGPjNG34,166
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=pi4-ErjIpxM1jrWjMr8vbUHmp0YSY9YaGSTXsL2k50I,24121
|
4
|
-
gym_examples-3.0.280.dist-info/METADATA,sha256=PqwHgxTiRoujD1Lb5DqobUiD4YNdp__uiv45wkx38oQ,412
|
5
|
-
gym_examples-3.0.280.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-3.0.280.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-3.0.280.dist-info/RECORD,,
|
File without changes
|
File without changes
|