gym-examples 3.0.262__py3-none-any.whl → 3.0.264__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +52 -58
- {gym_examples-3.0.262.dist-info → gym_examples-3.0.264.dist-info}/METADATA +1 -1
- gym_examples-3.0.264.dist-info/RECORD +7 -0
- gym_examples-3.0.262.dist-info/RECORD +0 -7
- {gym_examples-3.0.262.dist-info → gym_examples-3.0.264.dist-info}/WHEEL +0 -0
- {gym_examples-3.0.262.dist-info → gym_examples-3.0.264.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -81,8 +81,9 @@ class WSNRoutingEnv(gym.Env):
|
|
81
81
|
)
|
82
82
|
|
83
83
|
# self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
|
84
|
-
|
85
|
-
self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
|
84
|
+
self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
|
85
|
+
# self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
|
86
|
+
# self.action_space = Discrete(self.n_agents * (self.n_sensors + 1))
|
86
87
|
|
87
88
|
self.reset()
|
88
89
|
|
@@ -130,14 +131,18 @@ class WSNRoutingEnv(gym.Env):
|
|
130
131
|
|
131
132
|
|
132
133
|
def step(self, actions):
|
134
|
+
actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
|
133
135
|
self.steps += 1
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
136
|
+
rewards = [-max_reward] * self.n_sensors
|
137
|
+
dones = [False] * self.n_sensors
|
138
|
+
for i, action in enumerate(actions):
|
139
|
+
print(f"\nAction in WSNRoutingEnv: {action} and type: {type(action)}")
|
140
|
+
if action not in range(self.n_sensors + 1):
|
141
|
+
raise ValueError("Invalid action!")
|
142
|
+
|
143
|
+
if i >= self.n_sensors:
|
144
|
+
raise ValueError("Invalid sensor index!") # the number of actions is greater than the number of sensors
|
145
|
+
|
141
146
|
if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
|
142
147
|
continue # Skip if sensor has no energy left or no packets to transmit
|
143
148
|
|
@@ -163,10 +168,10 @@ class WSNRoutingEnv(gym.Env):
|
|
163
168
|
self.total_latency += self.packet_latency[i] + latency_per_hop
|
164
169
|
self.packet_latency[i] = 0
|
165
170
|
|
166
|
-
# rewards[i] =
|
167
|
-
|
168
|
-
#
|
169
|
-
|
171
|
+
# rewards[i] = self.compute_individual_rewards(i, action)
|
172
|
+
rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
|
173
|
+
# rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
|
174
|
+
dones[i] = True
|
170
175
|
else:
|
171
176
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
172
177
|
if distance > self.coverage_radius:
|
@@ -191,20 +196,18 @@ class WSNRoutingEnv(gym.Env):
|
|
191
196
|
self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
|
192
197
|
self.packet_latency[i] = 0
|
193
198
|
|
194
|
-
|
195
|
-
reward = self.compute_individual_rewards(i, action)
|
199
|
+
rewards[i] = self.compute_individual_rewards(i, action)
|
196
200
|
|
197
201
|
# Update the number of packets
|
198
202
|
self.number_of_packets[action] += self.number_of_packets[i]
|
199
|
-
|
200
203
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
201
204
|
# Calculate final reward
|
202
205
|
# rewards[i] = self.compute_attention_rewards(rewards[i])
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
206
|
+
rewards[i] = np.mean(rewards[i])
|
207
|
+
# rewards[i] = self.compute_weighted_sum_rewards(rewards[i])
|
208
|
+
for i in range(self.n_sensors):
|
209
|
+
if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
|
210
|
+
dones[i] = True
|
208
211
|
|
209
212
|
# Integrate the mobility of the sensors
|
210
213
|
# self.integrate_mobility()
|
@@ -216,45 +219,15 @@ class WSNRoutingEnv(gym.Env):
|
|
216
219
|
|
217
220
|
self.get_metrics()
|
218
221
|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
#
|
223
|
-
#
|
224
|
-
|
222
|
+
rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
|
223
|
+
# rewards = np.sum(rewards) # Sum the rewards of all agents
|
224
|
+
rewards = np.mean(rewards) # Average the rewards of all agents
|
225
|
+
# rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
|
226
|
+
# print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
|
227
|
+
dones = all(dones) # Done if all agents are done
|
225
228
|
|
226
|
-
|
227
|
-
# self.number_of_steps += 1
|
229
|
+
return self._get_obs(), rewards, dones, self.get_metrics()
|
228
230
|
|
229
|
-
|
230
|
-
|
231
|
-
# self.episode_return += reward
|
232
|
-
# if self.number_of_steps >= self.num_timesteps:
|
233
|
-
# self.episode_returns.append(self.episode_return)
|
234
|
-
# self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
|
235
|
-
# self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
|
236
|
-
# self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
|
237
|
-
# self.episode_network_throughput.append(self.network_throughput)
|
238
|
-
# self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
|
239
|
-
# self.episode_network_lifetime.append(self.network_lifetime)
|
240
|
-
# self.episode_average_latency.append(self.average_latency)
|
241
|
-
|
242
|
-
# metrics = {
|
243
|
-
# "returns_PPO": self.episode_returns,
|
244
|
-
# "std_remaining_energy_PPO": self.episode_std_remaining_energy,
|
245
|
-
# "total_consumption_energy_PPO": self.episode_total_consumption_energy,
|
246
|
-
# "mean_remaining_energy_PPO": self.episode_mean_remaining_energy,
|
247
|
-
# "network_throughput_PPO": self.episode_network_throughput,
|
248
|
-
# "packet_delivery_ratio_PPO": self.episode_packet_delivery_ratio,
|
249
|
-
# "network_lifetime_PPO": self.episode_network_lifetime,
|
250
|
-
# "average_latency_PPO": self.episode_average_latency
|
251
|
-
# }
|
252
|
-
|
253
|
-
# for metric_name, metric_value in metrics.items():
|
254
|
-
# np.save(f"{base_back_up_dir}{metric_name}_{self.version}.npy", np.array(metric_value))
|
255
|
-
|
256
|
-
# return self._get_obs(), rewards, dones, {}
|
257
|
-
return self._get_obs(), reward, done, self.get_metrics()
|
258
231
|
|
259
232
|
def _get_obs(self):
|
260
233
|
return [{'remaining_energy': np.array([e]),
|
@@ -263,6 +236,7 @@ class WSNRoutingEnv(gym.Env):
|
|
263
236
|
'number_of_packets': np.array([d])
|
264
237
|
} for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
|
265
238
|
|
239
|
+
|
266
240
|
def _get_observation_space(self):
|
267
241
|
return Dict({
|
268
242
|
'remaining_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
|
@@ -271,23 +245,29 @@ class WSNRoutingEnv(gym.Env):
|
|
271
245
|
'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)
|
272
246
|
})
|
273
247
|
|
248
|
+
|
274
249
|
def get_state(self):
|
275
250
|
return self._get_obs()
|
276
251
|
|
252
|
+
|
277
253
|
def get_avail_actions(self):
|
278
254
|
return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
|
279
255
|
|
256
|
+
|
280
257
|
def update_sensor_energies(self, i, delta_energy):
|
281
258
|
self.remaining_energy[i] -= delta_energy
|
282
259
|
|
260
|
+
|
283
261
|
def transmission_energy(self, number_of_packets, distance):
|
284
262
|
# energy consumption for transmitting data on a distance
|
285
263
|
return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
|
286
264
|
|
265
|
+
|
287
266
|
def reception_energy(self, number_of_packets):
|
288
267
|
# energy consumption for receiving data
|
289
268
|
return number_of_packets * info_amount * Eelec
|
290
269
|
|
270
|
+
|
291
271
|
def compute_angle_vectors(self, i, action):
|
292
272
|
'''
|
293
273
|
Compute the angle in radians between the vectors formed by (i, action) and (i, base station)
|
@@ -301,6 +281,7 @@ class WSNRoutingEnv(gym.Env):
|
|
301
281
|
|
302
282
|
return np.arccos(np.clip(cosine_angle, -1, 1))
|
303
283
|
|
284
|
+
|
304
285
|
def compute_reward_angle(self, i, action):
|
305
286
|
'''
|
306
287
|
Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
|
@@ -313,6 +294,7 @@ class WSNRoutingEnv(gym.Env):
|
|
313
294
|
return np.clip(1 - normalized_angle, 0, 1)
|
314
295
|
# return np.clip(- normalized_angle, -1, 1)
|
315
296
|
|
297
|
+
|
316
298
|
def compute_reward_distance(self, i, action):
|
317
299
|
'''
|
318
300
|
Compute the reward based on the distance to the next hop
|
@@ -327,6 +309,7 @@ class WSNRoutingEnv(gym.Env):
|
|
327
309
|
return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
|
328
310
|
# return np.clip(-normalized_distance_to_next_hop, -1, 1)
|
329
311
|
|
312
|
+
|
330
313
|
def compute_reward_consumption_energy(self, i, action):
|
331
314
|
'''
|
332
315
|
Compute the reward based on the total energy consumption (transmission, reception)
|
@@ -349,6 +332,7 @@ class WSNRoutingEnv(gym.Env):
|
|
349
332
|
return np.clip(1 - normalized_total_energy, 0, 1)
|
350
333
|
# return np.clip(- normalized_total_energy, -1, 1)
|
351
334
|
|
335
|
+
|
352
336
|
def compute_reward_dispersion_remaining_energy(self):
|
353
337
|
'''
|
354
338
|
Compute the reward based on the standard deviation of the remaining energy
|
@@ -361,6 +345,7 @@ class WSNRoutingEnv(gym.Env):
|
|
361
345
|
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
362
346
|
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
363
347
|
|
348
|
+
|
364
349
|
def compute_reward_number_of_packets(self, action):
|
365
350
|
'''
|
366
351
|
Compute the reward based on the number of packets of the receiver
|
@@ -374,6 +359,7 @@ class WSNRoutingEnv(gym.Env):
|
|
374
359
|
return np.clip(1 - normalized_number_of_packets, 0, 1)
|
375
360
|
# return np.clip(- normalized_number_of_packets, -1, 1)
|
376
361
|
|
362
|
+
|
377
363
|
def compute_individual_rewards(self, i, action):
|
378
364
|
'''
|
379
365
|
Compute the individual rewards
|
@@ -413,6 +399,7 @@ class WSNRoutingEnv(gym.Env):
|
|
413
399
|
|
414
400
|
return np.concatenate((rewards_energy, rewards_performance))
|
415
401
|
|
402
|
+
|
416
403
|
def network_reward_dispersion_remaining_energy(self):
|
417
404
|
'''
|
418
405
|
Compute the reward based on the standard deviation of the remaining energy at the network level
|
@@ -425,6 +412,7 @@ class WSNRoutingEnv(gym.Env):
|
|
425
412
|
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
426
413
|
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
427
414
|
|
415
|
+
|
428
416
|
def network_reward_consumption_energy(self):
|
429
417
|
'''
|
430
418
|
Compute the reward based on the total energy consumption (transmission, reception) at the network level
|
@@ -437,6 +425,7 @@ class WSNRoutingEnv(gym.Env):
|
|
437
425
|
return np.clip(1 - normalized_total_energy, 0, 1)
|
438
426
|
# return np.clip(- normalized_total_energy, -1, 1)
|
439
427
|
|
428
|
+
|
440
429
|
def compute_reward_packet_delivery_ratio(self):
|
441
430
|
'''
|
442
431
|
Compute the reward based on the packet delivery ratio
|
@@ -444,6 +433,7 @@ class WSNRoutingEnv(gym.Env):
|
|
444
433
|
packet_delivery_ratio = self.packets_delivered / (self.total_packets_sent_by_sensors + self.epsilon) if self.total_packets_sent_by_sensors > 0 else 0
|
445
434
|
return np.clip(packet_delivery_ratio, 0, 1)
|
446
435
|
|
436
|
+
|
447
437
|
def compute_reward_latency(self):
|
448
438
|
'''
|
449
439
|
Compute the reward based on the average latency
|
@@ -455,6 +445,7 @@ class WSNRoutingEnv(gym.Env):
|
|
455
445
|
return np.clip(1 - normalized_latency, 0, 1)
|
456
446
|
# return np.clip(- normalized_latency, -1, 1)
|
457
447
|
|
448
|
+
|
458
449
|
def compute_reward_network_throughput(self):
|
459
450
|
'''
|
460
451
|
Compute the reward based on the network throughput
|
@@ -464,6 +455,7 @@ class WSNRoutingEnv(gym.Env):
|
|
464
455
|
normalized_throughput = network_throughput / (maximum_throughput + self.epsilon)
|
465
456
|
return np.clip(normalized_throughput, 0, 1)
|
466
457
|
|
458
|
+
|
467
459
|
def compute_attention_rewards(self, rewards):
|
468
460
|
'''
|
469
461
|
Compute the attention-based rewards
|
@@ -472,6 +464,7 @@ class WSNRoutingEnv(gym.Env):
|
|
472
464
|
final_reward = net(rewards)
|
473
465
|
return final_reward
|
474
466
|
|
467
|
+
|
475
468
|
def integrate_mobility(self):
|
476
469
|
'''
|
477
470
|
Integrate the mobility of the sensors after each step
|
@@ -484,6 +477,7 @@ class WSNRoutingEnv(gym.Env):
|
|
484
477
|
if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
|
485
478
|
self.sensor_positions[i] -= displacement[i]
|
486
479
|
|
480
|
+
|
487
481
|
def get_metrics(self):
|
488
482
|
# Calculate network throughput
|
489
483
|
self.network_throughput = self.packets_delivered / (self.steps + self.epsilon) if self.steps > 0 else 0
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=OE2WBgKyEbqBOaXf4v4N5a0qFaCkN5iUcgmZIdLghqk,166
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=t0uJq77RsuDKn_VRKh8dY9khp4DE1etoHzaUd582OSw,24905
|
4
|
+
gym_examples-3.0.264.dist-info/METADATA,sha256=4l07YRLJpfbh8kvvuZIlaOTAls-N81hZPxxvNNAu8bM,412
|
5
|
+
gym_examples-3.0.264.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-3.0.264.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-3.0.264.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=CYfXHf7cNLnC7-qIfJQRpOs1xZSNHDP-MXO0DIQF2r0,166
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=i2aXordG52k5GhxIYQ1AVP2EiBzHtFMHUqwjAmdhWBQ,26481
|
4
|
-
gym_examples-3.0.262.dist-info/METADATA,sha256=nUKnPlBtDhKZCsEpJqp8sdmStdg6ePbgbKlglo7RMck,412
|
5
|
-
gym_examples-3.0.262.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-3.0.262.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-3.0.262.dist-info/RECORD,,
|
File without changes
|
File without changes
|