gym-examples 3.0.763__py3-none-any.whl → 3.0.764__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +34 -38
- {gym_examples-3.0.763.dist-info → gym_examples-3.0.764.dist-info}/METADATA +1 -1
- gym_examples-3.0.764.dist-info/RECORD +7 -0
- gym_examples-3.0.763.dist-info/RECORD +0 -7
- {gym_examples-3.0.763.dist-info → gym_examples-3.0.764.dist-info}/WHEEL +0 -0
- {gym_examples-3.0.763.dist-info → gym_examples-3.0.764.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -150,7 +150,7 @@ class WSNRoutingEnv(gym.Env):
|
|
150
150
|
self.number_of_steps += 1
|
151
151
|
self.steps += 1
|
152
152
|
# rewards = [-max_reward] * self.n_sensors
|
153
|
-
reward_init = [0] * input_dim
|
153
|
+
reward_init = np.array([0] * input_dim)
|
154
154
|
rewards = [reward_init] * self.n_sensors
|
155
155
|
dones = [False] * self.n_sensors
|
156
156
|
for i, action in enumerate(actions):
|
@@ -179,7 +179,7 @@ class WSNRoutingEnv(gym.Env):
|
|
179
179
|
self.total_latency += self.packet_latency[i] + latency_per_hop
|
180
180
|
self.packet_latency[i] = 0
|
181
181
|
|
182
|
-
rewards[i] = [max_reward] * input_dim # Reward for transmitting data to the base station
|
182
|
+
rewards[i] = np.array([max_reward] * input_dim) # Reward for transmitting data to the base station
|
183
183
|
dones[i] = True
|
184
184
|
else:
|
185
185
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
@@ -211,10 +211,6 @@ class WSNRoutingEnv(gym.Env):
|
|
211
211
|
self.number_of_packets[action] += self.number_of_packets[i]
|
212
212
|
|
213
213
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
214
|
-
|
215
|
-
for i in range(self.n_sensors):
|
216
|
-
# Calculate final reward
|
217
|
-
rewards[i] = self.compute_attention_rewards(rewards[i])
|
218
214
|
|
219
215
|
# Integrate the mobility of the sensors
|
220
216
|
# self.integrate_mobility()
|
@@ -227,7 +223,7 @@ class WSNRoutingEnv(gym.Env):
|
|
227
223
|
self.get_metrics()
|
228
224
|
|
229
225
|
# rewards = [reward.item() if isinstance(reward, torch.Tensor) else reward for reward in rewards] # Convert the reward to a float
|
230
|
-
rewards =
|
226
|
+
rewards = self.compute_attention_reward(rewards) # Compute the attention-based reward
|
231
227
|
|
232
228
|
for i in range(self.n_sensors):
|
233
229
|
if not dones[i]:
|
@@ -381,7 +377,7 @@ class WSNRoutingEnv(gym.Env):
|
|
381
377
|
reward_dispersion_remaining_energy = self.compute_reward_dispersion_remaining_energy()
|
382
378
|
reward_number_of_packets = self.compute_reward_number_of_packets(action)
|
383
379
|
|
384
|
-
rewards_energy = [reward_angle, reward_consumption_energy, reward_dispersion_remaining_energy, reward_number_of_packets]
|
380
|
+
rewards_energy = np.array([reward_angle, reward_consumption_energy, reward_dispersion_remaining_energy, reward_number_of_packets])
|
385
381
|
|
386
382
|
#-- rewards related to the performance metrics
|
387
383
|
reward_latency = self.compute_reward_latency()
|
@@ -389,7 +385,7 @@ class WSNRoutingEnv(gym.Env):
|
|
389
385
|
reward_network_throughput = self.compute_reward_network_throughput()
|
390
386
|
reward_packet_delivery_ratio = self.compute_reward_packet_delivery_ratio()
|
391
387
|
|
392
|
-
rewards_performance = [reward_latency, reward_network_throughput, reward_packet_delivery_ratio]
|
388
|
+
rewards_performance = np.array([reward_latency, reward_network_throughput, reward_packet_delivery_ratio])
|
393
389
|
|
394
390
|
return rewards_energy
|
395
391
|
|
@@ -464,44 +460,44 @@ class WSNRoutingEnv(gym.Env):
|
|
464
460
|
return np.clip(normalized_throughput, 0, 1)
|
465
461
|
|
466
462
|
|
467
|
-
def compute_attention_rewards(self, reward):
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
463
|
+
# def compute_attention_rewards(self, reward):
|
464
|
+
# '''
|
465
|
+
# Compute the attention-based rewards
|
466
|
+
# '''
|
467
|
+
# rewards_i = torch.tensor(reward, dtype=torch.double)
|
468
|
+
# rewards_i = rewards_i.unsqueeze(0) # Add batch dimension
|
469
|
+
# return net(rewards_i).item()
|
474
470
|
|
475
471
|
|
476
|
-
|
477
|
-
|
478
|
-
|
472
|
+
def compute_attention_reward(self, rewards):
|
473
|
+
'''
|
474
|
+
Compute the attention-based reward for the network with Q, V, and K matrices
|
479
475
|
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
476
|
+
Input:
|
477
|
+
- rewards: list of rewards for each sensor
|
478
|
+
Output:
|
479
|
+
- final_reward: final reward for the network
|
480
|
+
'''
|
481
|
+
# dimension of the sensor embedding
|
482
|
+
d = len(rewards[0])
|
487
483
|
|
488
|
-
|
489
|
-
|
484
|
+
# All sensors are represented by their raw rewards
|
485
|
+
query_vector = np.array([max_reward] * d) # Basically, the target is the base station
|
490
486
|
|
491
|
-
|
492
|
-
|
487
|
+
# Similarities between the query vector and the rewards
|
488
|
+
similarities = [np.dot(query_vector, reward) for reward in rewards]
|
493
489
|
|
494
|
-
|
495
|
-
|
490
|
+
# Similarities scaling
|
491
|
+
similarities = [similarity / np.sqrt(d) for similarity in similarities]
|
496
492
|
|
497
|
-
|
498
|
-
|
499
|
-
|
493
|
+
# Softmax operation
|
494
|
+
denominator = sum([np.exp(similarity) for similarity in similarities])
|
495
|
+
attention_weights = [np.exp(similarity) / denominator for similarity in similarities]
|
500
496
|
|
501
|
-
|
502
|
-
|
497
|
+
# Weighted sum of the rewards
|
498
|
+
final_reward = sum([attention_weight * reward for attention_weight, reward in zip(attention_weights, rewards)])
|
503
499
|
|
504
|
-
|
500
|
+
return final_reward
|
505
501
|
|
506
502
|
|
507
503
|
def integrate_mobility(self):
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=iGclI8newmGbCQqTxcbiIdqdKpVF-L4HIE49f_Ad9qQ,166
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=mlurW0_198GO2zyTX2xlU5KrrodDUoES1pxPDGWAq6k,26452
|
4
|
+
gym_examples-3.0.764.dist-info/METADATA,sha256=OYFWn9IOfYO_l0MHyDiedJLEO8T6ocdiBBLS9OLmdOY,412
|
5
|
+
gym_examples-3.0.764.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-3.0.764.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-3.0.764.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=hOOzP2zBWU0qLRAhWtW_UNS7uLGUECyNRi0BoHR-Q1E,166
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=WLrpU3V0rIctPV5vgVtHno59gdhosWnRngnHp2JwM4g,26543
|
4
|
-
gym_examples-3.0.763.dist-info/METADATA,sha256=_G0-Xric89P7LPhAbKhYaArOoiGJH3jleKGY2uIwc2M,412
|
5
|
-
gym_examples-3.0.763.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-3.0.763.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-3.0.763.dist-info/RECORD,,
|
File without changes
|
File without changes
|