gym-examples 3.0.748__py3-none-any.whl → 3.0.750__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +39 -38
- {gym_examples-3.0.748.dist-info → gym_examples-3.0.750.dist-info}/METADATA +1 -1
- gym_examples-3.0.750.dist-info/RECORD +7 -0
- gym_examples-3.0.748.dist-info/RECORD +0 -7
- {gym_examples-3.0.748.dist-info → gym_examples-3.0.750.dist-info}/WHEEL +0 -0
- {gym_examples-3.0.748.dist-info → gym_examples-3.0.750.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -150,7 +150,7 @@ class WSNRoutingEnv(gym.Env):
|
|
150
150
|
self.number_of_steps += 1
|
151
151
|
self.steps += 1
|
152
152
|
# rewards = [-max_reward] * self.n_sensors
|
153
|
-
reward_init =
|
153
|
+
reward_init = [0] * input_dim
|
154
154
|
rewards = [reward_init] * self.n_sensors
|
155
155
|
dones = [False] * self.n_sensors
|
156
156
|
for i, action in enumerate(actions):
|
@@ -179,7 +179,7 @@ class WSNRoutingEnv(gym.Env):
|
|
179
179
|
self.total_latency += self.packet_latency[i] + latency_per_hop
|
180
180
|
self.packet_latency[i] = 0
|
181
181
|
|
182
|
-
rewards[i] =
|
182
|
+
rewards[i] = [max_reward] * input_dim # Reward for transmitting data to the base station
|
183
183
|
dones[i] = True
|
184
184
|
else:
|
185
185
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
@@ -225,10 +225,10 @@ class WSNRoutingEnv(gym.Env):
|
|
225
225
|
|
226
226
|
self.get_metrics()
|
227
227
|
|
228
|
-
rewards = [reward.item() if isinstance(reward, torch.Tensor) else reward for reward in rewards] # Convert the reward to a float
|
228
|
+
# rewards = [reward.item() if isinstance(reward, torch.Tensor) else reward for reward in rewards] # Convert the reward to a float
|
229
229
|
|
230
|
-
rewards = self.
|
231
|
-
rewards = np.mean(rewards)
|
230
|
+
rewards = self.compute_attention_rewards(rewards)
|
231
|
+
# rewards = np.mean(rewards)
|
232
232
|
|
233
233
|
for i in range(self.n_sensors):
|
234
234
|
if not dones[i]:
|
@@ -382,7 +382,7 @@ class WSNRoutingEnv(gym.Env):
|
|
382
382
|
reward_dispersion_remaining_energy = self.compute_reward_dispersion_remaining_energy()
|
383
383
|
reward_number_of_packets = self.compute_reward_number_of_packets(action)
|
384
384
|
|
385
|
-
rewards_energy =
|
385
|
+
rewards_energy = [reward_angle, reward_consumption_energy, reward_dispersion_remaining_energy, reward_number_of_packets]
|
386
386
|
|
387
387
|
#-- rewards related to the performance metrics
|
388
388
|
reward_latency = self.compute_reward_latency()
|
@@ -390,7 +390,7 @@ class WSNRoutingEnv(gym.Env):
|
|
390
390
|
reward_network_throughput = self.compute_reward_network_throughput()
|
391
391
|
reward_packet_delivery_ratio = self.compute_reward_packet_delivery_ratio()
|
392
392
|
|
393
|
-
rewards_performance =
|
393
|
+
rewards_performance = [reward_latency, reward_network_throughput, reward_packet_delivery_ratio]
|
394
394
|
|
395
395
|
return rewards_energy
|
396
396
|
|
@@ -399,12 +399,12 @@ class WSNRoutingEnv(gym.Env):
|
|
399
399
|
|
400
400
|
reward_consumption_energy = self.network_reward_consumption_energy()
|
401
401
|
reward_dispersion_remaining_energy = self.network_reward_dispersion_remaining_energy()
|
402
|
-
rewards_energy =
|
402
|
+
rewards_energy = [reward_consumption_energy, reward_dispersion_remaining_energy]
|
403
403
|
|
404
404
|
reward_latency = self.compute_reward_latency()
|
405
405
|
reward_network_throughput = self.compute_reward_network_throughput()
|
406
406
|
reward_packet_delivery_ratio = self.compute_reward_packet_delivery_ratio()
|
407
|
-
rewards_performance =
|
407
|
+
rewards_performance = [reward_latency, reward_network_throughput, reward_packet_delivery_ratio]
|
408
408
|
|
409
409
|
return np.concatenate((rewards_energy, rewards_performance))
|
410
410
|
|
@@ -465,44 +465,45 @@ class WSNRoutingEnv(gym.Env):
|
|
465
465
|
return np.clip(normalized_throughput, 0, 1)
|
466
466
|
|
467
467
|
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
468
|
+
def compute_attention_rewards(self, rewards):
|
469
|
+
'''
|
470
|
+
Compute the attention-based rewards
|
471
|
+
'''
|
472
|
+
rewards = torch.tensor(rewards, dtype=torch.double)
|
473
|
+
rewards = rewards.unsqueeze(0) # Add batch dimension
|
474
|
+
final_reward = net(rewards)
|
475
|
+
return final_reward
|
475
476
|
|
476
477
|
|
477
|
-
def compute_attention_reward(self, rewards):
|
478
|
-
|
479
|
-
|
478
|
+
# def compute_attention_reward(self, rewards):
|
479
|
+
# '''
|
480
|
+
# Compute the attention-based reward for the network with Q, V, and K matrices
|
480
481
|
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
482
|
+
# Input:
|
483
|
+
# - rewards: list of rewards for each sensor
|
484
|
+
# Output:
|
485
|
+
# - final_reward: final reward for the network
|
486
|
+
# '''
|
487
|
+
# # dimension of the sensor embedding
|
488
|
+
# d = len(rewards[0])
|
488
489
|
|
489
|
-
|
490
|
-
|
490
|
+
# # All sensors are represented by their raw rewards
|
491
|
+
# query_vector = np.array([max_reward] * d) # Basically, the target is the base station
|
491
492
|
|
492
|
-
|
493
|
-
|
493
|
+
# # Similarities between the query vector and the rewards
|
494
|
+
# similarities = [np.dot(query_vector, reward) for reward in rewards]
|
494
495
|
|
495
|
-
|
496
|
-
|
496
|
+
# # Similarities scaling
|
497
|
+
# similarities = [similarity / np.sqrt(d) for similarity in similarities]
|
497
498
|
|
498
|
-
|
499
|
-
|
500
|
-
|
499
|
+
# # Softmax operation
|
500
|
+
# denominator = sum([np.exp(similarity) for similarity in similarities])
|
501
|
+
# attention_weights = [np.exp(similarity) / denominator for similarity in similarities]
|
501
502
|
|
502
|
-
|
503
|
-
|
503
|
+
# # Weighted sum of the rewards
|
504
|
+
# final_reward = sum([attention_weight * reward for attention_weight, reward in zip(attention_weights, rewards)])
|
504
505
|
|
505
|
-
|
506
|
+
# return final_reward
|
506
507
|
|
507
508
|
|
508
509
|
def integrate_mobility(self):
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=qaPZA7Q93uDypjo58Stoco6cD-o4R03zzeYSCAE2JhU,166
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=8ne-Dm4xfoY-AYDyzOiZ1RjDtG6RjuWrXiiFQSix8Ac,26639
|
4
|
+
gym_examples-3.0.750.dist-info/METADATA,sha256=LdlfRyDsFXXtHKkA1m-f2Gmb0_wXxb2dQPwpYuVD0OM,412
|
5
|
+
gym_examples-3.0.750.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-3.0.750.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-3.0.750.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=eYYXxWIuq0ncCeOZlkOdiiSyvf-tuxzRHAPrh_PA3_c,166
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=MEp84-JlL4oPg0YTGl-PvVIOZdC4xi8cyBCfNkbtdDQ,26601
|
4
|
-
gym_examples-3.0.748.dist-info/METADATA,sha256=j3_GmDZ2TiworAz03Jo9cnA5PrCbt1LlaBD5izqbJnU,412
|
5
|
-
gym_examples-3.0.748.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-3.0.748.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-3.0.748.dist-info/RECORD,,
|
File without changes
|
File without changes
|