gym-examples 3.0.770__py3-none-any.whl → 3.0.772__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +34 -34
- {gym_examples-3.0.770.dist-info → gym_examples-3.0.772.dist-info}/METADATA +1 -1
- gym_examples-3.0.772.dist-info/RECORD +7 -0
- gym_examples-3.0.770.dist-info/RECORD +0 -7
- {gym_examples-3.0.770.dist-info → gym_examples-3.0.772.dist-info}/WHEEL +0 -0
- {gym_examples-3.0.770.dist-info → gym_examples-3.0.772.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -150,7 +150,7 @@ class WSNRoutingEnv(gym.Env):
|
|
150
150
|
self.number_of_steps += 1
|
151
151
|
self.steps += 1
|
152
152
|
# rewards = [-max_reward] * self.n_sensors
|
153
|
-
reward_init = [
|
153
|
+
reward_init = np.array([- max_reward] * input_dim)
|
154
154
|
rewards = [reward_init] * self.n_sensors
|
155
155
|
dones = [False] * self.n_sensors
|
156
156
|
for i, action in enumerate(actions):
|
@@ -181,7 +181,7 @@ class WSNRoutingEnv(gym.Env):
|
|
181
181
|
|
182
182
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
183
183
|
|
184
|
-
rewards[i] = [max_reward] * input_dim # Reward for transmitting data to the base station
|
184
|
+
rewards[i] = np.array([max_reward] * input_dim) # Reward for transmitting data to the base station
|
185
185
|
dones[i] = True
|
186
186
|
else:
|
187
187
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
@@ -219,7 +219,7 @@ class WSNRoutingEnv(gym.Env):
|
|
219
219
|
|
220
220
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
221
221
|
|
222
|
-
rewards[i] = self.compute_attention_rewards(rewards[i]) # Compute the attention-based reward
|
222
|
+
# rewards[i] = self.compute_attention_rewards(rewards[i]) # Compute the attention-based reward
|
223
223
|
|
224
224
|
# Integrate the mobility of the sensors
|
225
225
|
# self.integrate_mobility()
|
@@ -231,9 +231,9 @@ class WSNRoutingEnv(gym.Env):
|
|
231
231
|
|
232
232
|
self.get_metrics()
|
233
233
|
|
234
|
-
rewards = [reward.item() if isinstance(reward, torch.Tensor) else reward for reward in rewards] # Convert the reward to a float
|
235
|
-
|
236
|
-
rewards = np.sum(rewards) # Sum the rewards of all the sensors
|
234
|
+
# rewards = [reward.item() if isinstance(reward, torch.Tensor) else reward for reward in rewards] # Convert the reward to a float
|
235
|
+
rewards = self.compute_attention_reward(rewards) # Compute the attention-based reward
|
236
|
+
# rewards = np.sum(rewards) # Sum the rewards of all the sensors
|
237
237
|
|
238
238
|
for i in range(self.n_sensors):
|
239
239
|
if not dones[i]:
|
@@ -470,43 +470,43 @@ class WSNRoutingEnv(gym.Env):
|
|
470
470
|
return np.clip(normalized_throughput, 0, 1)
|
471
471
|
|
472
472
|
|
473
|
-
def compute_attention_rewards(self, reward):
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
473
|
+
# def compute_attention_rewards(self, reward):
|
474
|
+
# '''
|
475
|
+
# Compute the attention-based rewards
|
476
|
+
# '''
|
477
|
+
# rewards_i = torch.tensor(reward, dtype=torch.double)
|
478
|
+
# return net(rewards_i)
|
479
479
|
|
480
480
|
|
481
|
-
|
482
|
-
|
483
|
-
|
481
|
+
def compute_attention_reward(self, rewards):
|
482
|
+
'''
|
483
|
+
Compute the attention-based reward for the network with Q, V, and K matrices
|
484
484
|
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
485
|
+
Input:
|
486
|
+
- rewards: list of rewards for each sensor
|
487
|
+
Output:
|
488
|
+
- final_reward: final reward for the network
|
489
|
+
'''
|
490
|
+
# dimension of the sensor embedding
|
491
|
+
d = len(rewards[0])
|
492
492
|
|
493
|
-
|
494
|
-
|
493
|
+
# All sensors are represented by their raw rewards
|
494
|
+
query_vector = np.array([max_reward] * d) # Basically, the target is the base station
|
495
495
|
|
496
|
-
|
497
|
-
|
496
|
+
# Similarities between the query vector and the rewards
|
497
|
+
similarities = [np.dot(query_vector, reward) for reward in rewards]
|
498
498
|
|
499
|
-
|
500
|
-
|
499
|
+
# Similarities scaling
|
500
|
+
similarities = [similarity / np.sqrt(d) for similarity in similarities]
|
501
501
|
|
502
|
-
|
503
|
-
|
504
|
-
|
502
|
+
# Softmax operation
|
503
|
+
denominator = sum([np.exp(similarity) for similarity in similarities])
|
504
|
+
attention_weights = [np.exp(similarity) / denominator for similarity in similarities]
|
505
505
|
|
506
|
-
|
507
|
-
|
506
|
+
# Weighted sum of the rewards
|
507
|
+
final_reward = sum([attention_weight * reward for attention_weight, reward in zip(attention_weights, rewards)])
|
508
508
|
|
509
|
-
|
509
|
+
return np.sum(final_reward)
|
510
510
|
|
511
511
|
|
512
512
|
def integrate_mobility(self):
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=cx4oUpQUqBd33c8NwakCIA6-mn1WlAR-6VrWE9vk4rw,166
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=CEI6uM78EVY5QEXjslxfaTXDP-wTSklrM9yghckQ-vk,26926
|
4
|
+
gym_examples-3.0.772.dist-info/METADATA,sha256=d0VVwwj9LwPD3GmPrJ1XD7yn73uQ_tg6RDwzoFcSOeQ,412
|
5
|
+
gym_examples-3.0.772.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-3.0.772.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-3.0.772.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=7wzqkNR1J58WjxpbMblWYIIpQrdittcbz6AHBlfddF0,166
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=GcJY-lbqHO-68SUcOAhSsz4q-BNdiYd_H0uIUISGrHc,26911
|
4
|
-
gym_examples-3.0.770.dist-info/METADATA,sha256=-EY1uciNYFEHVyoXMCBUtOSpA3du7bYkpk-GsbPop7U,412
|
5
|
-
gym_examples-3.0.770.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-3.0.770.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-3.0.770.dist-info/RECORD,,
|
File without changes
|
File without changes
|