gym-examples 3.0.769__py3-none-any.whl → 3.0.770__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +35 -33
- {gym_examples-3.0.769.dist-info → gym_examples-3.0.770.dist-info}/METADATA +1 -1
- gym_examples-3.0.770.dist-info/RECORD +7 -0
- gym_examples-3.0.769.dist-info/RECORD +0 -7
- {gym_examples-3.0.769.dist-info → gym_examples-3.0.770.dist-info}/WHEEL +0 -0
- {gym_examples-3.0.769.dist-info → gym_examples-3.0.770.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -150,7 +150,7 @@ class WSNRoutingEnv(gym.Env):
|
|
150
150
|
self.number_of_steps += 1
|
151
151
|
self.steps += 1
|
152
152
|
# rewards = [-max_reward] * self.n_sensors
|
153
|
-
reward_init =
|
153
|
+
reward_init = [0]
|
154
154
|
rewards = [reward_init] * self.n_sensors
|
155
155
|
dones = [False] * self.n_sensors
|
156
156
|
for i, action in enumerate(actions):
|
@@ -181,7 +181,7 @@ class WSNRoutingEnv(gym.Env):
|
|
181
181
|
|
182
182
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
183
183
|
|
184
|
-
rewards[i] =
|
184
|
+
rewards[i] = [max_reward] * input_dim # Reward for transmitting data to the base station
|
185
185
|
dones[i] = True
|
186
186
|
else:
|
187
187
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
@@ -219,6 +219,8 @@ class WSNRoutingEnv(gym.Env):
|
|
219
219
|
|
220
220
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
221
221
|
|
222
|
+
rewards[i] = self.compute_attention_rewards(rewards[i]) # Compute the attention-based reward
|
223
|
+
|
222
224
|
# Integrate the mobility of the sensors
|
223
225
|
# self.integrate_mobility()
|
224
226
|
|
@@ -229,8 +231,9 @@ class WSNRoutingEnv(gym.Env):
|
|
229
231
|
|
230
232
|
self.get_metrics()
|
231
233
|
|
232
|
-
|
233
|
-
rewards = self.compute_attention_reward(rewards) # Compute the attention-based reward
|
234
|
+
rewards = [reward.item() if isinstance(reward, torch.Tensor) else reward for reward in rewards] # Convert the reward to a float
|
235
|
+
# rewards = self.compute_attention_reward(rewards) # Compute the attention-based reward
|
236
|
+
rewards = np.sum(rewards) # Sum the rewards of all the sensors
|
234
237
|
|
235
238
|
for i in range(self.n_sensors):
|
236
239
|
if not dones[i]:
|
@@ -467,44 +470,43 @@ class WSNRoutingEnv(gym.Env):
|
|
467
470
|
return np.clip(normalized_throughput, 0, 1)
|
468
471
|
|
469
472
|
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
# return net(rewards_i).item()
|
473
|
+
def compute_attention_rewards(self, reward):
|
474
|
+
'''
|
475
|
+
Compute the attention-based rewards
|
476
|
+
'''
|
477
|
+
rewards_i = torch.tensor(reward, dtype=torch.double)
|
478
|
+
return net(rewards_i)
|
477
479
|
|
478
480
|
|
479
|
-
def compute_attention_reward(self, rewards):
|
480
|
-
|
481
|
-
|
481
|
+
# def compute_attention_reward(self, rewards):
|
482
|
+
# '''
|
483
|
+
# Compute the attention-based reward for the network with Q, V, and K matrices
|
482
484
|
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
485
|
+
# Input:
|
486
|
+
# - rewards: list of rewards for each sensor
|
487
|
+
# Output:
|
488
|
+
# - final_reward: final reward for the network
|
489
|
+
# '''
|
490
|
+
# # dimension of the sensor embedding
|
491
|
+
# d = len(rewards[0])
|
490
492
|
|
491
|
-
|
492
|
-
|
493
|
+
# # All sensors are represented by their raw rewards
|
494
|
+
# query_vector = np.array([max_reward] * d) # Basically, the target is the base station
|
493
495
|
|
494
|
-
|
495
|
-
|
496
|
+
# # Similarities between the query vector and the rewards
|
497
|
+
# similarities = [np.dot(query_vector, reward) for reward in rewards]
|
496
498
|
|
497
|
-
|
498
|
-
|
499
|
+
# # Similarities scaling
|
500
|
+
# similarities = [similarity / np.sqrt(d) for similarity in similarities]
|
499
501
|
|
500
|
-
|
501
|
-
|
502
|
-
|
502
|
+
# # Softmax operation
|
503
|
+
# denominator = sum([np.exp(similarity) for similarity in similarities])
|
504
|
+
# attention_weights = [np.exp(similarity) / denominator for similarity in similarities]
|
503
505
|
|
504
|
-
|
505
|
-
|
506
|
+
# # Weighted sum of the rewards
|
507
|
+
# final_reward = sum([attention_weight * reward for attention_weight, reward in zip(attention_weights, rewards)])
|
506
508
|
|
507
|
-
|
509
|
+
# return np.sum(final_reward)
|
508
510
|
|
509
511
|
|
510
512
|
def integrate_mobility(self):
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=7wzqkNR1J58WjxpbMblWYIIpQrdittcbz6AHBlfddF0,166
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=GcJY-lbqHO-68SUcOAhSsz4q-BNdiYd_H0uIUISGrHc,26911
|
4
|
+
gym_examples-3.0.770.dist-info/METADATA,sha256=-EY1uciNYFEHVyoXMCBUtOSpA3du7bYkpk-GsbPop7U,412
|
5
|
+
gym_examples-3.0.770.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-3.0.770.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-3.0.770.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=JrDpTNbLnzE4_62ux2BYcurQHhbfV4ZjpaK9TfNmgvM,166
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=O2DXjVkYubXn5aLNeCf2AwQ1FUZlwp8R2xYCR1MDqCk,26806
|
4
|
-
gym_examples-3.0.769.dist-info/METADATA,sha256=6oumqB5XQ0yhGHDK7G4E5yj2JeK0TxSqFqb3XUsUjHw,412
|
5
|
-
gym_examples-3.0.769.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-3.0.769.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-3.0.769.dist-info/RECORD,,
|
File without changes
|
File without changes
|