gym-examples 3.0.768__py3-none-any.whl → 3.0.770__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -5,4 +5,4 @@ register(
5
5
  entry_point="gym_examples.envs:WSNRoutingEnv",
6
6
  )
7
7
 
8
- __version__ = "3.0.768"
8
+ __version__ = "3.0.770"
@@ -150,7 +150,7 @@ class WSNRoutingEnv(gym.Env):
150
150
  self.number_of_steps += 1
151
151
  self.steps += 1
152
152
  # rewards = [-max_reward] * self.n_sensors
153
- reward_init = [0] * input_dim
153
+ reward_init = [0]
154
154
  rewards = [reward_init] * self.n_sensors
155
155
  dones = [False] * self.n_sensors
156
156
  for i, action in enumerate(actions):
@@ -181,7 +181,7 @@ class WSNRoutingEnv(gym.Env):
181
181
 
182
182
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
183
183
 
184
- rewards[i] = np.array([max_reward] * input_dim) # Reward for transmitting data to the base station
184
+ rewards[i] = [max_reward] * input_dim # Reward for transmitting data to the base station
185
185
  dones[i] = True
186
186
  else:
187
187
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
@@ -219,6 +219,8 @@ class WSNRoutingEnv(gym.Env):
219
219
 
220
220
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
221
221
 
222
+ rewards[i] = self.compute_attention_rewards(rewards[i]) # Compute the attention-based reward
223
+
222
224
  # Integrate the mobility of the sensors
223
225
  # self.integrate_mobility()
224
226
 
@@ -229,8 +231,9 @@ class WSNRoutingEnv(gym.Env):
229
231
 
230
232
  self.get_metrics()
231
233
 
232
- # rewards = [reward.item() if isinstance(reward, torch.Tensor) else reward for reward in rewards] # Convert the reward to a float
233
- rewards = self.compute_attention_reward(rewards) # Compute the attention-based reward
234
+ rewards = [reward.item() if isinstance(reward, torch.Tensor) else reward for reward in rewards] # Convert the reward to a float
235
+ # rewards = self.compute_attention_reward(rewards) # Compute the attention-based reward
236
+ rewards = np.sum(rewards) # Sum the rewards of all the sensors
234
237
 
235
238
  for i in range(self.n_sensors):
236
239
  if not dones[i]:
@@ -467,44 +470,43 @@ class WSNRoutingEnv(gym.Env):
467
470
  return np.clip(normalized_throughput, 0, 1)
468
471
 
469
472
 
470
- # def compute_attention_rewards(self, reward):
471
- # '''
472
- # Compute the attention-based rewards
473
- # '''
474
- # rewards_i = torch.tensor(reward, dtype=torch.double)
475
- # rewards_i = rewards_i.unsqueeze(0) # Add batch dimension
476
- # return net(rewards_i).item()
473
+ def compute_attention_rewards(self, reward):
474
+ '''
475
+ Compute the attention-based rewards
476
+ '''
477
+ rewards_i = torch.tensor(reward, dtype=torch.double)
478
+ return net(rewards_i)
477
479
 
478
480
 
479
- def compute_attention_reward(self, rewards):
480
- '''
481
- Compute the attention-based reward for the network with Q, V, and K matrices
481
+ # def compute_attention_reward(self, rewards):
482
+ # '''
483
+ # Compute the attention-based reward for the network with Q, V, and K matrices
482
484
 
483
- Input:
484
- - rewards: list of rewards for each sensor
485
- Output:
486
- - final_reward: final reward for the network
487
- '''
488
- # dimension of the sensor embedding
489
- d = len(rewards[0])
485
+ # Input:
486
+ # - rewards: list of rewards for each sensor
487
+ # Output:
488
+ # - final_reward: final reward for the network
489
+ # '''
490
+ # # dimension of the sensor embedding
491
+ # d = len(rewards[0])
490
492
 
491
- # All sensors are represented by their raw rewards
492
- query_vector = np.array([max_reward] * d) # Basically, the target is the base station
493
+ # # All sensors are represented by their raw rewards
494
+ # query_vector = np.array([max_reward] * d) # Basically, the target is the base station
493
495
 
494
- # Similarities between the query vector and the rewards
495
- similarities = [np.dot(query_vector, reward) for reward in rewards if isinstance(reward, np.ndarray)]
496
+ # # Similarities between the query vector and the rewards
497
+ # similarities = [np.dot(query_vector, reward) for reward in rewards]
496
498
 
497
- # Similarities scaling
498
- similarities = [similarity / np.sqrt(d) for similarity in similarities]
499
+ # # Similarities scaling
500
+ # similarities = [similarity / np.sqrt(d) for similarity in similarities]
499
501
 
500
- # Softmax operation
501
- denominator = sum([np.exp(similarity) for similarity in similarities])
502
- attention_weights = [np.exp(similarity) / denominator for similarity in similarities]
502
+ # # Softmax operation
503
+ # denominator = sum([np.exp(similarity) for similarity in similarities])
504
+ # attention_weights = [np.exp(similarity) / denominator for similarity in similarities]
503
505
 
504
- # Weighted sum of the rewards
505
- final_reward = sum([attention_weight * reward for attention_weight, reward in zip(attention_weights, rewards) if isinstance(reward, np.ndarray)])
506
+ # # Weighted sum of the rewards
507
+ # final_reward = sum([attention_weight * reward for attention_weight, reward in zip(attention_weights, rewards)])
506
508
 
507
- return np.sum(final_reward)
509
+ # return np.sum(final_reward)
508
510
 
509
511
 
510
512
  def integrate_mobility(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 3.0.768
3
+ Version: 3.0.770
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=7wzqkNR1J58WjxpbMblWYIIpQrdittcbz6AHBlfddF0,166
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=GcJY-lbqHO-68SUcOAhSsz4q-BNdiYd_H0uIUISGrHc,26911
4
+ gym_examples-3.0.770.dist-info/METADATA,sha256=-EY1uciNYFEHVyoXMCBUtOSpA3du7bYkpk-GsbPop7U,412
5
+ gym_examples-3.0.770.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-3.0.770.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-3.0.770.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=ErBo6hRVSUgQvxu0zaJqLRmZM9LTYXQ_1HIgV4bg01o,166
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=baJSRHpxNnasenWc-lGGjeHvXkJQaSLSpr_pSP9SrmQ,26864
4
- gym_examples-3.0.768.dist-info/METADATA,sha256=U9TYdpAF3BmSEktZwNYaDSjCV3bjKezIUbAMsQBgfGs,412
5
- gym_examples-3.0.768.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-3.0.768.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-3.0.768.dist-info/RECORD,,