gym-examples 3.0.763__py3-none-any.whl → 3.0.764__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -5,4 +5,4 @@ register(
5
5
  entry_point="gym_examples.envs:WSNRoutingEnv",
6
6
  )
7
7
 
8
- __version__ = "3.0.763"
8
+ __version__ = "3.0.764"
@@ -150,7 +150,7 @@ class WSNRoutingEnv(gym.Env):
150
150
  self.number_of_steps += 1
151
151
  self.steps += 1
152
152
  # rewards = [-max_reward] * self.n_sensors
153
- reward_init = [0] * input_dim
153
+ reward_init = np.array([0] * input_dim)
154
154
  rewards = [reward_init] * self.n_sensors
155
155
  dones = [False] * self.n_sensors
156
156
  for i, action in enumerate(actions):
@@ -179,7 +179,7 @@ class WSNRoutingEnv(gym.Env):
179
179
  self.total_latency += self.packet_latency[i] + latency_per_hop
180
180
  self.packet_latency[i] = 0
181
181
 
182
- rewards[i] = [max_reward] * input_dim # Reward for transmitting data to the base station
182
+ rewards[i] = np.array([max_reward] * input_dim) # Reward for transmitting data to the base station
183
183
  dones[i] = True
184
184
  else:
185
185
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
@@ -211,10 +211,6 @@ class WSNRoutingEnv(gym.Env):
211
211
  self.number_of_packets[action] += self.number_of_packets[i]
212
212
 
213
213
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
214
-
215
- for i in range(self.n_sensors):
216
- # Calculate final reward
217
- rewards[i] = self.compute_attention_rewards(rewards[i])
218
214
 
219
215
  # Integrate the mobility of the sensors
220
216
  # self.integrate_mobility()
@@ -227,7 +223,7 @@ class WSNRoutingEnv(gym.Env):
227
223
  self.get_metrics()
228
224
 
229
225
  # rewards = [reward.item() if isinstance(reward, torch.Tensor) else reward for reward in rewards] # Convert the reward to a float
230
- rewards = np.mean(rewards)
226
+ rewards = self.compute_attention_reward(rewards) # Compute the attention-based reward
231
227
 
232
228
  for i in range(self.n_sensors):
233
229
  if not dones[i]:
@@ -381,7 +377,7 @@ class WSNRoutingEnv(gym.Env):
381
377
  reward_dispersion_remaining_energy = self.compute_reward_dispersion_remaining_energy()
382
378
  reward_number_of_packets = self.compute_reward_number_of_packets(action)
383
379
 
384
- rewards_energy = [reward_angle, reward_consumption_energy, reward_dispersion_remaining_energy, reward_number_of_packets]
380
+ rewards_energy = np.array([reward_angle, reward_consumption_energy, reward_dispersion_remaining_energy, reward_number_of_packets])
385
381
 
386
382
  #-- rewards related to the performance metrics
387
383
  reward_latency = self.compute_reward_latency()
@@ -389,7 +385,7 @@ class WSNRoutingEnv(gym.Env):
389
385
  reward_network_throughput = self.compute_reward_network_throughput()
390
386
  reward_packet_delivery_ratio = self.compute_reward_packet_delivery_ratio()
391
387
 
392
- rewards_performance = [reward_latency, reward_network_throughput, reward_packet_delivery_ratio]
388
+ rewards_performance = np.array([reward_latency, reward_network_throughput, reward_packet_delivery_ratio])
393
389
 
394
390
  return rewards_energy
395
391
 
@@ -464,44 +460,44 @@ class WSNRoutingEnv(gym.Env):
464
460
  return np.clip(normalized_throughput, 0, 1)
465
461
 
466
462
 
467
- def compute_attention_rewards(self, reward):
468
- '''
469
- Compute the attention-based rewards
470
- '''
471
- rewards_i = torch.tensor(reward, dtype=torch.double)
472
- rewards_i = rewards_i.unsqueeze(0) # Add batch dimension
473
- return np.mean(net(rewards_i).item())
463
+ # def compute_attention_rewards(self, reward):
464
+ # '''
465
+ # Compute the attention-based rewards
466
+ # '''
467
+ # rewards_i = torch.tensor(reward, dtype=torch.double)
468
+ # rewards_i = rewards_i.unsqueeze(0) # Add batch dimension
469
+ # return net(rewards_i).item()
474
470
 
475
471
 
476
- # def compute_attention_reward(self, rewards):
477
- # '''
478
- # Compute the attention-based reward for the network with Q, V, and K matrices
472
+ def compute_attention_reward(self, rewards):
473
+ '''
474
+ Compute the attention-based reward for the network with Q, V, and K matrices
479
475
 
480
- # Input:
481
- # - rewards: list of rewards for each sensor
482
- # Output:
483
- # - final_reward: final reward for the network
484
- # '''
485
- # # dimension of the sensor embedding
486
- # d = len(rewards[0])
476
+ Input:
477
+ - rewards: list of rewards for each sensor
478
+ Output:
479
+ - final_reward: final reward for the network
480
+ '''
481
+ # dimension of the sensor embedding
482
+ d = len(rewards[0])
487
483
 
488
- # # All sensors are represented by their raw rewards
489
- # query_vector = np.array([max_reward] * d) # Basically, the target is the base station
484
+ # All sensors are represented by their raw rewards
485
+ query_vector = np.array([max_reward] * d) # Basically, the target is the base station
490
486
 
491
- # # Similarities between the query vector and the rewards
492
- # similarities = [np.dot(query_vector, reward) for reward in rewards]
487
+ # Similarities between the query vector and the rewards
488
+ similarities = [np.dot(query_vector, reward) for reward in rewards]
493
489
 
494
- # # Similarities scaling
495
- # similarities = [similarity / np.sqrt(d) for similarity in similarities]
490
+ # Similarities scaling
491
+ similarities = [similarity / np.sqrt(d) for similarity in similarities]
496
492
 
497
- # # Softmax operation
498
- # denominator = sum([np.exp(similarity) for similarity in similarities])
499
- # attention_weights = [np.exp(similarity) / denominator for similarity in similarities]
493
+ # Softmax operation
494
+ denominator = sum([np.exp(similarity) for similarity in similarities])
495
+ attention_weights = [np.exp(similarity) / denominator for similarity in similarities]
500
496
 
501
- # # Weighted sum of the rewards
502
- # final_reward = sum([attention_weight * reward for attention_weight, reward in zip(attention_weights, rewards)])
497
+ # Weighted sum of the rewards
498
+ final_reward = sum([attention_weight * reward for attention_weight, reward in zip(attention_weights, rewards)])
503
499
 
504
- # return final_reward
500
+ return final_reward
505
501
 
506
502
 
507
503
  def integrate_mobility(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 3.0.763
3
+ Version: 3.0.764
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=iGclI8newmGbCQqTxcbiIdqdKpVF-L4HIE49f_Ad9qQ,166
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=mlurW0_198GO2zyTX2xlU5KrrodDUoES1pxPDGWAq6k,26452
4
+ gym_examples-3.0.764.dist-info/METADATA,sha256=OYFWn9IOfYO_l0MHyDiedJLEO8T6ocdiBBLS9OLmdOY,412
5
+ gym_examples-3.0.764.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-3.0.764.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-3.0.764.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=hOOzP2zBWU0qLRAhWtW_UNS7uLGUECyNRi0BoHR-Q1E,166
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=WLrpU3V0rIctPV5vgVtHno59gdhosWnRngnHp2JwM4g,26543
4
- gym_examples-3.0.763.dist-info/METADATA,sha256=_G0-Xric89P7LPhAbKhYaArOoiGJH3jleKGY2uIwc2M,412
5
- gym_examples-3.0.763.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-3.0.763.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-3.0.763.dist-info/RECORD,,