gym-examples 3.0.762__py3-none-any.whl → 3.0.764__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -5,4 +5,4 @@ register(
5
5
  entry_point="gym_examples.envs:WSNRoutingEnv",
6
6
  )
7
7
 
8
- __version__ = "3.0.762"
8
+ __version__ = "3.0.764"
@@ -150,7 +150,7 @@ class WSNRoutingEnv(gym.Env):
150
150
  self.number_of_steps += 1
151
151
  self.steps += 1
152
152
  # rewards = [-max_reward] * self.n_sensors
153
- reward_init = [0] * input_dim
153
+ reward_init = np.array([0] * input_dim)
154
154
  rewards = [reward_init] * self.n_sensors
155
155
  dones = [False] * self.n_sensors
156
156
  for i, action in enumerate(actions):
@@ -179,7 +179,7 @@ class WSNRoutingEnv(gym.Env):
179
179
  self.total_latency += self.packet_latency[i] + latency_per_hop
180
180
  self.packet_latency[i] = 0
181
181
 
182
- rewards[i] = [max_reward] * input_dim # Reward for transmitting data to the base station
182
+ rewards[i] = np.array([max_reward] * input_dim) # Reward for transmitting data to the base station
183
183
  dones[i] = True
184
184
  else:
185
185
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
@@ -211,10 +211,6 @@ class WSNRoutingEnv(gym.Env):
211
211
  self.number_of_packets[action] += self.number_of_packets[i]
212
212
 
213
213
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
214
-
215
- for i in range(self.n_sensors):
216
- # Calculate final reward
217
- rewards[i] = self.compute_attention_rewards(rewards[i])
218
214
 
219
215
  # Integrate the mobility of the sensors
220
216
  # self.integrate_mobility()
@@ -227,7 +223,7 @@ class WSNRoutingEnv(gym.Env):
227
223
  self.get_metrics()
228
224
 
229
225
  # rewards = [reward.item() if isinstance(reward, torch.Tensor) else reward for reward in rewards] # Convert the reward to a float
230
- rewards = np.mean(rewards)
226
+ rewards = self.compute_attention_reward(rewards) # Compute the attention-based reward
231
227
 
232
228
  for i in range(self.n_sensors):
233
229
  if not dones[i]:
@@ -381,7 +377,7 @@ class WSNRoutingEnv(gym.Env):
381
377
  reward_dispersion_remaining_energy = self.compute_reward_dispersion_remaining_energy()
382
378
  reward_number_of_packets = self.compute_reward_number_of_packets(action)
383
379
 
384
- rewards_energy = [reward_angle, reward_consumption_energy, reward_dispersion_remaining_energy, reward_number_of_packets]
380
+ rewards_energy = np.array([reward_angle, reward_consumption_energy, reward_dispersion_remaining_energy, reward_number_of_packets])
385
381
 
386
382
  #-- rewards related to the performance metrics
387
383
  reward_latency = self.compute_reward_latency()
@@ -389,7 +385,7 @@ class WSNRoutingEnv(gym.Env):
389
385
  reward_network_throughput = self.compute_reward_network_throughput()
390
386
  reward_packet_delivery_ratio = self.compute_reward_packet_delivery_ratio()
391
387
 
392
- rewards_performance = [reward_latency, reward_network_throughput, reward_packet_delivery_ratio]
388
+ rewards_performance = np.array([reward_latency, reward_network_throughput, reward_packet_delivery_ratio])
393
389
 
394
390
  return rewards_energy
395
391
 
@@ -464,45 +460,44 @@ class WSNRoutingEnv(gym.Env):
464
460
  return np.clip(normalized_throughput, 0, 1)
465
461
 
466
462
 
467
- def compute_attention_rewards(self, reward):
468
- '''
469
- Compute the attention-based rewards
470
- '''
471
- rewards_i = torch.tensor(reward, dtype=torch.double)
472
- rewards_i = rewards_i.unsqueeze(0) # Add batch dimension
473
- print(f"net(rewards_i).item(): {net(rewards_i).item()}")
474
- return np.mean(net(rewards_i).item())
463
+ # def compute_attention_rewards(self, reward):
464
+ # '''
465
+ # Compute the attention-based rewards
466
+ # '''
467
+ # rewards_i = torch.tensor(reward, dtype=torch.double)
468
+ # rewards_i = rewards_i.unsqueeze(0) # Add batch dimension
469
+ # return net(rewards_i).item()
475
470
 
476
471
 
477
- # def compute_attention_reward(self, rewards):
478
- # '''
479
- # Compute the attention-based reward for the network with Q, V, and K matrices
472
+ def compute_attention_reward(self, rewards):
473
+ '''
474
+ Compute the attention-based reward for the network with Q, V, and K matrices
480
475
 
481
- # Input:
482
- # - rewards: list of rewards for each sensor
483
- # Output:
484
- # - final_reward: final reward for the network
485
- # '''
486
- # # dimension of the sensor embedding
487
- # d = len(rewards[0])
476
+ Input:
477
+ - rewards: list of rewards for each sensor
478
+ Output:
479
+ - final_reward: final reward for the network
480
+ '''
481
+ # dimension of the sensor embedding
482
+ d = len(rewards[0])
488
483
 
489
- # # All sensors are represented by their raw rewards
490
- # query_vector = np.array([max_reward] * d) # Basically, the target is the base station
484
+ # All sensors are represented by their raw rewards
485
+ query_vector = np.array([max_reward] * d) # Basically, the target is the base station
491
486
 
492
- # # Similarities between the query vector and the rewards
493
- # similarities = [np.dot(query_vector, reward) for reward in rewards]
487
+ # Similarities between the query vector and the rewards
488
+ similarities = [np.dot(query_vector, reward) for reward in rewards]
494
489
 
495
- # # Similarities scaling
496
- # similarities = [similarity / np.sqrt(d) for similarity in similarities]
490
+ # Similarities scaling
491
+ similarities = [similarity / np.sqrt(d) for similarity in similarities]
497
492
 
498
- # # Softmax operation
499
- # denominator = sum([np.exp(similarity) for similarity in similarities])
500
- # attention_weights = [np.exp(similarity) / denominator for similarity in similarities]
493
+ # Softmax operation
494
+ denominator = sum([np.exp(similarity) for similarity in similarities])
495
+ attention_weights = [np.exp(similarity) / denominator for similarity in similarities]
501
496
 
502
- # # Weighted sum of the rewards
503
- # final_reward = sum([attention_weight * reward for attention_weight, reward in zip(attention_weights, rewards)])
497
+ # Weighted sum of the rewards
498
+ final_reward = sum([attention_weight * reward for attention_weight, reward in zip(attention_weights, rewards)])
504
499
 
505
- # return final_reward
500
+ return final_reward
506
501
 
507
502
 
508
503
  def integrate_mobility(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 3.0.762
3
+ Version: 3.0.764
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=iGclI8newmGbCQqTxcbiIdqdKpVF-L4HIE49f_Ad9qQ,166
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=mlurW0_198GO2zyTX2xlU5KrrodDUoES1pxPDGWAq6k,26452
4
+ gym_examples-3.0.764.dist-info/METADATA,sha256=OYFWn9IOfYO_l0MHyDiedJLEO8T6ocdiBBLS9OLmdOY,412
5
+ gym_examples-3.0.764.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-3.0.764.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-3.0.764.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=fQGHHtmkYG0B6YCGmVYN9lXXBt2FkWSOzbzbxeok_-Q,166
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=DpQTrUKg2pwCFgRos3uFzMSBEzkarkPxMPVwUIufOUg,26609
4
- gym_examples-3.0.762.dist-info/METADATA,sha256=NwHlvSZZu_HmvnJZ8Vusc8MseOysNnaJAjZ1VWyuN_g,412
5
- gym_examples-3.0.762.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-3.0.762.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-3.0.762.dist-info/RECORD,,