gym-examples 3.0.265__py3-none-any.whl → 3.0.267__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -5,4 +5,4 @@ register(
5
5
  entry_point="gym_examples.envs:WSNRoutingEnv",
6
6
  )
7
7
 
8
- __version__ = "3.0.265"
8
+ __version__ = "3.0.267"
@@ -25,7 +25,7 @@ initial_number_of_packets = 1 # initial number of packets to transmit
25
25
  latency_per_hop = 1 # latency per hop in seconds
26
26
 
27
27
  base_back_up_dir = "results/data/"
28
- max_reward = 5 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
28
+ max_reward = 3 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
29
29
 
30
30
  # Define the final reward function using an attention mechanism
31
31
  class Attention(nn.Module):
@@ -73,7 +73,7 @@ class WSNRoutingEnv(gym.Env):
73
73
  self.episode_count = 0
74
74
  self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
75
75
  self.epsilon = 1e-10 # small value to avoid division by zero
76
- # Initialize the position of the sensors randomly
76
+ self.current_sensor = 0 # Index of the current sensor
77
77
 
78
78
  # Define observation space
79
79
  self.observation_space = Tuple(
@@ -81,28 +81,13 @@ class WSNRoutingEnv(gym.Env):
81
81
  )
82
82
 
83
83
  # self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
84
- self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
84
+ # self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
85
85
  # self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
86
- # self.action_space = Discrete(self.n_agents * (self.n_sensors + 1))
86
+ self.action_space = Discrete(self.n_sensors + 1) # +1 for the base station
87
87
 
88
88
  self.reset()
89
89
 
90
90
  def reset(self):
91
-
92
- # if self.episode_count > 1 and os.getenv('PRINT_STATS') == 'True': # Statistics for the PPO algorithm during the training phase
93
- # self.episode_returns.append(self.episode_return)
94
- # self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
95
- # self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
96
- # self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
97
- # self.episode_network_throughput.append(self.network_throughput)
98
- # self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
99
- # self.episode_network_lifetime.append(self.network_lifetime)
100
- # self.episode_average_latency.append(self.average_latency)
101
-
102
- # if self.episode_count > 1 and os.getenv('PRINT_STATS') == 'True-False': # Statistics for the PPO algorithm during the evaluation phase
103
- # print(f"Episode: {self.episode_count}")
104
- # print(self.get_metrics())
105
-
106
91
  self.episode_return = 0
107
92
  self.sensor_positions = np.random.rand(self.n_sensors, 2) * (upper_bound - lower_bound) + lower_bound
108
93
  self.distance_to_base = np.linalg.norm(self.sensor_positions - base_station_position, axis=1)
@@ -131,17 +116,13 @@ class WSNRoutingEnv(gym.Env):
131
116
 
132
117
 
133
118
  def step(self, actions):
134
- actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
135
119
  self.steps += 1
136
- rewards = [-max_reward] * self.n_sensors
137
- dones = [False] * self.n_sensors
138
- for i, action in enumerate(actions):
139
- if action not in range(self.n_sensors + 1):
140
- raise ValueError("Invalid action!")
141
-
142
- if i >= self.n_sensors:
143
- raise ValueError("Invalid sensor index!") # the number of actions is greater than the number of sensors
144
-
120
+ rewards = - max_reward
121
+ # rewards = 0
122
+ dones = False
123
+ actions = np.array([self.current_sensor, actions])
124
+ print(f"\nactions from step in WSNRoutingEnv: {actions}\n")
125
+ for i, action in [actions]:
145
126
  if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
146
127
  continue # Skip if sensor has no energy left or no packets to transmit
147
128
 
@@ -156,6 +137,11 @@ class WSNRoutingEnv(gym.Env):
156
137
  transmission_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
157
138
  if self.remaining_energy[i] < transmission_energy:
158
139
  self.remaining_energy[i] = 0
140
+ next_sensor = self.find_next_sensor()
141
+ if next_sensor is None:
142
+ dones = True
143
+ else:
144
+ self.current_sensor = next_sensor
159
145
  continue # Skip if the sensor does not have enough energy to transmit data to the base station
160
146
 
161
147
  self.update_sensor_energies(i, transmission_energy)
@@ -167,10 +153,12 @@ class WSNRoutingEnv(gym.Env):
167
153
  self.total_latency += self.packet_latency[i] + latency_per_hop
168
154
  self.packet_latency[i] = 0
169
155
 
170
- # rewards[i] = self.compute_individual_rewards(i, action)
171
- rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
172
- # rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
173
- dones[i] = True
156
+ rewards = max_reward # Reward for transmitting data to the base station
157
+ next_sensor = self.find_next_sensor()
158
+ if next_sensor is None:
159
+ dones = True
160
+ else:
161
+ self.current_sensor = next_sensor
174
162
  else:
175
163
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
176
164
  if distance > self.coverage_radius:
@@ -179,11 +167,21 @@ class WSNRoutingEnv(gym.Env):
179
167
  transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
180
168
  reception_energy = self.reception_energy(self.number_of_packets[i])
181
169
  if self.remaining_energy[i] < transmission_energy:
182
- self.remaining_energy[i] = 0
170
+ self.remaining_energy[i] = 0
171
+ next_sensor = self.find_next_sensor()
172
+ if next_sensor is None:
173
+ dones = True
174
+ else:
175
+ self.current_sensor = next_sensor
183
176
  continue # Skip if the sensor does not have enough energy to transmit data to the next hop
184
177
  if self.remaining_energy[action] < reception_energy:
185
178
  self.number_of_packets[i] = 0
186
179
  self.remaining_energy[action] = 0
180
+ next_sensor = self.find_next_sensor()
181
+ if next_sensor is None:
182
+ dones = True
183
+ else:
184
+ self.current_sensor = next_sensor
187
185
  continue # Skip if the next hop does not have enough energy to receive data
188
186
 
189
187
  self.update_sensor_energies(i, transmission_energy)
@@ -195,19 +193,17 @@ class WSNRoutingEnv(gym.Env):
195
193
  self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
196
194
  self.packet_latency[i] = 0
197
195
 
198
- rewards[i] = self.compute_individual_rewards(i, action)
196
+ rewards = self.compute_individual_rewards(i, action)
199
197
 
200
198
  # Update the number of packets
201
199
  self.number_of_packets[action] += self.number_of_packets[i]
200
+
201
+ self.current_sensor = action
202
202
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
203
203
  # Calculate final reward
204
204
  # rewards[i] = self.compute_attention_rewards(rewards[i])
205
- rewards[i] = np.mean(rewards[i])
206
- # rewards[i] = self.compute_weighted_sum_rewards(rewards[i])
207
- for i in range(self.n_sensors):
208
- if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
209
- dones[i] = True
210
-
205
+ rewards = np.mean(rewards)
206
+
211
207
  # Integrate the mobility of the sensors
212
208
  # self.integrate_mobility()
213
209
 
@@ -218,13 +214,10 @@ class WSNRoutingEnv(gym.Env):
218
214
 
219
215
  self.get_metrics()
220
216
 
221
- rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
222
- # rewards = np.sum(rewards) # Sum the rewards of all agents
223
- rewards = np.mean(rewards) # Average the rewards of all agents
224
- # rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
225
- # print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
226
- dones = all(dones) # Done if all agents are done
227
-
217
+ rewards = rewards.item() if isinstance(rewards, torch.Tensor) else rewards # Convert the reward to a float
218
+ if not dones:
219
+ dones = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
220
+
228
221
  return self._get_obs(), rewards, dones, self.get_metrics()
229
222
 
230
223
 
@@ -232,7 +225,8 @@ class WSNRoutingEnv(gym.Env):
232
225
  return [{'remaining_energy': np.array([e]),
233
226
  'consumption_energy': np.array([initial_energy - e]),
234
227
  'sensor_positions': p,
235
- 'number_of_packets': np.array([d])
228
+ 'number_of_packets': np.array([d]),
229
+ 'curent_sensor': np.array([self.current_sensor])
236
230
  } for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
237
231
 
238
232
 
@@ -241,7 +235,8 @@ class WSNRoutingEnv(gym.Env):
241
235
  'remaining_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
242
236
  'consumption_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
243
237
  'sensor_positions': Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64),
244
- 'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)
238
+ 'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int),
239
+ 'current_sensor': Box(low=0, high=self.n_sensors - 1, shape=(1,), dtype=int)
245
240
  })
246
241
 
247
242
 
@@ -495,4 +490,12 @@ class WSNRoutingEnv(gym.Env):
495
490
  "packet_delivery_ratio": self.packet_delivery_ratio,
496
491
  "network_lifetime": self.network_lifetime,
497
492
  "average_latency": self.average_latency
498
- }
493
+ }
494
+
495
+
496
+ def find_next_sensor(self):
497
+ for offset in range(1, self.n_sensors):
498
+ next_index = (self.current_sensor + offset) % self.n_sensors
499
+ if self.remaining_energy[next_index] > 0 and self.number_of_packets[next_index] > 0:
500
+ return next_index
501
+ return None # If no such sensor is found
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 3.0.265
3
+ Version: 3.0.267
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=P0-KlIsONNVBKDPxlYZseVoScqQ3WTdLbUXOfsbtc7Y,166
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=MTP_6sqJT4T2IBGedoLOGaKVZras8gT2NXvWHfggOXA,24154
4
+ gym_examples-3.0.267.dist-info/METADATA,sha256=D5AgSNWQ3VeV8JapskViS-CoIkUkgwQPZ74N6WEcEAk,412
5
+ gym_examples-3.0.267.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-3.0.267.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-3.0.267.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=F0u_yF7l2eD3fOQTUzN2XUFt7etMgtlhgDv5tFMubpE,166
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=2GvyLeNjLM-q9oLCobVzpTUrUqzjTyRn_H3BAQZZvbY,24821
4
- gym_examples-3.0.265.dist-info/METADATA,sha256=D0__HbG6kJd9Kb1SX_Lh_MysXCxkK_ToMFudnMPWqgU,412
5
- gym_examples-3.0.265.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-3.0.265.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-3.0.265.dist-info/RECORD,,