gym-examples 3.0.248__py3-none-any.whl → 3.0.250__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -5,4 +5,4 @@ register(
5
5
  entry_point="gym_examples.envs:WSNRoutingEnv",
6
6
  )
7
7
 
8
- __version__ = "3.0.248"
8
+ __version__ = "3.0.250"
@@ -48,7 +48,7 @@ net = net.double() # Convert the weights to Double
48
48
 
49
49
  class WSNRoutingEnv(gym.Env):
50
50
 
51
- print_stats = False # Global flag to control printing of statistics
51
+ PRINT_STATS = "False" # Global flag to control printing of statistics
52
52
 
53
53
  def __init__(self, n_sensors = 20, coverage_radius=(upper_bound - lower_bound)/4, num_timesteps = None, version = None):
54
54
 
@@ -128,20 +128,16 @@ class WSNRoutingEnv(gym.Env):
128
128
 
129
129
  return self._get_obs()
130
130
 
131
+
131
132
  def step(self, actions):
132
133
  self.steps += 1
133
- rewards = [-max_reward] * self.n_sensors
134
- # rewards = [0] * self.n_sensors
135
- dones = [False] * self.n_sensors
134
+ # rewards = [-max_reward] * self.n_sensors
135
+ reward = -max_reward
136
+ # dones = [False] * self.n_sensors
137
+ done = False
136
138
  # actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
137
139
  # for i, action in enumerate(actions):
138
- for i, action in [actions]:
139
- if action not in range(self.n_sensors + 1):
140
- raise ValueError("Invalid action!")
141
-
142
- if i >= self.n_sensors:
143
- raise ValueError("Invalid sensor index!") # the number of actions is greater than the number of sensors
144
-
140
+ for i, action in [actions]: # This loop is for the PPO algorithm: actions is a numpy array of shape (1, 2)
145
141
  if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
146
142
  continue # Skip if sensor has no energy left or no packets to transmit
147
143
 
@@ -167,10 +163,10 @@ class WSNRoutingEnv(gym.Env):
167
163
  self.total_latency += self.packet_latency[i] + latency_per_hop
168
164
  self.packet_latency[i] = 0
169
165
 
170
- # rewards[i] = self.compute_individual_rewards(i, action)
171
- rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
172
- # rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
173
- dones[i] = True
166
+ # rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
167
+ reward = max_reward
168
+ # dones[i] = True
169
+ # done = True
174
170
  else:
175
171
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
176
172
  if distance > self.coverage_radius:
@@ -195,18 +191,20 @@ class WSNRoutingEnv(gym.Env):
195
191
  self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
196
192
  self.packet_latency[i] = 0
197
193
 
198
- rewards[i] = self.compute_individual_rewards(i, action)
194
+ # rewards[i] = self.compute_individual_rewards(i, action)
195
+ reward = self.compute_individual_rewards(i, action)
199
196
 
200
197
  # Update the number of packets
201
198
  self.number_of_packets[action] += self.number_of_packets[i]
199
+
202
200
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
203
201
  # Calculate final reward
204
202
  # rewards[i] = self.compute_attention_rewards(rewards[i])
205
- rewards[i] = np.mean(rewards[i])
206
- # rewards[i] = self.compute_weighted_sum_rewards(rewards[i])
207
- for i in range(self.n_sensors):
208
- if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
209
- dones[i] = True
203
+ # rewards[i] = np.mean(rewards[i])
204
+ reward = np.mean(reward)
205
+ # for i in range(self.n_sensors):
206
+ # if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
207
+ # dones[i] = True
210
208
 
211
209
  # Integrate the mobility of the sensors
212
210
  # self.integrate_mobility()
@@ -218,41 +216,45 @@ class WSNRoutingEnv(gym.Env):
218
216
 
219
217
  self.get_metrics()
220
218
 
221
- rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
222
- # rewards = np.sum(rewards) # Sum the rewards of all agents
223
- rewards = np.mean(rewards) # Average the rewards of all agents
224
- # rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
225
- # print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
226
- dones = all(dones) # Done if all agents are done
219
+ # rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
220
+ reward = reward.item() if isinstance(reward, torch.Tensor) else reward # Convert the reward to a float
221
+ # rewards = np.mean(rewards) # Average the rewards of all agents
222
+ # dones = all(dones) # Done if all agents are done
223
+ # Check if done condition is met
224
+ done = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
227
225
 
228
- if os.getenv('PRINT_STATS') == 'True': # We are trying to extract only the statistics for the PPO algorithm
229
- self.number_of_steps += 1
230
- self.episode_return += rewards
231
- if self.number_of_steps >= self.num_timesteps:
232
- self.episode_returns.append(self.episode_return)
233
- self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
234
- self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
235
- self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
236
- self.episode_network_throughput.append(self.network_throughput)
237
- self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
238
- self.episode_network_lifetime.append(self.network_lifetime)
239
- self.episode_average_latency.append(self.average_latency)
240
-
241
- metrics = {
242
- "returns_PPO": self.episode_returns,
243
- "std_remaining_energy_PPO": self.episode_std_remaining_energy,
244
- "total_consumption_energy_PPO": self.episode_total_consumption_energy,
245
- "mean_remaining_energy_PPO": self.episode_mean_remaining_energy,
246
- "network_throughput_PPO": self.episode_network_throughput,
247
- "packet_delivery_ratio_PPO": self.episode_packet_delivery_ratio,
248
- "network_lifetime_PPO": self.episode_network_lifetime,
249
- "average_latency_PPO": self.episode_average_latency
250
- }
251
-
252
- for metric_name, metric_value in metrics.items():
253
- np.save(f"{base_back_up_dir}{metric_name}_{self.version}.npy", np.array(metric_value))
254
-
255
- return self._get_obs(), rewards, dones, {}
226
+ # if os.getenv('PRINT_STATS') == 'True': # We are trying to extract only the statistics for the PPO algorithm
227
+ # self.number_of_steps += 1
228
+
229
+
230
+
231
+ # self.episode_return += reward
232
+ # if self.number_of_steps >= self.num_timesteps:
233
+ # self.episode_returns.append(self.episode_return)
234
+ # self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
235
+ # self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
236
+ # self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
237
+ # self.episode_network_throughput.append(self.network_throughput)
238
+ # self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
239
+ # self.episode_network_lifetime.append(self.network_lifetime)
240
+ # self.episode_average_latency.append(self.average_latency)
241
+
242
+ # metrics = {
243
+ # "returns_PPO": self.episode_returns,
244
+ # "std_remaining_energy_PPO": self.episode_std_remaining_energy,
245
+ # "total_consumption_energy_PPO": self.episode_total_consumption_energy,
246
+ # "mean_remaining_energy_PPO": self.episode_mean_remaining_energy,
247
+ # "network_throughput_PPO": self.episode_network_throughput,
248
+ # "packet_delivery_ratio_PPO": self.episode_packet_delivery_ratio,
249
+ # "network_lifetime_PPO": self.episode_network_lifetime,
250
+ # "average_latency_PPO": self.episode_average_latency
251
+ # }
252
+
253
+ # for metric_name, metric_value in metrics.items():
254
+ # np.save(f"{base_back_up_dir}{metric_name}_{self.version}.npy", np.array(metric_value))
255
+
256
+ # return self._get_obs(), rewards, dones, {}
257
+ return self._get_obs(), reward, done, self.get_metrics()
256
258
 
257
259
  def _get_obs(self):
258
260
  return [{'remaining_energy': np.array([e]),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 3.0.248
3
+ Version: 3.0.250
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=SCP88qKNLXNrQnomR5aDwTUKRGcubrWoYrMLMh_Jv_E,166
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=YWHO8EnD2HiG-72eWOtOtvEuR4x1tiUEQGjfUGCnN1g,26424
4
+ gym_examples-3.0.250.dist-info/METADATA,sha256=iNb1JdpzCy-fSeT2rhapPJK-McEwXRcoGqY7cJIdHSo,412
5
+ gym_examples-3.0.250.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-3.0.250.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-3.0.250.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=BjAXRifSQtOSJYJbOPIngjGcITUUcHk6qRjyJl_zT5A,166
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=qNvcWbmU5fv6t-CRHNWzWfPU_Z3qp_Qu9m6IPe-EuGg,26505
4
- gym_examples-3.0.248.dist-info/METADATA,sha256=sq4qBRNZXMFG1ohtgJM-Xrs8znvFxDEoEKniZ85JQZs,412
5
- gym_examples-3.0.248.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-3.0.248.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-3.0.248.dist-info/RECORD,,