gym-examples 3.0.266__py3-none-any.whl → 3.0.267__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -5,4 +5,4 @@ register(
5
5
  entry_point="gym_examples.envs:WSNRoutingEnv",
6
6
  )
7
7
 
8
- __version__ = "3.0.266"
8
+ __version__ = "3.0.267"
@@ -25,7 +25,7 @@ initial_number_of_packets = 1 # initial number of packets to transmit
25
25
  latency_per_hop = 1 # latency per hop in seconds
26
26
 
27
27
  base_back_up_dir = "results/data/"
28
- max_reward = 5 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
28
+ max_reward = 3 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
29
29
 
30
30
  # Define the final reward function using an attention mechanism
31
31
  class Attention(nn.Module):
@@ -73,7 +73,7 @@ class WSNRoutingEnv(gym.Env):
73
73
  self.episode_count = 0
74
74
  self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
75
75
  self.epsilon = 1e-10 # small value to avoid division by zero
76
- # Initialize the position of the sensors randomly
76
+ self.current_sensor = 0 # Index of the current sensor
77
77
 
78
78
  # Define observation space
79
79
  self.observation_space = Tuple(
@@ -81,28 +81,13 @@ class WSNRoutingEnv(gym.Env):
81
81
  )
82
82
 
83
83
  # self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
84
- self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
84
+ # self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
85
85
  # self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
86
- # self.action_space = Discrete(self.n_agents * (self.n_sensors + 1))
86
+ self.action_space = Discrete(self.n_sensors + 1) # +1 for the base station
87
87
 
88
88
  self.reset()
89
89
 
90
90
  def reset(self):
91
-
92
- # if self.episode_count > 1 and os.getenv('PRINT_STATS') == 'True': # Statistics for the PPO algorithm during the training phase
93
- # self.episode_returns.append(self.episode_return)
94
- # self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
95
- # self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
96
- # self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
97
- # self.episode_network_throughput.append(self.network_throughput)
98
- # self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
99
- # self.episode_network_lifetime.append(self.network_lifetime)
100
- # self.episode_average_latency.append(self.average_latency)
101
-
102
- # if self.episode_count > 1 and os.getenv('PRINT_STATS') == 'True-False': # Statistics for the PPO algorithm during the evaluation phase
103
- # print(f"Episode: {self.episode_count}")
104
- # print(self.get_metrics())
105
-
106
91
  self.episode_return = 0
107
92
  self.sensor_positions = np.random.rand(self.n_sensors, 2) * (upper_bound - lower_bound) + lower_bound
108
93
  self.distance_to_base = np.linalg.norm(self.sensor_positions - base_station_position, axis=1)
@@ -131,18 +116,13 @@ class WSNRoutingEnv(gym.Env):
131
116
 
132
117
 
133
118
  def step(self, actions):
134
- actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
135
119
  self.steps += 1
136
- # rewards = [-max_reward] * self.n_sensors
137
- rewards = [0] * self.n_sensors
138
- dones = [False] * self.n_sensors
139
- for i, action in enumerate(actions):
140
- if action not in range(self.n_sensors + 1):
141
- raise ValueError("Invalid action!")
142
-
143
- if i >= self.n_sensors:
144
- raise ValueError("Invalid sensor index!") # the number of actions is greater than the number of sensors
145
-
120
+ rewards = - max_reward
121
+ # rewards = 0
122
+ dones = False
123
+ actions = np.array([self.current_sensor, actions])
124
+ print(f"\nactions from step in WSNRoutingEnv: {actions}\n")
125
+ for i, action in [actions]:
146
126
  if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
147
127
  continue # Skip if sensor has no energy left or no packets to transmit
148
128
 
@@ -157,6 +137,11 @@ class WSNRoutingEnv(gym.Env):
157
137
  transmission_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
158
138
  if self.remaining_energy[i] < transmission_energy:
159
139
  self.remaining_energy[i] = 0
140
+ next_sensor = self.find_next_sensor()
141
+ if next_sensor is None:
142
+ dones = True
143
+ else:
144
+ self.current_sensor = next_sensor
160
145
  continue # Skip if the sensor does not have enough energy to transmit data to the base station
161
146
 
162
147
  self.update_sensor_energies(i, transmission_energy)
@@ -168,10 +153,12 @@ class WSNRoutingEnv(gym.Env):
168
153
  self.total_latency += self.packet_latency[i] + latency_per_hop
169
154
  self.packet_latency[i] = 0
170
155
 
171
- # rewards[i] = self.compute_individual_rewards(i, action)
172
- # rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
173
- rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
174
- dones[i] = True
156
+ rewards = max_reward # Reward for transmitting data to the base station
157
+ next_sensor = self.find_next_sensor()
158
+ if next_sensor is None:
159
+ dones = True
160
+ else:
161
+ self.current_sensor = next_sensor
175
162
  else:
176
163
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
177
164
  if distance > self.coverage_radius:
@@ -180,11 +167,21 @@ class WSNRoutingEnv(gym.Env):
180
167
  transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
181
168
  reception_energy = self.reception_energy(self.number_of_packets[i])
182
169
  if self.remaining_energy[i] < transmission_energy:
183
- self.remaining_energy[i] = 0
170
+ self.remaining_energy[i] = 0
171
+ next_sensor = self.find_next_sensor()
172
+ if next_sensor is None:
173
+ dones = True
174
+ else:
175
+ self.current_sensor = next_sensor
184
176
  continue # Skip if the sensor does not have enough energy to transmit data to the next hop
185
177
  if self.remaining_energy[action] < reception_energy:
186
178
  self.number_of_packets[i] = 0
187
179
  self.remaining_energy[action] = 0
180
+ next_sensor = self.find_next_sensor()
181
+ if next_sensor is None:
182
+ dones = True
183
+ else:
184
+ self.current_sensor = next_sensor
188
185
  continue # Skip if the next hop does not have enough energy to receive data
189
186
 
190
187
  self.update_sensor_energies(i, transmission_energy)
@@ -196,19 +193,17 @@ class WSNRoutingEnv(gym.Env):
196
193
  self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
197
194
  self.packet_latency[i] = 0
198
195
 
199
- rewards[i] = self.compute_individual_rewards(i, action)
196
+ rewards = self.compute_individual_rewards(i, action)
200
197
 
201
198
  # Update the number of packets
202
199
  self.number_of_packets[action] += self.number_of_packets[i]
200
+
201
+ self.current_sensor = action
203
202
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
204
203
  # Calculate final reward
205
204
  # rewards[i] = self.compute_attention_rewards(rewards[i])
206
- rewards[i] = np.mean(rewards[i])
207
- # rewards[i] = self.compute_weighted_sum_rewards(rewards[i])
208
- for i in range(self.n_sensors):
209
- if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
210
- dones[i] = True
211
-
205
+ rewards = np.mean(rewards)
206
+
212
207
  # Integrate the mobility of the sensors
213
208
  # self.integrate_mobility()
214
209
 
@@ -219,13 +214,10 @@ class WSNRoutingEnv(gym.Env):
219
214
 
220
215
  self.get_metrics()
221
216
 
222
- rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
223
- # rewards = np.sum(rewards) # Sum the rewards of all agents
224
- rewards = np.mean(rewards) # Average the rewards of all agents
225
- # rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
226
- # print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
227
- dones = all(dones) # Done if all agents are done
228
-
217
+ rewards = rewards.item() if isinstance(rewards, torch.Tensor) else rewards # Convert the reward to a float
218
+ if not dones:
219
+ dones = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
220
+
229
221
  return self._get_obs(), rewards, dones, self.get_metrics()
230
222
 
231
223
 
@@ -233,7 +225,8 @@ class WSNRoutingEnv(gym.Env):
233
225
  return [{'remaining_energy': np.array([e]),
234
226
  'consumption_energy': np.array([initial_energy - e]),
235
227
  'sensor_positions': p,
236
- 'number_of_packets': np.array([d])
228
+ 'number_of_packets': np.array([d]),
229
+ 'curent_sensor': np.array([self.current_sensor])
237
230
  } for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
238
231
 
239
232
 
@@ -242,7 +235,8 @@ class WSNRoutingEnv(gym.Env):
242
235
  'remaining_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
243
236
  'consumption_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
244
237
  'sensor_positions': Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64),
245
- 'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)
238
+ 'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int),
239
+ 'current_sensor': Box(low=0, high=self.n_sensors - 1, shape=(1,), dtype=int)
246
240
  })
247
241
 
248
242
 
@@ -496,4 +490,12 @@ class WSNRoutingEnv(gym.Env):
496
490
  "packet_delivery_ratio": self.packet_delivery_ratio,
497
491
  "network_lifetime": self.network_lifetime,
498
492
  "average_latency": self.average_latency
499
- }
493
+ }
494
+
495
+
496
+ def find_next_sensor(self):
497
+ for offset in range(1, self.n_sensors):
498
+ next_index = (self.current_sensor + offset) % self.n_sensors
499
+ if self.remaining_energy[next_index] > 0 and self.number_of_packets[next_index] > 0:
500
+ return next_index
501
+ return None # If no such sensor is found
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 3.0.266
3
+ Version: 3.0.267
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=P0-KlIsONNVBKDPxlYZseVoScqQ3WTdLbUXOfsbtc7Y,166
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=MTP_6sqJT4T2IBGedoLOGaKVZras8gT2NXvWHfggOXA,24154
4
+ gym_examples-3.0.267.dist-info/METADATA,sha256=D5AgSNWQ3VeV8JapskViS-CoIkUkgwQPZ74N6WEcEAk,412
5
+ gym_examples-3.0.267.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-3.0.267.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-3.0.267.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=KqEqczXOEfVmbkVd7dsDXk4gyzc5VC_HrycnPdSH_8M,166
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=4CXrSu-8epidbKW7JgPIzbXPS-WPtCC82FIsdPPAlMQ,24863
4
- gym_examples-3.0.266.dist-info/METADATA,sha256=KdqFD13218m6mSDcVJ5fpru17gAS0IuYts1m4uMPUTE,412
5
- gym_examples-3.0.266.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-3.0.266.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-3.0.266.dist-info/RECORD,,