gym-examples 2.0.29__py3-none-any.whl → 2.0.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -6,4 +6,4 @@ register(
6
6
  max_episode_steps=50,
7
7
  )
8
8
 
9
- __version__ = "2.0.29"
9
+ __version__ = "2.0.31"
@@ -22,7 +22,7 @@ initial_energy = 1 # initial energy of each sensor (in joules)
22
22
  lower_bound = 0 # lower bound of the sensor positions
23
23
  upper_bound = 100 # upper bound of the sensor positions
24
24
  base_station_position = np.array([(upper_bound - lower_bound)/2, (upper_bound - lower_bound)/2]) # position of the base station
25
-
25
+ initial_number_of_packets = 1 # initial number of packets to transmit
26
26
 
27
27
 
28
28
  # Define the final reward function using an attention mechanism
@@ -74,7 +74,7 @@ class WSNRoutingEnv(gym.Env):
74
74
  # Initialize remaining energy of each sensor to initial_energy joule
75
75
  self.remaining_energy = np.ones(self.n_sensors) * initial_energy
76
76
  self.consumption_energy = np.zeros(self.n_sensors)
77
- self.number_of_packets = np.ones(self.n_sensors, dtype=int) # Number of packets to transmit
77
+ self.number_of_packets = np.ones(self.n_sensors, dtype=int) * initial_number_of_packets # number of packets to transmit
78
78
  self.episode_count += 1
79
79
  return self._get_obs()
80
80
 
@@ -95,31 +95,27 @@ class WSNRoutingEnv(gym.Env):
95
95
  if (action == i):
96
96
  continue # Skip if sensor tries to transmit data to itself
97
97
 
98
- neighbors_i = self.eligible_receivers(i)
99
- keys_neighbors_i = list(neighbors_i.keys())
100
- if len(neighbors_i) == 0 or action not in keys_neighbors_i:
101
- continue
102
-
103
- remaining_energy_before = copy.deepcopy(self.remaining_energy)
104
98
  if action == self.n_sensors:
105
- rewards[i] = self.compute_individual_rewards(i, action, neighbors_i, remaining_energy_before)
106
- dones[i] = True
107
99
  # Calculate the energy consumption and remaining for transmitting data to the base station
108
- self.update_sensor_energies(i, neighbors_i[action]['transmission_energy'])
100
+ transmission_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
101
+ self.update_sensor_energies(i, transmission_energy)
102
+ rewards[i] = self.compute_individual_rewards(i, action)
103
+ dones[i] = True
109
104
  else:
110
- self.update_sensor_energies(i, neighbors_i[action]['transmission_energy'])
111
- self.update_sensor_energies(action, neighbors_i[action]['reception_energy'])
112
- # Update the number of packets of the sensor action
113
- self.number_of_packets[action] += self.number_of_packets[i]
114
- self.distance_to_base[action] = np.linalg.norm(self.sensor_positions[action] - base_station_position)
105
+ distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
106
+ transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
107
+ reception_energy = self.reception_energy(self.number_of_packets[i])
108
+ self.update_sensor_energies(i, transmission_energy)
109
+ self.update_sensor_energies(action, reception_energy)
115
110
  # Compute individual rewards
116
- rewards[i] = self.compute_individual_rewards(i, action, neighbors_i, remaining_energy_before)
111
+ rewards[i] = self.compute_individual_rewards(i, action)
112
+ # Update the number of packets
113
+ self.number_of_packets[action] += self.number_of_packets[i]
117
114
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
118
115
  # Calculate final reward
119
116
  # rewards_individual = torch.tensor(rewards[i], dtype=torch.double)
120
117
  # final_reward = net(rewards_individual)
121
- # final_reward = sum(rewards[i])
122
- final_reward = np.mean(rewards[i])
118
+ final_reward = np.sum(rewards[i])
123
119
  rewards[i] = final_reward
124
120
 
125
121
  for i in range(self.n_sensors):
@@ -129,6 +125,8 @@ class WSNRoutingEnv(gym.Env):
129
125
  # Integrate the mobility of the sensors
130
126
  self.integrate_mobility()
131
127
 
128
+ self.distance_to_base = np.linalg.norm(self.sensor_positions - base_station_position, axis=1)
129
+
132
130
  return self._get_obs(), rewards, dones, {}
133
131
 
134
132
  def _get_obs(self):
@@ -155,13 +153,13 @@ class WSNRoutingEnv(gym.Env):
155
153
  self.consumption_energy[i] += delta_energy
156
154
  self.remaining_energy[i] -= delta_energy
157
155
 
158
- def transmission_energy(self, i, distance):
156
+ def transmission_energy(self, number_of_packets, distance):
159
157
  # energy consumption for transmitting data on a distance
160
- return self.number_of_packets[i] * info_amount * (Eelec + Eamp * distance**2)
158
+ return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
161
159
 
162
- def reception_energy(self, i):
160
+ def reception_energy(self, number_of_packets):
163
161
  # energy consumption for receiving data
164
- return self.number_of_packets[i] * info_amount * Eelec
162
+ return number_of_packets * info_amount * Eelec
165
163
 
166
164
  def compute_angle_vectors(self, i, action):
167
165
  '''
@@ -176,101 +174,83 @@ class WSNRoutingEnv(gym.Env):
176
174
 
177
175
  return np.arccos(np.clip(cosine_angle, -1, 1))
178
176
 
179
- def compute_reward_angle(self, i, action, neighbors_i):
177
+ def compute_reward_angle(self, i, action):
180
178
  '''
181
179
  Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
182
180
  '''
183
- if len(neighbors_i) == 1:
184
- return 1
185
- else:
186
- # Calculate the angle in radians between the vectors formed by (i, action) and (i, base station)
187
- angle = self.compute_angle_vectors(i, action)
188
- # Normalize the angle
189
- total_angles_without_direction = np.sum([abs(self.compute_angle_vectors(i, x)) for x in neighbors_i])
190
- normalized_angle = abs(angle) / total_angles_without_direction
181
+ # Calculate the angle in radians between the vectors formed by (i, action) and (i, base station)
182
+ angle = self.compute_angle_vectors(i, action)
183
+ # Normalize the angle
184
+ normalized_angle = abs(angle) / np.pi
191
185
 
192
- return 1 - normalized_angle
186
+ return 1 - normalized_angle
193
187
 
194
- def compute_reward_distance(self, action, neighbors_i):
188
+ def compute_reward_distance(self, i, action):
195
189
  '''
196
190
  Compute the reward based on the distance to the next hop
197
191
  '''
198
- if len(neighbors_i) == 1:
199
- return 1
192
+ if action == self.n_sensors:
193
+ distance = np.linalg.norm(self.sensor_positions[i] - base_station_position)
200
194
  else:
201
- total_distances = np.sum([neighbors_i[x]['distance'] for x in neighbors_i])
202
- # Normalize the distance to the next hop
203
- normalized_distance_to_next_hop = neighbors_i[action]['distance'] / total_distances
195
+ distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
196
+ # Normalize the distance to the next hop
197
+ normalized_distance_to_next_hop = distance / self.coverage_radius
204
198
 
205
- return 1 - normalized_distance_to_next_hop
199
+ return 1 - normalized_distance_to_next_hop
206
200
 
207
- def compute_reward_consumption_energy(self, action, neighbor_i):
201
+ def compute_reward_consumption_energy(self, i, action):
208
202
  '''
209
203
  Compute the reward based on the total energy consumption (transmission, reception)
210
204
  '''
211
- if len(neighbor_i) == 1:
212
- return 1
205
+ # Calculate the total energy consumption (transmission, reception)
206
+ if action == self.n_sensors:
207
+ total_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
213
208
  else:
214
- # Calculate the total energy consumption (transmission, reception)
215
- total_energy = neighbor_i[action]['transmission_energy'] + neighbor_i[action]['reception_energy']
216
-
217
- # Normalize the total energy consumption
218
- total_transmission_energies = np.sum([neighbor_i[x]['transmission_energy'] for x in neighbor_i])
219
- total_reception_energies = np.sum([neighbor_i[x]['reception_energy'] for x in neighbor_i])
220
- total_energies = total_transmission_energies + total_reception_energies
221
- normalized_total_energy = total_energy / total_energies
222
-
223
- return 1 - normalized_total_energy
209
+ distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
210
+ transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
211
+ reception_energy = self.reception_energy(self.number_of_packets[i])
212
+ total_energy = transmission_energy + reception_energy
213
+
214
+ # Normalize the total energy consumption
215
+ max_transmission_energy = self.transmission_energy(self.n_sensors * initial_number_of_packets, self.coverage_radius)
216
+ max_reception_energy = self.reception_energy(self.n_sensors * initial_number_of_packets)
217
+ max_total_energy = max_transmission_energy + max_reception_energy
218
+ normalized_total_energy = total_energy / max_total_energy
219
+
220
+ return 1 - normalized_total_energy
224
221
 
225
- def compute_dispersion_remaining_energy(self, i, action, neighbor_i, remaining_energy_before):
226
- '''
227
- Compute the variation of sensors remaining energy after transmission and reception
228
- '''
229
- temporary_remaining_energy = copy.deepcopy(remaining_energy_before)
230
- temporary_remaining_energy[i] -= neighbor_i[action]['transmission_energy']
231
- if action != self.n_sensors:
232
- temporary_remaining_energy[action] -= neighbor_i[action]['reception_energy']
233
- dispersion_remaining_energy = np.std(temporary_remaining_energy)
234
-
235
- return dispersion_remaining_energy
236
-
237
- def compute_reward_dispersion_remaining_energy(self, i, action, neighbor_i, remaining_energy_before):
222
+ def compute_reward_dispersion_remaining_energy(self):
238
223
  '''
239
224
  Compute the reward based on the standard deviation of the remaining energy
240
225
  '''
241
- if len(neighbor_i) == 1:
242
- return 1
243
- else:
244
- dispersion_remaining_energy = self.compute_dispersion_remaining_energy(i, action, neighbor_i, remaining_energy_before)
245
- # Normalize the standard deviation of the remaining energy
246
- total_dispersion_remaining_energy = np.sum([self.compute_dispersion_remaining_energy(i, x, neighbor_i, remaining_energy_before) for x in neighbor_i])
247
- normalized_dispersion_remaining_energy = dispersion_remaining_energy / total_dispersion_remaining_energy
226
+ dispersion_remaining_energy = np.std(self.remaining_energy)
227
+ # Normalize the standard deviation of the remaining energy
228
+ max_dispersion_remaining_energy = initial_energy / 2 # maximum standard deviation of the remaining energy if n_sensors is even
229
+ normalized_dispersion_remaining_energy = dispersion_remaining_energy / max_dispersion_remaining_energy
248
230
 
249
- return 1 - normalized_dispersion_remaining_energy
231
+ return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
250
232
 
251
- def compute_reward_number_of_packets(self, i, action, neighbors_i):
233
+ def compute_reward_number_of_packets(self, action):
252
234
  '''
253
- Compute the reward based on the number of packets of each sensor in the neighborhood
235
+ Compute the reward based on the number of packets of the receiver
254
236
  '''
255
- if len(neighbors_i) == 1 or action == self.n_sensors:
256
- return 1
257
- elif self.n_sensors in neighbors_i.keys():
258
- return 0
259
- else:
260
- total_number_of_packets = np.sum([self.number_of_packets[x] for x in neighbors_i])
261
- normalized_number_of_packets = self.number_of_packets[action] / total_number_of_packets
237
+ max_number_of_packets = self.n_sensors * initial_number_of_packets
238
+ if action == self.n_sensors:
239
+ normalized_number_of_packets = 0
240
+ else:
241
+ normalized_number_of_packets = self.number_of_packets[action] / max_number_of_packets
262
242
 
263
- return 1 - normalized_number_of_packets
243
+ return 1 - normalized_number_of_packets
264
244
 
265
- def compute_individual_rewards(self, i, action, neighbors_i, remaining_energy_before):
245
+ def compute_individual_rewards(self, i, action):
266
246
  '''
267
247
  Compute the individual rewards
268
248
  '''
269
- reward_angle = self.compute_reward_angle(i, action, neighbors_i)
270
- reward_distance = self.compute_reward_distance(action, neighbors_i)
271
- reward_consumption_energy = self.compute_reward_consumption_energy(action, neighbors_i)
272
- reward_dispersion_remaining_energy = self.compute_reward_dispersion_remaining_energy(i, action, neighbors_i, remaining_energy_before)
273
- reward_number_of_packets = self.compute_reward_number_of_packets(i, action, neighbors_i)
249
+ reward_angle = self.compute_reward_angle(i, action)
250
+ reward_distance = self.compute_reward_distance(i, action)
251
+ reward_consumption_energy = self.compute_reward_consumption_energy(i, action)
252
+ reward_dispersion_remaining_energy = self.compute_reward_dispersion_remaining_energy()
253
+ reward_number_of_packets = self.compute_reward_number_of_packets(action)
274
254
 
275
255
  return [reward_angle, reward_distance, reward_consumption_energy, reward_dispersion_remaining_energy, reward_number_of_packets]
276
256
 
@@ -284,37 +264,4 @@ class WSNRoutingEnv(gym.Env):
284
264
  # Cancel the displacement if the sensor goes out of bounds
285
265
  for i in range(self.n_sensors):
286
266
  if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
287
- self.sensor_positions[i] -= displacement[i]
288
-
289
- def eligible_receivers(self, i):
290
- '''
291
- Get the list of eligible receivers for the current sensor
292
- '''
293
- eligible_receivers = {}
294
- # eligibility for sensors apart the base station
295
- for j in range(self.n_sensors):
296
- if i != j:
297
- distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[j])
298
- transmission_energy = self.transmission_energy(i, distance)
299
- reception_energy = self.reception_energy(j)
300
- condition_i = (self.remaining_energy[i] >= transmission_energy) and (distance <= self.coverage_radius)
301
- condition_j = (self.remaining_energy[j] >= reception_energy) and (distance <= self.coverage_radius)
302
- if condition_i and condition_j:
303
- eligible_receivers[j] = {
304
- 'distance': distance,
305
- 'transmission_energy': transmission_energy,
306
- 'reception_energy': reception_energy
307
- }
308
-
309
- # eligibility for the base station
310
- distance = np.linalg.norm(self.sensor_positions[i] - base_station_position)
311
- transmission_energy = self.transmission_energy(i, distance)
312
- condition_i = (self.remaining_energy[i] >= transmission_energy) and (distance <= self.coverage_radius)
313
- if condition_i:
314
- eligible_receivers[self.n_sensors] = {
315
- 'distance': distance,
316
- 'transmission_energy': transmission_energy,
317
- 'reception_energy': 0
318
- }
319
-
320
- return eligible_receivers
267
+ self.sensor_positions[i] -= displacement[i]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 2.0.29
3
+ Version: 2.0.31
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=-Zi6pjtuGP4mWLhMJ5XoRMBap3wEBnvbILvg_aozuTw,193
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=M2bo_-wLWkXTJajQc7OgkT7vsXlCyGBTkfXA3ipFSc0,13265
4
+ gym_examples-2.0.31.dist-info/METADATA,sha256=XZzdSuDCJ2CXoAF6osyaCT0nKN9xRQoL5GksbJvxnq4,411
5
+ gym_examples-2.0.31.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-2.0.31.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-2.0.31.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=JrKY4JGnl71Q355qUxX0MfuMRVQmVenwidMKN4QdVEY,193
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=sCdEj9QwacOrNQjGw5Hq-CLdWLmCYSGCf-iW_byEWY4,15928
4
- gym_examples-2.0.29.dist-info/METADATA,sha256=iZt19QQ1a4-_hFcVN3KkzCDPk1i55lbM47K_zCVJcj0,411
5
- gym_examples-2.0.29.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-2.0.29.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-2.0.29.dist-info/RECORD,,