gym-examples 3.0.280__py3-none-any.whl → 3.0.282__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -5,4 +5,4 @@ register(
5
5
  entry_point="gym_examples.envs:WSNRoutingEnv",
6
6
  )
7
7
 
8
- __version__ = "3.0.280"
8
+ __version__ = "3.0.282"
@@ -12,7 +12,7 @@ import os
12
12
  from collections import OrderedDict
13
13
 
14
14
  # Define the network parameters for the final reward function
15
- input_dim = 4 # length of the individual rewards vector
15
+ input_dim = 7 # length of the individual rewards vector
16
16
  output_dim = 1 # final reward
17
17
 
18
18
  Eelec = 50e-9 # energy consumption per bit in joules
@@ -26,7 +26,7 @@ initial_number_of_packets = 1 # initial number of packets to transmit
26
26
  latency_per_hop = 1 # latency per hop in seconds
27
27
 
28
28
  base_back_up_dir = "results/data/"
29
- max_reward = 3 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
29
+ max_reward = 1 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
30
30
 
31
31
  # Define the final reward function using an attention mechanism
32
32
  class Attention(nn.Module):
@@ -49,7 +49,7 @@ net = net.double() # Convert the weights to Double
49
49
 
50
50
  class WSNRoutingEnv(gym.Env):
51
51
 
52
- PRINT_STATS = "False" # Global flag to control printing of statistics
52
+ print_stats = False # Global flag to control printing of statistics
53
53
 
54
54
  def __init__(self, n_sensors = 20, coverage_radius=(upper_bound - lower_bound)/4, num_timesteps = None, version = None):
55
55
 
@@ -74,7 +74,7 @@ class WSNRoutingEnv(gym.Env):
74
74
  self.episode_count = 0
75
75
  self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
76
76
  self.epsilon = 1e-10 # small value to avoid division by zero
77
- self.current_sensor = 0 # Index of the current sensor
77
+ # Initialize the position of the sensors randomly
78
78
 
79
79
  # Define observation space
80
80
  self.observation_space = Tuple(
@@ -82,9 +82,7 @@ class WSNRoutingEnv(gym.Env):
82
82
  )
83
83
 
84
84
  # self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
85
- # self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
86
- # self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
87
- self.action_space = Discrete(self.n_sensors + 1) # +1 for the base station
85
+ self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
88
86
 
89
87
  self.reset()
90
88
 
@@ -115,14 +113,13 @@ class WSNRoutingEnv(gym.Env):
115
113
 
116
114
  return self._get_obs()
117
115
 
118
-
119
116
  def step(self, actions):
117
+ actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
120
118
  self.steps += 1
121
- rewards = - max_reward
122
- # rewards = 0
123
- dones = False
124
- actions = np.array([self.current_sensor, actions])
125
- for i, action in [actions]:
119
+ rewards = [-max_reward] * self.n_sensors
120
+ # rewards = [0] * self.n_sensors
121
+ dones = [False] * self.n_sensors
122
+ for i, action in enumerate(actions):
126
123
  if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
127
124
  continue # Skip if sensor has no energy left or no packets to transmit
128
125
 
@@ -137,11 +134,6 @@ class WSNRoutingEnv(gym.Env):
137
134
  transmission_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
138
135
  if self.remaining_energy[i] < transmission_energy:
139
136
  self.remaining_energy[i] = 0
140
- next_sensor = self.find_next_sensor()
141
- if next_sensor is None:
142
- dones = True
143
- else:
144
- self.current_sensor = next_sensor
145
137
  continue # Skip if the sensor does not have enough energy to transmit data to the base station
146
138
 
147
139
  self.update_sensor_energies(i, transmission_energy)
@@ -153,12 +145,10 @@ class WSNRoutingEnv(gym.Env):
153
145
  self.total_latency += self.packet_latency[i] + latency_per_hop
154
146
  self.packet_latency[i] = 0
155
147
 
156
- rewards = max_reward # Reward for transmitting data to the base station
157
- next_sensor = self.find_next_sensor()
158
- if next_sensor is None:
159
- dones = True
160
- else:
161
- self.current_sensor = next_sensor
148
+ # rewards[i] = self.compute_individual_rewards(i, action)
149
+ rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
150
+ # rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
151
+ dones[i] = True
162
152
  else:
163
153
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
164
154
  if distance > self.coverage_radius:
@@ -167,21 +157,11 @@ class WSNRoutingEnv(gym.Env):
167
157
  transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
168
158
  reception_energy = self.reception_energy(self.number_of_packets[i])
169
159
  if self.remaining_energy[i] < transmission_energy:
170
- self.remaining_energy[i] = 0
171
- next_sensor = self.find_next_sensor()
172
- if next_sensor is None:
173
- dones = True
174
- else:
175
- self.current_sensor = next_sensor
160
+ self.remaining_energy[i] = 0
176
161
  continue # Skip if the sensor does not have enough energy to transmit data to the next hop
177
162
  if self.remaining_energy[action] < reception_energy:
178
163
  self.number_of_packets[i] = 0
179
164
  self.remaining_energy[action] = 0
180
- next_sensor = self.find_next_sensor()
181
- if next_sensor is None:
182
- dones = True
183
- else:
184
- self.current_sensor = next_sensor
185
165
  continue # Skip if the next hop does not have enough energy to receive data
186
166
 
187
167
  self.update_sensor_energies(i, transmission_energy)
@@ -193,17 +173,18 @@ class WSNRoutingEnv(gym.Env):
193
173
  self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
194
174
  self.packet_latency[i] = 0
195
175
 
196
- rewards = self.compute_individual_rewards(i, action)
176
+ rewards[i] = self.compute_individual_rewards(i, action)
197
177
 
198
178
  # Update the number of packets
199
179
  self.number_of_packets[action] += self.number_of_packets[i]
200
-
201
- self.current_sensor = action
202
180
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
203
181
  # Calculate final reward
204
- # rewards[i] = self.compute_attention_rewards(rewards[i])
205
- rewards = np.mean(rewards)
206
-
182
+ rewards[i] = self.compute_attention_rewards(rewards[i])
183
+ # rewards[i] = np.mean(rewards[i])
184
+ for i in range(self.n_sensors):
185
+ if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
186
+ dones[i] = True
187
+
207
188
  # Integrate the mobility of the sensors
208
189
  # self.integrate_mobility()
209
190
 
@@ -214,54 +195,47 @@ class WSNRoutingEnv(gym.Env):
214
195
 
215
196
  self.get_metrics()
216
197
 
217
- rewards = rewards.item() if isinstance(rewards, torch.Tensor) else rewards # Convert the reward to a float
218
- if not dones:
219
- dones = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
198
+ rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
199
+ # rewards = np.sum(rewards) # Sum the rewards of all agents
200
+ rewards = np.mean(rewards) # Average the rewards of all agents
201
+ # rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
202
+ # print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
203
+ dones = all(dones) # Done if all agents are done
220
204
 
221
205
  return self._get_obs(), rewards, dones, self.get_metrics()
222
206
 
223
-
224
207
  def _get_obs(self):
225
208
  return [{'remaining_energy': np.array([e]),
226
209
  'consumption_energy': np.array([initial_energy - e]),
227
210
  'sensor_positions': p,
228
- 'number_of_packets': np.array([d]),
229
- 'curent_sensor': np.array([self.current_sensor])
211
+ 'number_of_packets': np.array([d])
230
212
  } for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
231
213
 
232
-
233
214
  def _get_observation_space(self):
234
215
  return Dict(OrderedDict([
235
216
  ('remaining_energy', Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64)),
236
217
  ('consumption_energy', Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64)),
237
218
  ('sensor_positions', Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64)),
238
- ('number_of_packets', Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)),
239
- ('current_sensor', Box(low=0, high=self.n_sensors - 1, shape=(1,), dtype=int))
219
+ ('number_of_packets', Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int))
240
220
  ]))
241
221
 
242
-
243
222
  def get_state(self):
244
223
  return self._get_obs()
245
224
 
246
-
247
225
  def get_avail_actions(self):
248
226
  return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
249
227
 
250
-
251
228
  def update_sensor_energies(self, i, delta_energy):
252
229
  self.remaining_energy[i] -= delta_energy
253
230
 
254
-
255
231
  def transmission_energy(self, number_of_packets, distance):
256
232
  # energy consumption for transmitting data on a distance
257
233
  return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
258
234
 
259
-
260
235
  def reception_energy(self, number_of_packets):
261
236
  # energy consumption for receiving data
262
237
  return number_of_packets * info_amount * Eelec
263
238
 
264
-
265
239
  def compute_angle_vectors(self, i, action):
266
240
  '''
267
241
  Compute the angle in radians between the vectors formed by (i, action) and (i, base station)
@@ -275,7 +249,6 @@ class WSNRoutingEnv(gym.Env):
275
249
 
276
250
  return np.arccos(np.clip(cosine_angle, -1, 1))
277
251
 
278
-
279
252
  def compute_reward_angle(self, i, action):
280
253
  '''
281
254
  Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
@@ -288,7 +261,6 @@ class WSNRoutingEnv(gym.Env):
288
261
  return np.clip(1 - normalized_angle, 0, 1)
289
262
  # return np.clip(- normalized_angle, -1, 1)
290
263
 
291
-
292
264
  def compute_reward_distance(self, i, action):
293
265
  '''
294
266
  Compute the reward based on the distance to the next hop
@@ -303,7 +275,6 @@ class WSNRoutingEnv(gym.Env):
303
275
  return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
304
276
  # return np.clip(-normalized_distance_to_next_hop, -1, 1)
305
277
 
306
-
307
278
  def compute_reward_consumption_energy(self, i, action):
308
279
  '''
309
280
  Compute the reward based on the total energy consumption (transmission, reception)
@@ -326,7 +297,6 @@ class WSNRoutingEnv(gym.Env):
326
297
  return np.clip(1 - normalized_total_energy, 0, 1)
327
298
  # return np.clip(- normalized_total_energy, -1, 1)
328
299
 
329
-
330
300
  def compute_reward_dispersion_remaining_energy(self):
331
301
  '''
332
302
  Compute the reward based on the standard deviation of the remaining energy
@@ -339,7 +309,6 @@ class WSNRoutingEnv(gym.Env):
339
309
  return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
340
310
  # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
341
311
 
342
-
343
312
  def compute_reward_number_of_packets(self, action):
344
313
  '''
345
314
  Compute the reward based on the number of packets of the receiver
@@ -353,7 +322,6 @@ class WSNRoutingEnv(gym.Env):
353
322
  return np.clip(1 - normalized_number_of_packets, 0, 1)
354
323
  # return np.clip(- normalized_number_of_packets, -1, 1)
355
324
 
356
-
357
325
  def compute_individual_rewards(self, i, action):
358
326
  '''
359
327
  Compute the individual rewards
@@ -375,9 +343,8 @@ class WSNRoutingEnv(gym.Env):
375
343
 
376
344
  rewards_performance = np.array([reward_latency, reward_network_throughput, reward_packet_delivery_ratio])
377
345
 
378
- # return np.concatenate((rewards_energy, rewards_performance))
346
+ return np.concatenate((rewards_energy, rewards_performance))
379
347
  # return np.array([reward_consumption_energy, reward_dispersion_remaining_energy])
380
- return rewards_energy
381
348
 
382
349
 
383
350
  def compute_network_rewards(self):
@@ -393,7 +360,6 @@ class WSNRoutingEnv(gym.Env):
393
360
 
394
361
  return np.concatenate((rewards_energy, rewards_performance))
395
362
 
396
-
397
363
  def network_reward_dispersion_remaining_energy(self):
398
364
  '''
399
365
  Compute the reward based on the standard deviation of the remaining energy at the network level
@@ -406,7 +372,6 @@ class WSNRoutingEnv(gym.Env):
406
372
  return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
407
373
  # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
408
374
 
409
-
410
375
  def network_reward_consumption_energy(self):
411
376
  '''
412
377
  Compute the reward based on the total energy consumption (transmission, reception) at the network level
@@ -419,7 +384,6 @@ class WSNRoutingEnv(gym.Env):
419
384
  return np.clip(1 - normalized_total_energy, 0, 1)
420
385
  # return np.clip(- normalized_total_energy, -1, 1)
421
386
 
422
-
423
387
  def compute_reward_packet_delivery_ratio(self):
424
388
  '''
425
389
  Compute the reward based on the packet delivery ratio
@@ -427,7 +391,6 @@ class WSNRoutingEnv(gym.Env):
427
391
  packet_delivery_ratio = self.packets_delivered / (self.total_packets_sent_by_sensors + self.epsilon) if self.total_packets_sent_by_sensors > 0 else 0
428
392
  return np.clip(packet_delivery_ratio, 0, 1)
429
393
 
430
-
431
394
  def compute_reward_latency(self):
432
395
  '''
433
396
  Compute the reward based on the average latency
@@ -439,7 +402,6 @@ class WSNRoutingEnv(gym.Env):
439
402
  return np.clip(1 - normalized_latency, 0, 1)
440
403
  # return np.clip(- normalized_latency, -1, 1)
441
404
 
442
-
443
405
  def compute_reward_network_throughput(self):
444
406
  '''
445
407
  Compute the reward based on the network throughput
@@ -449,7 +411,6 @@ class WSNRoutingEnv(gym.Env):
449
411
  normalized_throughput = network_throughput / (maximum_throughput + self.epsilon)
450
412
  return np.clip(normalized_throughput, 0, 1)
451
413
 
452
-
453
414
  def compute_attention_rewards(self, rewards):
454
415
  '''
455
416
  Compute the attention-based rewards
@@ -458,7 +419,6 @@ class WSNRoutingEnv(gym.Env):
458
419
  final_reward = net(rewards)
459
420
  return final_reward
460
421
 
461
-
462
422
  def integrate_mobility(self):
463
423
  '''
464
424
  Integrate the mobility of the sensors after each step
@@ -471,7 +431,6 @@ class WSNRoutingEnv(gym.Env):
471
431
  if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
472
432
  self.sensor_positions[i] -= displacement[i]
473
433
 
474
-
475
434
  def get_metrics(self):
476
435
  # Calculate network throughput
477
436
  self.network_throughput = self.packets_delivered / (self.steps + self.epsilon) if self.steps > 0 else 0
@@ -490,12 +449,4 @@ class WSNRoutingEnv(gym.Env):
490
449
  "packet_delivery_ratio": self.packet_delivery_ratio,
491
450
  "network_lifetime": self.network_lifetime,
492
451
  "average_latency": self.average_latency
493
- }
494
-
495
-
496
- def find_next_sensor(self):
497
- for offset in range(1, self.n_sensors):
498
- next_index = (self.current_sensor + offset) % self.n_sensors
499
- if self.remaining_energy[next_index] > 0 and self.number_of_packets[next_index] > 0:
500
- return next_index
501
- return None # If no such sensor is found
452
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 3.0.280
3
+ Version: 3.0.282
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=fBEwdHLyLAy9RQVdFO5HIxVvlyPKD2yEcyS6eJvwA80,166
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=bwXjZ9uS41ChZ7DvgeIwhPkLVUfkRzWCwPQjFDs34ho,23218
4
+ gym_examples-3.0.282.dist-info/METADATA,sha256=PgGDOKmwo0s0Vx5X8CEgJntslyORoybP5IDEE-4CI7M,412
5
+ gym_examples-3.0.282.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-3.0.282.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-3.0.282.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=31Vwz4E6SJG5a7ryaw856CyfouCCdUwVnXmVGPjNG34,166
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=pi4-ErjIpxM1jrWjMr8vbUHmp0YSY9YaGSTXsL2k50I,24121
4
- gym_examples-3.0.280.dist-info/METADATA,sha256=PqwHgxTiRoujD1Lb5DqobUiD4YNdp__uiv45wkx38oQ,412
5
- gym_examples-3.0.280.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-3.0.280.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-3.0.280.dist-info/RECORD,,