gym-examples 3.0.282__py3-none-any.whl → 3.0.284__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -5,4 +5,4 @@ register(
5
5
  entry_point="gym_examples.envs:WSNRoutingEnv",
6
6
  )
7
7
 
8
- __version__ = "3.0.282"
8
+ __version__ = "3.0.284"
@@ -12,7 +12,7 @@ import os
12
12
  from collections import OrderedDict
13
13
 
14
14
  # Define the network parameters for the final reward function
15
- input_dim = 7 # length of the individual rewards vector
15
+ input_dim = 4 # length of the individual rewards vector
16
16
  output_dim = 1 # final reward
17
17
 
18
18
  Eelec = 50e-9 # energy consumption per bit in joules
@@ -26,7 +26,7 @@ initial_number_of_packets = 1 # initial number of packets to transmit
26
26
  latency_per_hop = 1 # latency per hop in seconds
27
27
 
28
28
  base_back_up_dir = "results/data/"
29
- max_reward = 1 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
29
+ max_reward = 3 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
30
30
 
31
31
  # Define the final reward function using an attention mechanism
32
32
  class Attention(nn.Module):
@@ -49,7 +49,7 @@ net = net.double() # Convert the weights to Double
49
49
 
50
50
  class WSNRoutingEnv(gym.Env):
51
51
 
52
- print_stats = False # Global flag to control printing of statistics
52
+ PRINT_STATS = "False" # Global flag to control printing of statistics
53
53
 
54
54
  def __init__(self, n_sensors = 20, coverage_radius=(upper_bound - lower_bound)/4, num_timesteps = None, version = None):
55
55
 
@@ -74,7 +74,6 @@ class WSNRoutingEnv(gym.Env):
74
74
  self.episode_count = 0
75
75
  self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
76
76
  self.epsilon = 1e-10 # small value to avoid division by zero
77
- # Initialize the position of the sensors randomly
78
77
 
79
78
  # Define observation space
80
79
  self.observation_space = Tuple(
@@ -82,7 +81,10 @@ class WSNRoutingEnv(gym.Env):
82
81
  )
83
82
 
84
83
  # self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
85
- self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
84
+ # self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
85
+ # self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
86
+ # self.action_space = Discrete(self.n_sensors + 1) # +1 for the base station
87
+ self.action_space = Discrete((self.n_sensors + 1)**self.n_agents)
86
88
 
87
89
  self.reset()
88
90
 
@@ -113,12 +115,12 @@ class WSNRoutingEnv(gym.Env):
113
115
 
114
116
  return self._get_obs()
115
117
 
118
+
116
119
  def step(self, actions):
117
- actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
118
120
  self.steps += 1
119
121
  rewards = [-max_reward] * self.n_sensors
120
- # rewards = [0] * self.n_sensors
121
122
  dones = [False] * self.n_sensors
123
+ actions = self.to_base_n(actions, self.n_sensors + 1)
122
124
  for i, action in enumerate(actions):
123
125
  if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
124
126
  continue # Skip if sensor has no energy left or no packets to transmit
@@ -145,10 +147,8 @@ class WSNRoutingEnv(gym.Env):
145
147
  self.total_latency += self.packet_latency[i] + latency_per_hop
146
148
  self.packet_latency[i] = 0
147
149
 
148
- # rewards[i] = self.compute_individual_rewards(i, action)
149
- rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
150
- # rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
151
- dones[i] = True
150
+ rewards[i] = max_reward # Reward for transmitting data to the base station
151
+ dones[i] = True
152
152
  else:
153
153
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
154
154
  if distance > self.coverage_radius:
@@ -173,18 +173,16 @@ class WSNRoutingEnv(gym.Env):
173
173
  self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
174
174
  self.packet_latency[i] = 0
175
175
 
176
- rewards[i] = self.compute_individual_rewards(i, action)
176
+ rewards = self.compute_individual_rewards(i, action)
177
177
 
178
178
  # Update the number of packets
179
179
  self.number_of_packets[action] += self.number_of_packets[i]
180
+
180
181
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
181
182
  # Calculate final reward
182
- rewards[i] = self.compute_attention_rewards(rewards[i])
183
- # rewards[i] = np.mean(rewards[i])
184
- for i in range(self.n_sensors):
185
- if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
186
- dones[i] = True
187
-
183
+ # rewards[i] = self.compute_attention_rewards(rewards[i])
184
+ rewards = np.mean(rewards)
185
+
188
186
  # Integrate the mobility of the sensors
189
187
  # self.integrate_mobility()
190
188
 
@@ -195,15 +193,14 @@ class WSNRoutingEnv(gym.Env):
195
193
 
196
194
  self.get_metrics()
197
195
 
198
- rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
199
- # rewards = np.sum(rewards) # Sum the rewards of all agents
200
- rewards = np.mean(rewards) # Average the rewards of all agents
201
- # rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
202
- # print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
203
- dones = all(dones) # Done if all agents are done
196
+ rewards = [reward.item() if isinstance(reward, torch.Tensor) else reward for reward in rewards] # Convert the reward to a float
197
+ for i in range(self.n_sensors):
198
+ if not dones[i]:
199
+ dones[i] = self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0
204
200
 
205
201
  return self._get_obs(), rewards, dones, self.get_metrics()
206
202
 
203
+
207
204
  def _get_obs(self):
208
205
  return [{'remaining_energy': np.array([e]),
209
206
  'consumption_energy': np.array([initial_energy - e]),
@@ -211,6 +208,7 @@ class WSNRoutingEnv(gym.Env):
211
208
  'number_of_packets': np.array([d])
212
209
  } for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
213
210
 
211
+
214
212
  def _get_observation_space(self):
215
213
  return Dict(OrderedDict([
216
214
  ('remaining_energy', Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64)),
@@ -219,23 +217,29 @@ class WSNRoutingEnv(gym.Env):
219
217
  ('number_of_packets', Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int))
220
218
  ]))
221
219
 
220
+
222
221
  def get_state(self):
223
222
  return self._get_obs()
224
223
 
224
+
225
225
  def get_avail_actions(self):
226
226
  return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
227
227
 
228
+
228
229
  def update_sensor_energies(self, i, delta_energy):
229
230
  self.remaining_energy[i] -= delta_energy
230
231
 
232
+
231
233
  def transmission_energy(self, number_of_packets, distance):
232
234
  # energy consumption for transmitting data on a distance
233
235
  return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
234
236
 
237
+
235
238
  def reception_energy(self, number_of_packets):
236
239
  # energy consumption for receiving data
237
240
  return number_of_packets * info_amount * Eelec
238
241
 
242
+
239
243
  def compute_angle_vectors(self, i, action):
240
244
  '''
241
245
  Compute the angle in radians between the vectors formed by (i, action) and (i, base station)
@@ -249,6 +253,7 @@ class WSNRoutingEnv(gym.Env):
249
253
 
250
254
  return np.arccos(np.clip(cosine_angle, -1, 1))
251
255
 
256
+
252
257
  def compute_reward_angle(self, i, action):
253
258
  '''
254
259
  Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
@@ -261,6 +266,7 @@ class WSNRoutingEnv(gym.Env):
261
266
  return np.clip(1 - normalized_angle, 0, 1)
262
267
  # return np.clip(- normalized_angle, -1, 1)
263
268
 
269
+
264
270
  def compute_reward_distance(self, i, action):
265
271
  '''
266
272
  Compute the reward based on the distance to the next hop
@@ -275,6 +281,7 @@ class WSNRoutingEnv(gym.Env):
275
281
  return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
276
282
  # return np.clip(-normalized_distance_to_next_hop, -1, 1)
277
283
 
284
+
278
285
  def compute_reward_consumption_energy(self, i, action):
279
286
  '''
280
287
  Compute the reward based on the total energy consumption (transmission, reception)
@@ -297,6 +304,7 @@ class WSNRoutingEnv(gym.Env):
297
304
  return np.clip(1 - normalized_total_energy, 0, 1)
298
305
  # return np.clip(- normalized_total_energy, -1, 1)
299
306
 
307
+
300
308
  def compute_reward_dispersion_remaining_energy(self):
301
309
  '''
302
310
  Compute the reward based on the standard deviation of the remaining energy
@@ -309,6 +317,7 @@ class WSNRoutingEnv(gym.Env):
309
317
  return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
310
318
  # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
311
319
 
320
+
312
321
  def compute_reward_number_of_packets(self, action):
313
322
  '''
314
323
  Compute the reward based on the number of packets of the receiver
@@ -322,6 +331,7 @@ class WSNRoutingEnv(gym.Env):
322
331
  return np.clip(1 - normalized_number_of_packets, 0, 1)
323
332
  # return np.clip(- normalized_number_of_packets, -1, 1)
324
333
 
334
+
325
335
  def compute_individual_rewards(self, i, action):
326
336
  '''
327
337
  Compute the individual rewards
@@ -343,8 +353,9 @@ class WSNRoutingEnv(gym.Env):
343
353
 
344
354
  rewards_performance = np.array([reward_latency, reward_network_throughput, reward_packet_delivery_ratio])
345
355
 
346
- return np.concatenate((rewards_energy, rewards_performance))
356
+ # return np.concatenate((rewards_energy, rewards_performance))
347
357
  # return np.array([reward_consumption_energy, reward_dispersion_remaining_energy])
358
+ return rewards_energy
348
359
 
349
360
 
350
361
  def compute_network_rewards(self):
@@ -360,6 +371,7 @@ class WSNRoutingEnv(gym.Env):
360
371
 
361
372
  return np.concatenate((rewards_energy, rewards_performance))
362
373
 
374
+
363
375
  def network_reward_dispersion_remaining_energy(self):
364
376
  '''
365
377
  Compute the reward based on the standard deviation of the remaining energy at the network level
@@ -372,6 +384,7 @@ class WSNRoutingEnv(gym.Env):
372
384
  return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
373
385
  # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
374
386
 
387
+
375
388
  def network_reward_consumption_energy(self):
376
389
  '''
377
390
  Compute the reward based on the total energy consumption (transmission, reception) at the network level
@@ -384,6 +397,7 @@ class WSNRoutingEnv(gym.Env):
384
397
  return np.clip(1 - normalized_total_energy, 0, 1)
385
398
  # return np.clip(- normalized_total_energy, -1, 1)
386
399
 
400
+
387
401
  def compute_reward_packet_delivery_ratio(self):
388
402
  '''
389
403
  Compute the reward based on the packet delivery ratio
@@ -391,6 +405,7 @@ class WSNRoutingEnv(gym.Env):
391
405
  packet_delivery_ratio = self.packets_delivered / (self.total_packets_sent_by_sensors + self.epsilon) if self.total_packets_sent_by_sensors > 0 else 0
392
406
  return np.clip(packet_delivery_ratio, 0, 1)
393
407
 
408
+
394
409
  def compute_reward_latency(self):
395
410
  '''
396
411
  Compute the reward based on the average latency
@@ -402,6 +417,7 @@ class WSNRoutingEnv(gym.Env):
402
417
  return np.clip(1 - normalized_latency, 0, 1)
403
418
  # return np.clip(- normalized_latency, -1, 1)
404
419
 
420
+
405
421
  def compute_reward_network_throughput(self):
406
422
  '''
407
423
  Compute the reward based on the network throughput
@@ -411,6 +427,7 @@ class WSNRoutingEnv(gym.Env):
411
427
  normalized_throughput = network_throughput / (maximum_throughput + self.epsilon)
412
428
  return np.clip(normalized_throughput, 0, 1)
413
429
 
430
+
414
431
  def compute_attention_rewards(self, rewards):
415
432
  '''
416
433
  Compute the attention-based rewards
@@ -419,6 +436,7 @@ class WSNRoutingEnv(gym.Env):
419
436
  final_reward = net(rewards)
420
437
  return final_reward
421
438
 
439
+
422
440
  def integrate_mobility(self):
423
441
  '''
424
442
  Integrate the mobility of the sensors after each step
@@ -431,6 +449,7 @@ class WSNRoutingEnv(gym.Env):
431
449
  if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
432
450
  self.sensor_positions[i] -= displacement[i]
433
451
 
452
+
434
453
  def get_metrics(self):
435
454
  # Calculate network throughput
436
455
  self.network_throughput = self.packets_delivered / (self.steps + self.epsilon) if self.steps > 0 else 0
@@ -449,4 +468,24 @@ class WSNRoutingEnv(gym.Env):
449
468
  "packet_delivery_ratio": self.packet_delivery_ratio,
450
469
  "network_lifetime": self.network_lifetime,
451
470
  "average_latency": self.average_latency
452
- }
471
+ }
472
+
473
+
474
+ def find_next_sensor(self):
475
+ for offset in range(1, self.n_sensors):
476
+ next_index = (self.current_sensor + offset) % self.n_sensors
477
+ if self.remaining_energy[next_index] > 0 and self.number_of_packets[next_index] > 0:
478
+ return next_index
479
+ return None # If no such sensor is found
480
+
481
+
482
+ def to_base_n(self, number, base):
483
+ """Convert a number to a base-n number."""
484
+ if number == 0:
485
+ return [0] * (base - 1)
486
+
487
+ digits = []
488
+ while number:
489
+ digits.append(number % base)
490
+ number //= base
491
+ return digits[::-1] # Reverse the list to get the correct order
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 3.0.282
3
+ Version: 3.0.284
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=SbYVbDAngXH8S9SM6aQijqGwMUt-FSXYtOV00-_0ahI,166
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=VzbARUwPIdS6-RkQND3dxx0lLheFpP6NqaBwT5JY91o,23503
4
+ gym_examples-3.0.284.dist-info/METADATA,sha256=U9huJCLpn-NIXHhbpD97MFIY_WpCrOxX7ebSgLBsf_w,412
5
+ gym_examples-3.0.284.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-3.0.284.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-3.0.284.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=fBEwdHLyLAy9RQVdFO5HIxVvlyPKD2yEcyS6eJvwA80,166
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=bwXjZ9uS41ChZ7DvgeIwhPkLVUfkRzWCwPQjFDs34ho,23218
4
- gym_examples-3.0.282.dist-info/METADATA,sha256=PgGDOKmwo0s0Vx5X8CEgJntslyORoybP5IDEE-4CI7M,412
5
- gym_examples-3.0.282.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-3.0.282.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-3.0.282.dist-info/RECORD,,