gym-examples 3.0.282__py3-none-any.whl → 3.0.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -5,4 +5,4 @@ register(
5
5
  entry_point="gym_examples.envs:WSNRoutingEnv",
6
6
  )
7
7
 
8
- __version__ = "3.0.282"
8
+ __version__ = "3.0.283"
@@ -12,7 +12,7 @@ import os
12
12
  from collections import OrderedDict
13
13
 
14
14
  # Define the network parameters for the final reward function
15
- input_dim = 7 # length of the individual rewards vector
15
+ input_dim = 4 # length of the individual rewards vector
16
16
  output_dim = 1 # final reward
17
17
 
18
18
  Eelec = 50e-9 # energy consumption per bit in joules
@@ -26,7 +26,7 @@ initial_number_of_packets = 1 # initial number of packets to transmit
26
26
  latency_per_hop = 1 # latency per hop in seconds
27
27
 
28
28
  base_back_up_dir = "results/data/"
29
- max_reward = 1 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
29
+ max_reward = 3 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
30
30
 
31
31
  # Define the final reward function using an attention mechanism
32
32
  class Attention(nn.Module):
@@ -49,7 +49,7 @@ net = net.double() # Convert the weights to Double
49
49
 
50
50
  class WSNRoutingEnv(gym.Env):
51
51
 
52
- print_stats = False # Global flag to control printing of statistics
52
+ PRINT_STATS = "False" # Global flag to control printing of statistics
53
53
 
54
54
  def __init__(self, n_sensors = 20, coverage_radius=(upper_bound - lower_bound)/4, num_timesteps = None, version = None):
55
55
 
@@ -74,7 +74,7 @@ class WSNRoutingEnv(gym.Env):
74
74
  self.episode_count = 0
75
75
  self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
76
76
  self.epsilon = 1e-10 # small value to avoid division by zero
77
- # Initialize the position of the sensors randomly
77
+ self.current_sensor = 0 # Index of the current sensor
78
78
 
79
79
  # Define observation space
80
80
  self.observation_space = Tuple(
@@ -82,7 +82,9 @@ class WSNRoutingEnv(gym.Env):
82
82
  )
83
83
 
84
84
  # self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
85
- self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
85
+ # self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
86
+ # self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
87
+ self.action_space = Discrete(self.n_sensors + 1) # +1 for the base station
86
88
 
87
89
  self.reset()
88
90
 
@@ -113,13 +115,14 @@ class WSNRoutingEnv(gym.Env):
113
115
 
114
116
  return self._get_obs()
115
117
 
118
+
116
119
  def step(self, actions):
117
- actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
118
120
  self.steps += 1
119
- rewards = [-max_reward] * self.n_sensors
120
- # rewards = [0] * self.n_sensors
121
- dones = [False] * self.n_sensors
122
- for i, action in enumerate(actions):
121
+ rewards = - max_reward
122
+ # rewards = 0
123
+ dones = False
124
+ actions = np.array([self.current_sensor, actions])
125
+ for i, action in [actions]:
123
126
  if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
124
127
  continue # Skip if sensor has no energy left or no packets to transmit
125
128
 
@@ -134,6 +137,11 @@ class WSNRoutingEnv(gym.Env):
134
137
  transmission_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
135
138
  if self.remaining_energy[i] < transmission_energy:
136
139
  self.remaining_energy[i] = 0
140
+ next_sensor = self.find_next_sensor()
141
+ if next_sensor is None:
142
+ dones = True
143
+ else:
144
+ self.current_sensor = next_sensor
137
145
  continue # Skip if the sensor does not have enough energy to transmit data to the base station
138
146
 
139
147
  self.update_sensor_energies(i, transmission_energy)
@@ -145,10 +153,12 @@ class WSNRoutingEnv(gym.Env):
145
153
  self.total_latency += self.packet_latency[i] + latency_per_hop
146
154
  self.packet_latency[i] = 0
147
155
 
148
- # rewards[i] = self.compute_individual_rewards(i, action)
149
- rewards[i] = np.ones(input_dim) * max_reward # Reward for transmitting data to the base station
150
- # rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
151
- dones[i] = True
156
+ rewards = max_reward # Reward for transmitting data to the base station
157
+ next_sensor = self.find_next_sensor()
158
+ if next_sensor is None:
159
+ dones = True
160
+ else:
161
+ self.current_sensor = next_sensor
152
162
  else:
153
163
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
154
164
  if distance > self.coverage_radius:
@@ -157,11 +167,21 @@ class WSNRoutingEnv(gym.Env):
157
167
  transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
158
168
  reception_energy = self.reception_energy(self.number_of_packets[i])
159
169
  if self.remaining_energy[i] < transmission_energy:
160
- self.remaining_energy[i] = 0
170
+ self.remaining_energy[i] = 0
171
+ next_sensor = self.find_next_sensor()
172
+ if next_sensor is None:
173
+ dones = True
174
+ else:
175
+ self.current_sensor = next_sensor
161
176
  continue # Skip if the sensor does not have enough energy to transmit data to the next hop
162
177
  if self.remaining_energy[action] < reception_energy:
163
178
  self.number_of_packets[i] = 0
164
179
  self.remaining_energy[action] = 0
180
+ next_sensor = self.find_next_sensor()
181
+ if next_sensor is None:
182
+ dones = True
183
+ else:
184
+ self.current_sensor = next_sensor
165
185
  continue # Skip if the next hop does not have enough energy to receive data
166
186
 
167
187
  self.update_sensor_energies(i, transmission_energy)
@@ -173,18 +193,17 @@ class WSNRoutingEnv(gym.Env):
173
193
  self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
174
194
  self.packet_latency[i] = 0
175
195
 
176
- rewards[i] = self.compute_individual_rewards(i, action)
196
+ rewards = self.compute_individual_rewards(i, action)
177
197
 
178
198
  # Update the number of packets
179
199
  self.number_of_packets[action] += self.number_of_packets[i]
200
+
201
+ self.current_sensor = action
180
202
  self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
181
203
  # Calculate final reward
182
- rewards[i] = self.compute_attention_rewards(rewards[i])
183
- # rewards[i] = np.mean(rewards[i])
184
- for i in range(self.n_sensors):
185
- if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
186
- dones[i] = True
187
-
204
+ # rewards[i] = self.compute_attention_rewards(rewards[i])
205
+ rewards = np.mean(rewards)
206
+
188
207
  # Integrate the mobility of the sensors
189
208
  # self.integrate_mobility()
190
209
 
@@ -195,47 +214,54 @@ class WSNRoutingEnv(gym.Env):
195
214
 
196
215
  self.get_metrics()
197
216
 
198
- rewards = [r.item() if isinstance(r, torch.Tensor) else r for r in rewards] # Convert the rewards to a list of floats
199
- # rewards = np.sum(rewards) # Sum the rewards of all agents
200
- rewards = np.mean(rewards) # Average the rewards of all agents
201
- # rewards = np.mean(self.compute_network_rewards()) # Average the rewards of all agents
202
- # print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
203
- dones = all(dones) # Done if all agents are done
217
+ rewards = rewards.item() if isinstance(rewards, torch.Tensor) else rewards # Convert the reward to a float
218
+ if not dones:
219
+ dones = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
204
220
 
205
221
  return self._get_obs(), rewards, dones, self.get_metrics()
206
222
 
223
+
207
224
  def _get_obs(self):
208
225
  return [{'remaining_energy': np.array([e]),
209
226
  'consumption_energy': np.array([initial_energy - e]),
210
227
  'sensor_positions': p,
211
- 'number_of_packets': np.array([d])
228
+ 'number_of_packets': np.array([d]),
229
+ 'curent_sensor': np.array([self.current_sensor])
212
230
  } for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
213
231
 
232
+
214
233
  def _get_observation_space(self):
215
234
  return Dict(OrderedDict([
216
235
  ('remaining_energy', Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64)),
217
236
  ('consumption_energy', Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64)),
218
237
  ('sensor_positions', Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64)),
219
- ('number_of_packets', Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int))
238
+ ('number_of_packets', Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)),
239
+ ('current_sensor', Box(low=0, high=self.n_sensors - 1, shape=(1,), dtype=int))
220
240
  ]))
221
241
 
242
+
222
243
  def get_state(self):
223
244
  return self._get_obs()
224
245
 
246
+
225
247
  def get_avail_actions(self):
226
248
  return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
227
249
 
250
+
228
251
  def update_sensor_energies(self, i, delta_energy):
229
252
  self.remaining_energy[i] -= delta_energy
230
253
 
254
+
231
255
  def transmission_energy(self, number_of_packets, distance):
232
256
  # energy consumption for transmitting data on a distance
233
257
  return number_of_packets * info_amount * (Eelec + Eamp * distance**2)
234
258
 
259
+
235
260
  def reception_energy(self, number_of_packets):
236
261
  # energy consumption for receiving data
237
262
  return number_of_packets * info_amount * Eelec
238
263
 
264
+
239
265
  def compute_angle_vectors(self, i, action):
240
266
  '''
241
267
  Compute the angle in radians between the vectors formed by (i, action) and (i, base station)
@@ -249,6 +275,7 @@ class WSNRoutingEnv(gym.Env):
249
275
 
250
276
  return np.arccos(np.clip(cosine_angle, -1, 1))
251
277
 
278
+
252
279
  def compute_reward_angle(self, i, action):
253
280
  '''
254
281
  Compute the reward based on the angle between the vectors formed by (i, action) and (i, base station)
@@ -261,6 +288,7 @@ class WSNRoutingEnv(gym.Env):
261
288
  return np.clip(1 - normalized_angle, 0, 1)
262
289
  # return np.clip(- normalized_angle, -1, 1)
263
290
 
291
+
264
292
  def compute_reward_distance(self, i, action):
265
293
  '''
266
294
  Compute the reward based on the distance to the next hop
@@ -275,6 +303,7 @@ class WSNRoutingEnv(gym.Env):
275
303
  return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
276
304
  # return np.clip(-normalized_distance_to_next_hop, -1, 1)
277
305
 
306
+
278
307
  def compute_reward_consumption_energy(self, i, action):
279
308
  '''
280
309
  Compute the reward based on the total energy consumption (transmission, reception)
@@ -297,6 +326,7 @@ class WSNRoutingEnv(gym.Env):
297
326
  return np.clip(1 - normalized_total_energy, 0, 1)
298
327
  # return np.clip(- normalized_total_energy, -1, 1)
299
328
 
329
+
300
330
  def compute_reward_dispersion_remaining_energy(self):
301
331
  '''
302
332
  Compute the reward based on the standard deviation of the remaining energy
@@ -309,6 +339,7 @@ class WSNRoutingEnv(gym.Env):
309
339
  return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
310
340
  # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
311
341
 
342
+
312
343
  def compute_reward_number_of_packets(self, action):
313
344
  '''
314
345
  Compute the reward based on the number of packets of the receiver
@@ -322,6 +353,7 @@ class WSNRoutingEnv(gym.Env):
322
353
  return np.clip(1 - normalized_number_of_packets, 0, 1)
323
354
  # return np.clip(- normalized_number_of_packets, -1, 1)
324
355
 
356
+
325
357
  def compute_individual_rewards(self, i, action):
326
358
  '''
327
359
  Compute the individual rewards
@@ -343,8 +375,9 @@ class WSNRoutingEnv(gym.Env):
343
375
 
344
376
  rewards_performance = np.array([reward_latency, reward_network_throughput, reward_packet_delivery_ratio])
345
377
 
346
- return np.concatenate((rewards_energy, rewards_performance))
378
+ # return np.concatenate((rewards_energy, rewards_performance))
347
379
  # return np.array([reward_consumption_energy, reward_dispersion_remaining_energy])
380
+ return rewards_energy
348
381
 
349
382
 
350
383
  def compute_network_rewards(self):
@@ -360,6 +393,7 @@ class WSNRoutingEnv(gym.Env):
360
393
 
361
394
  return np.concatenate((rewards_energy, rewards_performance))
362
395
 
396
+
363
397
  def network_reward_dispersion_remaining_energy(self):
364
398
  '''
365
399
  Compute the reward based on the standard deviation of the remaining energy at the network level
@@ -372,6 +406,7 @@ class WSNRoutingEnv(gym.Env):
372
406
  return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
373
407
  # return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
374
408
 
409
+
375
410
  def network_reward_consumption_energy(self):
376
411
  '''
377
412
  Compute the reward based on the total energy consumption (transmission, reception) at the network level
@@ -384,6 +419,7 @@ class WSNRoutingEnv(gym.Env):
384
419
  return np.clip(1 - normalized_total_energy, 0, 1)
385
420
  # return np.clip(- normalized_total_energy, -1, 1)
386
421
 
422
+
387
423
  def compute_reward_packet_delivery_ratio(self):
388
424
  '''
389
425
  Compute the reward based on the packet delivery ratio
@@ -391,6 +427,7 @@ class WSNRoutingEnv(gym.Env):
391
427
  packet_delivery_ratio = self.packets_delivered / (self.total_packets_sent_by_sensors + self.epsilon) if self.total_packets_sent_by_sensors > 0 else 0
392
428
  return np.clip(packet_delivery_ratio, 0, 1)
393
429
 
430
+
394
431
  def compute_reward_latency(self):
395
432
  '''
396
433
  Compute the reward based on the average latency
@@ -402,6 +439,7 @@ class WSNRoutingEnv(gym.Env):
402
439
  return np.clip(1 - normalized_latency, 0, 1)
403
440
  # return np.clip(- normalized_latency, -1, 1)
404
441
 
442
+
405
443
  def compute_reward_network_throughput(self):
406
444
  '''
407
445
  Compute the reward based on the network throughput
@@ -411,6 +449,7 @@ class WSNRoutingEnv(gym.Env):
411
449
  normalized_throughput = network_throughput / (maximum_throughput + self.epsilon)
412
450
  return np.clip(normalized_throughput, 0, 1)
413
451
 
452
+
414
453
  def compute_attention_rewards(self, rewards):
415
454
  '''
416
455
  Compute the attention-based rewards
@@ -419,6 +458,7 @@ class WSNRoutingEnv(gym.Env):
419
458
  final_reward = net(rewards)
420
459
  return final_reward
421
460
 
461
+
422
462
  def integrate_mobility(self):
423
463
  '''
424
464
  Integrate the mobility of the sensors after each step
@@ -431,6 +471,7 @@ class WSNRoutingEnv(gym.Env):
431
471
  if not(np.all(self.sensor_positions[i] >= lower_bound) and np.all(self.sensor_positions[i] <= upper_bound)):
432
472
  self.sensor_positions[i] -= displacement[i]
433
473
 
474
+
434
475
  def get_metrics(self):
435
476
  # Calculate network throughput
436
477
  self.network_throughput = self.packets_delivered / (self.steps + self.epsilon) if self.steps > 0 else 0
@@ -449,4 +490,12 @@ class WSNRoutingEnv(gym.Env):
449
490
  "packet_delivery_ratio": self.packet_delivery_ratio,
450
491
  "network_lifetime": self.network_lifetime,
451
492
  "average_latency": self.average_latency
452
- }
493
+ }
494
+
495
+
496
+ def find_next_sensor(self):
497
+ for offset in range(1, self.n_sensors):
498
+ next_index = (self.current_sensor + offset) % self.n_sensors
499
+ if self.remaining_energy[next_index] > 0 and self.number_of_packets[next_index] > 0:
500
+ return next_index
501
+ return None # If no such sensor is found
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 3.0.282
3
+ Version: 3.0.283
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=B6nFhjmZ3o9wglL3vYZps18eP8W7b436z2-pBFs_-2w,166
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=pi4-ErjIpxM1jrWjMr8vbUHmp0YSY9YaGSTXsL2k50I,24121
4
+ gym_examples-3.0.283.dist-info/METADATA,sha256=0J7nhDUnmHi_7HwHNYycQqRikH_nlbSNNGmVVf6ujm0,412
5
+ gym_examples-3.0.283.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-3.0.283.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-3.0.283.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=fBEwdHLyLAy9RQVdFO5HIxVvlyPKD2yEcyS6eJvwA80,166
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=bwXjZ9uS41ChZ7DvgeIwhPkLVUfkRzWCwPQjFDs34ho,23218
4
- gym_examples-3.0.282.dist-info/METADATA,sha256=PgGDOKmwo0s0Vx5X8CEgJntslyORoybP5IDEE-4CI7M,412
5
- gym_examples-3.0.282.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-3.0.282.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-3.0.282.dist-info/RECORD,,