gym-examples 2.0.80__py3-none-any.whl → 2.0.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gym_examples/__init__.py CHANGED
@@ -6,4 +6,4 @@ register(
6
6
  max_episode_steps=50,
7
7
  )
8
8
 
9
- __version__ = "2.0.80"
9
+ __version__ = "2.0.82"
@@ -10,11 +10,9 @@ import torch.nn as nn
10
10
  import torch.nn.functional as F
11
11
 
12
12
  # Define the network parameters for the final reward function
13
- input_dim = 5 # number of individual rewards
13
+ input_dim = 4 # number of individual rewards
14
14
  output_dim = 1 # final reward
15
15
 
16
-
17
- stats_file_path_base = 'C:\\Users\\djime\\Documents\\PHD\\THESIS\\CODES\\RL_Routing\\Results_EPyMARL\\stats_over_time'
18
16
  Eelec = 50e-9 # energy consumption per bit in joules
19
17
  Eamp = 100e-12 # energy consumption per bit per square meter in joules
20
18
  info_amount = 3072 # data size in bits
@@ -47,7 +45,7 @@ net = net.double() # Convert the weights to Double
47
45
 
48
46
 
49
47
  class WSNRoutingEnv(gym.Env):
50
- def __init__(self, n_sensors = 20, coverage_radius=50):
48
+ def __init__(self, n_sensors = 20, coverage_radius=(upper_bound - lower_bound)/2):
51
49
 
52
50
  super(WSNRoutingEnv, self).__init__()
53
51
 
@@ -57,7 +55,7 @@ class WSNRoutingEnv(gym.Env):
57
55
  self.episode_count = 0
58
56
  self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
59
57
  self.epsilon = 1e-10 # small value to avoid division by zero
60
- self.rewards_individual = [0] * self.n_sensors
58
+ # self.rewards_individual = [0] * self.n_sensors
61
59
  # Initialize the position of the sensors randomly
62
60
  self.sensor_positions = np.random.rand(self.n_sensors, 2) * (upper_bound - lower_bound) + lower_bound
63
61
  self.distance_to_base = np.linalg.norm(self.sensor_positions - base_station_position, axis=1)
@@ -75,21 +73,20 @@ class WSNRoutingEnv(gym.Env):
75
73
  def reset(self):
76
74
  # print individual rewards
77
75
 
78
- if self.rewards_individual != []:
79
- print("\n=================================================")
80
- print(f"Episode: {self.episode_count}")
81
- print(f"Rewards: {self.rewards_individual}")
82
- print("=================================================\n")
76
+ # if self.rewards_individual != []:
77
+ # print("\n=================================================")
78
+ # print(f"Episode: {self.episode_count}")
79
+ # print(f"Rewards: {self.rewards_individual}")
80
+ # print("=================================================\n")
83
81
  # Initialize remaining energy of each sensor to initial_energy joule
84
82
  self.remaining_energy = np.ones(self.n_sensors) * initial_energy
85
- self.consumption_energy = np.zeros(self.n_sensors)
86
83
  self.number_of_packets = np.ones(self.n_sensors, dtype=int) * initial_number_of_packets # number of packets to transmit
87
84
  self.episode_count += 1
88
85
  return self._get_obs()
89
86
 
90
87
  def step(self, actions):
91
88
  rewards = [0] * self.n_sensors
92
- self.rewards_individual = [0] * self.n_sensors
89
+ # self.rewards_individual = [0] * self.n_sensors
93
90
  dones = [False] * self.n_sensors
94
91
  for i, action in enumerate(actions):
95
92
 
@@ -106,15 +103,25 @@ class WSNRoutingEnv(gym.Env):
106
103
  continue # Skip if sensor tries to transmit data to itself
107
104
 
108
105
  if action == self.n_sensors:
109
- # Calculate the energy consumption and remaining for transmitting data to the base station
106
+ if self.distance_to_base[i] > self.coverage_radius:
107
+ continue # Skip if the distance to the base station is greater than the coverage radius
108
+
109
+ # Calculate the energy consumption for transmitting data to the base station
110
110
  transmission_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
111
+ if self.remaining_energy[i] < transmission_energy:
112
+ continue # Skip if the sensor does not have enough energy to transmit data to the base station
111
113
  self.update_sensor_energies(i, transmission_energy)
112
114
  rewards[i] = self.compute_individual_rewards(i, action)
113
115
  dones[i] = True
114
116
  else:
115
117
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
118
+ if distance > self.coverage_radius:
119
+ continue # Skip if the distance to the next hop is greater than the coverage radius
120
+
116
121
  transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
117
122
  reception_energy = self.reception_energy(self.number_of_packets[i])
123
+ if self.remaining_energy[i] < transmission_energy or self.remaining_energy[action] < reception_energy:
124
+ continue
118
125
  self.update_sensor_energies(i, transmission_energy)
119
126
  self.update_sensor_energies(action, reception_energy)
120
127
  # Compute individual rewards
@@ -125,7 +132,7 @@ class WSNRoutingEnv(gym.Env):
125
132
  # Calculate final reward
126
133
  # rewards_individual = torch.tensor(rewards[i], dtype=torch.double)
127
134
  # final_reward = net(rewards_individual)
128
- self.rewards_individual[i] = rewards[i]
135
+ # self.rewards_individual[i] = rewards[i]
129
136
  final_reward = np.sum(rewards[i])
130
137
  rewards[i] = final_reward
131
138
 
@@ -133,7 +140,7 @@ class WSNRoutingEnv(gym.Env):
133
140
  # rewards = [0.5 * r + 0.5 * (self.network_reward_consumption_energy() + self.network_reward_dispersion_remaining_energy()) for r in rewards]
134
141
 
135
142
  # Only proceed if network consumption energy is not zero to avoid unnecessary list comprehension
136
- self.rewards_individual = [r for r in self.rewards_individual if ((r != 0) and (r[len(r) -1] < 1))]
143
+ # self.rewards_individual = [r for r in self.rewards_individual if ((r != 0) and (r[len(r) -1] < 1))]
137
144
 
138
145
  # self.rewards_individual = [{"ind": r, "net_consumption_energy": self.network_reward_consumption_energy(), "net_dispersion_energy": self.network_reward_dispersion_remaining_energy()} for r in self.rewards_individual if ((r != 0) and (self.network_reward_consumption_energy() != 0))]
139
146
  for i in range(self.n_sensors):
@@ -149,15 +156,15 @@ class WSNRoutingEnv(gym.Env):
149
156
 
150
157
  def _get_obs(self):
151
158
  return [{'remaining_energy': np.array([e]),
159
+ 'consumption_energy': np.array([initial_energy - e]),
152
160
  'sensor_positions': p,
153
- 'consumption_energy': np.array([c]),
154
- 'number_of_packets': np.array([d])} for e, p, c, d in zip(self.remaining_energy, self.sensor_positions, self.consumption_energy, self.number_of_packets)]
161
+ 'number_of_packets': np.array([d])} for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
155
162
 
156
163
  def _get_observation_space(self):
157
164
  return Dict({
158
165
  'remaining_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
159
- 'sensor_positions': Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64),
160
166
  'consumption_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
167
+ 'sensor_positions': Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64),
161
168
  'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)
162
169
  })
163
170
 
@@ -168,7 +175,6 @@ class WSNRoutingEnv(gym.Env):
168
175
  return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
169
176
 
170
177
  def update_sensor_energies(self, i, delta_energy):
171
- self.consumption_energy[i] += delta_energy
172
178
  self.remaining_energy[i] -= delta_energy
173
179
 
174
180
  def transmission_energy(self, number_of_packets, distance):
@@ -208,7 +214,7 @@ class WSNRoutingEnv(gym.Env):
208
214
  Compute the reward based on the distance to the next hop
209
215
  '''
210
216
  if action == self.n_sensors:
211
- distance = np.linalg.norm(self.sensor_positions[i] - base_station_position)
217
+ distance = np.linalg.norm(self.sensor_positions[i] - self.distance_to_base[i])
212
218
  else:
213
219
  distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
214
220
  # Normalize the distance to the next hop
@@ -228,37 +234,35 @@ class WSNRoutingEnv(gym.Env):
228
234
  transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
229
235
  reception_energy = self.reception_energy(self.number_of_packets[i])
230
236
  total_energy = transmission_energy + reception_energy
231
- # total_energy = transmission_energy
232
-
237
+
233
238
  # Normalize the total energy consumption
234
239
  max_transmission_energy = self.transmission_energy(self.n_sensors * initial_number_of_packets, self.coverage_radius)
235
240
  max_reception_energy = self.reception_energy(self.n_sensors * initial_number_of_packets)
236
241
  max_total_energy = max_transmission_energy + max_reception_energy
237
- # max_total_energy = max_transmission_energy
238
242
  normalized_total_energy = total_energy / max_total_energy
239
243
 
240
244
  return np.clip(1 - normalized_total_energy, 0, 1)
241
245
 
242
- def compute_reward_dispersion_remaining_energy(self,i):
243
- '''
244
- Compute the reward based on the difference between the remaining energy of the sensor i and the mean remaining energy of all sensors
245
- '''
246
- difference = np.abs(self.remaining_energy[i] - np.mean(self.remaining_energy))
247
- # Normalize the difference
248
- normalized_difference = difference / initial_energy
249
-
250
- return np.clip(1 - normalized_difference, 0, 1)
251
-
252
- # def compute_reward_dispersion_remaining_energy(self):
246
+ # def compute_reward_dispersion_remaining_energy(self,i):
253
247
  # '''
254
- # Compute the reward based on the standard deviation of the remaining energy
248
+ # Compute the reward based on the difference between the remaining energy of the sensor i and the mean remaining energy of all sensors
255
249
  # '''
256
- # dispersion_remaining_energy = np.std(self.remaining_energy)
257
- # # Normalize the standard deviation of the remaining energy
258
- # max_dispersion_remaining_energy = initial_energy / 2 # maximum standard deviation of the remaining energy if n_sensors is even
259
- # normalized_dispersion_remaining_energy = dispersion_remaining_energy / max_dispersion_remaining_energy
250
+ # difference = np.abs(self.remaining_energy[i] - np.mean(self.remaining_energy))
251
+ # # Normalize the difference
252
+ # normalized_difference = difference / initial_energy
260
253
 
261
- # return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
254
+ # return np.clip(1 - normalized_difference, 0, 1)
255
+
256
+ def compute_reward_dispersion_remaining_energy(self):
257
+ '''
258
+ Compute the reward based on the standard deviation of the remaining energy
259
+ '''
260
+ dispersion_remaining_energy = np.std(self.remaining_energy)
261
+ # Normalize the standard deviation of the remaining energy
262
+ max_dispersion_remaining_energy = initial_energy / 2 # maximum standard deviation of the remaining energy if n_sensors is even
263
+ normalized_dispersion_remaining_energy = dispersion_remaining_energy / max_dispersion_remaining_energy
264
+
265
+ return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
262
266
 
263
267
  def compute_reward_number_of_packets(self, action):
264
268
  '''
@@ -286,27 +290,27 @@ class WSNRoutingEnv(gym.Env):
286
290
  # return [reward_angle, reward_distance, reward_consumption_energy, reward_number_of_packets]
287
291
  return [reward_angle, reward_distance, reward_dispersion_remaining_energy, reward_number_of_packets]
288
292
 
289
- def network_reward_dispersion_remaining_energy(self):
290
- '''
291
- Compute the reward based on the standard deviation of the remaining energy at the network level
292
- '''
293
- dispersion_remaining_energy = np.std(self.remaining_energy)
294
- # Normalize the standard deviation of the remaining energy
295
- max_dispersion_remaining_energy = initial_energy / 2 # maximum standard deviation of the remaining energy if n_sensors is even
296
- normalized_dispersion_remaining_energy = dispersion_remaining_energy / max_dispersion_remaining_energy
293
+ # def network_reward_dispersion_remaining_energy(self):
294
+ # '''
295
+ # Compute the reward based on the standard deviation of the remaining energy at the network level
296
+ # '''
297
+ # dispersion_remaining_energy = np.std(self.remaining_energy)
298
+ # # Normalize the standard deviation of the remaining energy
299
+ # max_dispersion_remaining_energy = initial_energy / 2 # maximum standard deviation of the remaining energy if n_sensors is even
300
+ # normalized_dispersion_remaining_energy = dispersion_remaining_energy / max_dispersion_remaining_energy
297
301
 
298
- return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
302
+ # return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
299
303
 
300
- def network_reward_consumption_energy(self):
301
- '''
302
- Compute the reward based on the total energy consumption (transmission, reception) at the network level
303
- '''
304
- total_energy = np.sum(self.consumption_energy)
305
- # Normalize the total energy consumption
306
- max_total_energy = self.n_sensors * initial_energy
307
- normalized_total_energy = total_energy / max_total_energy
304
+ # def network_reward_consumption_energy(self):
305
+ # '''
306
+ # Compute the reward based on the total energy consumption (transmission, reception) at the network level
307
+ # '''
308
+ # total_energy = self.n_sensors * initial_energy - np.sum(self.remaining_energy)
309
+ # # Normalize the total energy consumption
310
+ # max_total_energy = self.n_sensors * initial_energy
311
+ # normalized_total_energy = total_energy / max_total_energy
308
312
 
309
- return np.clip(1 - normalized_total_energy, 0, 1)
313
+ # return np.clip(1 - normalized_total_energy, 0, 1)
310
314
 
311
315
  def integrate_mobility(self):
312
316
  '''
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gym-examples
3
- Version: 2.0.80
3
+ Version: 2.0.82
4
4
  Summary: A custom environment for multi-agent reinforcement learning focused on WSN routing.
5
5
  Home-page: https://github.com/gedji/CODES.git
6
6
  Author: Georges Djimefo
@@ -0,0 +1,7 @@
1
+ gym_examples/__init__.py,sha256=0a6V_iaBbwkYsQh-6xcZfi7W2c80bYCfqPCbB9J335k,193
2
+ gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
+ gym_examples/envs/wsn_env.py,sha256=smTNOcnzz5rgGkvpdgZycyVbeAJA3DpXVA1BJoRsQ4s,17142
4
+ gym_examples-2.0.82.dist-info/METADATA,sha256=OfTN21UuVUewI4w-o8_84TLg4LJW3r4c7fNFsZ8obBY,411
5
+ gym_examples-2.0.82.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
+ gym_examples-2.0.82.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
+ gym_examples-2.0.82.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- gym_examples/__init__.py,sha256=1BNt94Y-ZBFKM09grE3oS4yWNI7qkuapGX3c7aVQxyk,193
2
- gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
3
- gym_examples/envs/wsn_env.py,sha256=SyD3DwXcsuSpLHxZhaVFnL3oe4KbdvQvP-bm8qIquhs,16706
4
- gym_examples-2.0.80.dist-info/METADATA,sha256=rRA4HZBKdJwffh-LYmwbiRulGQ-Q_8K2Lophc-KjWrE,411
5
- gym_examples-2.0.80.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
6
- gym_examples-2.0.80.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
7
- gym_examples-2.0.80.dist-info/RECORD,,