gym-examples 2.0.80__py3-none-any.whl → 2.0.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +62 -58
- {gym_examples-2.0.80.dist-info → gym_examples-2.0.82.dist-info}/METADATA +1 -1
- gym_examples-2.0.82.dist-info/RECORD +7 -0
- gym_examples-2.0.80.dist-info/RECORD +0 -7
- {gym_examples-2.0.80.dist-info → gym_examples-2.0.82.dist-info}/WHEEL +0 -0
- {gym_examples-2.0.80.dist-info → gym_examples-2.0.82.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -10,11 +10,9 @@ import torch.nn as nn
|
|
10
10
|
import torch.nn.functional as F
|
11
11
|
|
12
12
|
# Define the network parameters for the final reward function
|
13
|
-
input_dim =
|
13
|
+
input_dim = 4 # number of individual rewards
|
14
14
|
output_dim = 1 # final reward
|
15
15
|
|
16
|
-
|
17
|
-
stats_file_path_base = 'C:\\Users\\djime\\Documents\\PHD\\THESIS\\CODES\\RL_Routing\\Results_EPyMARL\\stats_over_time'
|
18
16
|
Eelec = 50e-9 # energy consumption per bit in joules
|
19
17
|
Eamp = 100e-12 # energy consumption per bit per square meter in joules
|
20
18
|
info_amount = 3072 # data size in bits
|
@@ -47,7 +45,7 @@ net = net.double() # Convert the weights to Double
|
|
47
45
|
|
48
46
|
|
49
47
|
class WSNRoutingEnv(gym.Env):
|
50
|
-
def __init__(self, n_sensors = 20, coverage_radius=
|
48
|
+
def __init__(self, n_sensors = 20, coverage_radius=(upper_bound - lower_bound)/2):
|
51
49
|
|
52
50
|
super(WSNRoutingEnv, self).__init__()
|
53
51
|
|
@@ -57,7 +55,7 @@ class WSNRoutingEnv(gym.Env):
|
|
57
55
|
self.episode_count = 0
|
58
56
|
self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
|
59
57
|
self.epsilon = 1e-10 # small value to avoid division by zero
|
60
|
-
self.rewards_individual = [0] * self.n_sensors
|
58
|
+
# self.rewards_individual = [0] * self.n_sensors
|
61
59
|
# Initialize the position of the sensors randomly
|
62
60
|
self.sensor_positions = np.random.rand(self.n_sensors, 2) * (upper_bound - lower_bound) + lower_bound
|
63
61
|
self.distance_to_base = np.linalg.norm(self.sensor_positions - base_station_position, axis=1)
|
@@ -75,21 +73,20 @@ class WSNRoutingEnv(gym.Env):
|
|
75
73
|
def reset(self):
|
76
74
|
# print individual rewards
|
77
75
|
|
78
|
-
if self.rewards_individual != []:
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
76
|
+
# if self.rewards_individual != []:
|
77
|
+
# print("\n=================================================")
|
78
|
+
# print(f"Episode: {self.episode_count}")
|
79
|
+
# print(f"Rewards: {self.rewards_individual}")
|
80
|
+
# print("=================================================\n")
|
83
81
|
# Initialize remaining energy of each sensor to initial_energy joule
|
84
82
|
self.remaining_energy = np.ones(self.n_sensors) * initial_energy
|
85
|
-
self.consumption_energy = np.zeros(self.n_sensors)
|
86
83
|
self.number_of_packets = np.ones(self.n_sensors, dtype=int) * initial_number_of_packets # number of packets to transmit
|
87
84
|
self.episode_count += 1
|
88
85
|
return self._get_obs()
|
89
86
|
|
90
87
|
def step(self, actions):
|
91
88
|
rewards = [0] * self.n_sensors
|
92
|
-
self.rewards_individual = [0] * self.n_sensors
|
89
|
+
# self.rewards_individual = [0] * self.n_sensors
|
93
90
|
dones = [False] * self.n_sensors
|
94
91
|
for i, action in enumerate(actions):
|
95
92
|
|
@@ -106,15 +103,25 @@ class WSNRoutingEnv(gym.Env):
|
|
106
103
|
continue # Skip if sensor tries to transmit data to itself
|
107
104
|
|
108
105
|
if action == self.n_sensors:
|
109
|
-
|
106
|
+
if self.distance_to_base[i] > self.coverage_radius:
|
107
|
+
continue # Skip if the distance to the base station is greater than the coverage radius
|
108
|
+
|
109
|
+
# Calculate the energy consumption for transmitting data to the base station
|
110
110
|
transmission_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
|
111
|
+
if self.remaining_energy[i] < transmission_energy:
|
112
|
+
continue # Skip if the sensor does not have enough energy to transmit data to the base station
|
111
113
|
self.update_sensor_energies(i, transmission_energy)
|
112
114
|
rewards[i] = self.compute_individual_rewards(i, action)
|
113
115
|
dones[i] = True
|
114
116
|
else:
|
115
117
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
118
|
+
if distance > self.coverage_radius:
|
119
|
+
continue # Skip if the distance to the next hop is greater than the coverage radius
|
120
|
+
|
116
121
|
transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
|
117
122
|
reception_energy = self.reception_energy(self.number_of_packets[i])
|
123
|
+
if self.remaining_energy[i] < transmission_energy or self.remaining_energy[action] < reception_energy:
|
124
|
+
continue
|
118
125
|
self.update_sensor_energies(i, transmission_energy)
|
119
126
|
self.update_sensor_energies(action, reception_energy)
|
120
127
|
# Compute individual rewards
|
@@ -125,7 +132,7 @@ class WSNRoutingEnv(gym.Env):
|
|
125
132
|
# Calculate final reward
|
126
133
|
# rewards_individual = torch.tensor(rewards[i], dtype=torch.double)
|
127
134
|
# final_reward = net(rewards_individual)
|
128
|
-
self.rewards_individual[i] = rewards[i]
|
135
|
+
# self.rewards_individual[i] = rewards[i]
|
129
136
|
final_reward = np.sum(rewards[i])
|
130
137
|
rewards[i] = final_reward
|
131
138
|
|
@@ -133,7 +140,7 @@ class WSNRoutingEnv(gym.Env):
|
|
133
140
|
# rewards = [0.5 * r + 0.5 * (self.network_reward_consumption_energy() + self.network_reward_dispersion_remaining_energy()) for r in rewards]
|
134
141
|
|
135
142
|
# Only proceed if network consumption energy is not zero to avoid unnecessary list comprehension
|
136
|
-
self.rewards_individual = [r for r in self.rewards_individual if ((r != 0) and (r[len(r) -1] < 1))]
|
143
|
+
# self.rewards_individual = [r for r in self.rewards_individual if ((r != 0) and (r[len(r) -1] < 1))]
|
137
144
|
|
138
145
|
# self.rewards_individual = [{"ind": r, "net_consumption_energy": self.network_reward_consumption_energy(), "net_dispersion_energy": self.network_reward_dispersion_remaining_energy()} for r in self.rewards_individual if ((r != 0) and (self.network_reward_consumption_energy() != 0))]
|
139
146
|
for i in range(self.n_sensors):
|
@@ -149,15 +156,15 @@ class WSNRoutingEnv(gym.Env):
|
|
149
156
|
|
150
157
|
def _get_obs(self):
|
151
158
|
return [{'remaining_energy': np.array([e]),
|
159
|
+
'consumption_energy': np.array([initial_energy - e]),
|
152
160
|
'sensor_positions': p,
|
153
|
-
'
|
154
|
-
'number_of_packets': np.array([d])} for e, p, c, d in zip(self.remaining_energy, self.sensor_positions, self.consumption_energy, self.number_of_packets)]
|
161
|
+
'number_of_packets': np.array([d])} for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
|
155
162
|
|
156
163
|
def _get_observation_space(self):
|
157
164
|
return Dict({
|
158
165
|
'remaining_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
|
159
|
-
'sensor_positions': Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64),
|
160
166
|
'consumption_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
|
167
|
+
'sensor_positions': Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64),
|
161
168
|
'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)
|
162
169
|
})
|
163
170
|
|
@@ -168,7 +175,6 @@ class WSNRoutingEnv(gym.Env):
|
|
168
175
|
return [list(range(self.n_sensors + 1)) for _ in range(self.n_sensors)]
|
169
176
|
|
170
177
|
def update_sensor_energies(self, i, delta_energy):
|
171
|
-
self.consumption_energy[i] += delta_energy
|
172
178
|
self.remaining_energy[i] -= delta_energy
|
173
179
|
|
174
180
|
def transmission_energy(self, number_of_packets, distance):
|
@@ -208,7 +214,7 @@ class WSNRoutingEnv(gym.Env):
|
|
208
214
|
Compute the reward based on the distance to the next hop
|
209
215
|
'''
|
210
216
|
if action == self.n_sensors:
|
211
|
-
distance = np.linalg.norm(self.sensor_positions[i] -
|
217
|
+
distance = np.linalg.norm(self.sensor_positions[i] - self.distance_to_base[i])
|
212
218
|
else:
|
213
219
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
214
220
|
# Normalize the distance to the next hop
|
@@ -228,37 +234,35 @@ class WSNRoutingEnv(gym.Env):
|
|
228
234
|
transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
|
229
235
|
reception_energy = self.reception_energy(self.number_of_packets[i])
|
230
236
|
total_energy = transmission_energy + reception_energy
|
231
|
-
|
232
|
-
|
237
|
+
|
233
238
|
# Normalize the total energy consumption
|
234
239
|
max_transmission_energy = self.transmission_energy(self.n_sensors * initial_number_of_packets, self.coverage_radius)
|
235
240
|
max_reception_energy = self.reception_energy(self.n_sensors * initial_number_of_packets)
|
236
241
|
max_total_energy = max_transmission_energy + max_reception_energy
|
237
|
-
# max_total_energy = max_transmission_energy
|
238
242
|
normalized_total_energy = total_energy / max_total_energy
|
239
243
|
|
240
244
|
return np.clip(1 - normalized_total_energy, 0, 1)
|
241
245
|
|
242
|
-
def compute_reward_dispersion_remaining_energy(self,i):
|
243
|
-
'''
|
244
|
-
Compute the reward based on the difference between the remaining energy of the sensor i and the mean remaining energy of all sensors
|
245
|
-
'''
|
246
|
-
difference = np.abs(self.remaining_energy[i] - np.mean(self.remaining_energy))
|
247
|
-
# Normalize the difference
|
248
|
-
normalized_difference = difference / initial_energy
|
249
|
-
|
250
|
-
return np.clip(1 - normalized_difference, 0, 1)
|
251
|
-
|
252
|
-
# def compute_reward_dispersion_remaining_energy(self):
|
246
|
+
# def compute_reward_dispersion_remaining_energy(self,i):
|
253
247
|
# '''
|
254
|
-
# Compute the reward based on the
|
248
|
+
# Compute the reward based on the difference between the remaining energy of the sensor i and the mean remaining energy of all sensors
|
255
249
|
# '''
|
256
|
-
#
|
257
|
-
# # Normalize the
|
258
|
-
#
|
259
|
-
# normalized_dispersion_remaining_energy = dispersion_remaining_energy / max_dispersion_remaining_energy
|
250
|
+
# difference = np.abs(self.remaining_energy[i] - np.mean(self.remaining_energy))
|
251
|
+
# # Normalize the difference
|
252
|
+
# normalized_difference = difference / initial_energy
|
260
253
|
|
261
|
-
# return np.clip(1 -
|
254
|
+
# return np.clip(1 - normalized_difference, 0, 1)
|
255
|
+
|
256
|
+
def compute_reward_dispersion_remaining_energy(self):
|
257
|
+
'''
|
258
|
+
Compute the reward based on the standard deviation of the remaining energy
|
259
|
+
'''
|
260
|
+
dispersion_remaining_energy = np.std(self.remaining_energy)
|
261
|
+
# Normalize the standard deviation of the remaining energy
|
262
|
+
max_dispersion_remaining_energy = initial_energy / 2 # maximum standard deviation of the remaining energy if n_sensors is even
|
263
|
+
normalized_dispersion_remaining_energy = dispersion_remaining_energy / max_dispersion_remaining_energy
|
264
|
+
|
265
|
+
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
262
266
|
|
263
267
|
def compute_reward_number_of_packets(self, action):
|
264
268
|
'''
|
@@ -286,27 +290,27 @@ class WSNRoutingEnv(gym.Env):
|
|
286
290
|
# return [reward_angle, reward_distance, reward_consumption_energy, reward_number_of_packets]
|
287
291
|
return [reward_angle, reward_distance, reward_dispersion_remaining_energy, reward_number_of_packets]
|
288
292
|
|
289
|
-
def network_reward_dispersion_remaining_energy(self):
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
293
|
+
# def network_reward_dispersion_remaining_energy(self):
|
294
|
+
# '''
|
295
|
+
# Compute the reward based on the standard deviation of the remaining energy at the network level
|
296
|
+
# '''
|
297
|
+
# dispersion_remaining_energy = np.std(self.remaining_energy)
|
298
|
+
# # Normalize the standard deviation of the remaining energy
|
299
|
+
# max_dispersion_remaining_energy = initial_energy / 2 # maximum standard deviation of the remaining energy if n_sensors is even
|
300
|
+
# normalized_dispersion_remaining_energy = dispersion_remaining_energy / max_dispersion_remaining_energy
|
297
301
|
|
298
|
-
|
302
|
+
# return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
299
303
|
|
300
|
-
def network_reward_consumption_energy(self):
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
304
|
+
# def network_reward_consumption_energy(self):
|
305
|
+
# '''
|
306
|
+
# Compute the reward based on the total energy consumption (transmission, reception) at the network level
|
307
|
+
# '''
|
308
|
+
# total_energy = self.n_sensors * initial_energy - np.sum(self.remaining_energy)
|
309
|
+
# # Normalize the total energy consumption
|
310
|
+
# max_total_energy = self.n_sensors * initial_energy
|
311
|
+
# normalized_total_energy = total_energy / max_total_energy
|
308
312
|
|
309
|
-
|
313
|
+
# return np.clip(1 - normalized_total_energy, 0, 1)
|
310
314
|
|
311
315
|
def integrate_mobility(self):
|
312
316
|
'''
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=0a6V_iaBbwkYsQh-6xcZfi7W2c80bYCfqPCbB9J335k,193
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=smTNOcnzz5rgGkvpdgZycyVbeAJA3DpXVA1BJoRsQ4s,17142
|
4
|
+
gym_examples-2.0.82.dist-info/METADATA,sha256=OfTN21UuVUewI4w-o8_84TLg4LJW3r4c7fNFsZ8obBY,411
|
5
|
+
gym_examples-2.0.82.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-2.0.82.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-2.0.82.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=1BNt94Y-ZBFKM09grE3oS4yWNI7qkuapGX3c7aVQxyk,193
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=SyD3DwXcsuSpLHxZhaVFnL3oe4KbdvQvP-bm8qIquhs,16706
|
4
|
-
gym_examples-2.0.80.dist-info/METADATA,sha256=rRA4HZBKdJwffh-LYmwbiRulGQ-Q_8K2Lophc-KjWrE,411
|
5
|
-
gym_examples-2.0.80.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-2.0.80.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-2.0.80.dist-info/RECORD,,
|
File without changes
|
File without changes
|