gym-examples 3.0.79__py3-none-any.whl → 3.0.81__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +24 -24
- {gym_examples-3.0.79.dist-info → gym_examples-3.0.81.dist-info}/METADATA +1 -1
- gym_examples-3.0.81.dist-info/RECORD +7 -0
- gym_examples-3.0.79.dist-info/RECORD +0 -7
- {gym_examples-3.0.79.dist-info → gym_examples-3.0.81.dist-info}/WHEEL +0 -0
- {gym_examples-3.0.79.dist-info → gym_examples-3.0.81.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -40,6 +40,9 @@ class Attention(nn.Module):
|
|
40
40
|
x = self.linear2(x) # Pass the result through another linear layer
|
41
41
|
return x
|
42
42
|
|
43
|
+
net = Attention(input_dim, output_dim)
|
44
|
+
net = net.double() # Convert the weights to Double
|
45
|
+
|
43
46
|
class WSNRoutingEnv(gym.Env):
|
44
47
|
def __init__(self, n_sensors = 20, coverage_radius=(upper_bound - lower_bound)/4):
|
45
48
|
|
@@ -88,7 +91,7 @@ class WSNRoutingEnv(gym.Env):
|
|
88
91
|
|
89
92
|
def step(self, actions):
|
90
93
|
self.steps += 1
|
91
|
-
rewards = [
|
94
|
+
rewards = [0] * self.n_sensors
|
92
95
|
dones = [False] * self.n_sensors
|
93
96
|
for i, action in enumerate(actions):
|
94
97
|
if action not in range(self.n_sensors + 1):
|
@@ -121,8 +124,8 @@ class WSNRoutingEnv(gym.Env):
|
|
121
124
|
self.total_latency += self.packet_latency[i] + latency_per_hop
|
122
125
|
self.packet_latency[i] = 0
|
123
126
|
|
124
|
-
|
125
|
-
rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
|
127
|
+
rewards[i] = self.compute_individual_rewards(i, action)
|
128
|
+
# rewards[i] = np.ones(input_dim) # Reward for transmitting data to the base station
|
126
129
|
dones[i] = True
|
127
130
|
else:
|
128
131
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
@@ -149,8 +152,8 @@ class WSNRoutingEnv(gym.Env):
|
|
149
152
|
self.number_of_packets[action] += self.number_of_packets[i]
|
150
153
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
151
154
|
# Calculate final reward
|
152
|
-
|
153
|
-
rewards[i] = np.mean(rewards[i])
|
155
|
+
rewards[i] = self.compute_attention_rewards(rewards[i])
|
156
|
+
# rewards[i] = np.mean(rewards[i])
|
154
157
|
for i in range(self.n_sensors):
|
155
158
|
if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
|
156
159
|
dones[i] = True
|
@@ -222,8 +225,8 @@ class WSNRoutingEnv(gym.Env):
|
|
222
225
|
# Normalize the angle
|
223
226
|
normalized_angle = abs(angle) / np.pi
|
224
227
|
|
225
|
-
|
226
|
-
return np.clip(- normalized_angle, -1, 1)
|
228
|
+
return np.clip(1 - normalized_angle, 0, 1)
|
229
|
+
# return np.clip(- normalized_angle, -1, 1)
|
227
230
|
|
228
231
|
def compute_reward_distance(self, i, action):
|
229
232
|
'''
|
@@ -236,8 +239,8 @@ class WSNRoutingEnv(gym.Env):
|
|
236
239
|
# Normalize the distance to the next hop
|
237
240
|
normalized_distance_to_next_hop = distance / self.coverage_radius
|
238
241
|
|
239
|
-
|
240
|
-
return np.clip(-normalized_distance_to_next_hop, -1, 1)
|
242
|
+
return np.clip(1 - normalized_distance_to_next_hop, 0, 1)
|
243
|
+
# return np.clip(-normalized_distance_to_next_hop, -1, 1)
|
241
244
|
|
242
245
|
def compute_reward_consumption_energy(self, i, action):
|
243
246
|
'''
|
@@ -258,8 +261,8 @@ class WSNRoutingEnv(gym.Env):
|
|
258
261
|
max_total_energy = max_transmission_energy + max_reception_energy
|
259
262
|
normalized_total_energy = total_energy / (max_total_energy + self.epsilon)
|
260
263
|
|
261
|
-
|
262
|
-
return np.clip(- normalized_total_energy, -1, 1)
|
264
|
+
return np.clip(1 - normalized_total_energy, 0, 1)
|
265
|
+
# return np.clip(- normalized_total_energy, -1, 1)
|
263
266
|
|
264
267
|
def compute_reward_dispersion_remaining_energy(self):
|
265
268
|
'''
|
@@ -270,8 +273,8 @@ class WSNRoutingEnv(gym.Env):
|
|
270
273
|
max_dispersion_remaining_energy = initial_energy / 2 # maximum standard deviation of the remaining energy if n_sensors is even
|
271
274
|
normalized_dispersion_remaining_energy = dispersion_remaining_energy / (max_dispersion_remaining_energy + self.epsilon)
|
272
275
|
|
273
|
-
|
274
|
-
return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
276
|
+
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
277
|
+
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
275
278
|
|
276
279
|
def compute_reward_number_of_packets(self, action):
|
277
280
|
'''
|
@@ -283,8 +286,8 @@ class WSNRoutingEnv(gym.Env):
|
|
283
286
|
else:
|
284
287
|
normalized_number_of_packets = self.number_of_packets[action] / (max_number_of_packets + self.epsilon)
|
285
288
|
|
286
|
-
|
287
|
-
return np.clip(- normalized_number_of_packets, -1, 1)
|
289
|
+
return np.clip(1 - normalized_number_of_packets, 0, 1)
|
290
|
+
# return np.clip(- normalized_number_of_packets, -1, 1)
|
288
291
|
|
289
292
|
def compute_individual_rewards(self, i, action):
|
290
293
|
'''
|
@@ -318,8 +321,8 @@ class WSNRoutingEnv(gym.Env):
|
|
318
321
|
max_dispersion_remaining_energy = initial_energy / 2 # maximum standard deviation of the remaining energy if n_sensors is even
|
319
322
|
normalized_dispersion_remaining_energy = dispersion_remaining_energy / (max_dispersion_remaining_energy + self.epsilon)
|
320
323
|
|
321
|
-
|
322
|
-
return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
324
|
+
return np.clip(1 - normalized_dispersion_remaining_energy, 0, 1)
|
325
|
+
# return np.clip(- normalized_dispersion_remaining_energy, -1, 1)
|
323
326
|
|
324
327
|
def network_reward_consumption_energy(self):
|
325
328
|
'''
|
@@ -330,8 +333,8 @@ class WSNRoutingEnv(gym.Env):
|
|
330
333
|
max_total_energy = self.n_sensors * initial_energy
|
331
334
|
normalized_total_energy = total_energy / (max_total_energy + self.epsilon)
|
332
335
|
|
333
|
-
|
334
|
-
return np.clip(- normalized_total_energy, -1, 1)
|
336
|
+
return np.clip(1 - normalized_total_energy, 0, 1)
|
337
|
+
# return np.clip(- normalized_total_energy, -1, 1)
|
335
338
|
|
336
339
|
def compute_reward_packet_delivery_ratio(self):
|
337
340
|
'''
|
@@ -348,8 +351,8 @@ class WSNRoutingEnv(gym.Env):
|
|
348
351
|
max_latency = self.n_sensors * self.steps
|
349
352
|
normalized_latency = self.total_latency / (max_latency + self.epsilon)
|
350
353
|
|
351
|
-
|
352
|
-
return np.clip(- normalized_latency, -1, 1)
|
354
|
+
return np.clip(1 - normalized_latency, 0, 1)
|
355
|
+
# return np.clip(- normalized_latency, -1, 1)
|
353
356
|
|
354
357
|
def compute_reward_network_throughput(self):
|
355
358
|
'''
|
@@ -371,10 +374,7 @@ class WSNRoutingEnv(gym.Env):
|
|
371
374
|
'''
|
372
375
|
Compute the attention-based rewards
|
373
376
|
'''
|
374
|
-
input_dim = len(rewards)
|
375
377
|
rewards = torch.tensor(rewards, dtype=torch.double)
|
376
|
-
net = Attention(input_dim, output_dim)
|
377
|
-
net = net.double() # Convert the weights to Double
|
378
378
|
final_reward = net(rewards)
|
379
379
|
return final_reward
|
380
380
|
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=-tNBdDGkUAe9gRz0t2fK_jAloQqZCns0wnEGIXGdB_s,193
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=onqDtWhDb4uXQtcpWZFILFujrH-AbVZXluV-pgYi3nA,20385
|
4
|
+
gym_examples-3.0.81.dist-info/METADATA,sha256=Id-4MFw_RH7WUdIcUlhbAsjDQMX05OIilQgWzKJ1h00,411
|
5
|
+
gym_examples-3.0.81.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-3.0.81.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-3.0.81.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=Zx6apdel9zNMkb769ArqkX8c7CG_d8CHIpz4BQoJUnc,193
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=z4l1ctB9RlQmtxwb1Nl-QGtERQ1C_YVVhacx8fzfp7o,20434
|
4
|
-
gym_examples-3.0.79.dist-info/METADATA,sha256=Tk9FJq9C_mAWmJoehqotrD_lB16I1t70kybNMWtHaVA,411
|
5
|
-
gym_examples-3.0.79.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-3.0.79.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-3.0.79.dist-info/RECORD,,
|
File without changes
|
File without changes
|