gym-examples 3.0.265__py3-none-any.whl → 3.0.267__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +54 -51
- {gym_examples-3.0.265.dist-info → gym_examples-3.0.267.dist-info}/METADATA +1 -1
- gym_examples-3.0.267.dist-info/RECORD +7 -0
- gym_examples-3.0.265.dist-info/RECORD +0 -7
- {gym_examples-3.0.265.dist-info → gym_examples-3.0.267.dist-info}/WHEEL +0 -0
- {gym_examples-3.0.265.dist-info → gym_examples-3.0.267.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -25,7 +25,7 @@ initial_number_of_packets = 1 # initial number of packets to transmit
|
|
25
25
|
latency_per_hop = 1 # latency per hop in seconds
|
26
26
|
|
27
27
|
base_back_up_dir = "results/data/"
|
28
|
-
max_reward =
|
28
|
+
max_reward = 3 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
|
29
29
|
|
30
30
|
# Define the final reward function using an attention mechanism
|
31
31
|
class Attention(nn.Module):
|
@@ -73,7 +73,7 @@ class WSNRoutingEnv(gym.Env):
|
|
73
73
|
self.episode_count = 0
|
74
74
|
self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
|
75
75
|
self.epsilon = 1e-10 # small value to avoid division by zero
|
76
|
-
|
76
|
+
self.current_sensor = 0 # Index of the current sensor
|
77
77
|
|
78
78
|
# Define observation space
|
79
79
|
self.observation_space = Tuple(
|
@@ -81,28 +81,13 @@ class WSNRoutingEnv(gym.Env):
|
|
81
81
|
)
|
82
82
|
|
83
83
|
# self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
|
84
|
-
self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
|
84
|
+
# self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
|
85
85
|
# self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
|
86
|
-
|
86
|
+
self.action_space = Discrete(self.n_sensors + 1) # +1 for the base station
|
87
87
|
|
88
88
|
self.reset()
|
89
89
|
|
90
90
|
def reset(self):
|
91
|
-
|
92
|
-
# if self.episode_count > 1 and os.getenv('PRINT_STATS') == 'True': # Statistics for the PPO algorithm during the training phase
|
93
|
-
# self.episode_returns.append(self.episode_return)
|
94
|
-
# self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
|
95
|
-
# self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
|
96
|
-
# self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
|
97
|
-
# self.episode_network_throughput.append(self.network_throughput)
|
98
|
-
# self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
|
99
|
-
# self.episode_network_lifetime.append(self.network_lifetime)
|
100
|
-
# self.episode_average_latency.append(self.average_latency)
|
101
|
-
|
102
|
-
# if self.episode_count > 1 and os.getenv('PRINT_STATS') == 'True-False': # Statistics for the PPO algorithm during the evaluation phase
|
103
|
-
# print(f"Episode: {self.episode_count}")
|
104
|
-
# print(self.get_metrics())
|
105
|
-
|
106
91
|
self.episode_return = 0
|
107
92
|
self.sensor_positions = np.random.rand(self.n_sensors, 2) * (upper_bound - lower_bound) + lower_bound
|
108
93
|
self.distance_to_base = np.linalg.norm(self.sensor_positions - base_station_position, axis=1)
|
@@ -131,17 +116,13 @@ class WSNRoutingEnv(gym.Env):
|
|
131
116
|
|
132
117
|
|
133
118
|
def step(self, actions):
|
134
|
-
actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
|
135
119
|
self.steps += 1
|
136
|
-
rewards =
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
if i >= self.n_sensors:
|
143
|
-
raise ValueError("Invalid sensor index!") # the number of actions is greater than the number of sensors
|
144
|
-
|
120
|
+
rewards = - max_reward
|
121
|
+
# rewards = 0
|
122
|
+
dones = False
|
123
|
+
actions = np.array([self.current_sensor, actions])
|
124
|
+
print(f"\nactions from step in WSNRoutingEnv: {actions}\n")
|
125
|
+
for i, action in [actions]:
|
145
126
|
if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
|
146
127
|
continue # Skip if sensor has no energy left or no packets to transmit
|
147
128
|
|
@@ -156,6 +137,11 @@ class WSNRoutingEnv(gym.Env):
|
|
156
137
|
transmission_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
|
157
138
|
if self.remaining_energy[i] < transmission_energy:
|
158
139
|
self.remaining_energy[i] = 0
|
140
|
+
next_sensor = self.find_next_sensor()
|
141
|
+
if next_sensor is None:
|
142
|
+
dones = True
|
143
|
+
else:
|
144
|
+
self.current_sensor = next_sensor
|
159
145
|
continue # Skip if the sensor does not have enough energy to transmit data to the base station
|
160
146
|
|
161
147
|
self.update_sensor_energies(i, transmission_energy)
|
@@ -167,10 +153,12 @@ class WSNRoutingEnv(gym.Env):
|
|
167
153
|
self.total_latency += self.packet_latency[i] + latency_per_hop
|
168
154
|
self.packet_latency[i] = 0
|
169
155
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
156
|
+
rewards = max_reward # Reward for transmitting data to the base station
|
157
|
+
next_sensor = self.find_next_sensor()
|
158
|
+
if next_sensor is None:
|
159
|
+
dones = True
|
160
|
+
else:
|
161
|
+
self.current_sensor = next_sensor
|
174
162
|
else:
|
175
163
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
176
164
|
if distance > self.coverage_radius:
|
@@ -179,11 +167,21 @@ class WSNRoutingEnv(gym.Env):
|
|
179
167
|
transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
|
180
168
|
reception_energy = self.reception_energy(self.number_of_packets[i])
|
181
169
|
if self.remaining_energy[i] < transmission_energy:
|
182
|
-
self.remaining_energy[i] = 0
|
170
|
+
self.remaining_energy[i] = 0
|
171
|
+
next_sensor = self.find_next_sensor()
|
172
|
+
if next_sensor is None:
|
173
|
+
dones = True
|
174
|
+
else:
|
175
|
+
self.current_sensor = next_sensor
|
183
176
|
continue # Skip if the sensor does not have enough energy to transmit data to the next hop
|
184
177
|
if self.remaining_energy[action] < reception_energy:
|
185
178
|
self.number_of_packets[i] = 0
|
186
179
|
self.remaining_energy[action] = 0
|
180
|
+
next_sensor = self.find_next_sensor()
|
181
|
+
if next_sensor is None:
|
182
|
+
dones = True
|
183
|
+
else:
|
184
|
+
self.current_sensor = next_sensor
|
187
185
|
continue # Skip if the next hop does not have enough energy to receive data
|
188
186
|
|
189
187
|
self.update_sensor_energies(i, transmission_energy)
|
@@ -195,19 +193,17 @@ class WSNRoutingEnv(gym.Env):
|
|
195
193
|
self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
|
196
194
|
self.packet_latency[i] = 0
|
197
195
|
|
198
|
-
rewards
|
196
|
+
rewards = self.compute_individual_rewards(i, action)
|
199
197
|
|
200
198
|
# Update the number of packets
|
201
199
|
self.number_of_packets[action] += self.number_of_packets[i]
|
200
|
+
|
201
|
+
self.current_sensor = action
|
202
202
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
203
203
|
# Calculate final reward
|
204
204
|
# rewards[i] = self.compute_attention_rewards(rewards[i])
|
205
|
-
rewards
|
206
|
-
|
207
|
-
for i in range(self.n_sensors):
|
208
|
-
if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
|
209
|
-
dones[i] = True
|
210
|
-
|
205
|
+
rewards = np.mean(rewards)
|
206
|
+
|
211
207
|
# Integrate the mobility of the sensors
|
212
208
|
# self.integrate_mobility()
|
213
209
|
|
@@ -218,13 +214,10 @@ class WSNRoutingEnv(gym.Env):
|
|
218
214
|
|
219
215
|
self.get_metrics()
|
220
216
|
|
221
|
-
rewards =
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
# print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
|
226
|
-
dones = all(dones) # Done if all agents are done
|
227
|
-
|
217
|
+
rewards = rewards.item() if isinstance(rewards, torch.Tensor) else rewards # Convert the reward to a float
|
218
|
+
if not dones:
|
219
|
+
dones = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
|
220
|
+
|
228
221
|
return self._get_obs(), rewards, dones, self.get_metrics()
|
229
222
|
|
230
223
|
|
@@ -232,7 +225,8 @@ class WSNRoutingEnv(gym.Env):
|
|
232
225
|
return [{'remaining_energy': np.array([e]),
|
233
226
|
'consumption_energy': np.array([initial_energy - e]),
|
234
227
|
'sensor_positions': p,
|
235
|
-
'number_of_packets': np.array([d])
|
228
|
+
'number_of_packets': np.array([d]),
|
229
|
+
'curent_sensor': np.array([self.current_sensor])
|
236
230
|
} for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
|
237
231
|
|
238
232
|
|
@@ -241,7 +235,8 @@ class WSNRoutingEnv(gym.Env):
|
|
241
235
|
'remaining_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
|
242
236
|
'consumption_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
|
243
237
|
'sensor_positions': Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64),
|
244
|
-
'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)
|
238
|
+
'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int),
|
239
|
+
'current_sensor': Box(low=0, high=self.n_sensors - 1, shape=(1,), dtype=int)
|
245
240
|
})
|
246
241
|
|
247
242
|
|
@@ -495,4 +490,12 @@ class WSNRoutingEnv(gym.Env):
|
|
495
490
|
"packet_delivery_ratio": self.packet_delivery_ratio,
|
496
491
|
"network_lifetime": self.network_lifetime,
|
497
492
|
"average_latency": self.average_latency
|
498
|
-
}
|
493
|
+
}
|
494
|
+
|
495
|
+
|
496
|
+
def find_next_sensor(self):
|
497
|
+
for offset in range(1, self.n_sensors):
|
498
|
+
next_index = (self.current_sensor + offset) % self.n_sensors
|
499
|
+
if self.remaining_energy[next_index] > 0 and self.number_of_packets[next_index] > 0:
|
500
|
+
return next_index
|
501
|
+
return None # If no such sensor is found
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=P0-KlIsONNVBKDPxlYZseVoScqQ3WTdLbUXOfsbtc7Y,166
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=MTP_6sqJT4T2IBGedoLOGaKVZras8gT2NXvWHfggOXA,24154
|
4
|
+
gym_examples-3.0.267.dist-info/METADATA,sha256=D5AgSNWQ3VeV8JapskViS-CoIkUkgwQPZ74N6WEcEAk,412
|
5
|
+
gym_examples-3.0.267.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-3.0.267.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-3.0.267.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=F0u_yF7l2eD3fOQTUzN2XUFt7etMgtlhgDv5tFMubpE,166
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=2GvyLeNjLM-q9oLCobVzpTUrUqzjTyRn_H3BAQZZvbY,24821
|
4
|
-
gym_examples-3.0.265.dist-info/METADATA,sha256=D0__HbG6kJd9Kb1SX_Lh_MysXCxkK_ToMFudnMPWqgU,412
|
5
|
-
gym_examples-3.0.265.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-3.0.265.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-3.0.265.dist-info/RECORD,,
|
File without changes
|
File without changes
|