gym-examples 3.0.266__py3-none-any.whl → 3.0.268__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gym_examples/__init__.py +1 -1
- gym_examples/envs/wsn_env.py +54 -52
- {gym_examples-3.0.266.dist-info → gym_examples-3.0.268.dist-info}/METADATA +1 -1
- gym_examples-3.0.268.dist-info/RECORD +7 -0
- gym_examples-3.0.266.dist-info/RECORD +0 -7
- {gym_examples-3.0.266.dist-info → gym_examples-3.0.268.dist-info}/WHEEL +0 -0
- {gym_examples-3.0.266.dist-info → gym_examples-3.0.268.dist-info}/top_level.txt +0 -0
gym_examples/__init__.py
CHANGED
gym_examples/envs/wsn_env.py
CHANGED
@@ -25,7 +25,7 @@ initial_number_of_packets = 1 # initial number of packets to transmit
|
|
25
25
|
latency_per_hop = 1 # latency per hop in seconds
|
26
26
|
|
27
27
|
base_back_up_dir = "results/data/"
|
28
|
-
max_reward =
|
28
|
+
max_reward = 3 # maximum reward value when the sensors sent data to the base station. The opposite value is when the sensors perform an unauthorized action
|
29
29
|
|
30
30
|
# Define the final reward function using an attention mechanism
|
31
31
|
class Attention(nn.Module):
|
@@ -73,7 +73,7 @@ class WSNRoutingEnv(gym.Env):
|
|
73
73
|
self.episode_count = 0
|
74
74
|
self.scale_displacement = 0.01 * (upper_bound - lower_bound) # scale of the random displacement of the sensors
|
75
75
|
self.epsilon = 1e-10 # small value to avoid division by zero
|
76
|
-
|
76
|
+
self.current_sensor = 0 # Index of the current sensor
|
77
77
|
|
78
78
|
# Define observation space
|
79
79
|
self.observation_space = Tuple(
|
@@ -81,28 +81,13 @@ class WSNRoutingEnv(gym.Env):
|
|
81
81
|
)
|
82
82
|
|
83
83
|
# self.action_space = Tuple(tuple([Discrete(self.n_sensors + 1)] * self.n_agents))
|
84
|
-
self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
|
84
|
+
# self.action_space = MultiDiscrete([self.n_sensors + 1] * self.n_agents)
|
85
85
|
# self.action_space = MultiDiscrete([self.n_agents, self.n_sensors + 1])
|
86
|
-
|
86
|
+
self.action_space = Discrete(self.n_sensors + 1) # +1 for the base station
|
87
87
|
|
88
88
|
self.reset()
|
89
89
|
|
90
90
|
def reset(self):
|
91
|
-
|
92
|
-
# if self.episode_count > 1 and os.getenv('PRINT_STATS') == 'True': # Statistics for the PPO algorithm during the training phase
|
93
|
-
# self.episode_returns.append(self.episode_return)
|
94
|
-
# self.episode_std_remaining_energy.append(np.std(self.remaining_energy))
|
95
|
-
# self.episode_mean_remaining_energy.append(np.mean(self.remaining_energy))
|
96
|
-
# self.episode_total_consumption_energy.append(np.sum(initial_energy - self.remaining_energy))
|
97
|
-
# self.episode_network_throughput.append(self.network_throughput)
|
98
|
-
# self.episode_packet_delivery_ratio.append(self.packet_delivery_ratio)
|
99
|
-
# self.episode_network_lifetime.append(self.network_lifetime)
|
100
|
-
# self.episode_average_latency.append(self.average_latency)
|
101
|
-
|
102
|
-
# if self.episode_count > 1 and os.getenv('PRINT_STATS') == 'True-False': # Statistics for the PPO algorithm during the evaluation phase
|
103
|
-
# print(f"Episode: {self.episode_count}")
|
104
|
-
# print(self.get_metrics())
|
105
|
-
|
106
91
|
self.episode_return = 0
|
107
92
|
self.sensor_positions = np.random.rand(self.n_sensors, 2) * (upper_bound - lower_bound) + lower_bound
|
108
93
|
self.distance_to_base = np.linalg.norm(self.sensor_positions - base_station_position, axis=1)
|
@@ -131,18 +116,13 @@ class WSNRoutingEnv(gym.Env):
|
|
131
116
|
|
132
117
|
|
133
118
|
def step(self, actions):
|
134
|
-
actions = [actions[i] for i in range(self.n_agents)] # We want to go back from the MultiDiscrete action space to a tuple of tuple of Discrete action spaces
|
135
119
|
self.steps += 1
|
136
|
-
|
137
|
-
rewards =
|
138
|
-
dones =
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
if i >= self.n_sensors:
|
144
|
-
raise ValueError("Invalid sensor index!") # the number of actions is greater than the number of sensors
|
145
|
-
|
120
|
+
rewards = - max_reward
|
121
|
+
# rewards = 0
|
122
|
+
dones = False
|
123
|
+
actions = np.array([self.current_sensor, actions])
|
124
|
+
print(f"\nactions from step in WSNRoutingEnv: {actions}\n")
|
125
|
+
for i, action in [actions]:
|
146
126
|
if self.remaining_energy[i] <= 0 or self.number_of_packets[i] <= 0:
|
147
127
|
continue # Skip if sensor has no energy left or no packets to transmit
|
148
128
|
|
@@ -157,6 +137,11 @@ class WSNRoutingEnv(gym.Env):
|
|
157
137
|
transmission_energy = self.transmission_energy(self.number_of_packets[i], self.distance_to_base[i])
|
158
138
|
if self.remaining_energy[i] < transmission_energy:
|
159
139
|
self.remaining_energy[i] = 0
|
140
|
+
next_sensor = self.find_next_sensor()
|
141
|
+
if next_sensor is None:
|
142
|
+
dones = True
|
143
|
+
else:
|
144
|
+
self.current_sensor = next_sensor
|
160
145
|
continue # Skip if the sensor does not have enough energy to transmit data to the base station
|
161
146
|
|
162
147
|
self.update_sensor_energies(i, transmission_energy)
|
@@ -168,10 +153,12 @@ class WSNRoutingEnv(gym.Env):
|
|
168
153
|
self.total_latency += self.packet_latency[i] + latency_per_hop
|
169
154
|
self.packet_latency[i] = 0
|
170
155
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
156
|
+
rewards = max_reward # Reward for transmitting data to the base station
|
157
|
+
next_sensor = self.find_next_sensor()
|
158
|
+
if next_sensor is None:
|
159
|
+
dones = True
|
160
|
+
else:
|
161
|
+
self.current_sensor = next_sensor
|
175
162
|
else:
|
176
163
|
distance = np.linalg.norm(self.sensor_positions[i] - self.sensor_positions[action])
|
177
164
|
if distance > self.coverage_radius:
|
@@ -180,11 +167,21 @@ class WSNRoutingEnv(gym.Env):
|
|
180
167
|
transmission_energy = self.transmission_energy(self.number_of_packets[i], distance)
|
181
168
|
reception_energy = self.reception_energy(self.number_of_packets[i])
|
182
169
|
if self.remaining_energy[i] < transmission_energy:
|
183
|
-
self.remaining_energy[i] = 0
|
170
|
+
self.remaining_energy[i] = 0
|
171
|
+
next_sensor = self.find_next_sensor()
|
172
|
+
if next_sensor is None:
|
173
|
+
dones = True
|
174
|
+
else:
|
175
|
+
self.current_sensor = next_sensor
|
184
176
|
continue # Skip if the sensor does not have enough energy to transmit data to the next hop
|
185
177
|
if self.remaining_energy[action] < reception_energy:
|
186
178
|
self.number_of_packets[i] = 0
|
187
179
|
self.remaining_energy[action] = 0
|
180
|
+
next_sensor = self.find_next_sensor()
|
181
|
+
if next_sensor is None:
|
182
|
+
dones = True
|
183
|
+
else:
|
184
|
+
self.current_sensor = next_sensor
|
188
185
|
continue # Skip if the next hop does not have enough energy to receive data
|
189
186
|
|
190
187
|
self.update_sensor_energies(i, transmission_energy)
|
@@ -196,19 +193,17 @@ class WSNRoutingEnv(gym.Env):
|
|
196
193
|
self.packet_latency[action] += self.packet_latency[i] + latency_per_hop
|
197
194
|
self.packet_latency[i] = 0
|
198
195
|
|
199
|
-
rewards
|
196
|
+
rewards = self.compute_individual_rewards(i, action)
|
200
197
|
|
201
198
|
# Update the number of packets
|
202
199
|
self.number_of_packets[action] += self.number_of_packets[i]
|
200
|
+
|
201
|
+
self.current_sensor = action
|
203
202
|
self.number_of_packets[i] = 0 # Reset the number of packets of the sensor i
|
204
203
|
# Calculate final reward
|
205
204
|
# rewards[i] = self.compute_attention_rewards(rewards[i])
|
206
|
-
rewards
|
207
|
-
|
208
|
-
for i in range(self.n_sensors):
|
209
|
-
if (self.remaining_energy[i] <= 0) or (self.number_of_packets[i] <= 0):
|
210
|
-
dones[i] = True
|
211
|
-
|
205
|
+
rewards = np.mean(rewards)
|
206
|
+
|
212
207
|
# Integrate the mobility of the sensors
|
213
208
|
# self.integrate_mobility()
|
214
209
|
|
@@ -219,13 +214,10 @@ class WSNRoutingEnv(gym.Env):
|
|
219
214
|
|
220
215
|
self.get_metrics()
|
221
216
|
|
222
|
-
rewards =
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
# print(f"Step: {self.steps}, Rewards: {rewards}, Done: {dones}")
|
227
|
-
dones = all(dones) # Done if all agents are done
|
228
|
-
|
217
|
+
rewards = rewards.item() if isinstance(rewards, torch.Tensor) else rewards # Convert the reward to a float
|
218
|
+
if not dones:
|
219
|
+
dones = all(self.remaining_energy[i] <= 0 or self.number_of_packets[i] == 0 for i in range(self.n_sensors))
|
220
|
+
|
229
221
|
return self._get_obs(), rewards, dones, self.get_metrics()
|
230
222
|
|
231
223
|
|
@@ -233,7 +225,8 @@ class WSNRoutingEnv(gym.Env):
|
|
233
225
|
return [{'remaining_energy': np.array([e]),
|
234
226
|
'consumption_energy': np.array([initial_energy - e]),
|
235
227
|
'sensor_positions': p,
|
236
|
-
'number_of_packets': np.array([d])
|
228
|
+
'number_of_packets': np.array([d]),
|
229
|
+
'curent_sensor': np.array([self.current_sensor])
|
237
230
|
} for e, p, d in zip(self.remaining_energy, self.sensor_positions, self.number_of_packets)]
|
238
231
|
|
239
232
|
|
@@ -242,7 +235,8 @@ class WSNRoutingEnv(gym.Env):
|
|
242
235
|
'remaining_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
|
243
236
|
'consumption_energy': Box(low=0, high=initial_energy, shape=(1,), dtype=np.float64),
|
244
237
|
'sensor_positions': Box(low=lower_bound, high=upper_bound, shape=(2,), dtype=np.float64),
|
245
|
-
'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int)
|
238
|
+
'number_of_packets': Box(low=0, high=self.n_sensors * initial_number_of_packets + 1, shape=(1,), dtype=int),
|
239
|
+
'current_sensor': Box(low=0, high=self.n_sensors - 1, shape=(1,), dtype=int)
|
246
240
|
})
|
247
241
|
|
248
242
|
|
@@ -496,4 +490,12 @@ class WSNRoutingEnv(gym.Env):
|
|
496
490
|
"packet_delivery_ratio": self.packet_delivery_ratio,
|
497
491
|
"network_lifetime": self.network_lifetime,
|
498
492
|
"average_latency": self.average_latency
|
499
|
-
}
|
493
|
+
}
|
494
|
+
|
495
|
+
|
496
|
+
def find_next_sensor(self):
|
497
|
+
for offset in range(1, self.n_sensors):
|
498
|
+
next_index = (self.current_sensor + offset) % self.n_sensors
|
499
|
+
if self.remaining_energy[next_index] > 0 and self.number_of_packets[next_index] > 0:
|
500
|
+
return next_index
|
501
|
+
return None # If no such sensor is found
|
@@ -0,0 +1,7 @@
|
|
1
|
+
gym_examples/__init__.py,sha256=HpffWs56Jd1UDNQGyBLzk4OLzrDNZMUx_SN58BZaJA8,166
|
2
|
+
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
+
gym_examples/envs/wsn_env.py,sha256=MTP_6sqJT4T2IBGedoLOGaKVZras8gT2NXvWHfggOXA,24154
|
4
|
+
gym_examples-3.0.268.dist-info/METADATA,sha256=NvjGw3Ag9m0cr-a5mu_DQn2IkoEiiCUmZjeFAQye0wI,412
|
5
|
+
gym_examples-3.0.268.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
+
gym_examples-3.0.268.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
+
gym_examples-3.0.268.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
gym_examples/__init__.py,sha256=KqEqczXOEfVmbkVd7dsDXk4gyzc5VC_HrycnPdSH_8M,166
|
2
|
-
gym_examples/envs/__init__.py,sha256=lgMe4pyOuUTgTBUddM0iwMlETsYTwFShny6ifm8PGM8,53
|
3
|
-
gym_examples/envs/wsn_env.py,sha256=4CXrSu-8epidbKW7JgPIzbXPS-WPtCC82FIsdPPAlMQ,24863
|
4
|
-
gym_examples-3.0.266.dist-info/METADATA,sha256=KdqFD13218m6mSDcVJ5fpru17gAS0IuYts1m4uMPUTE,412
|
5
|
-
gym_examples-3.0.266.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
6
|
-
gym_examples-3.0.266.dist-info/top_level.txt,sha256=rJRksoAF32M6lTLBEwYzRdo4PgtejceaNnnZ3HeY_Rk,13
|
7
|
-
gym_examples-3.0.266.dist-info/RECORD,,
|
File without changes
|
File without changes
|