epyt-flow 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epyt_flow/EPANET/EPANET/SRC_engines/AUTHORS +28 -0
- epyt_flow/EPANET/EPANET/SRC_engines/LICENSE +21 -0
- epyt_flow/EPANET/EPANET/SRC_engines/Readme_SRC_Engines.txt +18 -0
- epyt_flow/EPANET/EPANET/SRC_engines/enumstxt.h +134 -0
- epyt_flow/EPANET/EPANET/SRC_engines/epanet.c +5578 -0
- epyt_flow/EPANET/EPANET/SRC_engines/epanet2.c +865 -0
- epyt_flow/EPANET/EPANET/SRC_engines/epanet2.def +131 -0
- epyt_flow/EPANET/EPANET/SRC_engines/errors.dat +73 -0
- epyt_flow/EPANET/EPANET/SRC_engines/funcs.h +193 -0
- epyt_flow/EPANET/EPANET/SRC_engines/genmmd.c +1000 -0
- epyt_flow/EPANET/EPANET/SRC_engines/hash.c +177 -0
- epyt_flow/EPANET/EPANET/SRC_engines/hash.h +28 -0
- epyt_flow/EPANET/EPANET/SRC_engines/hydcoeffs.c +1151 -0
- epyt_flow/EPANET/EPANET/SRC_engines/hydraul.c +1117 -0
- epyt_flow/EPANET/EPANET/SRC_engines/hydsolver.c +720 -0
- epyt_flow/EPANET/EPANET/SRC_engines/hydstatus.c +476 -0
- epyt_flow/EPANET/EPANET/SRC_engines/include/epanet2.h +431 -0
- epyt_flow/EPANET/EPANET/SRC_engines/include/epanet2_2.h +1786 -0
- epyt_flow/EPANET/EPANET/SRC_engines/include/epanet2_enums.h +468 -0
- epyt_flow/EPANET/EPANET/SRC_engines/inpfile.c +810 -0
- epyt_flow/EPANET/EPANET/SRC_engines/input1.c +707 -0
- epyt_flow/EPANET/EPANET/SRC_engines/input2.c +864 -0
- epyt_flow/EPANET/EPANET/SRC_engines/input3.c +2170 -0
- epyt_flow/EPANET/EPANET/SRC_engines/main.c +93 -0
- epyt_flow/EPANET/EPANET/SRC_engines/mempool.c +142 -0
- epyt_flow/EPANET/EPANET/SRC_engines/mempool.h +24 -0
- epyt_flow/EPANET/EPANET/SRC_engines/output.c +852 -0
- epyt_flow/EPANET/EPANET/SRC_engines/project.c +1359 -0
- epyt_flow/EPANET/EPANET/SRC_engines/quality.c +685 -0
- epyt_flow/EPANET/EPANET/SRC_engines/qualreact.c +743 -0
- epyt_flow/EPANET/EPANET/SRC_engines/qualroute.c +694 -0
- epyt_flow/EPANET/EPANET/SRC_engines/report.c +1489 -0
- epyt_flow/EPANET/EPANET/SRC_engines/rules.c +1362 -0
- epyt_flow/EPANET/EPANET/SRC_engines/smatrix.c +871 -0
- epyt_flow/EPANET/EPANET/SRC_engines/text.h +497 -0
- epyt_flow/EPANET/EPANET/SRC_engines/types.h +874 -0
- epyt_flow/EPANET/EPANET-MSX/MSX_Updates.txt +53 -0
- epyt_flow/EPANET/EPANET-MSX/Src/dispersion.h +27 -0
- epyt_flow/EPANET/EPANET-MSX/Src/hash.c +107 -0
- epyt_flow/EPANET/EPANET-MSX/Src/hash.h +28 -0
- epyt_flow/EPANET/EPANET-MSX/Src/include/epanetmsx.h +102 -0
- epyt_flow/EPANET/EPANET-MSX/Src/include/epanetmsx_export.h +42 -0
- epyt_flow/EPANET/EPANET-MSX/Src/mathexpr.c +937 -0
- epyt_flow/EPANET/EPANET-MSX/Src/mathexpr.h +39 -0
- epyt_flow/EPANET/EPANET-MSX/Src/mempool.c +204 -0
- epyt_flow/EPANET/EPANET-MSX/Src/mempool.h +24 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxchem.c +1285 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxcompiler.c +368 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxdict.h +42 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxdispersion.c +586 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxerr.c +116 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxfile.c +260 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxfuncs.c +175 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxfuncs.h +35 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxinp.c +1504 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxout.c +401 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxproj.c +791 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxqual.c +2010 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxrpt.c +400 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxtank.c +422 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxtoolkit.c +1164 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxtypes.h +551 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxutils.c +524 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxutils.h +56 -0
- epyt_flow/EPANET/EPANET-MSX/Src/newton.c +158 -0
- epyt_flow/EPANET/EPANET-MSX/Src/newton.h +34 -0
- epyt_flow/EPANET/EPANET-MSX/Src/rk5.c +287 -0
- epyt_flow/EPANET/EPANET-MSX/Src/rk5.h +39 -0
- epyt_flow/EPANET/EPANET-MSX/Src/ros2.c +293 -0
- epyt_flow/EPANET/EPANET-MSX/Src/ros2.h +35 -0
- epyt_flow/EPANET/EPANET-MSX/Src/smatrix.c +816 -0
- epyt_flow/EPANET/EPANET-MSX/Src/smatrix.h +29 -0
- epyt_flow/EPANET/EPANET-MSX/readme.txt +14 -0
- epyt_flow/EPANET/compile.sh +4 -0
- epyt_flow/VERSION +1 -0
- epyt_flow/__init__.py +24 -0
- epyt_flow/data/__init__.py +0 -0
- epyt_flow/data/benchmarks/__init__.py +11 -0
- epyt_flow/data/benchmarks/batadal.py +257 -0
- epyt_flow/data/benchmarks/batadal_data.py +28 -0
- epyt_flow/data/benchmarks/battledim.py +473 -0
- epyt_flow/data/benchmarks/battledim_data.py +51 -0
- epyt_flow/data/benchmarks/gecco_water_quality.py +267 -0
- epyt_flow/data/benchmarks/leakdb.py +592 -0
- epyt_flow/data/benchmarks/leakdb_data.py +18923 -0
- epyt_flow/data/benchmarks/water_usage.py +123 -0
- epyt_flow/data/networks.py +650 -0
- epyt_flow/gym/__init__.py +4 -0
- epyt_flow/gym/control_gyms.py +47 -0
- epyt_flow/gym/scenario_control_env.py +101 -0
- epyt_flow/metrics.py +404 -0
- epyt_flow/models/__init__.py +2 -0
- epyt_flow/models/event_detector.py +31 -0
- epyt_flow/models/sensor_interpolation_detector.py +118 -0
- epyt_flow/rest_api/__init__.py +4 -0
- epyt_flow/rest_api/base_handler.py +70 -0
- epyt_flow/rest_api/res_manager.py +95 -0
- epyt_flow/rest_api/scada_data_handler.py +476 -0
- epyt_flow/rest_api/scenario_handler.py +352 -0
- epyt_flow/rest_api/server.py +106 -0
- epyt_flow/serialization.py +438 -0
- epyt_flow/simulation/__init__.py +5 -0
- epyt_flow/simulation/events/__init__.py +6 -0
- epyt_flow/simulation/events/actuator_events.py +259 -0
- epyt_flow/simulation/events/event.py +81 -0
- epyt_flow/simulation/events/leakages.py +404 -0
- epyt_flow/simulation/events/sensor_faults.py +267 -0
- epyt_flow/simulation/events/sensor_reading_attack.py +185 -0
- epyt_flow/simulation/events/sensor_reading_event.py +170 -0
- epyt_flow/simulation/events/system_event.py +88 -0
- epyt_flow/simulation/parallel_simulation.py +147 -0
- epyt_flow/simulation/scada/__init__.py +3 -0
- epyt_flow/simulation/scada/advanced_control.py +134 -0
- epyt_flow/simulation/scada/scada_data.py +1589 -0
- epyt_flow/simulation/scada/scada_data_export.py +255 -0
- epyt_flow/simulation/scenario_config.py +608 -0
- epyt_flow/simulation/scenario_simulator.py +1897 -0
- epyt_flow/simulation/scenario_visualizer.py +61 -0
- epyt_flow/simulation/sensor_config.py +1289 -0
- epyt_flow/topology.py +290 -0
- epyt_flow/uncertainty/__init__.py +3 -0
- epyt_flow/uncertainty/model_uncertainty.py +302 -0
- epyt_flow/uncertainty/sensor_noise.py +73 -0
- epyt_flow/uncertainty/uncertainties.py +555 -0
- epyt_flow/uncertainty/utils.py +206 -0
- epyt_flow/utils.py +306 -0
- epyt_flow-0.1.0.dist-info/LICENSE +21 -0
- epyt_flow-0.1.0.dist-info/METADATA +139 -0
- epyt_flow-0.1.0.dist-info/RECORD +131 -0
- epyt_flow-0.1.0.dist-info/WHEEL +5 -0
- epyt_flow-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,592 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LeakDB (Leakage Diagnosis Benchmark) by Vrachimis, S. G., Kyriakou, M. S., Eliades, D. G.,
|
|
3
|
+
and Polycarpou, M. M. (2018), is a realistic leakage dataset for water distribution networks.
|
|
4
|
+
The dataset is comprised of 1000 artificially created but realistic leakage
|
|
5
|
+
scenarios, on different water distribution networks, under varying conditions.
|
|
6
|
+
|
|
7
|
+
See https://github.com/KIOS-Research/LeakDB/ for details.
|
|
8
|
+
|
|
9
|
+
This module provides functions for loading the original LeakDB data set
|
|
10
|
+
:func:`~epyt_flow.data.benchmarks.leakdb.load_data`, as well as methods for loading the scenarios
|
|
11
|
+
:func:`~epyt_flow.data.benchmarks.leakdb.load_scenarios` and pre-generated SCADA data
|
|
12
|
+
:func:`~epyt_flow.data.benchmarks.leakdb.load_scada_data`.
|
|
13
|
+
The official scoring/evaluation is implemented in
|
|
14
|
+
:func:`~epyt_flow.data.benchmarks.leakdb.compute_evaluation_score` -- i.e. those results can be
|
|
15
|
+
directly compared to the official paper.
|
|
16
|
+
Besides this, the user can choose to evaluate predictions using any other metric from
|
|
17
|
+
:mod:`~epyt_flow.metrics`.
|
|
18
|
+
"""
|
|
19
|
+
import os
|
|
20
|
+
from typing import Union
|
|
21
|
+
import math
|
|
22
|
+
import json
|
|
23
|
+
import scipy
|
|
24
|
+
import numpy as np
|
|
25
|
+
import pandas as pd
|
|
26
|
+
from scipy.sparse import bsr_array
|
|
27
|
+
|
|
28
|
+
from ..networks import load_net1, load_hanoi
|
|
29
|
+
from .leakdb_data import NET1_LEAKAGES, HANOI_LEAKAGES
|
|
30
|
+
from ...utils import get_temp_folder, to_seconds, unpack_zip_archive, create_path_if_not_exist, \
|
|
31
|
+
download_if_necessary
|
|
32
|
+
from ...metrics import f1_score, true_positive_rate, true_negative_rate
|
|
33
|
+
from ...simulation import ScenarioSimulator
|
|
34
|
+
from ...simulation.events import AbruptLeakage, IncipientLeakage
|
|
35
|
+
from ...simulation import ScenarioConfig
|
|
36
|
+
from ...simulation.scada import ScadaData
|
|
37
|
+
from ...uncertainty import ModelUncertainty, UniformUncertainty
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def __leak_time_to_idx(t: int, round_up: bool = False, hydraulic_time_step: int = 1800):
|
|
41
|
+
if round_up is False:
|
|
42
|
+
return math.floor(t / hydraulic_time_step)
|
|
43
|
+
else:
|
|
44
|
+
return math.ceil(t / hydraulic_time_step)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def __get_leak_time_windows(s_id: int, leaks_info: dict,
|
|
48
|
+
hydraulic_time_step: int = 1800) -> list[tuple[int, int]]:
|
|
49
|
+
time_windows = []
|
|
50
|
+
if str(s_id) in leaks_info:
|
|
51
|
+
for leak in leaks_info[str(s_id)]:
|
|
52
|
+
t_idx_start = __leak_time_to_idx(leak["leak_start_time"] * hydraulic_time_step)
|
|
53
|
+
t_idx_end = __leak_time_to_idx(leak["leak_end_time"] * hydraulic_time_step,
|
|
54
|
+
round_up=True)
|
|
55
|
+
|
|
56
|
+
time_windows.append((t_idx_start, t_idx_end))
|
|
57
|
+
|
|
58
|
+
return time_windows
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def __create_labels(s_id: int, n_time_steps: int, nodes: list[str],
|
|
62
|
+
leaks_info: dict,
|
|
63
|
+
hydraulic_time_step: int = 1800) -> tuple[np.ndarray, scipy.sparse.bsr_array]:
|
|
64
|
+
y = np.zeros(n_time_steps)
|
|
65
|
+
|
|
66
|
+
leak_locations_row = []
|
|
67
|
+
leak_locations_col = []
|
|
68
|
+
if str(s_id) in leaks_info:
|
|
69
|
+
for leak in leaks_info[str(s_id)]:
|
|
70
|
+
t_idx_start = __leak_time_to_idx(leak["leak_start_time"] * hydraulic_time_step)
|
|
71
|
+
t_idx_end = __leak_time_to_idx(leak["leak_end_time"] * hydraulic_time_step,
|
|
72
|
+
round_up=True)
|
|
73
|
+
|
|
74
|
+
leak_node_idx = nodes.index(leak["node_id"])
|
|
75
|
+
|
|
76
|
+
for t in range(t_idx_end - t_idx_start):
|
|
77
|
+
leak_locations_row.append(t_idx_start + t)
|
|
78
|
+
leak_locations_col.append(leak_node_idx)
|
|
79
|
+
|
|
80
|
+
y[t_idx_start:t_idx_end] = 1
|
|
81
|
+
|
|
82
|
+
y_leak_locations = bsr_array(
|
|
83
|
+
(np.ones(len(leak_locations_row)), (leak_locations_row, leak_locations_col)),
|
|
84
|
+
shape=(n_time_steps, len(nodes)))
|
|
85
|
+
|
|
86
|
+
return y, y_leak_locations
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def compute_evaluation_score(scenarios_id: list[int], use_net1: bool,
|
|
90
|
+
y_pred_labels_per_scenario: list[np.ndarray]) -> dict:
|
|
91
|
+
"""
|
|
92
|
+
Evaluates the predictions (leakage detection) for a list of given scenarios.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
scenarios_id : `list[int]`
|
|
97
|
+
List of scenarios ID that are to be evaluated -- there is a total number of 1000 scenarios.
|
|
98
|
+
use_net1 : `bool`
|
|
99
|
+
If True, Net1 LeakDB will be used for evaluation, otherwise the Hanoi LeakDB will be used.
|
|
100
|
+
y_pred_labels_per_scenario : `list[numpy.ndarray]`
|
|
101
|
+
Predicted binary labels (over time) for each scenario in `scenarios_id`.
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
`dict`
|
|
106
|
+
Dictionary containing the f1-score, true positive rate, true negative rate,
|
|
107
|
+
and early detection score.
|
|
108
|
+
"""
|
|
109
|
+
# Original MATLAB implementation: https://github.com/KIOS-Research/LeakDB/blob/master/CCWI-WDSA2018/Scoring%20Function/scoring_algorithm.m
|
|
110
|
+
if len(scenarios_id) != len(y_pred_labels_per_scenario):
|
|
111
|
+
raise ValueError("Number of scenarios does not match number of predictions -- " +
|
|
112
|
+
f"expected {len(scenarios_id)} but got {len(y_pred_labels_per_scenario)}")
|
|
113
|
+
|
|
114
|
+
# Load ground truth
|
|
115
|
+
if use_net1 is True:
|
|
116
|
+
leaks_info = json.loads(NET1_LEAKAGES)
|
|
117
|
+
else:
|
|
118
|
+
leaks_info = json.loads(HANOI_LEAKAGES)
|
|
119
|
+
|
|
120
|
+
network_config = load_net1() if use_net1 is True \
|
|
121
|
+
else load_hanoi()
|
|
122
|
+
nodes = network_config.sensor_config.nodes
|
|
123
|
+
|
|
124
|
+
y_true = []
|
|
125
|
+
for i, s_id in enumerate(scenarios_id):
|
|
126
|
+
y, _ = __create_labels(s_id, len(y_pred_labels_per_scenario[i]), nodes, leaks_info)
|
|
127
|
+
if len(y) != len(y_pred_labels_per_scenario[i]):
|
|
128
|
+
raise ValueError("A prediction must be provided for each time step -- " +
|
|
129
|
+
f"mismatch for scenario {i}, expected {len(y)} but got " +
|
|
130
|
+
f"{y_pred_labels_per_scenario[i]}")
|
|
131
|
+
y_true.append(y)
|
|
132
|
+
|
|
133
|
+
y_true = np.stack(y_true, axis=0)
|
|
134
|
+
y_pred = np.stack(y_pred_labels_per_scenario, axis=0)
|
|
135
|
+
|
|
136
|
+
# Evaluate predictions
|
|
137
|
+
f1 = f1_score(y_pred, y_true)
|
|
138
|
+
tpr = true_positive_rate(y_pred, y_true)
|
|
139
|
+
tnr = true_negative_rate(y_pred, y_true)
|
|
140
|
+
|
|
141
|
+
early_detection_score = 0
|
|
142
|
+
normalizing = []
|
|
143
|
+
n_time_steps_tolerance = 10
|
|
144
|
+
detection_threshold = .75
|
|
145
|
+
for i, s_id in enumerate(scenarios_id):
|
|
146
|
+
y_pred_i = y_pred_labels_per_scenario[i]
|
|
147
|
+
leaks_time_window = __get_leak_time_windows(s_id, leaks_info)
|
|
148
|
+
|
|
149
|
+
scores = []
|
|
150
|
+
for t0, _ in leaks_time_window:
|
|
151
|
+
normalizing.append(1.)
|
|
152
|
+
|
|
153
|
+
y_pred_window = y_pred_i[t0:t0+n_time_steps_tolerance]
|
|
154
|
+
if 1 in y_pred_window and \
|
|
155
|
+
np.sum(y_pred_window) / len(y_pred_window) > detection_threshold:
|
|
156
|
+
t_idx = np.argwhere(y_pred_window)[0] + 1
|
|
157
|
+
scores.append(2. / (1 + np.exp((5. / n_time_steps_tolerance) * t_idx)))
|
|
158
|
+
else:
|
|
159
|
+
scores.append(0.)
|
|
160
|
+
|
|
161
|
+
early_detection_score += np.sum(scores)
|
|
162
|
+
|
|
163
|
+
early_detection_score = early_detection_score / np.sum(normalizing)
|
|
164
|
+
|
|
165
|
+
return {"f1_score": f1, "true_positive_rate": tpr,
|
|
166
|
+
"true_negative_rate": tnr, "early_detection_score": early_detection_score}
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def load_data(scenarios_id: list[int], use_net1: bool, download_dir: str = None,
|
|
170
|
+
return_X_y: bool = False, return_features_desc: bool = False,
|
|
171
|
+
return_leak_locations: bool = False, verbose: bool = True) -> dict:
|
|
172
|
+
"""
|
|
173
|
+
Loads the original LeakDB benchmark data set.
|
|
174
|
+
|
|
175
|
+
.. warning::
|
|
176
|
+
|
|
177
|
+
All scenarios together are a huge data set -- approx. 8GB for Net1 and 25GB for Hanoi.
|
|
178
|
+
Downloading and loading might take some time! Also, a sufficient amount of hard disk
|
|
179
|
+
memory is required.
|
|
180
|
+
|
|
181
|
+
Parameters
|
|
182
|
+
----------
|
|
183
|
+
scenarios_id : `list[int]`
|
|
184
|
+
List of scenarios ID that are to be loaded -- there are a total number of 1000 scenarios.
|
|
185
|
+
use_net1 : `bool`
|
|
186
|
+
If True, Net1 LeakDB will be loaded, otherwise the Hanoi LeakDB will be loaded.
|
|
187
|
+
download_dir : `str`, optional
|
|
188
|
+
Path to the data files -- if None, the temp folder will be used.
|
|
189
|
+
If the path does not exist, the data files will be downloaded to the given path.
|
|
190
|
+
|
|
191
|
+
The default is None.
|
|
192
|
+
return_X_y : `bool`, optional
|
|
193
|
+
If True, the data is returned together with the labels (presence of a leakage) as
|
|
194
|
+
two Numpy arrays, otherwise, the data is returned as Pandas data frames.
|
|
195
|
+
|
|
196
|
+
The default is False.
|
|
197
|
+
return_features_desc : `bool`, optional
|
|
198
|
+
If True and if `return_X_y` is True, the returned dictionary contains the
|
|
199
|
+
features' descriptions (i.e. names) under the key "features_desc".
|
|
200
|
+
|
|
201
|
+
The default is False.
|
|
202
|
+
return_leak_locations : `bool`
|
|
203
|
+
If True and if `return_X_y` is True, the leak locations are returned as well --
|
|
204
|
+
as an instance of `scipy.sparse.bsr_array`.
|
|
205
|
+
|
|
206
|
+
The default is False.
|
|
207
|
+
verbose : `bool`, optional
|
|
208
|
+
If True, a progress bar is shown while downloading files.
|
|
209
|
+
|
|
210
|
+
The default is True.
|
|
211
|
+
|
|
212
|
+
Returns
|
|
213
|
+
-------
|
|
214
|
+
`dict`
|
|
215
|
+
Dictionary containing the scenario data sets. Data of each requested scenario
|
|
216
|
+
can be accessed by using the scenario ID as a key.
|
|
217
|
+
"""
|
|
218
|
+
url_data = "https://filedn.com/lumBFq2P9S74PNoLPWtzxG4/EPyT-Flow/LeakDB-Original/" +\
|
|
219
|
+
f"{'Net1_CMH/' if use_net1 is True else 'Hanoi_CMH/'}"
|
|
220
|
+
|
|
221
|
+
if use_net1 is True:
|
|
222
|
+
network_desc = "Net1"
|
|
223
|
+
leaks_info = json.loads(NET1_LEAKAGES)
|
|
224
|
+
else:
|
|
225
|
+
network_desc = "Hanoi"
|
|
226
|
+
leaks_info = json.loads(HANOI_LEAKAGES)
|
|
227
|
+
|
|
228
|
+
download_dir = download_dir if download_dir is not None else get_temp_folder()
|
|
229
|
+
download_dir = os.path.join(download_dir, network_desc)
|
|
230
|
+
create_path_if_not_exist(download_dir)
|
|
231
|
+
|
|
232
|
+
results = {}
|
|
233
|
+
for s_id in scenarios_id:
|
|
234
|
+
scenario_data = f"Scenario-{s_id}.zip"
|
|
235
|
+
scenario_data_url = url_data + scenario_data
|
|
236
|
+
scenario_data_file_in = os.path.join(download_dir, scenario_data)
|
|
237
|
+
scenario_data_folder_in = os.path.join(download_dir, f"Scenario-{s_id}")
|
|
238
|
+
|
|
239
|
+
download_if_necessary(scenario_data_file_in, scenario_data_url, verbose)
|
|
240
|
+
create_path_if_not_exist(scenario_data_folder_in)
|
|
241
|
+
unpack_zip_archive(scenario_data_file_in, scenario_data_folder_in)
|
|
242
|
+
|
|
243
|
+
# Load and parse data
|
|
244
|
+
pressure_files = list(filter(lambda d: d.endswith(".csv"),
|
|
245
|
+
os.listdir(os.path.join(scenario_data_folder_in,
|
|
246
|
+
"Pressures"))))
|
|
247
|
+
pressure_readings = {}
|
|
248
|
+
all_nodes = []
|
|
249
|
+
for f_in in pressure_files:
|
|
250
|
+
df = pd.read_csv(os.path.join(scenario_data_folder_in, "Pressures", f_in))
|
|
251
|
+
node_id = f_in.replace(".csv", "")
|
|
252
|
+
all_nodes.append(node_id)
|
|
253
|
+
pressure_readings[f"Pressure-{node_id}"] = df["Value"]
|
|
254
|
+
|
|
255
|
+
flow_files = list(filter(lambda d: d.endswith(".csv"),
|
|
256
|
+
os.listdir(os.path.join(scenario_data_folder_in, "Flows"))))
|
|
257
|
+
flow_readings = {}
|
|
258
|
+
for f_in in flow_files:
|
|
259
|
+
df = pd.read_csv(os.path.join(scenario_data_folder_in, "Flows", f_in))
|
|
260
|
+
flow_readings[f"Flow-{f_in.replace('.csv', '')}"] = df["Value"]
|
|
261
|
+
|
|
262
|
+
df_labels = pd.read_csv(os.path.join(scenario_data_folder_in, "Labels.csv"))
|
|
263
|
+
labels = df_labels["Label"]
|
|
264
|
+
|
|
265
|
+
df_timestamps = pd.read_csv(os.path.join(scenario_data_folder_in, "Timestamps.csv"))
|
|
266
|
+
sensor_reading_times = df_timestamps["Timestamp"]
|
|
267
|
+
|
|
268
|
+
df_final = pd.DataFrame(pressure_readings | flow_readings |
|
|
269
|
+
{"labels": labels, "timestamps": sensor_reading_times})
|
|
270
|
+
|
|
271
|
+
# Prepare final data
|
|
272
|
+
if return_X_y is True:
|
|
273
|
+
X = df_final[list(pressure_readings.keys()) + list(flow_readings.keys())].to_numpy()
|
|
274
|
+
y = labels.to_numpy()
|
|
275
|
+
|
|
276
|
+
network_config = load_net1(download_dir) if use_net1 is True \
|
|
277
|
+
else load_hanoi(download_dir)
|
|
278
|
+
nodes = network_config.sensor_config.nodes
|
|
279
|
+
_, y_leak_locations = __create_labels(s_id, X.shape[0], nodes, leaks_info)
|
|
280
|
+
|
|
281
|
+
if return_features_desc is True and "features_desc" not in results:
|
|
282
|
+
results["features_desc"] = list(pressure_readings.keys()) + \
|
|
283
|
+
list(flow_readings.keys())
|
|
284
|
+
|
|
285
|
+
if return_leak_locations is True:
|
|
286
|
+
results[s_id] = (X, y, y_leak_locations)
|
|
287
|
+
else:
|
|
288
|
+
results[s_id] = (X, y)
|
|
289
|
+
else:
|
|
290
|
+
results[s_id] = df_final
|
|
291
|
+
|
|
292
|
+
return results
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def load_scada_data(scenarios_id: list[int], use_net1: bool = True, download_dir: str = None,
|
|
296
|
+
return_X_y: bool = False, return_leak_locations: bool = False,
|
|
297
|
+
verbose: bool = True
|
|
298
|
+
) -> Union[list[ScadaData], list[tuple[np.ndarray, np.ndarray]]]:
|
|
299
|
+
"""
|
|
300
|
+
Loads the SCADA data of the simulated LeakDB benchmark scenarios -- see
|
|
301
|
+
:func:`~epyt_flow.data.benchmarks.leakdb.load_scenarios`.
|
|
302
|
+
|
|
303
|
+
.. note::
|
|
304
|
+
Note that due to the randomness in the demand creation as well as in the model
|
|
305
|
+
uncertainties, the SCADA data differs from the original data set
|
|
306
|
+
which can be loaded by calling :func:`~epyt_flow.data.benchmarks.leakdb.load_data`.
|
|
307
|
+
However, the leakages (i.e. location and profile) are consistent with the original data set.
|
|
308
|
+
|
|
309
|
+
Parameters
|
|
310
|
+
----------
|
|
311
|
+
scenarios_id : `list[int]`
|
|
312
|
+
List of scenarios ID that are to be loaded -- there are a total number of 1000 scenarios.
|
|
313
|
+
use_net1 : `bool`, optional
|
|
314
|
+
If True, Net1 LeakDB will be loaded, otherwise the Hanoi LeakDB will be loaded.
|
|
315
|
+
|
|
316
|
+
The default is True.
|
|
317
|
+
download_dir : `str`, optional
|
|
318
|
+
Path to the data files -- if None, the temp folder will be used.
|
|
319
|
+
If the path does not exist, the data files will be downloaded to the given path.
|
|
320
|
+
|
|
321
|
+
The default is None.
|
|
322
|
+
return_X_y : `bool`, optional
|
|
323
|
+
If True, the data is returned together with the labels (presence of a leakage) as
|
|
324
|
+
two Numpy arrays, otherwise, the data is returned as
|
|
325
|
+
:class:`~epyt_flow.simulation.scada.scada_data.ScadaData` instances.
|
|
326
|
+
|
|
327
|
+
The default is False.
|
|
328
|
+
return_leak_locations : `bool`
|
|
329
|
+
If True, the leak locations are returned as well --
|
|
330
|
+
as an instance of `scipy.sparse.bsr_array`.
|
|
331
|
+
|
|
332
|
+
The default is False.
|
|
333
|
+
verbose : `bool`, optional
|
|
334
|
+
If True, a progress bar is shown while downloading files.
|
|
335
|
+
|
|
336
|
+
The default is True.
|
|
337
|
+
|
|
338
|
+
Returns
|
|
339
|
+
-------
|
|
340
|
+
list[`:class:`~epyt_flow.simulation.scada.scada_data.ScadaData`] or `list[tuple[numpy.ndarray, numpy.ndarray]]`
|
|
341
|
+
The simulated benchmark scenarios as either a list of
|
|
342
|
+
:class:`~epyt_flow.simulation.scada.scada_data.ScadaData` instances or as a list of
|
|
343
|
+
(X, y) Numpy arrays. If 'return_leak_locations' is True, the leak locations are included
|
|
344
|
+
as an instance of `scipy.sparse.bsr_array` as well.
|
|
345
|
+
"""
|
|
346
|
+
download_dir = download_dir if download_dir is not None else get_temp_folder()
|
|
347
|
+
|
|
348
|
+
url_data = "https://filedn.com/lumBFq2P9S74PNoLPWtzxG4/EPyT-Flow/LeakDB/" +\
|
|
349
|
+
f"{'Net1/' if use_net1 is True else 'Hanoi/'}"
|
|
350
|
+
|
|
351
|
+
if use_net1 is True:
|
|
352
|
+
leaks_info = json.loads(NET1_LEAKAGES)
|
|
353
|
+
else:
|
|
354
|
+
leaks_info = json.loads(HANOI_LEAKAGES)
|
|
355
|
+
|
|
356
|
+
r = []
|
|
357
|
+
|
|
358
|
+
for s_id in scenarios_id:
|
|
359
|
+
f_in = f"{'Net1_ID' if use_net1 is True else 'Hanoi_ID'}={s_id}.epytflow_scada_data"
|
|
360
|
+
download_if_necessary(os.path.join(download_dir, f_in), url_data + f_in, verbose)
|
|
361
|
+
|
|
362
|
+
data = ScadaData.load_from_file(os.path.join(download_dir, f_in))
|
|
363
|
+
|
|
364
|
+
X = data.get_data()
|
|
365
|
+
y, y_leak_locations = __create_labels(s_id, X.shape[0], data.sensor_config.nodes,
|
|
366
|
+
leaks_info)
|
|
367
|
+
|
|
368
|
+
if return_X_y is True:
|
|
369
|
+
if return_leak_locations is True:
|
|
370
|
+
r.append((X, y, y_leak_locations))
|
|
371
|
+
else:
|
|
372
|
+
r.append((X, y))
|
|
373
|
+
else:
|
|
374
|
+
if return_leak_locations is True:
|
|
375
|
+
r.append((data, y_leak_locations))
|
|
376
|
+
else:
|
|
377
|
+
r.append(data)
|
|
378
|
+
|
|
379
|
+
return r
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def load_scenarios(scenarios_id: list[int], use_net1: bool = True,
|
|
383
|
+
download_dir: str = None, verbose: bool = True) -> list[ScenarioConfig]:
|
|
384
|
+
"""
|
|
385
|
+
Creates and returns the LeakDB scenarios -- they can be either modified or
|
|
386
|
+
passed directly to the simulator
|
|
387
|
+
:class:`~epyt_flow.simulation.scenario_simulator.ScenarioSimulator`.
|
|
388
|
+
|
|
389
|
+
.. note::
|
|
390
|
+
Note that due to the randomness in the demand creation as well as in the model
|
|
391
|
+
uncertainties, the simulation results will differ between different runs, and
|
|
392
|
+
will also differ from the original data set
|
|
393
|
+
(see :func:`~epyt_flow.data.benchmarks.leakdb.load_data`).
|
|
394
|
+
However, the leakages (i.e. location and profile) will be always the same and be
|
|
395
|
+
consistent with the original data set.
|
|
396
|
+
|
|
397
|
+
Parameters
|
|
398
|
+
----------
|
|
399
|
+
scenarios_id : `list[int]`
|
|
400
|
+
List of scenarios ID that are to be loaded -- there is a total number of 1000 scenarios.
|
|
401
|
+
use_net1 : `bool`, optional
|
|
402
|
+
If True, Net1 network will be used, otherwise the Hanoi network will be used.
|
|
403
|
+
|
|
404
|
+
The default is True.
|
|
405
|
+
download_dir : `str`, optional
|
|
406
|
+
Path to the Net1.inp or Hanoi.inp file -- if None, the temp folder will be used.
|
|
407
|
+
If the path does not exist, the .inp will be downloaded to the give path.
|
|
408
|
+
|
|
409
|
+
The default is None.
|
|
410
|
+
verbose : `bool`, optional
|
|
411
|
+
If True, a progress bar is shown while downloading files.
|
|
412
|
+
|
|
413
|
+
The default is True.
|
|
414
|
+
|
|
415
|
+
Returns
|
|
416
|
+
-------
|
|
417
|
+
list[:class:`~epyt_flow.simulation.scenario_config.ScenarioConfig`]
|
|
418
|
+
LeakDB scenarios.
|
|
419
|
+
"""
|
|
420
|
+
scenarios_inp = []
|
|
421
|
+
|
|
422
|
+
# Load the network
|
|
423
|
+
load_network = load_net1 if use_net1 is True else load_hanoi
|
|
424
|
+
download_dir = download_dir if download_dir is not None else get_temp_folder()
|
|
425
|
+
network_config = load_network(download_dir)
|
|
426
|
+
|
|
427
|
+
# Set simulation duration
|
|
428
|
+
hydraulic_time_step = to_seconds(minutes=30) # 30min time steps
|
|
429
|
+
general_params = {"simulation_duration": to_seconds(days=365), # One year
|
|
430
|
+
"hydraulic_time_step": hydraulic_time_step,
|
|
431
|
+
"reporting_time_step": hydraulic_time_step} | network_config.general_params
|
|
432
|
+
|
|
433
|
+
# Add demand patterns
|
|
434
|
+
def gen_dem(download_dir, use_net1):
|
|
435
|
+
# Taken from https://github.com/KIOS-Research/LeakDB/blob/master/CCWI-WDSA2018/Dataset_Generator_Py3/demandGenerator.py
|
|
436
|
+
week_pat = scipy.io.loadmat(os.path.join(download_dir, "weekPat_30min.mat"))
|
|
437
|
+
a_w = week_pat['Aw']
|
|
438
|
+
nw = week_pat['nw']
|
|
439
|
+
year_offset = scipy.io.loadmat(os.path.join(download_dir, "yearOffset_30min.mat"))
|
|
440
|
+
a_y = year_offset['Ay']
|
|
441
|
+
ny = year_offset['ny']
|
|
442
|
+
|
|
443
|
+
# Create yearly component
|
|
444
|
+
days = 365
|
|
445
|
+
|
|
446
|
+
t = (288/6)*days # one year period in five minute intervals
|
|
447
|
+
w = 2*np.pi/t
|
|
448
|
+
k = np.arange(1, days*288/6+1, 1) # number of time steps in time series
|
|
449
|
+
n = ny[0][0] # number of fourier coefficients
|
|
450
|
+
h_y = [1]*len(k)
|
|
451
|
+
|
|
452
|
+
for i in range(1, n+1):
|
|
453
|
+
h_y = np.column_stack((h_y, np.sin(i*w*k), np.cos(i*w*k)))
|
|
454
|
+
|
|
455
|
+
unc_y = 0.1
|
|
456
|
+
a_y_r = a_y*(1-unc_y + 2*unc_y*np.random.rand(int(a_y.shape[0]), int(a_y.shape[1])))
|
|
457
|
+
year_offset = np.dot(h_y, a_y_r)
|
|
458
|
+
|
|
459
|
+
# Create weekly component
|
|
460
|
+
t = (288/6)*7 # one week period in five minute intervals
|
|
461
|
+
w = 2*np.pi/t
|
|
462
|
+
k = np.arange(1, days*288/6+1, 1) # number of time steps in time series
|
|
463
|
+
n = nw[0][0] # number of fourier coefficients
|
|
464
|
+
h_w = [1]*len(k)
|
|
465
|
+
for i in range(1, n+1):
|
|
466
|
+
h_w = np.column_stack((h_w, np.sin(i*w*k), np.cos(i*w*k)))
|
|
467
|
+
|
|
468
|
+
unc_w = 0.1
|
|
469
|
+
a_w_r = a_w*(1-unc_w + 2*unc_w*np.random.rand(int(a_w.shape[0]), int(a_w.shape[1])))
|
|
470
|
+
week_year_pat = np.dot(h_w, a_w_r)
|
|
471
|
+
|
|
472
|
+
# Create random component
|
|
473
|
+
unc_r = 0.05
|
|
474
|
+
random = np.random.normal(0, (-unc_r+2*unc_r),
|
|
475
|
+
(int(week_year_pat.shape[0]), int(week_year_pat.shape[1])))
|
|
476
|
+
|
|
477
|
+
# Create demand
|
|
478
|
+
if use_net1 is True:
|
|
479
|
+
base = 1
|
|
480
|
+
else:
|
|
481
|
+
base = 0.3 # Avoid negative pressure in Hanoi
|
|
482
|
+
variation = 0.75 + np.random.normal(0, 0.07) # from 0 to 1
|
|
483
|
+
dem = base * (year_offset+1) * (week_year_pat*variation+1) * (random+1)
|
|
484
|
+
dem = dem.tolist()
|
|
485
|
+
dem_final = []
|
|
486
|
+
for d in dem:
|
|
487
|
+
dem_final.append(d[0])
|
|
488
|
+
|
|
489
|
+
return dem_final
|
|
490
|
+
|
|
491
|
+
week_pattern_url = "https://github.com/KIOS-Research/LeakDB/raw/master/CCWI-WDSA2018/" +\
|
|
492
|
+
"Dataset_Generator_Py3/weekPat_30min.mat"
|
|
493
|
+
year_offset_url = "https://github.com/KIOS-Research/LeakDB/raw/master/CCWI-WDSA2018/" +\
|
|
494
|
+
"Dataset_Generator_Py3/yearOffset_30min.mat"
|
|
495
|
+
|
|
496
|
+
download_if_necessary(os.path.join(download_dir, "weekPat_30min.mat"),
|
|
497
|
+
week_pattern_url, verbose)
|
|
498
|
+
download_if_necessary(os.path.join(download_dir, "yearOffset_30min.mat"),
|
|
499
|
+
year_offset_url, verbose)
|
|
500
|
+
|
|
501
|
+
for s_id in scenarios_id: # Create new .inp files with demands if necessary
|
|
502
|
+
f_inp_in = os.path.join(download_dir,
|
|
503
|
+
f"{'Net1' if use_net1 is True else 'Hanoi'}_LeakDB_ID={s_id}.inp")
|
|
504
|
+
scenarios_inp.append(f_inp_in)
|
|
505
|
+
|
|
506
|
+
if not os.path.exists(f_inp_in):
|
|
507
|
+
with ScenarioSimulator(f_inp_in=network_config.f_inp_in) as wdn:
|
|
508
|
+
wdn.epanet_api.setTimeHydraulicStep(general_params["hydraulic_time_step"])
|
|
509
|
+
wdn.epanet_api.setTimeSimulationDuration(general_params["simulation_duration"])
|
|
510
|
+
wdn.epanet_api.setTimePatternStep(general_params["hydraulic_time_step"])
|
|
511
|
+
|
|
512
|
+
wdn.epanet_api.deletePatternsAll()
|
|
513
|
+
|
|
514
|
+
reservoir_nodes_id = wdn.epanet_api.getNodeReservoirNameID()
|
|
515
|
+
for node_id in network_config.sensor_config.nodes:
|
|
516
|
+
if node_id in network_config.sensor_config.tanks or\
|
|
517
|
+
node_id in reservoir_nodes_id:
|
|
518
|
+
continue
|
|
519
|
+
|
|
520
|
+
node_idx = wdn.epanet_api.getNodeIndex(node_id)
|
|
521
|
+
base_demand = wdn.epanet_api.getNodeBaseDemands(node_idx)[1][0]
|
|
522
|
+
|
|
523
|
+
my_demand_pattern = np.array(gen_dem(download_dir, use_net1))
|
|
524
|
+
|
|
525
|
+
wdn.set_node_demand_pattern(node_id=node_id, base_demand=base_demand,
|
|
526
|
+
demand_pattern_id=f"demand_{node_id}",
|
|
527
|
+
demand_pattern=my_demand_pattern)
|
|
528
|
+
|
|
529
|
+
wdn.epanet_api.saveInputFile(f_inp_in)
|
|
530
|
+
|
|
531
|
+
# Create uncertainties
|
|
532
|
+
class MyUniformUncertainty(UniformUncertainty):
|
|
533
|
+
"""
|
|
534
|
+
Custom uniform uncertainty for LeakDB scenarios.
|
|
535
|
+
"""
|
|
536
|
+
def __init__(self, **kwds):
|
|
537
|
+
super().__init__(**kwds)
|
|
538
|
+
|
|
539
|
+
def apply(self, data: float) -> float:
|
|
540
|
+
z = data * np.random.uniform(low=self.low, high=self.high)
|
|
541
|
+
lower = data - z
|
|
542
|
+
upper = data + z
|
|
543
|
+
return lower + np.random.uniform() * (upper - lower)
|
|
544
|
+
|
|
545
|
+
model_uncertainty = ModelUncertainty(pipe_length_uncertainty=MyUniformUncertainty(low=0,
|
|
546
|
+
high=0.25),
|
|
547
|
+
pipe_diameter_uncertainty=MyUniformUncertainty(low=0,
|
|
548
|
+
high=0.25),
|
|
549
|
+
pipe_roughness_uncertainty=MyUniformUncertainty(low=0,
|
|
550
|
+
high=0.25),
|
|
551
|
+
demand_base_uncertainty=MyUniformUncertainty(low=0,
|
|
552
|
+
high=0.25))
|
|
553
|
+
|
|
554
|
+
# Create sensor config (place pressure and flow sensors everywhere)
|
|
555
|
+
sensor_config = network_config.sensor_config
|
|
556
|
+
sensor_config.pressure_sensors = sensor_config.nodes
|
|
557
|
+
sensor_config.flow_sensors = sensor_config.links
|
|
558
|
+
|
|
559
|
+
# Add leakages
|
|
560
|
+
leaks_all = []
|
|
561
|
+
|
|
562
|
+
if use_net1 is True:
|
|
563
|
+
leaks_info = json.loads(NET1_LEAKAGES)
|
|
564
|
+
else:
|
|
565
|
+
leaks_info = json.loads(HANOI_LEAKAGES)
|
|
566
|
+
|
|
567
|
+
for s_id in scenarios_id:
|
|
568
|
+
leaks_data = []
|
|
569
|
+
|
|
570
|
+
if str(s_id) in leaks_info:
|
|
571
|
+
for leak in leaks_info[str(s_id)]:
|
|
572
|
+
if leak["leak_type"] == "incipient":
|
|
573
|
+
leaks_data.append(
|
|
574
|
+
IncipientLeakage(node_id=leak["node_id"], link_id=None,
|
|
575
|
+
diameter=leak["leak_diameter"],
|
|
576
|
+
start_time=leak["leak_start_time"] * hydraulic_time_step,
|
|
577
|
+
end_time=leak["leak_end_time"] * hydraulic_time_step,
|
|
578
|
+
peak_time=leak["leak_peak_time"] * hydraulic_time_step))
|
|
579
|
+
else:
|
|
580
|
+
leaks_data.append(
|
|
581
|
+
AbruptLeakage(node_id=leak["node_id"], link_id=None,
|
|
582
|
+
diameter=leak["leak_diameter"],
|
|
583
|
+
start_time=leak["leak_start_time"] * hydraulic_time_step,
|
|
584
|
+
end_time=leak["leak_end_time"] * hydraulic_time_step))
|
|
585
|
+
|
|
586
|
+
leaks_all.append(leaks_data)
|
|
587
|
+
|
|
588
|
+
# Build final scenarios
|
|
589
|
+
return [ScenarioConfig(f_inp_in=f_inp_in, general_params=general_params,
|
|
590
|
+
sensor_config=sensor_config, model_uncertainty=model_uncertainty,
|
|
591
|
+
system_events=leaks)
|
|
592
|
+
for f_inp_in, leaks in zip(scenarios_inp, leaks_all)]
|