epyt-flow 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. epyt_flow/EPANET/EPANET/SRC_engines/AUTHORS +28 -0
  2. epyt_flow/EPANET/EPANET/SRC_engines/LICENSE +21 -0
  3. epyt_flow/EPANET/EPANET/SRC_engines/Readme_SRC_Engines.txt +18 -0
  4. epyt_flow/EPANET/EPANET/SRC_engines/enumstxt.h +134 -0
  5. epyt_flow/EPANET/EPANET/SRC_engines/epanet.c +5578 -0
  6. epyt_flow/EPANET/EPANET/SRC_engines/epanet2.c +865 -0
  7. epyt_flow/EPANET/EPANET/SRC_engines/epanet2.def +131 -0
  8. epyt_flow/EPANET/EPANET/SRC_engines/errors.dat +73 -0
  9. epyt_flow/EPANET/EPANET/SRC_engines/funcs.h +193 -0
  10. epyt_flow/EPANET/EPANET/SRC_engines/genmmd.c +1000 -0
  11. epyt_flow/EPANET/EPANET/SRC_engines/hash.c +177 -0
  12. epyt_flow/EPANET/EPANET/SRC_engines/hash.h +28 -0
  13. epyt_flow/EPANET/EPANET/SRC_engines/hydcoeffs.c +1151 -0
  14. epyt_flow/EPANET/EPANET/SRC_engines/hydraul.c +1117 -0
  15. epyt_flow/EPANET/EPANET/SRC_engines/hydsolver.c +720 -0
  16. epyt_flow/EPANET/EPANET/SRC_engines/hydstatus.c +476 -0
  17. epyt_flow/EPANET/EPANET/SRC_engines/include/epanet2.h +431 -0
  18. epyt_flow/EPANET/EPANET/SRC_engines/include/epanet2_2.h +1786 -0
  19. epyt_flow/EPANET/EPANET/SRC_engines/include/epanet2_enums.h +468 -0
  20. epyt_flow/EPANET/EPANET/SRC_engines/inpfile.c +810 -0
  21. epyt_flow/EPANET/EPANET/SRC_engines/input1.c +707 -0
  22. epyt_flow/EPANET/EPANET/SRC_engines/input2.c +864 -0
  23. epyt_flow/EPANET/EPANET/SRC_engines/input3.c +2170 -0
  24. epyt_flow/EPANET/EPANET/SRC_engines/main.c +93 -0
  25. epyt_flow/EPANET/EPANET/SRC_engines/mempool.c +142 -0
  26. epyt_flow/EPANET/EPANET/SRC_engines/mempool.h +24 -0
  27. epyt_flow/EPANET/EPANET/SRC_engines/output.c +852 -0
  28. epyt_flow/EPANET/EPANET/SRC_engines/project.c +1359 -0
  29. epyt_flow/EPANET/EPANET/SRC_engines/quality.c +685 -0
  30. epyt_flow/EPANET/EPANET/SRC_engines/qualreact.c +743 -0
  31. epyt_flow/EPANET/EPANET/SRC_engines/qualroute.c +694 -0
  32. epyt_flow/EPANET/EPANET/SRC_engines/report.c +1489 -0
  33. epyt_flow/EPANET/EPANET/SRC_engines/rules.c +1362 -0
  34. epyt_flow/EPANET/EPANET/SRC_engines/smatrix.c +871 -0
  35. epyt_flow/EPANET/EPANET/SRC_engines/text.h +497 -0
  36. epyt_flow/EPANET/EPANET/SRC_engines/types.h +874 -0
  37. epyt_flow/EPANET/EPANET-MSX/MSX_Updates.txt +53 -0
  38. epyt_flow/EPANET/EPANET-MSX/Src/dispersion.h +27 -0
  39. epyt_flow/EPANET/EPANET-MSX/Src/hash.c +107 -0
  40. epyt_flow/EPANET/EPANET-MSX/Src/hash.h +28 -0
  41. epyt_flow/EPANET/EPANET-MSX/Src/include/epanetmsx.h +102 -0
  42. epyt_flow/EPANET/EPANET-MSX/Src/include/epanetmsx_export.h +42 -0
  43. epyt_flow/EPANET/EPANET-MSX/Src/mathexpr.c +937 -0
  44. epyt_flow/EPANET/EPANET-MSX/Src/mathexpr.h +39 -0
  45. epyt_flow/EPANET/EPANET-MSX/Src/mempool.c +204 -0
  46. epyt_flow/EPANET/EPANET-MSX/Src/mempool.h +24 -0
  47. epyt_flow/EPANET/EPANET-MSX/Src/msxchem.c +1285 -0
  48. epyt_flow/EPANET/EPANET-MSX/Src/msxcompiler.c +368 -0
  49. epyt_flow/EPANET/EPANET-MSX/Src/msxdict.h +42 -0
  50. epyt_flow/EPANET/EPANET-MSX/Src/msxdispersion.c +586 -0
  51. epyt_flow/EPANET/EPANET-MSX/Src/msxerr.c +116 -0
  52. epyt_flow/EPANET/EPANET-MSX/Src/msxfile.c +260 -0
  53. epyt_flow/EPANET/EPANET-MSX/Src/msxfuncs.c +175 -0
  54. epyt_flow/EPANET/EPANET-MSX/Src/msxfuncs.h +35 -0
  55. epyt_flow/EPANET/EPANET-MSX/Src/msxinp.c +1504 -0
  56. epyt_flow/EPANET/EPANET-MSX/Src/msxout.c +401 -0
  57. epyt_flow/EPANET/EPANET-MSX/Src/msxproj.c +791 -0
  58. epyt_flow/EPANET/EPANET-MSX/Src/msxqual.c +2010 -0
  59. epyt_flow/EPANET/EPANET-MSX/Src/msxrpt.c +400 -0
  60. epyt_flow/EPANET/EPANET-MSX/Src/msxtank.c +422 -0
  61. epyt_flow/EPANET/EPANET-MSX/Src/msxtoolkit.c +1164 -0
  62. epyt_flow/EPANET/EPANET-MSX/Src/msxtypes.h +551 -0
  63. epyt_flow/EPANET/EPANET-MSX/Src/msxutils.c +524 -0
  64. epyt_flow/EPANET/EPANET-MSX/Src/msxutils.h +56 -0
  65. epyt_flow/EPANET/EPANET-MSX/Src/newton.c +158 -0
  66. epyt_flow/EPANET/EPANET-MSX/Src/newton.h +34 -0
  67. epyt_flow/EPANET/EPANET-MSX/Src/rk5.c +287 -0
  68. epyt_flow/EPANET/EPANET-MSX/Src/rk5.h +39 -0
  69. epyt_flow/EPANET/EPANET-MSX/Src/ros2.c +293 -0
  70. epyt_flow/EPANET/EPANET-MSX/Src/ros2.h +35 -0
  71. epyt_flow/EPANET/EPANET-MSX/Src/smatrix.c +816 -0
  72. epyt_flow/EPANET/EPANET-MSX/Src/smatrix.h +29 -0
  73. epyt_flow/EPANET/EPANET-MSX/readme.txt +14 -0
  74. epyt_flow/EPANET/compile.sh +4 -0
  75. epyt_flow/VERSION +1 -0
  76. epyt_flow/__init__.py +24 -0
  77. epyt_flow/data/__init__.py +0 -0
  78. epyt_flow/data/benchmarks/__init__.py +11 -0
  79. epyt_flow/data/benchmarks/batadal.py +257 -0
  80. epyt_flow/data/benchmarks/batadal_data.py +28 -0
  81. epyt_flow/data/benchmarks/battledim.py +473 -0
  82. epyt_flow/data/benchmarks/battledim_data.py +51 -0
  83. epyt_flow/data/benchmarks/gecco_water_quality.py +267 -0
  84. epyt_flow/data/benchmarks/leakdb.py +592 -0
  85. epyt_flow/data/benchmarks/leakdb_data.py +18923 -0
  86. epyt_flow/data/benchmarks/water_usage.py +123 -0
  87. epyt_flow/data/networks.py +650 -0
  88. epyt_flow/gym/__init__.py +4 -0
  89. epyt_flow/gym/control_gyms.py +47 -0
  90. epyt_flow/gym/scenario_control_env.py +101 -0
  91. epyt_flow/metrics.py +404 -0
  92. epyt_flow/models/__init__.py +2 -0
  93. epyt_flow/models/event_detector.py +31 -0
  94. epyt_flow/models/sensor_interpolation_detector.py +118 -0
  95. epyt_flow/rest_api/__init__.py +4 -0
  96. epyt_flow/rest_api/base_handler.py +70 -0
  97. epyt_flow/rest_api/res_manager.py +95 -0
  98. epyt_flow/rest_api/scada_data_handler.py +476 -0
  99. epyt_flow/rest_api/scenario_handler.py +352 -0
  100. epyt_flow/rest_api/server.py +106 -0
  101. epyt_flow/serialization.py +438 -0
  102. epyt_flow/simulation/__init__.py +5 -0
  103. epyt_flow/simulation/events/__init__.py +6 -0
  104. epyt_flow/simulation/events/actuator_events.py +259 -0
  105. epyt_flow/simulation/events/event.py +81 -0
  106. epyt_flow/simulation/events/leakages.py +404 -0
  107. epyt_flow/simulation/events/sensor_faults.py +267 -0
  108. epyt_flow/simulation/events/sensor_reading_attack.py +185 -0
  109. epyt_flow/simulation/events/sensor_reading_event.py +170 -0
  110. epyt_flow/simulation/events/system_event.py +88 -0
  111. epyt_flow/simulation/parallel_simulation.py +147 -0
  112. epyt_flow/simulation/scada/__init__.py +3 -0
  113. epyt_flow/simulation/scada/advanced_control.py +134 -0
  114. epyt_flow/simulation/scada/scada_data.py +1589 -0
  115. epyt_flow/simulation/scada/scada_data_export.py +255 -0
  116. epyt_flow/simulation/scenario_config.py +608 -0
  117. epyt_flow/simulation/scenario_simulator.py +1897 -0
  118. epyt_flow/simulation/scenario_visualizer.py +61 -0
  119. epyt_flow/simulation/sensor_config.py +1289 -0
  120. epyt_flow/topology.py +290 -0
  121. epyt_flow/uncertainty/__init__.py +3 -0
  122. epyt_flow/uncertainty/model_uncertainty.py +302 -0
  123. epyt_flow/uncertainty/sensor_noise.py +73 -0
  124. epyt_flow/uncertainty/uncertainties.py +555 -0
  125. epyt_flow/uncertainty/utils.py +206 -0
  126. epyt_flow/utils.py +306 -0
  127. epyt_flow-0.1.0.dist-info/LICENSE +21 -0
  128. epyt_flow-0.1.0.dist-info/METADATA +139 -0
  129. epyt_flow-0.1.0.dist-info/RECORD +131 -0
  130. epyt_flow-0.1.0.dist-info/WHEEL +5 -0
  131. epyt_flow-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,592 @@
1
+ """
2
+ LeakDB (Leakage Diagnosis Benchmark) by Vrachimis, S. G., Kyriakou, M. S., Eliades, D. G.,
3
+ and Polycarpou, M. M. (2018), is a realistic leakage dataset for water distribution networks.
4
+ The dataset is comprised of 1000 artificially created but realistic leakage
5
+ scenarios, on different water distribution networks, under varying conditions.
6
+
7
+ See https://github.com/KIOS-Research/LeakDB/ for details.
8
+
9
+ This module provides functions for loading the original LeakDB data set
10
+ :func:`~epyt_flow.data.benchmarks.leakdb.load_data`, as well as methods for loading the scenarios
11
+ :func:`~epyt_flow.data.benchmarks.leakdb.load_scenarios` and pre-generated SCADA data
12
+ :func:`~epyt_flow.data.benchmarks.leakdb.load_scada_data`.
13
+ The official scoring/evaluation is implemented in
14
+ :func:`~epyt_flow.data.benchmarks.leakdb.compute_evaluation_score` -- i.e. those results can be
15
+ directly compared to the official paper.
16
+ Besides this, the user can choose to evaluate predictions using any other metric from
17
+ :mod:`~epyt_flow.metrics`.
18
+ """
19
+ import os
20
+ from typing import Union
21
+ import math
22
+ import json
23
+ import scipy
24
+ import numpy as np
25
+ import pandas as pd
26
+ from scipy.sparse import bsr_array
27
+
28
+ from ..networks import load_net1, load_hanoi
29
+ from .leakdb_data import NET1_LEAKAGES, HANOI_LEAKAGES
30
+ from ...utils import get_temp_folder, to_seconds, unpack_zip_archive, create_path_if_not_exist, \
31
+ download_if_necessary
32
+ from ...metrics import f1_score, true_positive_rate, true_negative_rate
33
+ from ...simulation import ScenarioSimulator
34
+ from ...simulation.events import AbruptLeakage, IncipientLeakage
35
+ from ...simulation import ScenarioConfig
36
+ from ...simulation.scada import ScadaData
37
+ from ...uncertainty import ModelUncertainty, UniformUncertainty
38
+
39
+
40
+ def __leak_time_to_idx(t: int, round_up: bool = False, hydraulic_time_step: int = 1800):
41
+ if round_up is False:
42
+ return math.floor(t / hydraulic_time_step)
43
+ else:
44
+ return math.ceil(t / hydraulic_time_step)
45
+
46
+
47
+ def __get_leak_time_windows(s_id: int, leaks_info: dict,
48
+ hydraulic_time_step: int = 1800) -> list[tuple[int, int]]:
49
+ time_windows = []
50
+ if str(s_id) in leaks_info:
51
+ for leak in leaks_info[str(s_id)]:
52
+ t_idx_start = __leak_time_to_idx(leak["leak_start_time"] * hydraulic_time_step)
53
+ t_idx_end = __leak_time_to_idx(leak["leak_end_time"] * hydraulic_time_step,
54
+ round_up=True)
55
+
56
+ time_windows.append((t_idx_start, t_idx_end))
57
+
58
+ return time_windows
59
+
60
+
61
+ def __create_labels(s_id: int, n_time_steps: int, nodes: list[str],
62
+ leaks_info: dict,
63
+ hydraulic_time_step: int = 1800) -> tuple[np.ndarray, scipy.sparse.bsr_array]:
64
+ y = np.zeros(n_time_steps)
65
+
66
+ leak_locations_row = []
67
+ leak_locations_col = []
68
+ if str(s_id) in leaks_info:
69
+ for leak in leaks_info[str(s_id)]:
70
+ t_idx_start = __leak_time_to_idx(leak["leak_start_time"] * hydraulic_time_step)
71
+ t_idx_end = __leak_time_to_idx(leak["leak_end_time"] * hydraulic_time_step,
72
+ round_up=True)
73
+
74
+ leak_node_idx = nodes.index(leak["node_id"])
75
+
76
+ for t in range(t_idx_end - t_idx_start):
77
+ leak_locations_row.append(t_idx_start + t)
78
+ leak_locations_col.append(leak_node_idx)
79
+
80
+ y[t_idx_start:t_idx_end] = 1
81
+
82
+ y_leak_locations = bsr_array(
83
+ (np.ones(len(leak_locations_row)), (leak_locations_row, leak_locations_col)),
84
+ shape=(n_time_steps, len(nodes)))
85
+
86
+ return y, y_leak_locations
87
+
88
+
89
+ def compute_evaluation_score(scenarios_id: list[int], use_net1: bool,
90
+ y_pred_labels_per_scenario: list[np.ndarray]) -> dict:
91
+ """
92
+ Evaluates the predictions (leakage detection) for a list of given scenarios.
93
+
94
+ Parameters
95
+ ----------
96
+ scenarios_id : `list[int]`
97
+ List of scenarios ID that are to be evaluated -- there is a total number of 1000 scenarios.
98
+ use_net1 : `bool`
99
+ If True, Net1 LeakDB will be used for evaluation, otherwise the Hanoi LeakDB will be used.
100
+ y_pred_labels_per_scenario : `list[numpy.ndarray]`
101
+ Predicted binary labels (over time) for each scenario in `scenarios_id`.
102
+
103
+ Returns
104
+ -------
105
+ `dict`
106
+ Dictionary containing the f1-score, true positive rate, true negative rate,
107
+ and early detection score.
108
+ """
109
+ # Original MATLAB implementation: https://github.com/KIOS-Research/LeakDB/blob/master/CCWI-WDSA2018/Scoring%20Function/scoring_algorithm.m
110
+ if len(scenarios_id) != len(y_pred_labels_per_scenario):
111
+ raise ValueError("Number of scenarios does not match number of predictions -- " +
112
+ f"expected {len(scenarios_id)} but got {len(y_pred_labels_per_scenario)}")
113
+
114
+ # Load ground truth
115
+ if use_net1 is True:
116
+ leaks_info = json.loads(NET1_LEAKAGES)
117
+ else:
118
+ leaks_info = json.loads(HANOI_LEAKAGES)
119
+
120
+ network_config = load_net1() if use_net1 is True \
121
+ else load_hanoi()
122
+ nodes = network_config.sensor_config.nodes
123
+
124
+ y_true = []
125
+ for i, s_id in enumerate(scenarios_id):
126
+ y, _ = __create_labels(s_id, len(y_pred_labels_per_scenario[i]), nodes, leaks_info)
127
+ if len(y) != len(y_pred_labels_per_scenario[i]):
128
+ raise ValueError("A prediction must be provided for each time step -- " +
129
+ f"mismatch for scenario {i}, expected {len(y)} but got " +
130
+ f"{y_pred_labels_per_scenario[i]}")
131
+ y_true.append(y)
132
+
133
+ y_true = np.stack(y_true, axis=0)
134
+ y_pred = np.stack(y_pred_labels_per_scenario, axis=0)
135
+
136
+ # Evaluate predictions
137
+ f1 = f1_score(y_pred, y_true)
138
+ tpr = true_positive_rate(y_pred, y_true)
139
+ tnr = true_negative_rate(y_pred, y_true)
140
+
141
+ early_detection_score = 0
142
+ normalizing = []
143
+ n_time_steps_tolerance = 10
144
+ detection_threshold = .75
145
+ for i, s_id in enumerate(scenarios_id):
146
+ y_pred_i = y_pred_labels_per_scenario[i]
147
+ leaks_time_window = __get_leak_time_windows(s_id, leaks_info)
148
+
149
+ scores = []
150
+ for t0, _ in leaks_time_window:
151
+ normalizing.append(1.)
152
+
153
+ y_pred_window = y_pred_i[t0:t0+n_time_steps_tolerance]
154
+ if 1 in y_pred_window and \
155
+ np.sum(y_pred_window) / len(y_pred_window) > detection_threshold:
156
+ t_idx = np.argwhere(y_pred_window)[0] + 1
157
+ scores.append(2. / (1 + np.exp((5. / n_time_steps_tolerance) * t_idx)))
158
+ else:
159
+ scores.append(0.)
160
+
161
+ early_detection_score += np.sum(scores)
162
+
163
+ early_detection_score = early_detection_score / np.sum(normalizing)
164
+
165
+ return {"f1_score": f1, "true_positive_rate": tpr,
166
+ "true_negative_rate": tnr, "early_detection_score": early_detection_score}
167
+
168
+
169
+ def load_data(scenarios_id: list[int], use_net1: bool, download_dir: str = None,
170
+ return_X_y: bool = False, return_features_desc: bool = False,
171
+ return_leak_locations: bool = False, verbose: bool = True) -> dict:
172
+ """
173
+ Loads the original LeakDB benchmark data set.
174
+
175
+ .. warning::
176
+
177
+ All scenarios together are a huge data set -- approx. 8GB for Net1 and 25GB for Hanoi.
178
+ Downloading and loading might take some time! Also, a sufficient amount of hard disk
179
+ memory is required.
180
+
181
+ Parameters
182
+ ----------
183
+ scenarios_id : `list[int]`
184
+ List of scenarios ID that are to be loaded -- there are a total number of 1000 scenarios.
185
+ use_net1 : `bool`
186
+ If True, Net1 LeakDB will be loaded, otherwise the Hanoi LeakDB will be loaded.
187
+ download_dir : `str`, optional
188
+ Path to the data files -- if None, the temp folder will be used.
189
+ If the path does not exist, the data files will be downloaded to the given path.
190
+
191
+ The default is None.
192
+ return_X_y : `bool`, optional
193
+ If True, the data is returned together with the labels (presence of a leakage) as
194
+ two Numpy arrays, otherwise, the data is returned as Pandas data frames.
195
+
196
+ The default is False.
197
+ return_features_desc : `bool`, optional
198
+ If True and if `return_X_y` is True, the returned dictionary contains the
199
+ features' descriptions (i.e. names) under the key "features_desc".
200
+
201
+ The default is False.
202
+ return_leak_locations : `bool`
203
+ If True and if `return_X_y` is True, the leak locations are returned as well --
204
+ as an instance of `scipy.sparse.bsr_array`.
205
+
206
+ The default is False.
207
+ verbose : `bool`, optional
208
+ If True, a progress bar is shown while downloading files.
209
+
210
+ The default is True.
211
+
212
+ Returns
213
+ -------
214
+ `dict`
215
+ Dictionary containing the scenario data sets. Data of each requested scenario
216
+ can be accessed by using the scenario ID as a key.
217
+ """
218
+ url_data = "https://filedn.com/lumBFq2P9S74PNoLPWtzxG4/EPyT-Flow/LeakDB-Original/" +\
219
+ f"{'Net1_CMH/' if use_net1 is True else 'Hanoi_CMH/'}"
220
+
221
+ if use_net1 is True:
222
+ network_desc = "Net1"
223
+ leaks_info = json.loads(NET1_LEAKAGES)
224
+ else:
225
+ network_desc = "Hanoi"
226
+ leaks_info = json.loads(HANOI_LEAKAGES)
227
+
228
+ download_dir = download_dir if download_dir is not None else get_temp_folder()
229
+ download_dir = os.path.join(download_dir, network_desc)
230
+ create_path_if_not_exist(download_dir)
231
+
232
+ results = {}
233
+ for s_id in scenarios_id:
234
+ scenario_data = f"Scenario-{s_id}.zip"
235
+ scenario_data_url = url_data + scenario_data
236
+ scenario_data_file_in = os.path.join(download_dir, scenario_data)
237
+ scenario_data_folder_in = os.path.join(download_dir, f"Scenario-{s_id}")
238
+
239
+ download_if_necessary(scenario_data_file_in, scenario_data_url, verbose)
240
+ create_path_if_not_exist(scenario_data_folder_in)
241
+ unpack_zip_archive(scenario_data_file_in, scenario_data_folder_in)
242
+
243
+ # Load and parse data
244
+ pressure_files = list(filter(lambda d: d.endswith(".csv"),
245
+ os.listdir(os.path.join(scenario_data_folder_in,
246
+ "Pressures"))))
247
+ pressure_readings = {}
248
+ all_nodes = []
249
+ for f_in in pressure_files:
250
+ df = pd.read_csv(os.path.join(scenario_data_folder_in, "Pressures", f_in))
251
+ node_id = f_in.replace(".csv", "")
252
+ all_nodes.append(node_id)
253
+ pressure_readings[f"Pressure-{node_id}"] = df["Value"]
254
+
255
+ flow_files = list(filter(lambda d: d.endswith(".csv"),
256
+ os.listdir(os.path.join(scenario_data_folder_in, "Flows"))))
257
+ flow_readings = {}
258
+ for f_in in flow_files:
259
+ df = pd.read_csv(os.path.join(scenario_data_folder_in, "Flows", f_in))
260
+ flow_readings[f"Flow-{f_in.replace('.csv', '')}"] = df["Value"]
261
+
262
+ df_labels = pd.read_csv(os.path.join(scenario_data_folder_in, "Labels.csv"))
263
+ labels = df_labels["Label"]
264
+
265
+ df_timestamps = pd.read_csv(os.path.join(scenario_data_folder_in, "Timestamps.csv"))
266
+ sensor_reading_times = df_timestamps["Timestamp"]
267
+
268
+ df_final = pd.DataFrame(pressure_readings | flow_readings |
269
+ {"labels": labels, "timestamps": sensor_reading_times})
270
+
271
+ # Prepare final data
272
+ if return_X_y is True:
273
+ X = df_final[list(pressure_readings.keys()) + list(flow_readings.keys())].to_numpy()
274
+ y = labels.to_numpy()
275
+
276
+ network_config = load_net1(download_dir) if use_net1 is True \
277
+ else load_hanoi(download_dir)
278
+ nodes = network_config.sensor_config.nodes
279
+ _, y_leak_locations = __create_labels(s_id, X.shape[0], nodes, leaks_info)
280
+
281
+ if return_features_desc is True and "features_desc" not in results:
282
+ results["features_desc"] = list(pressure_readings.keys()) + \
283
+ list(flow_readings.keys())
284
+
285
+ if return_leak_locations is True:
286
+ results[s_id] = (X, y, y_leak_locations)
287
+ else:
288
+ results[s_id] = (X, y)
289
+ else:
290
+ results[s_id] = df_final
291
+
292
+ return results
293
+
294
+
295
+ def load_scada_data(scenarios_id: list[int], use_net1: bool = True, download_dir: str = None,
296
+ return_X_y: bool = False, return_leak_locations: bool = False,
297
+ verbose: bool = True
298
+ ) -> Union[list[ScadaData], list[tuple[np.ndarray, np.ndarray]]]:
299
+ """
300
+ Loads the SCADA data of the simulated LeakDB benchmark scenarios -- see
301
+ :func:`~epyt_flow.data.benchmarks.leakdb.load_scenarios`.
302
+
303
+ .. note::
304
+ Note that due to the randomness in the demand creation as well as in the model
305
+ uncertainties, the SCADA data differs from the original data set
306
+ which can be loaded by calling :func:`~epyt_flow.data.benchmarks.leakdb.load_data`.
307
+ However, the leakages (i.e. location and profile) are consistent with the original data set.
308
+
309
+ Parameters
310
+ ----------
311
+ scenarios_id : `list[int]`
312
+ List of scenarios ID that are to be loaded -- there are a total number of 1000 scenarios.
313
+ use_net1 : `bool`, optional
314
+ If True, Net1 LeakDB will be loaded, otherwise the Hanoi LeakDB will be loaded.
315
+
316
+ The default is True.
317
+ download_dir : `str`, optional
318
+ Path to the data files -- if None, the temp folder will be used.
319
+ If the path does not exist, the data files will be downloaded to the given path.
320
+
321
+ The default is None.
322
+ return_X_y : `bool`, optional
323
+ If True, the data is returned together with the labels (presence of a leakage) as
324
+ two Numpy arrays, otherwise, the data is returned as
325
+ :class:`~epyt_flow.simulation.scada.scada_data.ScadaData` instances.
326
+
327
+ The default is False.
328
+ return_leak_locations : `bool`
329
+ If True, the leak locations are returned as well --
330
+ as an instance of `scipy.sparse.bsr_array`.
331
+
332
+ The default is False.
333
+ verbose : `bool`, optional
334
+ If True, a progress bar is shown while downloading files.
335
+
336
+ The default is True.
337
+
338
+ Returns
339
+ -------
340
+ list[`:class:`~epyt_flow.simulation.scada.scada_data.ScadaData`] or `list[tuple[numpy.ndarray, numpy.ndarray]]`
341
+ The simulated benchmark scenarios as either a list of
342
+ :class:`~epyt_flow.simulation.scada.scada_data.ScadaData` instances or as a list of
343
+ (X, y) Numpy arrays. If 'return_leak_locations' is True, the leak locations are included
344
+ as an instance of `scipy.sparse.bsr_array` as well.
345
+ """
346
+ download_dir = download_dir if download_dir is not None else get_temp_folder()
347
+
348
+ url_data = "https://filedn.com/lumBFq2P9S74PNoLPWtzxG4/EPyT-Flow/LeakDB/" +\
349
+ f"{'Net1/' if use_net1 is True else 'Hanoi/'}"
350
+
351
+ if use_net1 is True:
352
+ leaks_info = json.loads(NET1_LEAKAGES)
353
+ else:
354
+ leaks_info = json.loads(HANOI_LEAKAGES)
355
+
356
+ r = []
357
+
358
+ for s_id in scenarios_id:
359
+ f_in = f"{'Net1_ID' if use_net1 is True else 'Hanoi_ID'}={s_id}.epytflow_scada_data"
360
+ download_if_necessary(os.path.join(download_dir, f_in), url_data + f_in, verbose)
361
+
362
+ data = ScadaData.load_from_file(os.path.join(download_dir, f_in))
363
+
364
+ X = data.get_data()
365
+ y, y_leak_locations = __create_labels(s_id, X.shape[0], data.sensor_config.nodes,
366
+ leaks_info)
367
+
368
+ if return_X_y is True:
369
+ if return_leak_locations is True:
370
+ r.append((X, y, y_leak_locations))
371
+ else:
372
+ r.append((X, y))
373
+ else:
374
+ if return_leak_locations is True:
375
+ r.append((data, y_leak_locations))
376
+ else:
377
+ r.append(data)
378
+
379
+ return r
380
+
381
+
382
+ def load_scenarios(scenarios_id: list[int], use_net1: bool = True,
383
+ download_dir: str = None, verbose: bool = True) -> list[ScenarioConfig]:
384
+ """
385
+ Creates and returns the LeakDB scenarios -- they can be either modified or
386
+ passed directly to the simulator
387
+ :class:`~epyt_flow.simulation.scenario_simulator.ScenarioSimulator`.
388
+
389
+ .. note::
390
+ Note that due to the randomness in the demand creation as well as in the model
391
+ uncertainties, the simulation results will differ between different runs, and
392
+ will also differ from the original data set
393
+ (see :func:`~epyt_flow.data.benchmarks.leakdb.load_data`).
394
+ However, the leakages (i.e. location and profile) will be always the same and be
395
+ consistent with the original data set.
396
+
397
+ Parameters
398
+ ----------
399
+ scenarios_id : `list[int]`
400
+ List of scenarios ID that are to be loaded -- there is a total number of 1000 scenarios.
401
+ use_net1 : `bool`, optional
402
+ If True, Net1 network will be used, otherwise the Hanoi network will be used.
403
+
404
+ The default is True.
405
+ download_dir : `str`, optional
406
+ Path to the Net1.inp or Hanoi.inp file -- if None, the temp folder will be used.
407
+ If the path does not exist, the .inp will be downloaded to the give path.
408
+
409
+ The default is None.
410
+ verbose : `bool`, optional
411
+ If True, a progress bar is shown while downloading files.
412
+
413
+ The default is True.
414
+
415
+ Returns
416
+ -------
417
+ list[:class:`~epyt_flow.simulation.scenario_config.ScenarioConfig`]
418
+ LeakDB scenarios.
419
+ """
420
+ scenarios_inp = []
421
+
422
+ # Load the network
423
+ load_network = load_net1 if use_net1 is True else load_hanoi
424
+ download_dir = download_dir if download_dir is not None else get_temp_folder()
425
+ network_config = load_network(download_dir)
426
+
427
+ # Set simulation duration
428
+ hydraulic_time_step = to_seconds(minutes=30) # 30min time steps
429
+ general_params = {"simulation_duration": to_seconds(days=365), # One year
430
+ "hydraulic_time_step": hydraulic_time_step,
431
+ "reporting_time_step": hydraulic_time_step} | network_config.general_params
432
+
433
+ # Add demand patterns
434
+ def gen_dem(download_dir, use_net1):
435
+ # Taken from https://github.com/KIOS-Research/LeakDB/blob/master/CCWI-WDSA2018/Dataset_Generator_Py3/demandGenerator.py
436
+ week_pat = scipy.io.loadmat(os.path.join(download_dir, "weekPat_30min.mat"))
437
+ a_w = week_pat['Aw']
438
+ nw = week_pat['nw']
439
+ year_offset = scipy.io.loadmat(os.path.join(download_dir, "yearOffset_30min.mat"))
440
+ a_y = year_offset['Ay']
441
+ ny = year_offset['ny']
442
+
443
+ # Create yearly component
444
+ days = 365
445
+
446
+ t = (288/6)*days # one year period in five minute intervals
447
+ w = 2*np.pi/t
448
+ k = np.arange(1, days*288/6+1, 1) # number of time steps in time series
449
+ n = ny[0][0] # number of fourier coefficients
450
+ h_y = [1]*len(k)
451
+
452
+ for i in range(1, n+1):
453
+ h_y = np.column_stack((h_y, np.sin(i*w*k), np.cos(i*w*k)))
454
+
455
+ unc_y = 0.1
456
+ a_y_r = a_y*(1-unc_y + 2*unc_y*np.random.rand(int(a_y.shape[0]), int(a_y.shape[1])))
457
+ year_offset = np.dot(h_y, a_y_r)
458
+
459
+ # Create weekly component
460
+ t = (288/6)*7 # one week period in five minute intervals
461
+ w = 2*np.pi/t
462
+ k = np.arange(1, days*288/6+1, 1) # number of time steps in time series
463
+ n = nw[0][0] # number of fourier coefficients
464
+ h_w = [1]*len(k)
465
+ for i in range(1, n+1):
466
+ h_w = np.column_stack((h_w, np.sin(i*w*k), np.cos(i*w*k)))
467
+
468
+ unc_w = 0.1
469
+ a_w_r = a_w*(1-unc_w + 2*unc_w*np.random.rand(int(a_w.shape[0]), int(a_w.shape[1])))
470
+ week_year_pat = np.dot(h_w, a_w_r)
471
+
472
+ # Create random component
473
+ unc_r = 0.05
474
+ random = np.random.normal(0, (-unc_r+2*unc_r),
475
+ (int(week_year_pat.shape[0]), int(week_year_pat.shape[1])))
476
+
477
+ # Create demand
478
+ if use_net1 is True:
479
+ base = 1
480
+ else:
481
+ base = 0.3 # Avoid negative pressure in Hanoi
482
+ variation = 0.75 + np.random.normal(0, 0.07) # from 0 to 1
483
+ dem = base * (year_offset+1) * (week_year_pat*variation+1) * (random+1)
484
+ dem = dem.tolist()
485
+ dem_final = []
486
+ for d in dem:
487
+ dem_final.append(d[0])
488
+
489
+ return dem_final
490
+
491
+ week_pattern_url = "https://github.com/KIOS-Research/LeakDB/raw/master/CCWI-WDSA2018/" +\
492
+ "Dataset_Generator_Py3/weekPat_30min.mat"
493
+ year_offset_url = "https://github.com/KIOS-Research/LeakDB/raw/master/CCWI-WDSA2018/" +\
494
+ "Dataset_Generator_Py3/yearOffset_30min.mat"
495
+
496
+ download_if_necessary(os.path.join(download_dir, "weekPat_30min.mat"),
497
+ week_pattern_url, verbose)
498
+ download_if_necessary(os.path.join(download_dir, "yearOffset_30min.mat"),
499
+ year_offset_url, verbose)
500
+
501
+ for s_id in scenarios_id: # Create new .inp files with demands if necessary
502
+ f_inp_in = os.path.join(download_dir,
503
+ f"{'Net1' if use_net1 is True else 'Hanoi'}_LeakDB_ID={s_id}.inp")
504
+ scenarios_inp.append(f_inp_in)
505
+
506
+ if not os.path.exists(f_inp_in):
507
+ with ScenarioSimulator(f_inp_in=network_config.f_inp_in) as wdn:
508
+ wdn.epanet_api.setTimeHydraulicStep(general_params["hydraulic_time_step"])
509
+ wdn.epanet_api.setTimeSimulationDuration(general_params["simulation_duration"])
510
+ wdn.epanet_api.setTimePatternStep(general_params["hydraulic_time_step"])
511
+
512
+ wdn.epanet_api.deletePatternsAll()
513
+
514
+ reservoir_nodes_id = wdn.epanet_api.getNodeReservoirNameID()
515
+ for node_id in network_config.sensor_config.nodes:
516
+ if node_id in network_config.sensor_config.tanks or\
517
+ node_id in reservoir_nodes_id:
518
+ continue
519
+
520
+ node_idx = wdn.epanet_api.getNodeIndex(node_id)
521
+ base_demand = wdn.epanet_api.getNodeBaseDemands(node_idx)[1][0]
522
+
523
+ my_demand_pattern = np.array(gen_dem(download_dir, use_net1))
524
+
525
+ wdn.set_node_demand_pattern(node_id=node_id, base_demand=base_demand,
526
+ demand_pattern_id=f"demand_{node_id}",
527
+ demand_pattern=my_demand_pattern)
528
+
529
+ wdn.epanet_api.saveInputFile(f_inp_in)
530
+
531
+ # Create uncertainties
532
+ class MyUniformUncertainty(UniformUncertainty):
533
+ """
534
+ Custom uniform uncertainty for LeakDB scenarios.
535
+ """
536
+ def __init__(self, **kwds):
537
+ super().__init__(**kwds)
538
+
539
+ def apply(self, data: float) -> float:
540
+ z = data * np.random.uniform(low=self.low, high=self.high)
541
+ lower = data - z
542
+ upper = data + z
543
+ return lower + np.random.uniform() * (upper - lower)
544
+
545
+ model_uncertainty = ModelUncertainty(pipe_length_uncertainty=MyUniformUncertainty(low=0,
546
+ high=0.25),
547
+ pipe_diameter_uncertainty=MyUniformUncertainty(low=0,
548
+ high=0.25),
549
+ pipe_roughness_uncertainty=MyUniformUncertainty(low=0,
550
+ high=0.25),
551
+ demand_base_uncertainty=MyUniformUncertainty(low=0,
552
+ high=0.25))
553
+
554
+ # Create sensor config (place pressure and flow sensors everywhere)
555
+ sensor_config = network_config.sensor_config
556
+ sensor_config.pressure_sensors = sensor_config.nodes
557
+ sensor_config.flow_sensors = sensor_config.links
558
+
559
+ # Add leakages
560
+ leaks_all = []
561
+
562
+ if use_net1 is True:
563
+ leaks_info = json.loads(NET1_LEAKAGES)
564
+ else:
565
+ leaks_info = json.loads(HANOI_LEAKAGES)
566
+
567
+ for s_id in scenarios_id:
568
+ leaks_data = []
569
+
570
+ if str(s_id) in leaks_info:
571
+ for leak in leaks_info[str(s_id)]:
572
+ if leak["leak_type"] == "incipient":
573
+ leaks_data.append(
574
+ IncipientLeakage(node_id=leak["node_id"], link_id=None,
575
+ diameter=leak["leak_diameter"],
576
+ start_time=leak["leak_start_time"] * hydraulic_time_step,
577
+ end_time=leak["leak_end_time"] * hydraulic_time_step,
578
+ peak_time=leak["leak_peak_time"] * hydraulic_time_step))
579
+ else:
580
+ leaks_data.append(
581
+ AbruptLeakage(node_id=leak["node_id"], link_id=None,
582
+ diameter=leak["leak_diameter"],
583
+ start_time=leak["leak_start_time"] * hydraulic_time_step,
584
+ end_time=leak["leak_end_time"] * hydraulic_time_step))
585
+
586
+ leaks_all.append(leaks_data)
587
+
588
+ # Build final scenarios
589
+ return [ScenarioConfig(f_inp_in=f_inp_in, general_params=general_params,
590
+ sensor_config=sensor_config, model_uncertainty=model_uncertainty,
591
+ system_events=leaks)
592
+ for f_inp_in, leaks in zip(scenarios_inp, leaks_all)]