epyt-flow 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epyt_flow/EPANET/EPANET/SRC_engines/AUTHORS +28 -0
- epyt_flow/EPANET/EPANET/SRC_engines/LICENSE +21 -0
- epyt_flow/EPANET/EPANET/SRC_engines/Readme_SRC_Engines.txt +18 -0
- epyt_flow/EPANET/EPANET/SRC_engines/enumstxt.h +134 -0
- epyt_flow/EPANET/EPANET/SRC_engines/epanet.c +5578 -0
- epyt_flow/EPANET/EPANET/SRC_engines/epanet2.c +865 -0
- epyt_flow/EPANET/EPANET/SRC_engines/epanet2.def +131 -0
- epyt_flow/EPANET/EPANET/SRC_engines/errors.dat +73 -0
- epyt_flow/EPANET/EPANET/SRC_engines/funcs.h +193 -0
- epyt_flow/EPANET/EPANET/SRC_engines/genmmd.c +1000 -0
- epyt_flow/EPANET/EPANET/SRC_engines/hash.c +177 -0
- epyt_flow/EPANET/EPANET/SRC_engines/hash.h +28 -0
- epyt_flow/EPANET/EPANET/SRC_engines/hydcoeffs.c +1151 -0
- epyt_flow/EPANET/EPANET/SRC_engines/hydraul.c +1117 -0
- epyt_flow/EPANET/EPANET/SRC_engines/hydsolver.c +720 -0
- epyt_flow/EPANET/EPANET/SRC_engines/hydstatus.c +476 -0
- epyt_flow/EPANET/EPANET/SRC_engines/include/epanet2.h +431 -0
- epyt_flow/EPANET/EPANET/SRC_engines/include/epanet2_2.h +1786 -0
- epyt_flow/EPANET/EPANET/SRC_engines/include/epanet2_enums.h +468 -0
- epyt_flow/EPANET/EPANET/SRC_engines/inpfile.c +810 -0
- epyt_flow/EPANET/EPANET/SRC_engines/input1.c +707 -0
- epyt_flow/EPANET/EPANET/SRC_engines/input2.c +864 -0
- epyt_flow/EPANET/EPANET/SRC_engines/input3.c +2170 -0
- epyt_flow/EPANET/EPANET/SRC_engines/main.c +93 -0
- epyt_flow/EPANET/EPANET/SRC_engines/mempool.c +142 -0
- epyt_flow/EPANET/EPANET/SRC_engines/mempool.h +24 -0
- epyt_flow/EPANET/EPANET/SRC_engines/output.c +852 -0
- epyt_flow/EPANET/EPANET/SRC_engines/project.c +1359 -0
- epyt_flow/EPANET/EPANET/SRC_engines/quality.c +685 -0
- epyt_flow/EPANET/EPANET/SRC_engines/qualreact.c +743 -0
- epyt_flow/EPANET/EPANET/SRC_engines/qualroute.c +694 -0
- epyt_flow/EPANET/EPANET/SRC_engines/report.c +1489 -0
- epyt_flow/EPANET/EPANET/SRC_engines/rules.c +1362 -0
- epyt_flow/EPANET/EPANET/SRC_engines/smatrix.c +871 -0
- epyt_flow/EPANET/EPANET/SRC_engines/text.h +497 -0
- epyt_flow/EPANET/EPANET/SRC_engines/types.h +874 -0
- epyt_flow/EPANET/EPANET-MSX/MSX_Updates.txt +53 -0
- epyt_flow/EPANET/EPANET-MSX/Src/dispersion.h +27 -0
- epyt_flow/EPANET/EPANET-MSX/Src/hash.c +107 -0
- epyt_flow/EPANET/EPANET-MSX/Src/hash.h +28 -0
- epyt_flow/EPANET/EPANET-MSX/Src/include/epanetmsx.h +102 -0
- epyt_flow/EPANET/EPANET-MSX/Src/include/epanetmsx_export.h +42 -0
- epyt_flow/EPANET/EPANET-MSX/Src/mathexpr.c +937 -0
- epyt_flow/EPANET/EPANET-MSX/Src/mathexpr.h +39 -0
- epyt_flow/EPANET/EPANET-MSX/Src/mempool.c +204 -0
- epyt_flow/EPANET/EPANET-MSX/Src/mempool.h +24 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxchem.c +1285 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxcompiler.c +368 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxdict.h +42 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxdispersion.c +586 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxerr.c +116 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxfile.c +260 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxfuncs.c +175 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxfuncs.h +35 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxinp.c +1504 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxout.c +401 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxproj.c +791 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxqual.c +2010 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxrpt.c +400 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxtank.c +422 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxtoolkit.c +1164 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxtypes.h +551 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxutils.c +524 -0
- epyt_flow/EPANET/EPANET-MSX/Src/msxutils.h +56 -0
- epyt_flow/EPANET/EPANET-MSX/Src/newton.c +158 -0
- epyt_flow/EPANET/EPANET-MSX/Src/newton.h +34 -0
- epyt_flow/EPANET/EPANET-MSX/Src/rk5.c +287 -0
- epyt_flow/EPANET/EPANET-MSX/Src/rk5.h +39 -0
- epyt_flow/EPANET/EPANET-MSX/Src/ros2.c +293 -0
- epyt_flow/EPANET/EPANET-MSX/Src/ros2.h +35 -0
- epyt_flow/EPANET/EPANET-MSX/Src/smatrix.c +816 -0
- epyt_flow/EPANET/EPANET-MSX/Src/smatrix.h +29 -0
- epyt_flow/EPANET/EPANET-MSX/readme.txt +14 -0
- epyt_flow/EPANET/compile.sh +4 -0
- epyt_flow/VERSION +1 -0
- epyt_flow/__init__.py +24 -0
- epyt_flow/data/__init__.py +0 -0
- epyt_flow/data/benchmarks/__init__.py +11 -0
- epyt_flow/data/benchmarks/batadal.py +257 -0
- epyt_flow/data/benchmarks/batadal_data.py +28 -0
- epyt_flow/data/benchmarks/battledim.py +473 -0
- epyt_flow/data/benchmarks/battledim_data.py +51 -0
- epyt_flow/data/benchmarks/gecco_water_quality.py +267 -0
- epyt_flow/data/benchmarks/leakdb.py +592 -0
- epyt_flow/data/benchmarks/leakdb_data.py +18923 -0
- epyt_flow/data/benchmarks/water_usage.py +123 -0
- epyt_flow/data/networks.py +650 -0
- epyt_flow/gym/__init__.py +4 -0
- epyt_flow/gym/control_gyms.py +47 -0
- epyt_flow/gym/scenario_control_env.py +101 -0
- epyt_flow/metrics.py +404 -0
- epyt_flow/models/__init__.py +2 -0
- epyt_flow/models/event_detector.py +31 -0
- epyt_flow/models/sensor_interpolation_detector.py +118 -0
- epyt_flow/rest_api/__init__.py +4 -0
- epyt_flow/rest_api/base_handler.py +70 -0
- epyt_flow/rest_api/res_manager.py +95 -0
- epyt_flow/rest_api/scada_data_handler.py +476 -0
- epyt_flow/rest_api/scenario_handler.py +352 -0
- epyt_flow/rest_api/server.py +106 -0
- epyt_flow/serialization.py +438 -0
- epyt_flow/simulation/__init__.py +5 -0
- epyt_flow/simulation/events/__init__.py +6 -0
- epyt_flow/simulation/events/actuator_events.py +259 -0
- epyt_flow/simulation/events/event.py +81 -0
- epyt_flow/simulation/events/leakages.py +404 -0
- epyt_flow/simulation/events/sensor_faults.py +267 -0
- epyt_flow/simulation/events/sensor_reading_attack.py +185 -0
- epyt_flow/simulation/events/sensor_reading_event.py +170 -0
- epyt_flow/simulation/events/system_event.py +88 -0
- epyt_flow/simulation/parallel_simulation.py +147 -0
- epyt_flow/simulation/scada/__init__.py +3 -0
- epyt_flow/simulation/scada/advanced_control.py +134 -0
- epyt_flow/simulation/scada/scada_data.py +1589 -0
- epyt_flow/simulation/scada/scada_data_export.py +255 -0
- epyt_flow/simulation/scenario_config.py +608 -0
- epyt_flow/simulation/scenario_simulator.py +1897 -0
- epyt_flow/simulation/scenario_visualizer.py +61 -0
- epyt_flow/simulation/sensor_config.py +1289 -0
- epyt_flow/topology.py +290 -0
- epyt_flow/uncertainty/__init__.py +3 -0
- epyt_flow/uncertainty/model_uncertainty.py +302 -0
- epyt_flow/uncertainty/sensor_noise.py +73 -0
- epyt_flow/uncertainty/uncertainties.py +555 -0
- epyt_flow/uncertainty/utils.py +206 -0
- epyt_flow/utils.py +306 -0
- epyt_flow-0.1.0.dist-info/LICENSE +21 -0
- epyt_flow-0.1.0.dist-info/METADATA +139 -0
- epyt_flow-0.1.0.dist-info/RECORD +131 -0
- epyt_flow-0.1.0.dist-info/WHEEL +5 -0
- epyt_flow-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module provides functions for loading different GECCO water quality data sets.
|
|
3
|
+
|
|
4
|
+
+------------------------------+---------------------------------------------------------------------------------------------+
|
|
5
|
+
| GECCO Water Quality 2017 | :func:`~epyt_flow.data.benchmarks.gecco_water_quality.load_gecco2017_water_quality_data` |
|
|
6
|
+
+------------------------------+---------------------------------------------------------------------------------------------+
|
|
7
|
+
| GECCO Water Quality 2018 | :func:`~epyt_flow.data.benchmarks.gecco_water_quality.load_gecco2018_water_quality_data` |
|
|
8
|
+
+------------------------------+---------------------------------------------------------------------------------------------+
|
|
9
|
+
| GECCO Water Quality 2019 | :func:`~epyt_flow.data.benchmarks.gecco_water_quality.load_gecco2019_water_quality_data` |
|
|
10
|
+
+------------------------------+---------------------------------------------------------------------------------------------+
|
|
11
|
+
|
|
12
|
+
Note that the scoring/evaluation algorithm is the same for all GECCO water quality benchmarks
|
|
13
|
+
and is implemented in
|
|
14
|
+
:func:`~epyt_flow.data.benchmarks.gecco_water_quality.compute_evaluation_score`.
|
|
15
|
+
"""
|
|
16
|
+
import os
|
|
17
|
+
from typing import Union
|
|
18
|
+
import numpy as np
|
|
19
|
+
import pandas as pd
|
|
20
|
+
|
|
21
|
+
from ...utils import get_temp_folder, download_if_necessary
|
|
22
|
+
from ...metrics import f1_score
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def compute_evaluation_score(y_pred: np.ndarray, y: np.ndarray) -> float:
|
|
26
|
+
"""
|
|
27
|
+
Evaluates the performance of a detection method.
|
|
28
|
+
|
|
29
|
+
.. note::
|
|
30
|
+
All GECCO water quality challenges use the F1-score for evaluation.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
y_pred : `numpy.ndarray`
|
|
35
|
+
Event indication prediction over time
|
|
36
|
+
y : `numpy.ndarray`
|
|
37
|
+
Ground truth event indication over time.
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
`float`
|
|
42
|
+
Evaluation score.
|
|
43
|
+
"""
|
|
44
|
+
return f1_score(y_pred, y)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def load_gecco2017_water_quality_data(download_dir: str = None, return_X_y: bool = True,
|
|
48
|
+
verbose: bool = True
|
|
49
|
+
) -> Union[pd.DataFrame, tuple[np.ndarray, np.ndarray]]:
|
|
50
|
+
"""
|
|
51
|
+
GECCO Industrial Challenge 2017 Dataset: A water quality dataset for the
|
|
52
|
+
"Monitoring of drinking-water quality" competition organized by M. Friese, J. Stork,
|
|
53
|
+
A. Fischbach, M. Rebolledo, T. Bartz-Beielstein at the Genetic and Evolutionary
|
|
54
|
+
Computation Conference 2017, Berlin, Germany
|
|
55
|
+
|
|
56
|
+
This is a benchmark for anomaly detection algorithms on water quality. The data is provided by
|
|
57
|
+
the "Thüringer Fernwasserversorgung" (Germany) and constitutes a real-world data set. In this
|
|
58
|
+
data set, 9 numeric water quality features are given at a sampling rate of 1 min over approx.
|
|
59
|
+
3 month. The goal is to predict the presence of an anomaly -- i.e. binary classification.
|
|
60
|
+
|
|
61
|
+
More information can be found at https://zenodo.org/records/3884465 and
|
|
62
|
+
http://www.spotseven.de/gecco-challenge/gecco-challenge-2017/
|
|
63
|
+
|
|
64
|
+
.. note::
|
|
65
|
+
|
|
66
|
+
Note that this is NOT a simulated scenario and therefore only the final
|
|
67
|
+
data set is provided.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
download_dir : `str`, optional
|
|
72
|
+
Path to the data files -- if None, the temp folder will be used.
|
|
73
|
+
If the path does not exist, the data files will be downloaded to the given path.
|
|
74
|
+
|
|
75
|
+
The default is None.
|
|
76
|
+
return_X_y : `bool`, optional
|
|
77
|
+
If True, the data is returned together with the labels as two Numpy arrays,
|
|
78
|
+
otherwise the data is returned as Pandas data frame.
|
|
79
|
+
|
|
80
|
+
The default is True.
|
|
81
|
+
verbose : `bool`, optional
|
|
82
|
+
If True, a progress bar is shown while downloading files.
|
|
83
|
+
|
|
84
|
+
The default is True.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
`pandas.DataFrame` or `tuple[numpy.ndarray, numpy.ndarray]`
|
|
89
|
+
The benchmark data set as either a Pandas data frame or as a pair of (X, y) Numpy arrays.
|
|
90
|
+
"""
|
|
91
|
+
url_data = "https://zenodo.org/records/3884465/files/1_gecco2017_water_quality.csv?download=1"
|
|
92
|
+
|
|
93
|
+
download_dir = download_dir if download_dir is not None else get_temp_folder()
|
|
94
|
+
f_in = os.path.join(download_dir, "gecco2017_water_quality.csv")
|
|
95
|
+
|
|
96
|
+
download_if_necessary(f_in, url_data, verbose)
|
|
97
|
+
|
|
98
|
+
# Load and return data
|
|
99
|
+
df_data = pd.read_csv(f_in, index_col=0)
|
|
100
|
+
|
|
101
|
+
if return_X_y is False:
|
|
102
|
+
return df_data
|
|
103
|
+
else:
|
|
104
|
+
y = df_data["EVENT"].to_numpy().astype(np.int8)
|
|
105
|
+
del df_data["EVENT"]
|
|
106
|
+
|
|
107
|
+
del df_data["Time"]
|
|
108
|
+
X = df_data.to_numpy()
|
|
109
|
+
|
|
110
|
+
return X, y
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def load_gecco2018_water_quality_data(download_dir: str = None, return_X_y: bool = True,
|
|
114
|
+
verbose: bool = True
|
|
115
|
+
) -> Union[pd.DataFrame, tuple[np.ndarray, np.ndarray]]:
|
|
116
|
+
"""
|
|
117
|
+
GECCO Industrial Challenge 2018 Dataset: A water quality dataset for the
|
|
118
|
+
"Internet of Things: Online Anomaly Detection for Drinking Water Quality" competition
|
|
119
|
+
organized by F. Rehbach, M. Rebolledo, S. Moritz, S. Chandrasekaran, T. Bartz-Beielstein at
|
|
120
|
+
the Genetic and Evolutionary Computation Conference 2018, Kyoto, Japan.
|
|
121
|
+
|
|
122
|
+
This is a benchmark
|
|
123
|
+
(based on
|
|
124
|
+
:func:`~epyt_flow.data.benchmarks.gecco_water_quality.load_gecco2017_water_quality_data`)
|
|
125
|
+
for anomaly detection algorithms on water quality. The data is provided by the
|
|
126
|
+
"Thüringer Fernwasserversorgung" (Germany) and constitutes a real-world data set. In this
|
|
127
|
+
data set, 9 numeric water quality features are given at a sampling rate of 1 min over approx.
|
|
128
|
+
3 month. The goal is to predict the presence of an anomaly -- i.e. binary classification.
|
|
129
|
+
|
|
130
|
+
More information can be found at https://zenodo.org/records/3884398 and
|
|
131
|
+
http://www.spotseven.de/gecco/gecco-challenge/gecco-challenge-2018/
|
|
132
|
+
|
|
133
|
+
.. note::
|
|
134
|
+
|
|
135
|
+
Note that this is NOT a simulated scenario and therefore only the final
|
|
136
|
+
data set is provided.
|
|
137
|
+
|
|
138
|
+
Parameters
|
|
139
|
+
----------
|
|
140
|
+
download_dir : `str`, optional
|
|
141
|
+
Path to the data files -- if None, the temp folder will be used.
|
|
142
|
+
If the path does not exist, the data files will be downloaded to the given path.
|
|
143
|
+
|
|
144
|
+
The default is None.
|
|
145
|
+
return_X_y : `bool`, optional
|
|
146
|
+
If True, the data is returned together with the labels as two Numpy arrays,
|
|
147
|
+
otherwise the data is returned as Pandas data frame.
|
|
148
|
+
|
|
149
|
+
The default is True.
|
|
150
|
+
verbose : `bool`, optional
|
|
151
|
+
If True, a progress bar is shown while downloading files.
|
|
152
|
+
|
|
153
|
+
The default is True.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
`pandas.DataFrame` or `tuple[numpy.ndarray, numpy.ndarray]`
|
|
158
|
+
The benchmark data set as either a Pandas data frame or as a pair of (X, y) Numpy arrays.
|
|
159
|
+
"""
|
|
160
|
+
# Download data if necessary
|
|
161
|
+
url_data = "https://zenodo.org/records/3884398/files/1_gecco2018_water_quality.csv?download=1"
|
|
162
|
+
|
|
163
|
+
download_dir = download_dir if download_dir is not None else get_temp_folder()
|
|
164
|
+
f_in = os.path.join(download_dir, "gecco2018_water_quality.csv")
|
|
165
|
+
|
|
166
|
+
download_if_necessary(f_in, url_data, verbose)
|
|
167
|
+
|
|
168
|
+
# Load and return data
|
|
169
|
+
df_data = pd.read_csv(f_in, index_col=0)
|
|
170
|
+
|
|
171
|
+
if return_X_y is False:
|
|
172
|
+
return df_data
|
|
173
|
+
else:
|
|
174
|
+
y = df_data["EVENT"].to_numpy().astype(np.int8)
|
|
175
|
+
del df_data["EVENT"]
|
|
176
|
+
|
|
177
|
+
del df_data["Time"]
|
|
178
|
+
X = df_data.to_numpy()
|
|
179
|
+
|
|
180
|
+
return X, y
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def load_gecco2019_water_quality_data(download_dir: str = None, return_X_y: bool = True,
|
|
184
|
+
verbose: bool = True) -> dict:
|
|
185
|
+
"""
|
|
186
|
+
GECCO Industrial Challenge 2019 Dataset: A water quality dataset for the "Internet of Things:
|
|
187
|
+
Online Event Detection for Drinking Water Quality Control" competition organized by
|
|
188
|
+
F. Rehbach, S. Moritz, T. Bartz-Beielstein at the Genetic and Evolutionary Computation
|
|
189
|
+
Conference 2019, Prague, Czech Republic.
|
|
190
|
+
|
|
191
|
+
This is a benchmark
|
|
192
|
+
(based on
|
|
193
|
+
:func:`~epyt_flow.data.benchmarks.gecco_water_quality.load_gecco2018_water_quality_data`)
|
|
194
|
+
for anomaly detection algorithms on water quality. The data is provided by the
|
|
195
|
+
"Thüringer Fernwasserversorgung" (Germany) and constitutes a real-world data set. In this
|
|
196
|
+
data set, 6 numeric water quality features are given at a sampling rate of 1 min over approx.
|
|
197
|
+
3 month. The goal is to predict the presence of an anomaly -- i.e. binary classification.
|
|
198
|
+
The data set itself comes in three splits: A train set, a validation set, and a test set.
|
|
199
|
+
|
|
200
|
+
More information can be found at https://zenodo.org/records/4304080 and
|
|
201
|
+
https://www.th-koeln.de/informatik-und-ingenieurwissenschaften/gecco-challenge-2019_63244.php
|
|
202
|
+
|
|
203
|
+
.. note::
|
|
204
|
+
|
|
205
|
+
Note that this is NOT a simulated scenario and therefore only the final
|
|
206
|
+
data set is provided.
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
download_dir : `str`, optional
|
|
211
|
+
Path to the data files -- if None, the temp folder will be used.
|
|
212
|
+
If the path does not exist, the data files will be downloaded to the given path.
|
|
213
|
+
|
|
214
|
+
The default is None.
|
|
215
|
+
return_X_y : `bool`, optional
|
|
216
|
+
If True, the data is returned together with the labels as two Numpy arrays,
|
|
217
|
+
otherwise the data is returned as Pandas data frame.
|
|
218
|
+
|
|
219
|
+
The default is True.
|
|
220
|
+
verbose : `bool`, optional
|
|
221
|
+
If True, a progress bar is shown while downloading files.
|
|
222
|
+
|
|
223
|
+
The default is True.
|
|
224
|
+
|
|
225
|
+
Returns
|
|
226
|
+
-------
|
|
227
|
+
`dict`
|
|
228
|
+
The data set as a dictionary with entries "train", "validation", and "test" containing
|
|
229
|
+
the respective data.
|
|
230
|
+
"""
|
|
231
|
+
# Download data if necessary
|
|
232
|
+
download_dir = download_dir if download_dir is not None else get_temp_folder()
|
|
233
|
+
|
|
234
|
+
base_url = "https://zenodo.org/records/4304080/files/"
|
|
235
|
+
url_train_data = base_url + "7_gecco2019_train_water_quality.csv?download=1"
|
|
236
|
+
url_valid_data = base_url + "8_gecco2019_valid_water_qulity.csv?download=1"
|
|
237
|
+
url_test_data = base_url + "6_gecco2019_test_water_quality.csv?download=1"
|
|
238
|
+
|
|
239
|
+
f_train_in = os.path.join(download_dir, "gecco2019_train_water_quality.csv")
|
|
240
|
+
f_valid_in = os.path.join(download_dir, "gecco2019_valid_water_qulity.csv")
|
|
241
|
+
f_test_in = os.path.join(download_dir, "gecco2019_test_water_quality.csv")
|
|
242
|
+
|
|
243
|
+
download_if_necessary(f_train_in, url_train_data, verbose)
|
|
244
|
+
download_if_necessary(f_valid_in, url_valid_data, verbose)
|
|
245
|
+
download_if_necessary(f_test_in, url_test_data, verbose)
|
|
246
|
+
|
|
247
|
+
# Load and return data
|
|
248
|
+
df_data_train = pd.read_csv(f_train_in, index_col=0)
|
|
249
|
+
df_data_valid = pd.read_csv(f_valid_in, index_col=0)
|
|
250
|
+
df_data_test = pd.read_csv(f_test_in, index_col=0)
|
|
251
|
+
|
|
252
|
+
if return_X_y is False:
|
|
253
|
+
return {"train": df_data_train, "validation": df_data_valid, "test": df_data_test}
|
|
254
|
+
else:
|
|
255
|
+
r = {"train": None, "validation": None, "test": None}
|
|
256
|
+
|
|
257
|
+
for k, df_data in zip(["train", "validation", "test"],
|
|
258
|
+
[df_data_train, df_data_valid, df_data_test]):
|
|
259
|
+
y = df_data["Event"].to_numpy().astype(np.int8)
|
|
260
|
+
del df_data["Event"]
|
|
261
|
+
|
|
262
|
+
del df_data["Time"]
|
|
263
|
+
X = df_data.to_numpy()
|
|
264
|
+
|
|
265
|
+
r[k] = (X, y)
|
|
266
|
+
|
|
267
|
+
return r
|