disdrodb 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +64 -34
- disdrodb/_config.py +5 -4
- disdrodb/_version.py +16 -3
- disdrodb/accessor/__init__.py +20 -0
- disdrodb/accessor/methods.py +125 -0
- disdrodb/api/checks.py +139 -9
- disdrodb/api/configs.py +4 -2
- disdrodb/api/info.py +10 -10
- disdrodb/api/io.py +237 -18
- disdrodb/api/path.py +81 -75
- disdrodb/api/search.py +6 -6
- disdrodb/cli/disdrodb_create_summary_station.py +91 -0
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
- disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
- disdrodb/configs.py +149 -4
- disdrodb/constants.py +61 -0
- disdrodb/data_transfer/download_data.py +145 -14
- disdrodb/etc/configs/attributes.yaml +339 -0
- disdrodb/etc/configs/encodings.yaml +473 -0
- disdrodb/etc/products/L1/global.yaml +13 -0
- disdrodb/etc/products/L2E/10MIN.yaml +12 -0
- disdrodb/etc/products/L2E/1MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +22 -0
- disdrodb/etc/products/L2M/10MIN.yaml +12 -0
- disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +26 -0
- disdrodb/l0/__init__.py +13 -0
- disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
- disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
- disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +44 -3
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +41 -1
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
- disdrodb/l0/l0a_processing.py +30 -30
- disdrodb/l0/l0b_nc_processing.py +108 -2
- disdrodb/l0/l0b_processing.py +4 -4
- disdrodb/l0/l0c_processing.py +5 -13
- disdrodb/l0/manuals/SWS250.pdf +0 -0
- disdrodb/l0/manuals/VPF730.pdf +0 -0
- disdrodb/l0/manuals/VPF750.pdf +0 -0
- disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
- disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
- disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +105 -0
- disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +128 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
- disdrodb/l0/readers/{PARSIVEL → PARSIVEL2}/KIT/BURKINA_FASO.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
- disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → NCAR/FARM_PARSIVEL2.py} +43 -70
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +29 -12
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
- disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
- disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +31 -14
- disdrodb/l0/routines.py +105 -14
- disdrodb/l1/__init__.py +5 -0
- disdrodb/l1/filters.py +34 -20
- disdrodb/l1/processing.py +45 -44
- disdrodb/l1/resampling.py +77 -66
- disdrodb/l1/routines.py +35 -42
- disdrodb/l1_env/routines.py +18 -3
- disdrodb/l2/__init__.py +7 -0
- disdrodb/l2/empirical_dsd.py +58 -10
- disdrodb/l2/event.py +27 -120
- disdrodb/l2/processing.py +267 -116
- disdrodb/l2/routines.py +618 -254
- disdrodb/metadata/standards.py +3 -1
- disdrodb/psd/fitting.py +463 -144
- disdrodb/psd/models.py +8 -5
- disdrodb/routines.py +3 -3
- disdrodb/scattering/__init__.py +16 -4
- disdrodb/scattering/axis_ratio.py +56 -36
- disdrodb/scattering/permittivity.py +486 -0
- disdrodb/scattering/routines.py +701 -159
- disdrodb/summary/__init__.py +17 -0
- disdrodb/summary/routines.py +4120 -0
- disdrodb/utils/attrs.py +68 -125
- disdrodb/utils/compression.py +30 -1
- disdrodb/utils/dask.py +59 -8
- disdrodb/utils/dataframe.py +63 -9
- disdrodb/utils/directories.py +49 -17
- disdrodb/utils/encoding.py +33 -19
- disdrodb/utils/logger.py +13 -6
- disdrodb/utils/manipulations.py +71 -0
- disdrodb/utils/subsetting.py +214 -0
- disdrodb/utils/time.py +165 -19
- disdrodb/utils/writer.py +20 -7
- disdrodb/utils/xarray.py +85 -4
- disdrodb/viz/__init__.py +13 -0
- disdrodb/viz/plots.py +327 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/RECORD +127 -87
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
- disdrodb/l1/encoding_attrs.py +0 -635
- disdrodb/l2/processing_options.py +0 -213
- /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -----------------------------------------------------------------------------.
|
|
3
|
+
# Copyright (c) 2021-2023 DISDRODB developers
|
|
4
|
+
#
|
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
8
|
+
# (at your option) any later version.
|
|
9
|
+
#
|
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
# GNU General Public License for more details.
|
|
14
|
+
#
|
|
15
|
+
# You should have received a copy of the GNU General Public License
|
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
17
|
+
# -----------------------------------------------------------------------------.
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
|
|
21
|
+
from disdrodb.l0.l0a_processing import read_raw_text_file
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@is_documented_by(reader_generic_docstring)
|
|
25
|
+
def reader(
|
|
26
|
+
filepath,
|
|
27
|
+
logger=None,
|
|
28
|
+
):
|
|
29
|
+
"""Reader."""
|
|
30
|
+
##------------------------------------------------------------------------.
|
|
31
|
+
#### Define column names
|
|
32
|
+
column_names = ["TO_PARSE"]
|
|
33
|
+
|
|
34
|
+
##------------------------------------------------------------------------.
|
|
35
|
+
#### Define reader options
|
|
36
|
+
reader_kwargs = {}
|
|
37
|
+
# - Define delimiter
|
|
38
|
+
reader_kwargs["delimiter"] = "\\n"
|
|
39
|
+
# - Skip first row as columns names
|
|
40
|
+
# - Define encoding
|
|
41
|
+
reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
|
|
42
|
+
# - Avoid first column to become df index !!!
|
|
43
|
+
reader_kwargs["index_col"] = False
|
|
44
|
+
# - Define behaviour when encountering bad lines
|
|
45
|
+
reader_kwargs["on_bad_lines"] = "skip"
|
|
46
|
+
# - Define reader engine
|
|
47
|
+
# - C engine is faster
|
|
48
|
+
# - Python engine is more feature-complete
|
|
49
|
+
reader_kwargs["engine"] = "python"
|
|
50
|
+
# - Define on-the-fly decompression of on-disk data
|
|
51
|
+
# - Available: gzip, bz2, zip
|
|
52
|
+
reader_kwargs["compression"] = "infer"
|
|
53
|
+
# - Strings to recognize as NA/NaN and replace with standard NA flags
|
|
54
|
+
# - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
|
|
55
|
+
# '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
|
|
56
|
+
# 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
|
|
57
|
+
reader_kwargs["na_values"] = ["na", "", "error"]
|
|
58
|
+
|
|
59
|
+
##------------------------------------------------------------------------.
|
|
60
|
+
#### Read the data
|
|
61
|
+
df = read_raw_text_file(
|
|
62
|
+
filepath=filepath,
|
|
63
|
+
column_names=column_names,
|
|
64
|
+
reader_kwargs=reader_kwargs,
|
|
65
|
+
logger=logger,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
##------------------------------------------------------------------------.
|
|
69
|
+
#### Adapt the dataframe to adhere to DISDRODB L0 standards
|
|
70
|
+
# Define time
|
|
71
|
+
df = df["TO_PARSE"].str.split(",", n=2, expand=True)
|
|
72
|
+
df.columns = ["date", "time", "TO_PARSE"]
|
|
73
|
+
datetime_str = df["date"] + " " + df["time"]
|
|
74
|
+
df["time"] = pd.to_datetime(datetime_str, format="%d.%m.%Y %H:%M:%S", errors="coerce")
|
|
75
|
+
|
|
76
|
+
# Identify rows with integral variables
|
|
77
|
+
df_vars = df[df["TO_PARSE"].str.len() == 61]
|
|
78
|
+
|
|
79
|
+
# Split and assign column names
|
|
80
|
+
df_data = df_vars["TO_PARSE"].str.split(",", expand=True)
|
|
81
|
+
var_names = [
|
|
82
|
+
"rainfall_rate_32bit",
|
|
83
|
+
"rainfall_accumulated_32bit",
|
|
84
|
+
"weather_code_synop_4680",
|
|
85
|
+
"weather_code_synop_4677",
|
|
86
|
+
"reflectivity_32bit",
|
|
87
|
+
"mor_visibility",
|
|
88
|
+
"laser_amplitude",
|
|
89
|
+
"number_particles",
|
|
90
|
+
"sensor_temperature",
|
|
91
|
+
"sensor_heating_current",
|
|
92
|
+
"sensor_battery_voltage",
|
|
93
|
+
]
|
|
94
|
+
df_data.columns = var_names
|
|
95
|
+
df_data["time"] = df_vars["time"]
|
|
96
|
+
|
|
97
|
+
# Initialize empty arrays
|
|
98
|
+
# --> 0 values array produced in L0B
|
|
99
|
+
df_data["raw_drop_concentration"] = ""
|
|
100
|
+
df_data["raw_drop_average_velocity"] = ""
|
|
101
|
+
df_data["raw_drop_number"] = ""
|
|
102
|
+
|
|
103
|
+
# Identify raw spectrum
|
|
104
|
+
df_raw_spectrum = df[df["TO_PARSE"].str.len() == 4545]
|
|
105
|
+
|
|
106
|
+
# Derive raw drop arrays
|
|
107
|
+
def split_string(s):
|
|
108
|
+
vals = [v.strip() for v in s.split(",")]
|
|
109
|
+
c1 = ",".join(vals[:32])
|
|
110
|
+
c2 = ",".join(vals[32:64])
|
|
111
|
+
c3 = ",".join(vals[64].replace("r", "").split("/"))
|
|
112
|
+
series = pd.Series(
|
|
113
|
+
{
|
|
114
|
+
"raw_drop_concentration": c1,
|
|
115
|
+
"raw_drop_average_velocity": c2,
|
|
116
|
+
"raw_drop_number": c3,
|
|
117
|
+
},
|
|
118
|
+
)
|
|
119
|
+
return series
|
|
120
|
+
|
|
121
|
+
splitted_string = df_raw_spectrum["TO_PARSE"].apply(split_string)
|
|
122
|
+
df_raw_spectrum["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
|
|
123
|
+
df_raw_spectrum["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
|
|
124
|
+
df_raw_spectrum["raw_drop_number"] = splitted_string["raw_drop_number"]
|
|
125
|
+
df_raw_spectrum = df_raw_spectrum.drop(columns=["date", "TO_PARSE"])
|
|
126
|
+
|
|
127
|
+
# Add raw array
|
|
128
|
+
df = df_data.set_index("time")
|
|
129
|
+
df_raw_spectrum = df_raw_spectrum.set_index("time")
|
|
130
|
+
|
|
131
|
+
df.update(df_raw_spectrum)
|
|
132
|
+
|
|
133
|
+
# Set back time as column
|
|
134
|
+
df = df.reset_index()
|
|
135
|
+
|
|
136
|
+
# Return the dataframe adhering to DISDRODB L0 standards
|
|
137
|
+
return df
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -----------------------------------------------------------------------------.
|
|
3
|
+
# Copyright (c) 2021-2023 DISDRODB developers
|
|
4
|
+
#
|
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
8
|
+
# (at your option) any later version.
|
|
9
|
+
#
|
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
# GNU General Public License for more details.
|
|
14
|
+
#
|
|
15
|
+
# You should have received a copy of the GNU General Public License
|
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
17
|
+
# -----------------------------------------------------------------------------.
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
|
|
21
|
+
from disdrodb.l0.l0a_processing import read_raw_text_file
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@is_documented_by(reader_generic_docstring)
|
|
25
|
+
def reader(
|
|
26
|
+
filepath,
|
|
27
|
+
logger=None,
|
|
28
|
+
):
|
|
29
|
+
"""Reader."""
|
|
30
|
+
##------------------------------------------------------------------------.
|
|
31
|
+
#### Define column names
|
|
32
|
+
column_names = ["TO_PARSE"]
|
|
33
|
+
|
|
34
|
+
##------------------------------------------------------------------------.
|
|
35
|
+
#### Define reader options
|
|
36
|
+
reader_kwargs = {}
|
|
37
|
+
# Skip first row as columns names
|
|
38
|
+
reader_kwargs["header"] = None
|
|
39
|
+
# Skip file with encoding errors
|
|
40
|
+
reader_kwargs["encoding_errors"] = "ignore"
|
|
41
|
+
# - Define delimiter
|
|
42
|
+
reader_kwargs["delimiter"] = "\\n"
|
|
43
|
+
# - Avoid first column to become df index !!!
|
|
44
|
+
reader_kwargs["index_col"] = False
|
|
45
|
+
# - Define behaviour when encountering bad lines
|
|
46
|
+
reader_kwargs["on_bad_lines"] = "skip"
|
|
47
|
+
# - Define reader engine
|
|
48
|
+
# - C engine is faster
|
|
49
|
+
# - Python engine is more feature-complete
|
|
50
|
+
reader_kwargs["engine"] = "python"
|
|
51
|
+
# - Define on-the-fly decompression of on-disk data
|
|
52
|
+
# - Available: gzip, bz2, zip
|
|
53
|
+
reader_kwargs["compression"] = "infer"
|
|
54
|
+
# - Strings to recognize as NA/NaN and replace with standard NA flags
|
|
55
|
+
# - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
|
|
56
|
+
# '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
|
|
57
|
+
# 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
|
|
58
|
+
reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
|
|
59
|
+
|
|
60
|
+
##------------------------------------------------------------------------.
|
|
61
|
+
#### Read the data
|
|
62
|
+
df = read_raw_text_file(
|
|
63
|
+
filepath=filepath,
|
|
64
|
+
column_names=column_names,
|
|
65
|
+
reader_kwargs=reader_kwargs,
|
|
66
|
+
logger=logger,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
##------------------------------------------------------------------------.
|
|
70
|
+
#### Adapt the dataframe to adhere to DISDRODB L0 standards
|
|
71
|
+
# Define 'time' datetime
|
|
72
|
+
|
|
73
|
+
# Split the columns
|
|
74
|
+
df["TO_PARSE"].iloc[0:5].str.split(";", n=16, expand=True).iloc[0]
|
|
75
|
+
|
|
76
|
+
df = df["TO_PARSE"].str.split(";", n=16, expand=True)
|
|
77
|
+
|
|
78
|
+
# Assign column names
|
|
79
|
+
names = [
|
|
80
|
+
"sensor_serial_number",
|
|
81
|
+
"sensor_status",
|
|
82
|
+
"laser_amplitude",
|
|
83
|
+
"sensor_heating_current",
|
|
84
|
+
"sensor_battery_voltage",
|
|
85
|
+
"dummy_date",
|
|
86
|
+
"sensor_time",
|
|
87
|
+
"sensor_date",
|
|
88
|
+
"sensor_temperature",
|
|
89
|
+
"number_particles",
|
|
90
|
+
"rainfall_rate_32bit",
|
|
91
|
+
"reflectivity_32bit",
|
|
92
|
+
"rainfall_accumulated_16bit",
|
|
93
|
+
"mor_visibility",
|
|
94
|
+
"weather_code_synop_4680",
|
|
95
|
+
"weather_code_synop_4677",
|
|
96
|
+
"TO_SPLIT",
|
|
97
|
+
]
|
|
98
|
+
df.columns = names
|
|
99
|
+
|
|
100
|
+
# Derive raw drop arrays
|
|
101
|
+
def split_string(s):
|
|
102
|
+
vals = [v.strip() for v in s.split(";")]
|
|
103
|
+
c1 = ";".join(vals[:32])
|
|
104
|
+
c2 = ";".join(vals[32:64])
|
|
105
|
+
c3 = ";".join(vals[64:1088])
|
|
106
|
+
c4 = vals[1088]
|
|
107
|
+
c5 = vals[1089]
|
|
108
|
+
series = pd.Series(
|
|
109
|
+
{
|
|
110
|
+
"raw_drop_concentration": c1,
|
|
111
|
+
"raw_drop_average_velocity": c2,
|
|
112
|
+
"raw_drop_number": c3,
|
|
113
|
+
"rain_kinetic_energy": c4,
|
|
114
|
+
"CHECK_EMPTY": c5,
|
|
115
|
+
},
|
|
116
|
+
)
|
|
117
|
+
return series
|
|
118
|
+
|
|
119
|
+
splitted_string = df["TO_SPLIT"].apply(split_string)
|
|
120
|
+
df["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
|
|
121
|
+
df["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
|
|
122
|
+
df["raw_drop_number"] = splitted_string["raw_drop_number"]
|
|
123
|
+
df["rain_kinetic_energy"] = splitted_string["rain_kinetic_energy"]
|
|
124
|
+
df["CHECK_EMPTY"] = splitted_string["CHECK_EMPTY"]
|
|
125
|
+
|
|
126
|
+
# Ensure valid observation
|
|
127
|
+
df = df[df["CHECK_EMPTY"] == ""]
|
|
128
|
+
|
|
129
|
+
# Add the time column
|
|
130
|
+
time_str = df["sensor_date"] + "-" + df["sensor_time"]
|
|
131
|
+
df["time"] = pd.to_datetime(time_str, format="%d.%m.%Y-%H:%M:%S", errors="coerce")
|
|
132
|
+
|
|
133
|
+
# Drop columns not agreeing with DISDRODB L0 standards
|
|
134
|
+
columns_to_drop = [
|
|
135
|
+
"dummy_date",
|
|
136
|
+
"sensor_date",
|
|
137
|
+
"sensor_time",
|
|
138
|
+
"sensor_serial_number",
|
|
139
|
+
"rainfall_accumulated_16bit", # unexpected format
|
|
140
|
+
"CHECK_EMPTY",
|
|
141
|
+
"TO_SPLIT",
|
|
142
|
+
]
|
|
143
|
+
df = df.drop(columns=columns_to_drop)
|
|
144
|
+
|
|
145
|
+
# Return the dataframe adhering to DISDRODB L0 standards
|
|
146
|
+
return df
|
|
@@ -17,12 +17,112 @@
|
|
|
17
17
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18
18
|
# -----------------------------------------------------------------------------.
|
|
19
19
|
"""DISDRODB reader for ENPC PWS100 raw text data."""
|
|
20
|
-
import zipfile
|
|
21
|
-
|
|
22
20
|
import pandas as pd
|
|
23
21
|
|
|
24
22
|
from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
|
|
25
|
-
from disdrodb.utils.logger import log_error, log_warning
|
|
23
|
+
from disdrodb.utils.logger import log_error # , log_warning
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def read_txt_file(file, filename, logger):
|
|
27
|
+
r"""Parse a single txt file within the daily zip file.
|
|
28
|
+
|
|
29
|
+
The file starts with \\x020 and ends with \\x03\\r\\n.
|
|
30
|
+
"""
|
|
31
|
+
# Read file
|
|
32
|
+
try:
|
|
33
|
+
txt = file.readline().decode("utf-8")
|
|
34
|
+
except Exception:
|
|
35
|
+
log_error(logger=logger, msg=f"{filename} is corrupted", verbose=False)
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
# Check file is not empty
|
|
39
|
+
if txt == "":
|
|
40
|
+
log_error(logger=logger, msg=f"{filename} is empty", verbose=False)
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
# Remove everything before \x020
|
|
44
|
+
# - At start it can appear: \x013 0 \x02PSU voltage too low 13.3\x03\r\n\
|
|
45
|
+
txt = txt.split("x020 ", 1)[-1]
|
|
46
|
+
|
|
47
|
+
# Remove everything after \\x03
|
|
48
|
+
txt = txt.split("x03", 1)[0]
|
|
49
|
+
|
|
50
|
+
# if "PSU voltage too low" in txt or "volt" in txt:
|
|
51
|
+
# log_warning(logger=logger, msg=f"PSU voltage too low in {filename}", verbose=False)
|
|
52
|
+
# return None
|
|
53
|
+
|
|
54
|
+
# if "Error - message" in txt:
|
|
55
|
+
# log_warning(logger=logger, msg=f"Error message in {filename}", verbose=False)
|
|
56
|
+
# return None
|
|
57
|
+
|
|
58
|
+
# Clean up the line
|
|
59
|
+
txt = txt.replace(" 00 ", " 0 0 ")
|
|
60
|
+
txt = txt.replace(" ", " 0 ")
|
|
61
|
+
|
|
62
|
+
# Split the cleaned line
|
|
63
|
+
buf = txt.split(" ")
|
|
64
|
+
|
|
65
|
+
# Helper to convert list of floats to comma-separated string
|
|
66
|
+
def int_list_to_str(lst):
|
|
67
|
+
return ",".join(f"{int(i)}" for i in lst)
|
|
68
|
+
|
|
69
|
+
# Try to get the drop_size distribution:
|
|
70
|
+
try:
|
|
71
|
+
drop_size_distribution = int_list_to_str(buf[29:329]) # Drop size distribution (message field 42)
|
|
72
|
+
except Exception:
|
|
73
|
+
log_error(logger, msg=f"Corrupted drop_size_distribution field in {filename}", verbose=False)
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
# Try to get peak_to_pedestal_hist
|
|
77
|
+
try:
|
|
78
|
+
peak_to_pedestal_hist = int_list_to_str(buf[1498:1548])
|
|
79
|
+
except Exception:
|
|
80
|
+
log_error(
|
|
81
|
+
logger,
|
|
82
|
+
msg=f"Corrupted raw_drop_number or peak_to_pedestal_hist field in {filename}",
|
|
83
|
+
verbose=False,
|
|
84
|
+
)
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
# Parse fields
|
|
88
|
+
data = {
|
|
89
|
+
"mor_visibility": float(buf[1]), # Visibility Range (message field 20)
|
|
90
|
+
"weather_code_synop_4680": float(buf[2]), # Present Weather Code (WMO) (message field 21)
|
|
91
|
+
"weather_code_metar_4678": buf[3], # Present Weather Code (METAR) (message field 22)
|
|
92
|
+
"weather_code_nws": buf[4], # Present Weather Code (NWS) (message field 23)
|
|
93
|
+
"alarms": int_list_to_str(buf[5:21]), # Alarms (message field (24))
|
|
94
|
+
"sensor_status": buf[21], # Fault status of PWS100 (message field 25)
|
|
95
|
+
"air_temperature": float(buf[22]), # Temperature (°C) (message field 30)
|
|
96
|
+
"relative_humidity": float(buf[23]), # Sampled relative humidity (%) (message field 30)
|
|
97
|
+
"wetbulb_temperature": float(buf[24]), # Average wetbulb temperature (°C)(message field 30)
|
|
98
|
+
"air_temperature_max": float(buf[25]), # Maximum temperature (°C)(message field 31)
|
|
99
|
+
"air_temperature_min": float(buf[26]), # Minimum temperature (°C)(message field 31)
|
|
100
|
+
"rainfall_rate": float(buf[27]), # Precipitation rate (mm/h)(message field 40)
|
|
101
|
+
"rainfall_accumulated": float(buf[28]), # Precipitation accumulation (mm/h)(message field 41)
|
|
102
|
+
"drop_size_distribution": drop_size_distribution, # Drop size distribution (message field 42)
|
|
103
|
+
"average_drop_velocity": float(buf[329]), # Average velocity (mm/s)(message field 43)
|
|
104
|
+
"average_drop_size": float(buf[330]), # Average size (mm/h)(message field 43)
|
|
105
|
+
"type_distribution": int_list_to_str(buf[331:342]), # Type distribution (message field 44)
|
|
106
|
+
"raw_drop_number": int_list_to_str(buf[342:1498]), # Size/velocity spectrum (34*34) (message field 47)
|
|
107
|
+
"peak_to_pedestal_hist": (
|
|
108
|
+
peak_to_pedestal_hist # Peak to pedestal ratio distribution histogram (message field 48)
|
|
109
|
+
),
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
# Convert to single-row DataFrame
|
|
113
|
+
df = pd.DataFrame([data])
|
|
114
|
+
|
|
115
|
+
# Define datetime "time" column from filename
|
|
116
|
+
datetime_str = " ".join(filename.replace(".txt", "").split("_")[-6:])
|
|
117
|
+
df["time"] = pd.to_datetime(datetime_str, format="%Y %m %d %H %M %S")
|
|
118
|
+
|
|
119
|
+
# # Drop columns not agreeing with DISDRODB L0 standards
|
|
120
|
+
# columns_to_drop = [
|
|
121
|
+
# "peak_to_pedestal_hist",
|
|
122
|
+
# "type_distribution",
|
|
123
|
+
# ]
|
|
124
|
+
# df = df.drop(columns=columns_to_drop)
|
|
125
|
+
return df
|
|
26
126
|
|
|
27
127
|
|
|
28
128
|
@is_documented_by(reader_generic_docstring)
|
|
@@ -31,102 +131,9 @@ def reader(
|
|
|
31
131
|
logger=None,
|
|
32
132
|
):
|
|
33
133
|
"""Reader."""
|
|
134
|
+
import zipfile
|
|
34
135
|
|
|
35
|
-
|
|
36
|
-
#### Define function to read each txt file inside each daily zip file
|
|
37
|
-
def read_txt_file(file, filename, logger): # noqa PLR0911
|
|
38
|
-
"""Parse a single txt file within the daily zip file."""
|
|
39
|
-
# Read file
|
|
40
|
-
try:
|
|
41
|
-
txt = file.readline().decode("utf-8")
|
|
42
|
-
except Exception:
|
|
43
|
-
log_warning(logger=logger, msg=f"{filename} is corrupted", verbose=False)
|
|
44
|
-
return None
|
|
45
|
-
|
|
46
|
-
# Check file is not empty
|
|
47
|
-
if txt == "":
|
|
48
|
-
log_warning(logger=logger, msg=f"{filename} is empty", verbose=False)
|
|
49
|
-
return None
|
|
50
|
-
|
|
51
|
-
if "PSU voltage too low" in txt or "volt" in txt:
|
|
52
|
-
log_warning(logger=logger, msg=f"PSU voltage too low in {filename}", verbose=False)
|
|
53
|
-
return None
|
|
54
|
-
|
|
55
|
-
if "Error - message" in txt:
|
|
56
|
-
log_warning(logger=logger, msg=f"Error message in {filename}", verbose=False)
|
|
57
|
-
return None
|
|
58
|
-
|
|
59
|
-
# Clean up the line
|
|
60
|
-
txt = txt.replace(" 00 ", " 0 0 ")
|
|
61
|
-
txt = txt.replace(" ", " 0 ")
|
|
62
|
-
txt = txt[1:-8]
|
|
63
|
-
|
|
64
|
-
# Split the cleaned line
|
|
65
|
-
buf = txt.split(" ")
|
|
66
|
-
|
|
67
|
-
# Helper to convert list of floats to comma-separated string
|
|
68
|
-
def int_list_to_str(lst):
|
|
69
|
-
return ",".join(f"{int(i)}" for i in lst)
|
|
70
|
-
|
|
71
|
-
# Try to get the drop_size distribution:
|
|
72
|
-
try:
|
|
73
|
-
drop_size_distribution = int_list_to_str(buf[30:330]) # Drop size distribution (message field 42)
|
|
74
|
-
except Exception:
|
|
75
|
-
log_warning(logger, msg=f"Corrupted drop_size_distribution field in {filename}", verbose=False)
|
|
76
|
-
return None
|
|
77
|
-
|
|
78
|
-
# Try to get peak_to_pedestal_hist
|
|
79
|
-
try:
|
|
80
|
-
peak_to_pedestal_hist = int_list_to_str(buf[1499:1549])
|
|
81
|
-
except Exception:
|
|
82
|
-
log_warning(
|
|
83
|
-
logger,
|
|
84
|
-
msg=f"Corrupted raw_drop_number or peak_to_pedestal_hist field in {filename}",
|
|
85
|
-
verbose=False,
|
|
86
|
-
)
|
|
87
|
-
return None
|
|
88
|
-
# Parse fields
|
|
89
|
-
data = {
|
|
90
|
-
"mor_visibility": float(buf[2]), # Visibility Range (message field 20)
|
|
91
|
-
"weather_code_synop_4680": float(buf[3]), # Present Weather Code (WMO) (message field 21)
|
|
92
|
-
"weather_code_metar_4678": buf[4], # Present Weather Code (METAR) (message field 22)
|
|
93
|
-
"weather_code_nws": buf[5], # Present Weather Code (NWS) (message field 23)
|
|
94
|
-
"alarms": int_list_to_str(buf[6:22]), # Alarms (message field (24))
|
|
95
|
-
"sensor_status": buf[22], # Fault status of PWS100 (message field 25)
|
|
96
|
-
"air_temperature": float(buf[23]), # Temperature (°C) (message field 30)
|
|
97
|
-
"relative_humidity": float(buf[24]), # Sampled relative humidity (%) (message field 30)
|
|
98
|
-
"wetbulb_temperature": float(buf[25]), # Average wetbulb temperature (°C)(message field 30)
|
|
99
|
-
"air_temperature_max": float(buf[26]), # Maximum temperature (°C)(message field 31)
|
|
100
|
-
"air_temperature_min": float(buf[27]), # Minimum temperature (°C)(message field 31)
|
|
101
|
-
"rainfall_rate": float(buf[28]), # Precipitation rate (mm/h)(message field 40)
|
|
102
|
-
"rainfall_accumulated": float(buf[29]), # Precipitation accumulation (mm/h)(message field 41)
|
|
103
|
-
"drop_size_distribution": drop_size_distribution, # Drop size distribution (message field 42)
|
|
104
|
-
"average_drop_velocity": float(buf[330]), # Average velocity (mm/s)(message field 43)
|
|
105
|
-
"average_drop_size": float(buf[331]), # Average size (mm/h)(message field 43)
|
|
106
|
-
"type_distribution": int_list_to_str(buf[332:343]), # Type distribution (message field 44)
|
|
107
|
-
"raw_drop_number": int_list_to_str(buf[343:1499]), # Size/velocity spectrum (34*34) (message field 47)
|
|
108
|
-
"peak_to_pedestal_hist": (
|
|
109
|
-
peak_to_pedestal_hist # Peak to pedestal ratio distribution histogram (message field 48)
|
|
110
|
-
),
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
# Convert to single-row DataFrame
|
|
114
|
-
df = pd.DataFrame([data])
|
|
115
|
-
|
|
116
|
-
# Define datetime "time" column from filename
|
|
117
|
-
datetime_str = " ".join(filename.replace(".txt", "").split("_")[-6:])
|
|
118
|
-
df["time"] = pd.to_datetime(datetime_str, format="%Y %m %d %H %M %S")
|
|
119
|
-
|
|
120
|
-
# # Drop columns not agreeing with DISDRODB L0 standards
|
|
121
|
-
# columns_to_drop = [
|
|
122
|
-
# "peak_to_pedestal_hist",
|
|
123
|
-
# "type_distribution",
|
|
124
|
-
# ]
|
|
125
|
-
# df = df.drop(columns=columns_to_drop)
|
|
126
|
-
return df
|
|
127
|
-
|
|
128
|
-
# ---------------------------------------------------------------------.
|
|
129
|
-
#### Iterate over all files (aka timesteps) in the daily zip archive
|
|
136
|
+
# Iterate over all files (aka timesteps) in the daily zip archive
|
|
130
137
|
# - Each file contain a single timestep !
|
|
131
138
|
list_df = []
|
|
132
139
|
with zipfile.ZipFile(filepath, "r") as zip_ref:
|
|
@@ -146,5 +153,4 @@ def reader(
|
|
|
146
153
|
# Concatenate all dataframes into a single one
|
|
147
154
|
df = pd.concat(list_df)
|
|
148
155
|
|
|
149
|
-
# ---------------------------------------------------------------------.
|
|
150
156
|
return df
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------.
|
|
2
|
+
# Copyright (c) 2021-2023 DISDRODB developers
|
|
3
|
+
#
|
|
4
|
+
# This program is free software: you can redistribute it and/or modify
|
|
5
|
+
# it under the terms of the GNU General Public License as published by
|
|
6
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
7
|
+
# (at your option) any later version.
|
|
8
|
+
#
|
|
9
|
+
# This program is distributed in the hope that it will be useful,
|
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
+
# GNU General Public License for more details.
|
|
13
|
+
#
|
|
14
|
+
# You should have received a copy of the GNU General Public License
|
|
15
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
|
+
# -----------------------------------------------------------------------------.
|
|
17
|
+
"""DISDRODB reader for ENPC PWS100 raw text data."""
|
|
18
|
+
import zipfile
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
21
|
+
|
|
22
|
+
from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
|
|
23
|
+
from disdrodb.utils.logger import log_error # , log_warning
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@is_documented_by(reader_generic_docstring)
|
|
27
|
+
def reader(
|
|
28
|
+
filepath,
|
|
29
|
+
logger=None,
|
|
30
|
+
):
|
|
31
|
+
"""Reader."""
|
|
32
|
+
|
|
33
|
+
##------------------------------------------------------------------------.
|
|
34
|
+
#### Define function to read each txt file inside each daily zip file
|
|
35
|
+
def read_txt_file(file, filename, logger):
|
|
36
|
+
r"""Parse a single txt file within the daily zip file.
|
|
37
|
+
|
|
38
|
+
The file starts with \x020 and ends with \x03\r\r\n.
|
|
39
|
+
"""
|
|
40
|
+
# Read file
|
|
41
|
+
try:
|
|
42
|
+
txt = file.readline().decode("utf-8")
|
|
43
|
+
except Exception:
|
|
44
|
+
log_error(logger=logger, msg=f"{filename} is corrupted", verbose=False)
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
# Check file is not empty
|
|
48
|
+
if txt == "":
|
|
49
|
+
log_error(logger=logger, msg=f"{filename} is empty", verbose=False)
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
# if "PSU voltage too low" in txt or "volt" in txt:
|
|
53
|
+
# log_warning(logger=logger, msg=f"PSU voltage too low in {filename}", verbose=False)
|
|
54
|
+
# return None
|
|
55
|
+
|
|
56
|
+
# if "Error - message" in txt:
|
|
57
|
+
# log_warning(logger=logger, msg=f"Error message in {filename}", verbose=False)
|
|
58
|
+
# return None
|
|
59
|
+
|
|
60
|
+
# Clean up the line
|
|
61
|
+
txt = txt.replace(" 00 ", " 0 0 ")
|
|
62
|
+
txt = txt.replace(" ", " 0 ")
|
|
63
|
+
txt = txt[1:-8]
|
|
64
|
+
|
|
65
|
+
# Split the cleaned line
|
|
66
|
+
buf = txt.split(" ")
|
|
67
|
+
|
|
68
|
+
# Helper to convert list of floats to comma-separated string
|
|
69
|
+
def int_list_to_str(lst):
|
|
70
|
+
return ",".join(f"{int(i)}" for i in lst)
|
|
71
|
+
|
|
72
|
+
# Try to get the drop_size distribution:
|
|
73
|
+
try:
|
|
74
|
+
drop_size_distribution = int_list_to_str(buf[30:330]) # Drop size distribution (message field 42)
|
|
75
|
+
except Exception:
|
|
76
|
+
log_error(logger, msg=f"Corrupted drop_size_distribution field in {filename}", verbose=False)
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
# Try to get peak_to_pedestal_hist
|
|
80
|
+
try:
|
|
81
|
+
peak_to_pedestal_hist = int_list_to_str(buf[1499:1549])
|
|
82
|
+
except Exception:
|
|
83
|
+
log_error(
|
|
84
|
+
logger,
|
|
85
|
+
msg=f"Corrupted raw_drop_number or peak_to_pedestal_hist field in {filename}",
|
|
86
|
+
verbose=False,
|
|
87
|
+
)
|
|
88
|
+
return None
|
|
89
|
+
# Parse fields
|
|
90
|
+
data = {
|
|
91
|
+
"mor_visibility": float(buf[2]), # Visibility Range (message field 20)
|
|
92
|
+
"weather_code_synop_4680": float(buf[3]), # Present Weather Code (WMO) (message field 21)
|
|
93
|
+
"weather_code_metar_4678": buf[4], # Present Weather Code (METAR) (message field 22)
|
|
94
|
+
"weather_code_nws": buf[5], # Present Weather Code (NWS) (message field 23)
|
|
95
|
+
"alarms": int_list_to_str(buf[6:22]), # Alarms (message field (24))
|
|
96
|
+
"sensor_status": buf[22], # Fault status of PWS100 (message field 25)
|
|
97
|
+
"air_temperature": float(buf[23]), # Temperature (°C) (message field 30)
|
|
98
|
+
"relative_humidity": float(buf[24]), # Sampled relative humidity (%) (message field 30)
|
|
99
|
+
"wetbulb_temperature": float(buf[25]), # Average wetbulb temperature (°C)(message field 30)
|
|
100
|
+
"air_temperature_max": float(buf[26]), # Maximum temperature (°C)(message field 31)
|
|
101
|
+
"air_temperature_min": float(buf[27]), # Minimum temperature (°C)(message field 31)
|
|
102
|
+
"rainfall_rate": float(buf[28]), # Precipitation rate (mm/h)(message field 40)
|
|
103
|
+
"rainfall_accumulated": float(buf[29]), # Precipitation accumulation (mm/h)(message field 41)
|
|
104
|
+
"drop_size_distribution": drop_size_distribution, # Drop size distribution (message field 42)
|
|
105
|
+
"average_drop_velocity": float(buf[330]), # Average velocity (mm/s)(message field 43)
|
|
106
|
+
"average_drop_size": float(buf[331]), # Average size (mm/h)(message field 43)
|
|
107
|
+
"type_distribution": int_list_to_str(buf[332:343]), # Type distribution (message field 44)
|
|
108
|
+
"raw_drop_number": int_list_to_str(buf[343:1499]), # Size/velocity spectrum (34*34) (message field 47)
|
|
109
|
+
"peak_to_pedestal_hist": (
|
|
110
|
+
peak_to_pedestal_hist # Peak to pedestal ratio distribution histogram (message field 48)
|
|
111
|
+
),
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
# Convert to single-row DataFrame
|
|
115
|
+
df = pd.DataFrame([data])
|
|
116
|
+
|
|
117
|
+
# Define datetime "time" column from filename
|
|
118
|
+
datetime_str = " ".join(filename.replace(".txt", "").split("_")[-6:])
|
|
119
|
+
df["time"] = pd.to_datetime(datetime_str, format="%Y %m %d %H %M %S")
|
|
120
|
+
|
|
121
|
+
# # Drop columns not agreeing with DISDRODB L0 standards
|
|
122
|
+
# columns_to_drop = [
|
|
123
|
+
# "peak_to_pedestal_hist",
|
|
124
|
+
# "type_distribution",
|
|
125
|
+
# ]
|
|
126
|
+
# df = df.drop(columns=columns_to_drop)
|
|
127
|
+
return df
|
|
128
|
+
|
|
129
|
+
# ---------------------------------------------------------------------.
|
|
130
|
+
#### Iterate over all files (aka timesteps) in the daily zip archive
|
|
131
|
+
# - Each file contain a single timestep !
|
|
132
|
+
list_df = []
|
|
133
|
+
with zipfile.ZipFile(filepath, "r") as zip_ref:
|
|
134
|
+
filenames = sorted(zip_ref.namelist())
|
|
135
|
+
for filename in filenames:
|
|
136
|
+
if filename.endswith(".txt"):
|
|
137
|
+
# Open file
|
|
138
|
+
with zip_ref.open(filename) as f:
|
|
139
|
+
try:
|
|
140
|
+
df = read_txt_file(file=f, filename=filename, logger=logger)
|
|
141
|
+
if df is not None:
|
|
142
|
+
list_df.append(df)
|
|
143
|
+
except Exception as e:
|
|
144
|
+
msg = f"An error occurred while reading {filename}. The error is: {e}."
|
|
145
|
+
log_error(logger=logger, msg=msg, verbose=True)
|
|
146
|
+
|
|
147
|
+
# Concatenate all dataframes into a single one
|
|
148
|
+
df = pd.concat(list_df)
|
|
149
|
+
|
|
150
|
+
# ---------------------------------------------------------------------.
|
|
151
|
+
return df
|