disdrodb 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +1 -5
- disdrodb/_version.py +2 -2
- disdrodb/accessor/methods.py +22 -4
- disdrodb/api/checks.py +10 -0
- disdrodb/api/io.py +20 -18
- disdrodb/api/path.py +42 -77
- disdrodb/api/search.py +89 -23
- disdrodb/cli/disdrodb_create_summary.py +1 -1
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0a.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l1.py +1 -1
- disdrodb/cli/disdrodb_run_l2e.py +1 -1
- disdrodb/cli/disdrodb_run_l2m.py +1 -1
- disdrodb/configs.py +30 -83
- disdrodb/constants.py +4 -3
- disdrodb/data_transfer/download_data.py +4 -2
- disdrodb/docs.py +2 -2
- disdrodb/etc/products/L1/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/LPM_V0/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/global.yaml +6 -0
- disdrodb/etc/products/L2E/10MIN.yaml +1 -12
- disdrodb/etc/products/L2E/global.yaml +1 -1
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +1 -1
- disdrodb/issue/checks.py +2 -2
- disdrodb/l0/check_configs.py +1 -1
- disdrodb/l0/configs/LPM/l0a_encodings.yml +0 -1
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +0 -4
- disdrodb/l0/configs/LPM/l0b_encodings.yml +9 -9
- disdrodb/l0/configs/LPM/raw_data_format.yml +11 -11
- disdrodb/l0/configs/LPM_V0/bins_diameter.yml +103 -0
- disdrodb/l0/configs/LPM_V0/bins_velocity.yml +103 -0
- disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +45 -0
- disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +180 -0
- disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +410 -0
- disdrodb/l0/configs/LPM_V0/raw_data_format.yml +474 -0
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +8 -8
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +9 -9
- disdrodb/l0/l0_reader.py +2 -2
- disdrodb/l0/l0a_processing.py +6 -2
- disdrodb/l0/l0b_processing.py +26 -19
- disdrodb/l0/l0c_processing.py +17 -3
- disdrodb/l0/manuals/LPM_V0.pdf +0 -0
- disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +15 -7
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +279 -0
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +276 -0
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
- disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +103 -0
- disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +216 -0
- disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +208 -0
- disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +219 -0
- disdrodb/l0/readers/LPM/USA/CHARLESTON.py +229 -0
- disdrodb/l0/readers/{LPM → LPM_V0}/BELGIUM/ULIEGE.py +33 -49
- disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +240 -0
- disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
- disdrodb/l0/readers/{PARSIVEL2 → PARSIVEL}/NASA/LPVEX.py +16 -28
- disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +3 -3
- disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/JAPAN/PRECIP.py +155 -0
- disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +14 -7
- disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +8 -3
- disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +28 -5
- disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/{GPM/GCPEX.py → NORWAY/UIB.py} +54 -29
- disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/{PANGASA.py → PAGASA.py} +6 -3
- disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
- disdrodb/l0/readers/{PARSIVEL/GPM/PIERS.py → PARSIVEL2/USA/CSU.py} +62 -29
- disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +51 -24
- disdrodb/l0/readers/{PARSIVEL/GPM/IFLOODS.py → RD80/BRAZIL/ATTO_RD80.py} +50 -34
- disdrodb/l0/readers/{SW250 → SWS250}/BELGIUM/KMI.py +1 -1
- disdrodb/l1/beard_model.py +45 -1
- disdrodb/l1/fall_velocity.py +1 -6
- disdrodb/l1/filters.py +2 -0
- disdrodb/l1/processing.py +6 -5
- disdrodb/l1/resampling.py +101 -38
- disdrodb/l2/empirical_dsd.py +12 -8
- disdrodb/l2/processing.py +4 -3
- disdrodb/metadata/search.py +3 -4
- disdrodb/routines/l0.py +4 -4
- disdrodb/routines/l1.py +173 -60
- disdrodb/routines/l2.py +121 -269
- disdrodb/routines/options.py +347 -0
- disdrodb/routines/wrappers.py +9 -1
- disdrodb/scattering/axis_ratio.py +3 -0
- disdrodb/scattering/routines.py +1 -1
- disdrodb/summary/routines.py +765 -724
- disdrodb/utils/archiving.py +51 -44
- disdrodb/utils/attrs.py +1 -1
- disdrodb/utils/compression.py +4 -2
- disdrodb/utils/dask.py +35 -15
- disdrodb/utils/dict.py +33 -0
- disdrodb/utils/encoding.py +1 -1
- disdrodb/utils/manipulations.py +7 -1
- disdrodb/utils/routines.py +9 -8
- disdrodb/utils/time.py +9 -1
- disdrodb/viz/__init__.py +0 -13
- disdrodb/viz/plots.py +209 -0
- {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/METADATA +1 -1
- {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/RECORD +124 -95
- disdrodb/l0/readers/PARSIVEL/GPM/LPVEX.py +0 -85
- /disdrodb/etc/products/L2M/{GAMMA_GS_ND_MAE.yaml → MODELS/GAMMA_GS_ND_MAE.yaml} +0 -0
- /disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +0 -0
- /disdrodb/etc/products/L2M/{LOGNORMAL_GS_LOG_ND_MAE.yaml → MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml} +0 -0
- /disdrodb/etc/products/L2M/{LOGNORMAL_GS_ND_MAE.yaml → MODELS/LOGNORMAL_GS_ND_MAE.yaml} +0 -0
- /disdrodb/etc/products/L2M/{LOGNORMAL_ML.yaml → MODELS/LOGNORMAL_ML.yaml} +0 -0
- /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
- /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
- /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
- /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
- {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/entry_points.txt +0 -0
- {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/top_level.txt +0 -0
disdrodb/l0/l0b_processing.py
CHANGED
|
@@ -91,7 +91,7 @@ def format_string_array(string: str, n_values: int) -> np.array:
|
|
|
91
91
|
|
|
92
92
|
e.g. : format_string_array("2,44,22,33", 4) will return [ 2. 44. 22. 33.]
|
|
93
93
|
|
|
94
|
-
If empty string ("") --> Return an arrays of zeros
|
|
94
|
+
If empty string ("") or "" --> Return an arrays of zeros
|
|
95
95
|
If the list length is not n_values -> Return an arrays of np.nan
|
|
96
96
|
|
|
97
97
|
The function strip potential delimiters at start and end before splitting.
|
|
@@ -108,31 +108,38 @@ def format_string_array(string: str, n_values: int) -> np.array:
|
|
|
108
108
|
np.array
|
|
109
109
|
array of float
|
|
110
110
|
"""
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
# -------------------------------------------------------------------------.
|
|
115
|
-
## Assumptions !!!
|
|
116
|
-
# If empty list --> Assume no precipitation recorded. Return an arrays of zeros
|
|
117
|
-
if len(values) == 0:
|
|
111
|
+
# Check for empty string or "0" case
|
|
112
|
+
# - Assume no precipitation recorded. Return an arrays of zeros
|
|
113
|
+
if string in {"", "0"}:
|
|
118
114
|
values = np.zeros(n_values)
|
|
119
115
|
return values
|
|
120
116
|
|
|
121
|
-
#
|
|
117
|
+
# Check for NaN case
|
|
118
|
+
# - Assume no data available. Return an arrays of NaN
|
|
119
|
+
if string == "NaN":
|
|
120
|
+
values = np.zeros(n_values) * np.nan
|
|
121
|
+
return values
|
|
122
|
+
|
|
123
|
+
# Retrieve list of values
|
|
124
|
+
split_str = infer_split_str(string)
|
|
125
|
+
values = np.array(string.strip(split_str).split(split_str))
|
|
126
|
+
|
|
122
127
|
# If the length is not as expected --> Assume data corruption
|
|
123
128
|
# --> Return an array with nan
|
|
124
129
|
if len(values) != n_values:
|
|
125
130
|
values = np.zeros(n_values) * np.nan
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
131
|
+
return values
|
|
132
|
+
|
|
133
|
+
# Otherwise sanitize the list of value
|
|
134
|
+
# Ensure string type
|
|
135
|
+
values = values.astype("str")
|
|
136
|
+
# Replace '' with 0
|
|
137
|
+
values = replace_empty_strings_with_zeros(values)
|
|
138
|
+
# Replace "-9.999" with 0
|
|
139
|
+
values = np.char.replace(values, "-9.999", "0")
|
|
140
|
+
# Cast values to float type
|
|
141
|
+
# --> Note: the disk encoding is specified in the l0b_encodings.yml
|
|
142
|
+
values = values.astype(float)
|
|
136
143
|
return values
|
|
137
144
|
|
|
138
145
|
|
disdrodb/l0/l0c_processing.py
CHANGED
|
@@ -117,7 +117,12 @@ def split_dataset_by_sampling_intervals(
|
|
|
117
117
|
|
|
118
118
|
# If sample_interval is a dataset variable, use it to define dictionary of datasets
|
|
119
119
|
if "sample_interval" in ds:
|
|
120
|
-
|
|
120
|
+
dict_ds = {}
|
|
121
|
+
for interval in measurement_intervals:
|
|
122
|
+
ds_subset = ds.isel(time=ds["sample_interval"] == interval)
|
|
123
|
+
if ds_subset.sizes["time"] > 2:
|
|
124
|
+
dict_ds[int(interval)] = ds_subset
|
|
125
|
+
return dict_ds
|
|
121
126
|
|
|
122
127
|
# ---------------------------------------------------------------------------------------.
|
|
123
128
|
# Otherwise exploit difference between timesteps to identify change point
|
|
@@ -460,9 +465,8 @@ def regularize_timesteps(ds, sample_interval, robust=False, add_quality_flag=Tru
|
|
|
460
465
|
# if last_time == last_time_expected and qc_flag[-1] == flag_next_missing:
|
|
461
466
|
# qc_flag[-1] = 0
|
|
462
467
|
|
|
463
|
-
#
|
|
468
|
+
# Add time quality flag variable
|
|
464
469
|
ds["time_qc"] = xr.DataArray(qc_flag, dims="time")
|
|
465
|
-
ds = ds.set_coords("time_qc")
|
|
466
470
|
|
|
467
471
|
# Add CF attributes for time_qc
|
|
468
472
|
ds["time_qc"].attrs = {
|
|
@@ -674,6 +678,16 @@ def create_l0c_datasets(
|
|
|
674
678
|
log_info(logger=logger, msg=f"No data between {start_time} and {end_time}.", verbose=verbose)
|
|
675
679
|
return {}
|
|
676
680
|
|
|
681
|
+
# If 1 or 2 timesteps per time block, return empty dictionary
|
|
682
|
+
n_timesteps = len(ds["time"])
|
|
683
|
+
if n_timesteps < 3:
|
|
684
|
+
log_info(
|
|
685
|
+
logger=logger,
|
|
686
|
+
msg=f"Only {n_timesteps} timesteps between {start_time} and {end_time}.",
|
|
687
|
+
verbose=verbose,
|
|
688
|
+
)
|
|
689
|
+
return {}
|
|
690
|
+
|
|
677
691
|
# ---------------------------------------------------------------------------------------.
|
|
678
692
|
# If sample interval is a dataset variable, drop timesteps with unexpected measurement intervals !
|
|
679
693
|
if "sample_interval" in ds:
|
|
Binary file
|
|
@@ -31,7 +31,7 @@ def reader(
|
|
|
31
31
|
"""Reader."""
|
|
32
32
|
##------------------------------------------------------------------------.
|
|
33
33
|
#### - Define raw data headers
|
|
34
|
-
column_names = ["
|
|
34
|
+
column_names = ["TO_PARSE"]
|
|
35
35
|
|
|
36
36
|
##------------------------------------------------------------------------.
|
|
37
37
|
#### Define reader options
|
|
@@ -79,14 +79,22 @@ def reader(
|
|
|
79
79
|
|
|
80
80
|
##------------------------------------------------------------------------.
|
|
81
81
|
#### Adapt the dataframe to adhere to DISDRODB L0 standards
|
|
82
|
-
#
|
|
83
|
-
|
|
82
|
+
# Raise error if empty file
|
|
83
|
+
if len(df) == 0:
|
|
84
|
+
raise ValueError(f"{filepath} is empty.")
|
|
85
|
+
|
|
86
|
+
# Select only rows with expected number of delimiters
|
|
87
|
+
df = df[df["TO_PARSE"].str.count(";").isin([519, 520])]
|
|
88
|
+
|
|
89
|
+
# Check there are still valid rows
|
|
90
|
+
if len(df) == 0:
|
|
91
|
+
raise ValueError(f"No valid rows in {filepath}.")
|
|
84
92
|
|
|
85
93
|
# Split by ; delimiter (before raw drop number)
|
|
86
|
-
df = df["
|
|
94
|
+
df = df["TO_PARSE"].str.split(";", expand=True, n=79)
|
|
87
95
|
|
|
88
96
|
# Assign column names
|
|
89
|
-
|
|
97
|
+
names = [
|
|
90
98
|
"start_identifier",
|
|
91
99
|
"device_address",
|
|
92
100
|
"sensor_serial_number",
|
|
@@ -168,10 +176,10 @@ def reader(
|
|
|
168
176
|
"number_particles_class_9_internal_data",
|
|
169
177
|
"raw_drop_number",
|
|
170
178
|
]
|
|
171
|
-
df.columns =
|
|
179
|
+
df.columns = names
|
|
172
180
|
|
|
173
181
|
# Remove checksum from raw_drop_number
|
|
174
|
-
df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=1, expand=True)[0]
|
|
182
|
+
df["raw_drop_number"] = df["raw_drop_number"].str.strip(";").str.rsplit(";", n=1, expand=True)[0]
|
|
175
183
|
|
|
176
184
|
# Define datetime "time" column
|
|
177
185
|
df["time"] = df["sensor_date"] + "-" + df["sensor_time"]
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------.
|
|
2
|
+
# Copyright (c) 2021-2023 DISDRODB developers
|
|
3
|
+
#
|
|
4
|
+
# This program is free software: you can redistribute it and/or modify
|
|
5
|
+
# it under the terms of the GNU General Public License as published by
|
|
6
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
7
|
+
# (at your option) any later version.
|
|
8
|
+
#
|
|
9
|
+
# This program is distributed in the hope that it will be useful,
|
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
+
# GNU General Public License for more details.
|
|
13
|
+
#
|
|
14
|
+
# You should have received a copy of the GNU General Public License
|
|
15
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
|
+
# -----------------------------------------------------------------------------.
|
|
17
|
+
"""DISDRODB reader for GID LPM sensor TC-PI with incorrect reported time."""
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
|
|
21
|
+
from disdrodb.l0.l0a_processing import read_raw_text_file
|
|
22
|
+
from disdrodb.utils.logger import log_error
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def read_txt_file(file, filename, logger):
|
|
26
|
+
"""Parse for TC-PI LPM file."""
|
|
27
|
+
#### - Define raw data headers
|
|
28
|
+
column_names = ["TO_PARSE"]
|
|
29
|
+
|
|
30
|
+
##------------------------------------------------------------------------.
|
|
31
|
+
#### Define reader options
|
|
32
|
+
# - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
|
|
33
|
+
reader_kwargs = {}
|
|
34
|
+
|
|
35
|
+
# - Define delimiter
|
|
36
|
+
reader_kwargs["delimiter"] = "\\n"
|
|
37
|
+
|
|
38
|
+
# - Avoid first column to become df index !!!
|
|
39
|
+
reader_kwargs["index_col"] = False
|
|
40
|
+
|
|
41
|
+
# Since column names are expected to be passed explicitly, header is set to None
|
|
42
|
+
reader_kwargs["header"] = None
|
|
43
|
+
|
|
44
|
+
# - Number of rows to be skipped at the beginning of the file
|
|
45
|
+
reader_kwargs["skiprows"] = 1
|
|
46
|
+
|
|
47
|
+
# - Define behaviour when encountering bad lines
|
|
48
|
+
reader_kwargs["on_bad_lines"] = "skip"
|
|
49
|
+
|
|
50
|
+
# - Define reader engine
|
|
51
|
+
# - C engine is faster
|
|
52
|
+
# - Python engine is more feature-complete
|
|
53
|
+
reader_kwargs["engine"] = "python"
|
|
54
|
+
|
|
55
|
+
# - Define on-the-fly decompression of on-disk data
|
|
56
|
+
# - Available: gzip, bz2, zip
|
|
57
|
+
reader_kwargs["compression"] = "infer"
|
|
58
|
+
|
|
59
|
+
# - Strings to recognize as NA/NaN and replace with standard NA flags
|
|
60
|
+
# - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
|
|
61
|
+
# '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
|
|
62
|
+
# 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
|
|
63
|
+
reader_kwargs["na_values"] = ["na", "", "error"]
|
|
64
|
+
|
|
65
|
+
##------------------------------------------------------------------------.
|
|
66
|
+
#### Read the data
|
|
67
|
+
df = read_raw_text_file(
|
|
68
|
+
filepath=file,
|
|
69
|
+
column_names=column_names,
|
|
70
|
+
reader_kwargs=reader_kwargs,
|
|
71
|
+
logger=logger,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
##------------------------------------------------------------------------.
|
|
75
|
+
#### Adapt the dataframe to adhere to DISDRODB L0 standards
|
|
76
|
+
# Raise error if empty file
|
|
77
|
+
if len(df) == 0:
|
|
78
|
+
raise ValueError(f"{filename} is empty.")
|
|
79
|
+
|
|
80
|
+
# Select only rows with expected number of delimiters
|
|
81
|
+
df = df[df["TO_PARSE"].str.count(" ") == 526]
|
|
82
|
+
|
|
83
|
+
# Check there are still valid rows
|
|
84
|
+
if len(df) == 0:
|
|
85
|
+
raise ValueError(f"No valid rows in {filename}.")
|
|
86
|
+
|
|
87
|
+
# Split by ; delimiter (before raw drop number)
|
|
88
|
+
df = df["TO_PARSE"].str.split(" ", expand=True, n=82)
|
|
89
|
+
|
|
90
|
+
# Assign column names
|
|
91
|
+
names = [
|
|
92
|
+
"date",
|
|
93
|
+
"time",
|
|
94
|
+
"unknown",
|
|
95
|
+
"start_identifier",
|
|
96
|
+
"device_address",
|
|
97
|
+
"sensor_serial_number",
|
|
98
|
+
"sensor_date",
|
|
99
|
+
"sensor_time",
|
|
100
|
+
"weather_code_synop_4677_5min",
|
|
101
|
+
"weather_code_synop_4680_5min",
|
|
102
|
+
"weather_code_metar_4678_5min",
|
|
103
|
+
"precipitation_rate_5min",
|
|
104
|
+
"weather_code_synop_4677",
|
|
105
|
+
"weather_code_synop_4680",
|
|
106
|
+
"weather_code_metar_4678",
|
|
107
|
+
"precipitation_rate",
|
|
108
|
+
"rainfall_rate",
|
|
109
|
+
"snowfall_rate",
|
|
110
|
+
"precipitation_accumulated",
|
|
111
|
+
"mor_visibility",
|
|
112
|
+
"reflectivity",
|
|
113
|
+
"quality_index",
|
|
114
|
+
"max_hail_diameter",
|
|
115
|
+
"laser_status",
|
|
116
|
+
"static_signal_status",
|
|
117
|
+
"laser_temperature_analog_status",
|
|
118
|
+
"laser_temperature_digital_status",
|
|
119
|
+
"laser_current_analog_status",
|
|
120
|
+
"laser_current_digital_status",
|
|
121
|
+
"sensor_voltage_supply_status",
|
|
122
|
+
"current_heating_pane_transmitter_head_status",
|
|
123
|
+
"current_heating_pane_receiver_head_status",
|
|
124
|
+
"temperature_sensor_status",
|
|
125
|
+
"current_heating_voltage_supply_status",
|
|
126
|
+
"current_heating_house_status",
|
|
127
|
+
"current_heating_heads_status",
|
|
128
|
+
"current_heating_carriers_status",
|
|
129
|
+
"control_output_laser_power_status",
|
|
130
|
+
"reserved_status",
|
|
131
|
+
"temperature_interior",
|
|
132
|
+
"laser_temperature",
|
|
133
|
+
"laser_current_average",
|
|
134
|
+
"control_voltage",
|
|
135
|
+
"optical_control_voltage_output",
|
|
136
|
+
"sensor_voltage_supply",
|
|
137
|
+
"current_heating_pane_transmitter_head",
|
|
138
|
+
"current_heating_pane_receiver_head",
|
|
139
|
+
"temperature_ambient",
|
|
140
|
+
"current_heating_voltage_supply",
|
|
141
|
+
"current_heating_house",
|
|
142
|
+
"current_heating_heads",
|
|
143
|
+
"current_heating_carriers",
|
|
144
|
+
"number_particles",
|
|
145
|
+
"number_particles_internal_data",
|
|
146
|
+
"number_particles_min_speed",
|
|
147
|
+
"number_particles_min_speed_internal_data",
|
|
148
|
+
"number_particles_max_speed",
|
|
149
|
+
"number_particles_max_speed_internal_data",
|
|
150
|
+
"number_particles_min_diameter",
|
|
151
|
+
"number_particles_min_diameter_internal_data",
|
|
152
|
+
"number_particles_no_hydrometeor",
|
|
153
|
+
"number_particles_no_hydrometeor_internal_data",
|
|
154
|
+
"number_particles_unknown_classification",
|
|
155
|
+
"number_particles_unknown_classification_internal_data",
|
|
156
|
+
"number_particles_class_1",
|
|
157
|
+
"number_particles_class_1_internal_data",
|
|
158
|
+
"number_particles_class_2",
|
|
159
|
+
"number_particles_class_2_internal_data",
|
|
160
|
+
"number_particles_class_3",
|
|
161
|
+
"number_particles_class_3_internal_data",
|
|
162
|
+
"number_particles_class_4",
|
|
163
|
+
"number_particles_class_4_internal_data",
|
|
164
|
+
"number_particles_class_5",
|
|
165
|
+
"number_particles_class_5_internal_data",
|
|
166
|
+
"number_particles_class_6",
|
|
167
|
+
"number_particles_class_6_internal_data",
|
|
168
|
+
"number_particles_class_7",
|
|
169
|
+
"number_particles_class_7_internal_data",
|
|
170
|
+
"number_particles_class_8",
|
|
171
|
+
"number_particles_class_8_internal_data",
|
|
172
|
+
"number_particles_class_9",
|
|
173
|
+
"number_particles_class_9_internal_data",
|
|
174
|
+
"TO_BE_FURTHER_PROCESSED",
|
|
175
|
+
]
|
|
176
|
+
df.columns = names
|
|
177
|
+
|
|
178
|
+
# Define datetime "time" column
|
|
179
|
+
df["time"] = df["date"] + " " + df["time"]
|
|
180
|
+
df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
|
|
181
|
+
|
|
182
|
+
# Drop row if start_identifier different than 00
|
|
183
|
+
df = df[df["start_identifier"].astype(str) == "00"]
|
|
184
|
+
|
|
185
|
+
# Extract the last variables remained in raw_drop_number
|
|
186
|
+
df_parsed = df["TO_BE_FURTHER_PROCESSED"].str.rsplit(" ", n=5, expand=True)
|
|
187
|
+
df_parsed.columns = [
|
|
188
|
+
"raw_drop_number",
|
|
189
|
+
"air_temperature",
|
|
190
|
+
"relative_humidity",
|
|
191
|
+
"wind_speed",
|
|
192
|
+
"wind_direction",
|
|
193
|
+
"checksum",
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
# Assign columns to the original dataframe
|
|
197
|
+
df[df_parsed.columns] = df_parsed
|
|
198
|
+
|
|
199
|
+
# Drop rows with invalid raw_drop_number
|
|
200
|
+
# --> 440 value # 22x20
|
|
201
|
+
df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
|
|
202
|
+
|
|
203
|
+
# Drop columns not agreeing with DISDRODB L0 standards
|
|
204
|
+
columns_to_drop = [
|
|
205
|
+
"start_identifier",
|
|
206
|
+
"device_address",
|
|
207
|
+
"sensor_serial_number",
|
|
208
|
+
"sensor_date",
|
|
209
|
+
"sensor_time",
|
|
210
|
+
"date",
|
|
211
|
+
"unknown",
|
|
212
|
+
"TO_BE_FURTHER_PROCESSED",
|
|
213
|
+
"air_temperature",
|
|
214
|
+
"relative_humidity",
|
|
215
|
+
"wind_speed",
|
|
216
|
+
"wind_direction",
|
|
217
|
+
"checksum",
|
|
218
|
+
]
|
|
219
|
+
df = df.drop(columns=columns_to_drop)
|
|
220
|
+
return df
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
@is_documented_by(reader_generic_docstring)
|
|
224
|
+
def reader(
|
|
225
|
+
filepath,
|
|
226
|
+
logger=None,
|
|
227
|
+
):
|
|
228
|
+
"""Reader."""
|
|
229
|
+
import zipfile
|
|
230
|
+
|
|
231
|
+
##------------------------------------------------------------------------.
|
|
232
|
+
# filename = os.path.basename(filepath)
|
|
233
|
+
# return read_txt_file(file=filepath, filename=filename, logger=logger)
|
|
234
|
+
|
|
235
|
+
# ---------------------------------------------------------------------.
|
|
236
|
+
#### Iterate over all files (aka timesteps) in the daily zip archive
|
|
237
|
+
# - Each file contain a single timestep !
|
|
238
|
+
# list_df = []
|
|
239
|
+
# with tempfile.TemporaryDirectory() as temp_dir:
|
|
240
|
+
# # Extract all files
|
|
241
|
+
# unzip_file_on_terminal(filepath, temp_dir)
|
|
242
|
+
|
|
243
|
+
# # Walk through extracted files
|
|
244
|
+
# for root, _, files in os.walk(temp_dir):
|
|
245
|
+
# for filename in sorted(files):
|
|
246
|
+
# if filename.endswith(".txt"):
|
|
247
|
+
# full_path = os.path.join(root, filename)
|
|
248
|
+
# try:
|
|
249
|
+
# df = read_txt_file(file=full_path, filename=filename, logger=logger)
|
|
250
|
+
# if df is not None:
|
|
251
|
+
# list_df.append(df)
|
|
252
|
+
# except Exception as e:
|
|
253
|
+
# msg = f"An error occurred while reading {filename}: {e}"
|
|
254
|
+
# log_error(logger=logger, msg=msg, verbose=True)
|
|
255
|
+
|
|
256
|
+
list_df = []
|
|
257
|
+
with zipfile.ZipFile(filepath, "r") as zip_ref:
|
|
258
|
+
filenames = sorted(zip_ref.namelist())
|
|
259
|
+
for filename in filenames:
|
|
260
|
+
if filename.endswith(".txt"):
|
|
261
|
+
# Open file
|
|
262
|
+
with zip_ref.open(filename) as file:
|
|
263
|
+
try:
|
|
264
|
+
df = read_txt_file(file=file, filename=filename, logger=logger)
|
|
265
|
+
if df is not None:
|
|
266
|
+
list_df.append(df)
|
|
267
|
+
except Exception as e:
|
|
268
|
+
msg = f"An error occurred while reading {filename}. The error is: {e}"
|
|
269
|
+
log_error(logger=logger, msg=msg, verbose=True)
|
|
270
|
+
|
|
271
|
+
# Check the zip file contains at least some non.empty files
|
|
272
|
+
if len(list_df) == 0:
|
|
273
|
+
raise ValueError(f"{filepath} contains only empty files!")
|
|
274
|
+
|
|
275
|
+
# Concatenate all dataframes into a single one
|
|
276
|
+
df = pd.concat(list_df)
|
|
277
|
+
|
|
278
|
+
# ---------------------------------------------------------------------.
|
|
279
|
+
return df
|