disdrodb 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +1 -5
- disdrodb/_version.py +2 -2
- disdrodb/accessor/methods.py +14 -3
- disdrodb/api/checks.py +10 -0
- disdrodb/api/create_directories.py +0 -2
- disdrodb/api/io.py +14 -17
- disdrodb/api/path.py +42 -77
- disdrodb/api/search.py +89 -23
- disdrodb/cli/disdrodb_create_summary.py +11 -1
- disdrodb/cli/disdrodb_create_summary_station.py +10 -0
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0a.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l1.py +1 -1
- disdrodb/cli/disdrodb_run_l2e.py +1 -1
- disdrodb/cli/disdrodb_run_l2m.py +1 -1
- disdrodb/configs.py +30 -83
- disdrodb/constants.py +4 -3
- disdrodb/data_transfer/download_data.py +4 -2
- disdrodb/docs.py +2 -2
- disdrodb/etc/products/L1/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/global.yaml +7 -1
- disdrodb/etc/products/L2E/10MIN.yaml +1 -12
- disdrodb/etc/products/L2E/5MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +1 -1
- disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +1 -1
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +11 -3
- disdrodb/l0/check_configs.py +49 -16
- disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
- disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
- disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
- disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
- disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
- disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
- disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
- disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
- disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
- disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
- disdrodb/l0/l0_reader.py +2 -2
- disdrodb/l0/l0b_processing.py +70 -15
- disdrodb/l0/l0c_processing.py +7 -3
- disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -1
- disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
- disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
- disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
- disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
- disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
- disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
- disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
- disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
- disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
- disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
- disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/LPVEX.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +8 -17
- disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
- disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
- disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
- disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
- disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
- disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
- disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
- disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
- disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
- disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +232 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
- disdrodb/l0/readers/PARSIVEL2/{NASA/LPVEX.py → SPAIN/GRANADA.py} +46 -35
- disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
- disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +10 -28
- disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
- disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
- disdrodb/l1/beard_model.py +31 -129
- disdrodb/l1/fall_velocity.py +136 -83
- disdrodb/l1/filters.py +25 -28
- disdrodb/l1/processing.py +16 -17
- disdrodb/l1/resampling.py +101 -38
- disdrodb/l1_env/routines.py +46 -17
- disdrodb/l2/empirical_dsd.py +6 -0
- disdrodb/l2/processing.py +6 -5
- disdrodb/metadata/geolocation.py +0 -2
- disdrodb/metadata/search.py +3 -4
- disdrodb/psd/fitting.py +16 -13
- disdrodb/routines/l0.py +2 -2
- disdrodb/routines/l1.py +173 -60
- disdrodb/routines/l2.py +148 -284
- disdrodb/routines/options.py +345 -0
- disdrodb/routines/wrappers.py +14 -1
- disdrodb/scattering/axis_ratio.py +90 -84
- disdrodb/scattering/permittivity.py +6 -0
- disdrodb/summary/routines.py +735 -670
- disdrodb/utils/archiving.py +51 -44
- disdrodb/utils/attrs.py +3 -1
- disdrodb/utils/dask.py +4 -4
- disdrodb/utils/dict.py +33 -0
- disdrodb/utils/encoding.py +6 -1
- disdrodb/utils/routines.py +9 -8
- disdrodb/utils/time.py +11 -3
- disdrodb/viz/__init__.py +0 -13
- disdrodb/viz/plots.py +231 -1
- {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/METADATA +2 -1
- {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/RECORD +135 -103
- /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
- /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
- /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
- /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/IFLOODS.py +0 -0
- /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +0 -0
- /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/PIERS.py +0 -0
- /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/GCPEX.py +0 -0
- /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
- {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/entry_points.txt +0 -0
- {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
precipitation_rate:
|
|
2
|
+
n_digits: 6
|
|
3
|
+
n_characters: 7
|
|
4
|
+
n_decimals: 3
|
|
5
|
+
n_naturals: 3
|
|
6
|
+
data_range:
|
|
7
|
+
- 0
|
|
8
|
+
- 9999.999
|
|
9
|
+
nan_flags: null
|
|
10
|
+
precipitation_accumulated:
|
|
11
|
+
n_digits: 6
|
|
12
|
+
n_characters: 7
|
|
13
|
+
n_decimals: 2
|
|
14
|
+
n_naturals: 4
|
|
15
|
+
data_range:
|
|
16
|
+
- 0
|
|
17
|
+
- 9999.0
|
|
18
|
+
nan_flags: null
|
|
19
|
+
weather_code_synop_4680:
|
|
20
|
+
n_digits: 2
|
|
21
|
+
n_characters: 2
|
|
22
|
+
n_decimals: 0
|
|
23
|
+
n_naturals: 2
|
|
24
|
+
data_range:
|
|
25
|
+
- 0
|
|
26
|
+
- 89
|
|
27
|
+
nan_flags: null
|
|
28
|
+
weather_code_metar_4678:
|
|
29
|
+
n_digits: null
|
|
30
|
+
n_characters: null
|
|
31
|
+
n_decimals: null
|
|
32
|
+
n_naturals: null
|
|
33
|
+
data_range: null
|
|
34
|
+
nan_flags: null
|
|
35
|
+
past_weather1:
|
|
36
|
+
n_digits: null
|
|
37
|
+
n_characters: null
|
|
38
|
+
n_decimals: null
|
|
39
|
+
n_naturals: null
|
|
40
|
+
data_range: null
|
|
41
|
+
nan_flags: null
|
|
42
|
+
past_weather2:
|
|
43
|
+
n_digits: null
|
|
44
|
+
n_characters: null
|
|
45
|
+
n_decimals: null
|
|
46
|
+
n_naturals: null
|
|
47
|
+
data_range: null
|
|
48
|
+
nan_flags: null
|
|
49
|
+
mor_visibility_5min:
|
|
50
|
+
n_digits: null
|
|
51
|
+
n_characters: null
|
|
52
|
+
n_decimals: null
|
|
53
|
+
n_naturals: null
|
|
54
|
+
data_range: null
|
|
55
|
+
nan_flags: null
|
|
56
|
+
mor_visibility:
|
|
57
|
+
n_digits: null
|
|
58
|
+
n_characters: null
|
|
59
|
+
n_decimals: null
|
|
60
|
+
n_naturals: null
|
|
61
|
+
data_range: null
|
|
62
|
+
nan_flags: null
|
|
63
|
+
number_particles:
|
|
64
|
+
n_digits: 4
|
|
65
|
+
n_characters: 4
|
|
66
|
+
n_decimals: 0
|
|
67
|
+
n_naturals: 4
|
|
68
|
+
data_range:
|
|
69
|
+
- 0
|
|
70
|
+
- 9999
|
|
71
|
+
nan_flags: null
|
|
72
|
+
sensor_temperature:
|
|
73
|
+
n_digits: 4
|
|
74
|
+
n_characters: 6
|
|
75
|
+
n_decimals: 1
|
|
76
|
+
n_naturals: 3
|
|
77
|
+
data_range:
|
|
78
|
+
- -99
|
|
79
|
+
- 100
|
|
80
|
+
nan_flags: null
|
|
81
|
+
obstruction_status:
|
|
82
|
+
n_digits: null
|
|
83
|
+
n_characters: null
|
|
84
|
+
n_decimals: null
|
|
85
|
+
n_naturals: null
|
|
86
|
+
data_range: null
|
|
87
|
+
nan_flags: null
|
|
88
|
+
total_extinction_coefficient:
|
|
89
|
+
n_digits: 5
|
|
90
|
+
n_characters: 6
|
|
91
|
+
n_decimals: 2
|
|
92
|
+
n_naturals: 3
|
|
93
|
+
data_range:
|
|
94
|
+
- 0
|
|
95
|
+
- 999.99
|
|
96
|
+
nan_flags: null
|
|
97
|
+
transmissometer_extinction_coefficient:
|
|
98
|
+
n_digits: 5
|
|
99
|
+
n_characters: 6
|
|
100
|
+
n_decimals: 2
|
|
101
|
+
n_naturals: 3
|
|
102
|
+
data_range:
|
|
103
|
+
- 0
|
|
104
|
+
- 999.99
|
|
105
|
+
nan_flags: null
|
|
106
|
+
back_scatter_extinction_coefficient:
|
|
107
|
+
n_digits: 5
|
|
108
|
+
n_characters: 7
|
|
109
|
+
n_decimals: 2
|
|
110
|
+
n_naturals: 3
|
|
111
|
+
data_range:
|
|
112
|
+
- -999.99
|
|
113
|
+
- 999.99
|
|
114
|
+
nan_flags: null
|
|
115
|
+
ambient_light_sensor_signal:
|
|
116
|
+
n_digits: 5
|
|
117
|
+
n_characters: 5
|
|
118
|
+
n_decimals: 0
|
|
119
|
+
n_naturals: 5
|
|
120
|
+
data_range:
|
|
121
|
+
- 0
|
|
122
|
+
- 99998
|
|
123
|
+
nan_flags: 99999
|
|
124
|
+
sensor_status:
|
|
125
|
+
n_digits: null
|
|
126
|
+
n_characters: null
|
|
127
|
+
n_decimals: null
|
|
128
|
+
n_naturals: null
|
|
129
|
+
data_range: null
|
|
130
|
+
nan_flags: null
|
|
131
|
+
ambient_light_sensor_signal_status:
|
|
132
|
+
n_digits: null
|
|
133
|
+
n_characters: null
|
|
134
|
+
n_decimals: null
|
|
135
|
+
n_naturals: null
|
|
136
|
+
data_range: null
|
|
137
|
+
nan_flags: null
|
|
138
|
+
raw_drop_number:
|
|
139
|
+
n_digits: 0
|
|
140
|
+
n_characters: 4096
|
|
141
|
+
n_decimals: 0
|
|
142
|
+
n_naturals: 0
|
|
143
|
+
data_range: null
|
|
144
|
+
nan_flags: null
|
|
145
|
+
dimension_order:
|
|
146
|
+
- velocity_bin_center
|
|
147
|
+
- diameter_bin_center
|
|
148
|
+
n_values: 336
|
disdrodb/l0/l0_reader.py
CHANGED
|
@@ -35,9 +35,9 @@ logger = logging.getLogger(__name__)
|
|
|
35
35
|
|
|
36
36
|
def define_readers_directory(sensor_name="") -> str:
|
|
37
37
|
"""Returns the path to the ``disdrodb.l0.readers`` directory within the disdrodb package."""
|
|
38
|
-
from disdrodb import
|
|
38
|
+
from disdrodb import package_dir
|
|
39
39
|
|
|
40
|
-
reader_dir = os.path.join(
|
|
40
|
+
reader_dir = os.path.join(package_dir, "l0", "readers", sensor_name)
|
|
41
41
|
return reader_dir
|
|
42
42
|
|
|
43
43
|
|
disdrodb/l0/l0b_processing.py
CHANGED
|
@@ -80,15 +80,16 @@ def infer_split_str(string: str) -> str:
|
|
|
80
80
|
return split_str
|
|
81
81
|
|
|
82
82
|
|
|
83
|
-
def
|
|
83
|
+
def replace_empty_strings_with_zeros(values):
|
|
84
|
+
"""Replace empty comma separated strings with '0'."""
|
|
84
85
|
values[np.char.str_len(values) == 0] = "0"
|
|
85
86
|
return values
|
|
86
87
|
|
|
87
88
|
|
|
88
|
-
def
|
|
89
|
+
def format_string_array(string: str, n_values: int) -> np.array:
|
|
89
90
|
"""Split a string with multiple numbers separated by a delimiter into an 1D array.
|
|
90
91
|
|
|
91
|
-
e.g. :
|
|
92
|
+
e.g. : format_string_array("2,44,22,33", 4) will return [ 2. 44. 22. 33.]
|
|
92
93
|
|
|
93
94
|
If empty string ("") --> Return an arrays of zeros
|
|
94
95
|
If the list length is not n_values -> Return an arrays of np.nan
|
|
@@ -126,7 +127,7 @@ def _format_string_array(string: str, n_values: int) -> np.array:
|
|
|
126
127
|
# Ensure string type
|
|
127
128
|
values = values.astype("str")
|
|
128
129
|
# Replace '' with 0
|
|
129
|
-
values =
|
|
130
|
+
values = replace_empty_strings_with_zeros(values)
|
|
130
131
|
# Replace "-9.999" with 0
|
|
131
132
|
values = np.char.replace(values, "-9.999", "0")
|
|
132
133
|
# Cast values to float type
|
|
@@ -135,7 +136,7 @@ def _format_string_array(string: str, n_values: int) -> np.array:
|
|
|
135
136
|
return values
|
|
136
137
|
|
|
137
138
|
|
|
138
|
-
def
|
|
139
|
+
def reshape_raw_spectrum(
|
|
139
140
|
arr: np.array,
|
|
140
141
|
dims_order: list,
|
|
141
142
|
dims_size_dict: dict,
|
|
@@ -243,17 +244,17 @@ def retrieve_l0b_arrays(
|
|
|
243
244
|
# Ensure is a string, get a numpy array for each row and then stack
|
|
244
245
|
# - Option 1: Clear but lot of copies
|
|
245
246
|
# df_series = df[key].astype(str)
|
|
246
|
-
# list_arr = df_series.apply(
|
|
247
|
+
# list_arr = df_series.apply(format_string_array, n_values=n_values)
|
|
247
248
|
# arr = np.stack(list_arr, axis=0)
|
|
248
249
|
|
|
249
250
|
# - Option 2: still copies
|
|
250
|
-
# arr = np.vstack(
|
|
251
|
+
# arr = np.vstack(format_string_array(s, n_values=n_values) for s in df_series.astype(str))
|
|
251
252
|
|
|
252
253
|
# - Option 3: more memory efficient
|
|
253
254
|
n_timesteps = len(df[key])
|
|
254
255
|
arr = np.empty((n_timesteps, n_values), dtype=float) # preallocates
|
|
255
256
|
for i, s in enumerate(df[key].astype(str)):
|
|
256
|
-
arr[i, :] =
|
|
257
|
+
arr[i, :] = format_string_array(s, n_values=n_values)
|
|
257
258
|
|
|
258
259
|
# Retrieve dimensions
|
|
259
260
|
dims_order = dims_order_dict[key]
|
|
@@ -263,7 +264,7 @@ def retrieve_l0b_arrays(
|
|
|
263
264
|
# - This applies i.e for PARSIVEL*, LPM, PWS100
|
|
264
265
|
# - This does not apply to RD80
|
|
265
266
|
if key == "raw_drop_number" and len(dims_order) == 2:
|
|
266
|
-
arr, dims =
|
|
267
|
+
arr, dims = reshape_raw_spectrum(
|
|
267
268
|
arr=arr,
|
|
268
269
|
dims_order=dims_order,
|
|
269
270
|
dims_size_dict=dims_size_dict,
|
|
@@ -288,7 +289,57 @@ def retrieve_l0b_arrays(
|
|
|
288
289
|
#### L0B Coords and attributes
|
|
289
290
|
|
|
290
291
|
|
|
291
|
-
def
|
|
292
|
+
def ensure_valid_geolocation(ds: xr.Dataset, coord: str, errors: str = "ignore") -> xr.Dataset:
|
|
293
|
+
"""Ensure valid geolocation coordinates.
|
|
294
|
+
|
|
295
|
+
'altitude' must be >= 0, 'latitude' must be within [-90, 90] and
|
|
296
|
+
'longitude' within [-180, 180].
|
|
297
|
+
|
|
298
|
+
It can deal with coordinates varying with time.
|
|
299
|
+
|
|
300
|
+
Parameters
|
|
301
|
+
----------
|
|
302
|
+
ds : xarray.Dataset
|
|
303
|
+
Dataset containing the coordinate.
|
|
304
|
+
coord : str
|
|
305
|
+
Name of the coordinate variable to validate.
|
|
306
|
+
errors : {"ignore", "raise", "coerce"}, default "ignore"
|
|
307
|
+
- "ignore": nothing is done.
|
|
308
|
+
- "raise" : raise ValueError if invalid values are found.
|
|
309
|
+
- "coerce": out-of-range values are replaced with NaN.
|
|
310
|
+
|
|
311
|
+
Returns
|
|
312
|
+
-------
|
|
313
|
+
xr.Dataset
|
|
314
|
+
Dataset with validated coordinate values.
|
|
315
|
+
"""
|
|
316
|
+
# Define coordinates ranges
|
|
317
|
+
ranges = {
|
|
318
|
+
"altitude": (0, np.inf),
|
|
319
|
+
"latitude": (-90, 90),
|
|
320
|
+
"longitude": (-180, 180), # used only for "raise"/"coerce"
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
# Check coordinate is available and correctly defined.
|
|
324
|
+
if coord not in ds:
|
|
325
|
+
raise ValueError(f"Coordinate '{coord}' not found in dataset.")
|
|
326
|
+
if coord not in list(ranges):
|
|
327
|
+
raise ValueError(f"Valid geolocation coordinates are: {list(ranges)}.")
|
|
328
|
+
|
|
329
|
+
# Validate coordinate
|
|
330
|
+
vmin, vmax = ranges[coord]
|
|
331
|
+
invalid = (ds[coord] < vmin) | (ds[coord] > vmax)
|
|
332
|
+
invalid = invalid.compute()
|
|
333
|
+
|
|
334
|
+
# Deal within invalid errors
|
|
335
|
+
if errors == "raise" and invalid.any():
|
|
336
|
+
raise ValueError(f"{coord} out of range {vmin}-{vmax}.")
|
|
337
|
+
if errors == "coerce":
|
|
338
|
+
ds[coord] = ds[coord].where(~invalid)
|
|
339
|
+
return ds
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def convert_object_variables_to_string(ds: xr.Dataset) -> xr.Dataset:
|
|
292
343
|
"""Convert variables with ``object`` dtype to ``string``.
|
|
293
344
|
|
|
294
345
|
Parameters
|
|
@@ -307,7 +358,7 @@ def _convert_object_variables_to_string(ds: xr.Dataset) -> xr.Dataset:
|
|
|
307
358
|
return ds
|
|
308
359
|
|
|
309
360
|
|
|
310
|
-
def
|
|
361
|
+
def set_variable_attributes(ds: xr.Dataset, sensor_name: str) -> xr.Dataset:
|
|
311
362
|
"""Set attributes to each ``xr.Dataset`` variable.
|
|
312
363
|
|
|
313
364
|
Parameters
|
|
@@ -353,7 +404,7 @@ def add_dataset_crs_coords(ds):
|
|
|
353
404
|
|
|
354
405
|
|
|
355
406
|
def _define_dataset_variables(df, sensor_name, logger=None, verbose=False):
|
|
356
|
-
"""Define DISDRODB L0B netCDF variables."""
|
|
407
|
+
"""Define DISDRODB L0B netCDF array variables."""
|
|
357
408
|
# Preprocess raw_spectrum, diameter and velocity arrays if available
|
|
358
409
|
raw_fields = ["raw_drop_concentration", "raw_drop_average_velocity", "raw_drop_number"]
|
|
359
410
|
if np.any(np.isin(raw_fields, df.columns)):
|
|
@@ -436,7 +487,7 @@ def set_geolocation_coordinates(ds, metadata):
|
|
|
436
487
|
# If coordinate not present, add it from dictionary
|
|
437
488
|
if coord not in ds:
|
|
438
489
|
ds = ds.assign_coords({coord: metadata.pop(coord, np.nan)})
|
|
439
|
-
# Else
|
|
490
|
+
# Else ensure coord is a dataset coordinates
|
|
440
491
|
else:
|
|
441
492
|
ds = ds.set_coords(coord)
|
|
442
493
|
_ = metadata.pop(coord, None)
|
|
@@ -445,6 +496,10 @@ def set_geolocation_coordinates(ds, metadata):
|
|
|
445
496
|
for coord in coords:
|
|
446
497
|
ds[coord] = xr.where(ds[coord] == -9999, np.nan, ds[coord])
|
|
447
498
|
|
|
499
|
+
# Ensure valid geolocation coordinates
|
|
500
|
+
for coord in coords:
|
|
501
|
+
ds = ensure_valid_geolocation(ds=ds, coord=coord, errors="coerce")
|
|
502
|
+
|
|
448
503
|
# Set attributes without geolocation coordinates
|
|
449
504
|
ds.attrs = metadata
|
|
450
505
|
return ds
|
|
@@ -469,11 +524,11 @@ def finalize_dataset(ds, sensor_name, metadata):
|
|
|
469
524
|
ds = ds.transpose("time", "diameter_bin_center", ...)
|
|
470
525
|
|
|
471
526
|
# Ensure variables with dtype object are converted to string
|
|
472
|
-
ds =
|
|
527
|
+
ds = convert_object_variables_to_string(ds)
|
|
473
528
|
|
|
474
529
|
# Add netCDF variable and coordinate attributes
|
|
475
530
|
# - Add variable attributes: long_name, units, descriptions, valid_min, valid_max
|
|
476
|
-
ds =
|
|
531
|
+
ds = set_variable_attributes(ds=ds, sensor_name=sensor_name)
|
|
477
532
|
# - Add netCDF coordinate attributes
|
|
478
533
|
ds = set_coordinate_attributes(ds=ds)
|
|
479
534
|
# - Set DISDRODB global attributes
|
disdrodb/l0/l0c_processing.py
CHANGED
|
@@ -117,7 +117,12 @@ def split_dataset_by_sampling_intervals(
|
|
|
117
117
|
|
|
118
118
|
# If sample_interval is a dataset variable, use it to define dictionary of datasets
|
|
119
119
|
if "sample_interval" in ds:
|
|
120
|
-
|
|
120
|
+
dict_ds = {}
|
|
121
|
+
for interval in measurement_intervals:
|
|
122
|
+
ds_subset = ds.isel(time=ds["sample_interval"] == interval)
|
|
123
|
+
if ds_subset.sizes["time"] > 2:
|
|
124
|
+
dict_ds[int(interval)] = ds_subset
|
|
125
|
+
return dict_ds
|
|
121
126
|
|
|
122
127
|
# ---------------------------------------------------------------------------------------.
|
|
123
128
|
# Otherwise exploit difference between timesteps to identify change point
|
|
@@ -460,9 +465,8 @@ def regularize_timesteps(ds, sample_interval, robust=False, add_quality_flag=Tru
|
|
|
460
465
|
# if last_time == last_time_expected and qc_flag[-1] == flag_next_missing:
|
|
461
466
|
# qc_flag[-1] = 0
|
|
462
467
|
|
|
463
|
-
#
|
|
468
|
+
# Add time quality flag variable
|
|
464
469
|
ds["time_qc"] = xr.DataArray(qc_flag, dims="time")
|
|
465
|
-
ds = ds.set_coords("time_qc")
|
|
466
470
|
|
|
467
471
|
# Add CF attributes for time_qc
|
|
468
472
|
ds["time_qc"].attrs = {
|
|
@@ -69,7 +69,7 @@ def reader(
|
|
|
69
69
|
"quality_measurement": "quality_index",
|
|
70
70
|
"max_diameter_hail": "max_hail_diameter",
|
|
71
71
|
"laser_status": "laser_status",
|
|
72
|
-
"
|
|
72
|
+
"static_signal_status": "static_signal_status",
|
|
73
73
|
"interior_temperature": "temperature_interior",
|
|
74
74
|
"laser_temperature": "laser_temperature",
|
|
75
75
|
"laser_temperature_analog_status": "laser_temperature_analog_status",
|
|
@@ -137,7 +137,7 @@ def reader(
|
|
|
137
137
|
"quality_index",
|
|
138
138
|
"max_hail_diameter",
|
|
139
139
|
"laser_status",
|
|
140
|
-
"
|
|
140
|
+
"static_signal_status",
|
|
141
141
|
"laser_temperature_analog_status",
|
|
142
142
|
"laser_temperature_digital_status",
|
|
143
143
|
"laser_current_analog_status",
|
|
@@ -151,7 +151,7 @@ def reader(
|
|
|
151
151
|
"current_heating_heads_status",
|
|
152
152
|
"current_heating_carriers_status",
|
|
153
153
|
"control_output_laser_power_status",
|
|
154
|
-
"
|
|
154
|
+
"reserved_status",
|
|
155
155
|
"temperature_interior",
|
|
156
156
|
"laser_temperature",
|
|
157
157
|
"laser_current_average",
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# -----------------------------------------------------------------------------.
|
|
4
|
+
# Copyright (c) 2021-2023 DISDRODB developers
|
|
5
|
+
#
|
|
6
|
+
# This program is free software: you can redistribute it and/or modify
|
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
9
|
+
# (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This program is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
# GNU General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU General Public License
|
|
17
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18
|
+
# -----------------------------------------------------------------------------.
|
|
19
|
+
"""DISDRODB reader for ULIEGE LPM stations."""
|
|
20
|
+
|
|
21
|
+
import numpy as np
|
|
22
|
+
import pandas as pd
|
|
23
|
+
|
|
24
|
+
from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
|
|
25
|
+
from disdrodb.l0.l0a_processing import read_raw_text_file
|
|
26
|
+
from disdrodb.utils.logger import log_error, log_warning
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def read_txt_file(file, filename, logger):
|
|
30
|
+
"""Parse ULIEGE LPM hourly file."""
|
|
31
|
+
#### - Define raw data headers
|
|
32
|
+
column_names = ["TO_PARSE"]
|
|
33
|
+
|
|
34
|
+
##------------------------------------------------------------------------.
|
|
35
|
+
#### Define reader options
|
|
36
|
+
# - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
|
|
37
|
+
reader_kwargs = {}
|
|
38
|
+
|
|
39
|
+
# - Define delimiter
|
|
40
|
+
reader_kwargs["delimiter"] = "\\n"
|
|
41
|
+
|
|
42
|
+
# - Avoid first column to become df index !!!
|
|
43
|
+
reader_kwargs["index_col"] = False
|
|
44
|
+
|
|
45
|
+
# Since column names are expected to be passed explicitly, header is set to None
|
|
46
|
+
reader_kwargs["header"] = None
|
|
47
|
+
|
|
48
|
+
# - Number of rows to be skipped at the beginning of the file
|
|
49
|
+
reader_kwargs["skiprows"] = None
|
|
50
|
+
|
|
51
|
+
# - Define behaviour when encountering bad lines
|
|
52
|
+
reader_kwargs["on_bad_lines"] = "skip"
|
|
53
|
+
|
|
54
|
+
# - Define reader engine
|
|
55
|
+
# - C engine is faster
|
|
56
|
+
# - Python engine is more feature-complete
|
|
57
|
+
reader_kwargs["engine"] = "python"
|
|
58
|
+
|
|
59
|
+
# - Define on-the-fly decompression of on-disk data
|
|
60
|
+
# - Available: gzip, bz2, zip
|
|
61
|
+
reader_kwargs["compression"] = "infer"
|
|
62
|
+
|
|
63
|
+
# - Strings to recognize as NA/NaN and replace with standard NA flags
|
|
64
|
+
# - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
|
|
65
|
+
# '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
|
|
66
|
+
# 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
|
|
67
|
+
reader_kwargs["na_values"] = ["na", "", "error"]
|
|
68
|
+
|
|
69
|
+
##------------------------------------------------------------------------.
|
|
70
|
+
#### Read the data
|
|
71
|
+
df = read_raw_text_file(
|
|
72
|
+
filepath=file,
|
|
73
|
+
column_names=column_names,
|
|
74
|
+
reader_kwargs=reader_kwargs,
|
|
75
|
+
logger=logger,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
##------------------------------------------------------------------------.
|
|
79
|
+
#### Adapt the dataframe to adhere to DISDRODB L0 standards
|
|
80
|
+
# Count number of delimiters to identify valid rows
|
|
81
|
+
df = df[df["TO_PARSE"].str.count(";") == 442]
|
|
82
|
+
|
|
83
|
+
# Check there are still valid rows
|
|
84
|
+
if len(df) == 0:
|
|
85
|
+
raise ValueError(f"No valid rows in {filename}.")
|
|
86
|
+
|
|
87
|
+
# Split by ; delimiter (before raw drop number)
|
|
88
|
+
df = df["TO_PARSE"].str.split(";", expand=True, n=43)
|
|
89
|
+
|
|
90
|
+
# Assign column names
|
|
91
|
+
column_names = [
|
|
92
|
+
"id",
|
|
93
|
+
"sample_interval",
|
|
94
|
+
"weather_code_synop_4677_5min", # or "weather_code_synop_4680_5min",
|
|
95
|
+
"weather_code_metar_4678_5min",
|
|
96
|
+
"precipitation_rate_5min",
|
|
97
|
+
"weather_code_synop_4677", # or "weather_code_synop_4680",
|
|
98
|
+
"weather_code_metar_4678",
|
|
99
|
+
"precipitation_rate",
|
|
100
|
+
"precipitation_accumulated",
|
|
101
|
+
"sensor_time",
|
|
102
|
+
# "mor_visibility",
|
|
103
|
+
# "reflectivity",
|
|
104
|
+
# "quality_index",
|
|
105
|
+
# "max_hail_diameter",
|
|
106
|
+
# "laser_status",
|
|
107
|
+
"dummy1",
|
|
108
|
+
"dummy2",
|
|
109
|
+
# "laser_temperature",
|
|
110
|
+
"laser_current_average",
|
|
111
|
+
"control_voltage",
|
|
112
|
+
"optical_control_voltage_output",
|
|
113
|
+
# "current_heating_house",
|
|
114
|
+
# "current_heating_heads",
|
|
115
|
+
# "current_heating_carriers",
|
|
116
|
+
"number_particles",
|
|
117
|
+
"number_particles_internal_data",
|
|
118
|
+
"number_particles_min_speed",
|
|
119
|
+
"number_particles_min_speed_internal_data",
|
|
120
|
+
"number_particles_max_speed",
|
|
121
|
+
"number_particles_max_speed_internal_data",
|
|
122
|
+
"number_particles_min_diameter",
|
|
123
|
+
"number_particles_min_diameter_internal_data",
|
|
124
|
+
"number_particles_no_hydrometeor",
|
|
125
|
+
"number_particles_no_hydrometeor_internal_data",
|
|
126
|
+
# "number_particles_unknown_classification", # ????
|
|
127
|
+
# "number_particles_unknown_classification_internal_data",
|
|
128
|
+
"number_particles_class_1",
|
|
129
|
+
"number_particles_class_1_internal_data",
|
|
130
|
+
"number_particles_class_2",
|
|
131
|
+
"number_particles_class_2_internal_data",
|
|
132
|
+
"number_particles_class_3",
|
|
133
|
+
"number_particles_class_3_internal_data",
|
|
134
|
+
"number_particles_class_4",
|
|
135
|
+
"number_particles_class_4_internal_data",
|
|
136
|
+
"number_particles_class_5",
|
|
137
|
+
"number_particles_class_5_internal_data",
|
|
138
|
+
"number_particles_class_6",
|
|
139
|
+
"number_particles_class_6_internal_data",
|
|
140
|
+
"number_particles_class_7",
|
|
141
|
+
"number_particles_class_7_internal_data",
|
|
142
|
+
"number_particles_class_8",
|
|
143
|
+
"number_particles_class_8_internal_data",
|
|
144
|
+
"number_particles_class_9",
|
|
145
|
+
"number_particles_class_9_internal_data",
|
|
146
|
+
"raw_drop_number",
|
|
147
|
+
]
|
|
148
|
+
df.columns = column_names
|
|
149
|
+
|
|
150
|
+
# Deal with case if there are 61 timesteps
|
|
151
|
+
# - Occurs sometimes when previous hourly file miss timesteps
|
|
152
|
+
if len(df) == 61:
|
|
153
|
+
log_warning(logger=logger, msg=f"{filename} contains 61 timesteps. Dropping the first.")
|
|
154
|
+
df = df.iloc[1:]
|
|
155
|
+
|
|
156
|
+
# Raise error if more than 60 timesteps/rows
|
|
157
|
+
n_rows = len(df)
|
|
158
|
+
if n_rows > 60:
|
|
159
|
+
raise ValueError(f"The hourly file contains {n_rows} timesteps.")
|
|
160
|
+
|
|
161
|
+
# Infer and define "time" column
|
|
162
|
+
start_time_str = filename.split(".")[0] # '2024020200.txt'
|
|
163
|
+
start_time = pd.to_datetime(start_time_str, format="%Y%m%d%H")
|
|
164
|
+
|
|
165
|
+
# - Define timedelta based on sensor_time
|
|
166
|
+
dt = pd.to_timedelta(df["sensor_time"] + ":00").to_numpy().astype("m8[s]")
|
|
167
|
+
dt = dt - dt[0]
|
|
168
|
+
|
|
169
|
+
# - Define approximate time
|
|
170
|
+
df["time"] = start_time + dt
|
|
171
|
+
|
|
172
|
+
# - Keep rows where time increment is between 00 and 59 minutes
|
|
173
|
+
valid_rows = dt <= np.timedelta64(3540, "s")
|
|
174
|
+
df = df[valid_rows]
|
|
175
|
+
|
|
176
|
+
# Drop rows where sample interval is not 60 seconds
|
|
177
|
+
df = df[df["sample_interval"] == "000060"]
|
|
178
|
+
|
|
179
|
+
# Drop rows with invalid raw_drop_number
|
|
180
|
+
# --> 440 value # 22x20
|
|
181
|
+
# --> 400 here # 20x20
|
|
182
|
+
df = df[df["raw_drop_number"].astype(str).str.len() == 1599]
|
|
183
|
+
|
|
184
|
+
# Deal with old LPM version 20x20 spectrum
|
|
185
|
+
# - Add 000 in first two velocity bins
|
|
186
|
+
df["raw_drop_number"] = df["raw_drop_number"] + ";" + ";".join(["000"] * 40)
|
|
187
|
+
|
|
188
|
+
# Drop columns not agreeing with DISDRODB L0 standards
|
|
189
|
+
columns_to_drop = [
|
|
190
|
+
"sample_interval",
|
|
191
|
+
"sensor_time",
|
|
192
|
+
"dummy1",
|
|
193
|
+
"dummy2",
|
|
194
|
+
"id",
|
|
195
|
+
]
|
|
196
|
+
df = df.drop(columns=columns_to_drop)
|
|
197
|
+
return df
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
@is_documented_by(reader_generic_docstring)
|
|
201
|
+
def reader(
|
|
202
|
+
filepath,
|
|
203
|
+
logger=None,
|
|
204
|
+
):
|
|
205
|
+
"""Reader."""
|
|
206
|
+
import zipfile
|
|
207
|
+
|
|
208
|
+
##------------------------------------------------------------------------.
|
|
209
|
+
# filename = os.path.basename(filepath)
|
|
210
|
+
# return read_txt_file(file=filepath, filename=filename, logger=logger)
|
|
211
|
+
|
|
212
|
+
# ---------------------------------------------------------------------.
|
|
213
|
+
#### Iterate over all files (aka timesteps) in the daily zip archive
|
|
214
|
+
# - Each file contain a single timestep !
|
|
215
|
+
# list_df = []
|
|
216
|
+
# with tempfile.TemporaryDirectory() as temp_dir:
|
|
217
|
+
# # Extract all files
|
|
218
|
+
# unzip_file_on_terminal(filepath, temp_dir)
|
|
219
|
+
|
|
220
|
+
# # Walk through extracted files
|
|
221
|
+
# for root, _, files in os.walk(temp_dir):
|
|
222
|
+
# for filename in sorted(files):
|
|
223
|
+
# if filename.endswith(".txt"):
|
|
224
|
+
# full_path = os.path.join(root, filename)
|
|
225
|
+
# try:
|
|
226
|
+
# df = read_txt_file(file=full_path, filename=filename, logger=logger)
|
|
227
|
+
# if df is not None:
|
|
228
|
+
# list_df.append(df)
|
|
229
|
+
# except Exception as e:
|
|
230
|
+
# msg = f"An error occurred while reading {filename}: {e}"
|
|
231
|
+
# log_error(logger=logger, msg=msg, verbose=True)
|
|
232
|
+
|
|
233
|
+
list_df = []
|
|
234
|
+
with zipfile.ZipFile(filepath, "r") as zip_ref:
|
|
235
|
+
filenames = sorted(zip_ref.namelist())
|
|
236
|
+
for filename in filenames:
|
|
237
|
+
if filename.endswith(".txt"):
|
|
238
|
+
# Open file
|
|
239
|
+
with zip_ref.open(filename) as file:
|
|
240
|
+
try:
|
|
241
|
+
df = read_txt_file(file=file, filename=filename, logger=logger)
|
|
242
|
+
if df is not None:
|
|
243
|
+
list_df.append(df)
|
|
244
|
+
except Exception as e:
|
|
245
|
+
msg = f"An error occurred while reading {filename}. The error is: {e}"
|
|
246
|
+
log_error(logger=logger, msg=msg, verbose=True)
|
|
247
|
+
|
|
248
|
+
# Check the zip file contains at least some non.empty files
|
|
249
|
+
if len(list_df) == 0:
|
|
250
|
+
raise ValueError(f"{filepath} contains only empty files!")
|
|
251
|
+
|
|
252
|
+
# Concatenate all dataframes into a single one
|
|
253
|
+
df = pd.concat(list_df)
|
|
254
|
+
|
|
255
|
+
# ---------------------------------------------------------------------.
|
|
256
|
+
return df
|
|
@@ -96,7 +96,7 @@ def reader(
|
|
|
96
96
|
"quality_index",
|
|
97
97
|
"max_hail_diameter",
|
|
98
98
|
"laser_status",
|
|
99
|
-
"
|
|
99
|
+
"static_signal_status",
|
|
100
100
|
"laser_temperature_analog_status",
|
|
101
101
|
"laser_temperature_digital_status",
|
|
102
102
|
"laser_current_analog_status",
|
|
@@ -110,7 +110,7 @@ def reader(
|
|
|
110
110
|
"current_heating_heads_status",
|
|
111
111
|
"current_heating_carriers_status",
|
|
112
112
|
"control_output_laser_power_status",
|
|
113
|
-
"
|
|
113
|
+
"reserved_status",
|
|
114
114
|
"temperature_interior",
|
|
115
115
|
"laser_temperature",
|
|
116
116
|
"laser_current_average",
|
|
@@ -96,7 +96,7 @@ def reader(
|
|
|
96
96
|
"quality_index",
|
|
97
97
|
"max_hail_diameter",
|
|
98
98
|
"laser_status",
|
|
99
|
-
"
|
|
99
|
+
"static_signal_status",
|
|
100
100
|
"laser_temperature_analog_status",
|
|
101
101
|
"laser_temperature_digital_status",
|
|
102
102
|
"laser_current_analog_status",
|
|
@@ -110,7 +110,7 @@ def reader(
|
|
|
110
110
|
"current_heating_heads_status",
|
|
111
111
|
"current_heating_carriers_status",
|
|
112
112
|
"control_output_laser_power_status",
|
|
113
|
-
"
|
|
113
|
+
"reserved_status",
|
|
114
114
|
"temperature_interior",
|
|
115
115
|
"laser_temperature",
|
|
116
116
|
"laser_current_average",
|