disdrodb 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +64 -34
- disdrodb/_config.py +5 -4
- disdrodb/_version.py +16 -3
- disdrodb/accessor/__init__.py +20 -0
- disdrodb/accessor/methods.py +125 -0
- disdrodb/api/checks.py +139 -9
- disdrodb/api/configs.py +4 -2
- disdrodb/api/info.py +10 -10
- disdrodb/api/io.py +237 -18
- disdrodb/api/path.py +81 -75
- disdrodb/api/search.py +6 -6
- disdrodb/cli/disdrodb_create_summary_station.py +91 -0
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
- disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
- disdrodb/configs.py +149 -4
- disdrodb/constants.py +61 -0
- disdrodb/data_transfer/download_data.py +145 -14
- disdrodb/etc/configs/attributes.yaml +339 -0
- disdrodb/etc/configs/encodings.yaml +473 -0
- disdrodb/etc/products/L1/global.yaml +13 -0
- disdrodb/etc/products/L2E/10MIN.yaml +12 -0
- disdrodb/etc/products/L2E/1MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +22 -0
- disdrodb/etc/products/L2M/10MIN.yaml +12 -0
- disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +26 -0
- disdrodb/l0/__init__.py +13 -0
- disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
- disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
- disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +44 -3
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +41 -1
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
- disdrodb/l0/l0a_processing.py +30 -30
- disdrodb/l0/l0b_nc_processing.py +108 -2
- disdrodb/l0/l0b_processing.py +4 -4
- disdrodb/l0/l0c_processing.py +5 -13
- disdrodb/l0/manuals/SWS250.pdf +0 -0
- disdrodb/l0/manuals/VPF730.pdf +0 -0
- disdrodb/l0/manuals/VPF750.pdf +0 -0
- disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
- disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
- disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +105 -0
- disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +128 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
- disdrodb/l0/readers/{PARSIVEL → PARSIVEL2}/KIT/BURKINA_FASO.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
- disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → NCAR/FARM_PARSIVEL2.py} +43 -70
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +29 -12
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
- disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
- disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +31 -14
- disdrodb/l0/routines.py +105 -14
- disdrodb/l1/__init__.py +5 -0
- disdrodb/l1/filters.py +34 -20
- disdrodb/l1/processing.py +45 -44
- disdrodb/l1/resampling.py +77 -66
- disdrodb/l1/routines.py +35 -42
- disdrodb/l1_env/routines.py +18 -3
- disdrodb/l2/__init__.py +7 -0
- disdrodb/l2/empirical_dsd.py +58 -10
- disdrodb/l2/event.py +27 -120
- disdrodb/l2/processing.py +267 -116
- disdrodb/l2/routines.py +618 -254
- disdrodb/metadata/standards.py +3 -1
- disdrodb/psd/fitting.py +463 -144
- disdrodb/psd/models.py +8 -5
- disdrodb/routines.py +3 -3
- disdrodb/scattering/__init__.py +16 -4
- disdrodb/scattering/axis_ratio.py +56 -36
- disdrodb/scattering/permittivity.py +486 -0
- disdrodb/scattering/routines.py +701 -159
- disdrodb/summary/__init__.py +17 -0
- disdrodb/summary/routines.py +4120 -0
- disdrodb/utils/attrs.py +68 -125
- disdrodb/utils/compression.py +30 -1
- disdrodb/utils/dask.py +59 -8
- disdrodb/utils/dataframe.py +63 -9
- disdrodb/utils/directories.py +49 -17
- disdrodb/utils/encoding.py +33 -19
- disdrodb/utils/logger.py +13 -6
- disdrodb/utils/manipulations.py +71 -0
- disdrodb/utils/subsetting.py +214 -0
- disdrodb/utils/time.py +165 -19
- disdrodb/utils/writer.py +20 -7
- disdrodb/utils/xarray.py +85 -4
- disdrodb/viz/__init__.py +13 -0
- disdrodb/viz/plots.py +327 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/RECORD +127 -87
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
- disdrodb/l1/encoding_attrs.py +0 -635
- disdrodb/l2/processing_options.py +0 -213
- /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
disdrodb/api/io.py
CHANGED
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18
18
|
# -----------------------------------------------------------------------------.
|
|
19
19
|
"""Routines to list and open DISDRODB products."""
|
|
20
|
+
import datetime
|
|
20
21
|
import os
|
|
21
22
|
import shutil
|
|
22
23
|
import subprocess
|
|
@@ -24,6 +25,14 @@ import sys
|
|
|
24
25
|
from pathlib import Path
|
|
25
26
|
from typing import Optional
|
|
26
27
|
|
|
28
|
+
import numpy as np
|
|
29
|
+
|
|
30
|
+
from disdrodb.api.checks import (
|
|
31
|
+
check_filepaths,
|
|
32
|
+
check_start_end_time,
|
|
33
|
+
get_current_utc_time,
|
|
34
|
+
)
|
|
35
|
+
from disdrodb.api.info import get_start_end_time_from_filepaths
|
|
27
36
|
from disdrodb.api.path import (
|
|
28
37
|
define_campaign_dir,
|
|
29
38
|
define_data_dir,
|
|
@@ -48,6 +57,75 @@ def filter_filepaths(filepaths, debugging_mode):
|
|
|
48
57
|
return filepaths
|
|
49
58
|
|
|
50
59
|
|
|
60
|
+
def is_within_time_period(l_start_time, l_end_time, start_time, end_time):
|
|
61
|
+
"""Assess which files are within the start and end time."""
|
|
62
|
+
# - Case 1
|
|
63
|
+
# s e
|
|
64
|
+
# | |
|
|
65
|
+
# ---------> (-------->)
|
|
66
|
+
idx_select1 = np.logical_and(l_start_time <= start_time, l_end_time > start_time)
|
|
67
|
+
# - Case 2
|
|
68
|
+
# s e
|
|
69
|
+
# | |
|
|
70
|
+
# ---------(-.)
|
|
71
|
+
idx_select2 = np.logical_and(l_start_time >= start_time, l_end_time <= end_time)
|
|
72
|
+
# - Case 3
|
|
73
|
+
# s e
|
|
74
|
+
# | |
|
|
75
|
+
# -------------
|
|
76
|
+
idx_select3 = np.logical_and(l_start_time < end_time, l_end_time > end_time)
|
|
77
|
+
# - Get idx where one of the cases occur
|
|
78
|
+
idx_select = np.logical_or.reduce([idx_select1, idx_select2, idx_select3])
|
|
79
|
+
return idx_select
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def filter_by_time(filepaths, start_time=None, end_time=None):
|
|
83
|
+
"""Filter filepaths by start_time and end_time.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
filepaths : list
|
|
88
|
+
List of filepaths.
|
|
89
|
+
start_time : datetime.datetime
|
|
90
|
+
Start time.
|
|
91
|
+
If ``None``, will be set to 1997-01-01.
|
|
92
|
+
end_time : datetime.datetime
|
|
93
|
+
End time.
|
|
94
|
+
If ``None`` will be set to current UTC time.
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
filepaths : list
|
|
99
|
+
List of valid filepaths.
|
|
100
|
+
If no valid filepaths, returns an empty list !
|
|
101
|
+
|
|
102
|
+
"""
|
|
103
|
+
# -------------------------------------------------------------------------.
|
|
104
|
+
# Check filepaths
|
|
105
|
+
if isinstance(filepaths, type(None)):
|
|
106
|
+
return []
|
|
107
|
+
filepaths = check_filepaths(filepaths)
|
|
108
|
+
if len(filepaths) == 0:
|
|
109
|
+
return []
|
|
110
|
+
|
|
111
|
+
# -------------------------------------------------------------------------.
|
|
112
|
+
# Check start_time and end_time
|
|
113
|
+
if start_time is None:
|
|
114
|
+
start_time = datetime.datetime(1978, 1, 1, 0, 0, 0) # Dummy start
|
|
115
|
+
if end_time is None:
|
|
116
|
+
end_time = get_current_utc_time() # Current time
|
|
117
|
+
start_time, end_time = check_start_end_time(start_time, end_time)
|
|
118
|
+
|
|
119
|
+
# -------------------------------------------------------------------------.
|
|
120
|
+
# - Retrieve start_time and end_time of GPM granules
|
|
121
|
+
l_start_time, l_end_time = get_start_end_time_from_filepaths(filepaths)
|
|
122
|
+
|
|
123
|
+
# -------------------------------------------------------------------------.
|
|
124
|
+
# Select granules with data within the start and end time
|
|
125
|
+
idx_select = is_within_time_period(l_start_time, l_end_time, start_time=start_time, end_time=end_time)
|
|
126
|
+
return np.array(filepaths)[idx_select].tolist()
|
|
127
|
+
|
|
128
|
+
|
|
51
129
|
def find_files(
|
|
52
130
|
data_source,
|
|
53
131
|
campaign_name,
|
|
@@ -55,7 +133,9 @@ def find_files(
|
|
|
55
133
|
product,
|
|
56
134
|
debugging_mode: bool = False,
|
|
57
135
|
data_archive_dir: Optional[str] = None,
|
|
58
|
-
glob_pattern=
|
|
136
|
+
glob_pattern=None,
|
|
137
|
+
start_time=None,
|
|
138
|
+
end_time=None,
|
|
59
139
|
**product_kwargs,
|
|
60
140
|
):
|
|
61
141
|
"""Retrieve DISDRODB product files for a give station.
|
|
@@ -100,6 +180,8 @@ def find_files(
|
|
|
100
180
|
List of file paths.
|
|
101
181
|
|
|
102
182
|
"""
|
|
183
|
+
from disdrodb.metadata import read_station_metadata
|
|
184
|
+
|
|
103
185
|
# Retrieve data directory
|
|
104
186
|
data_dir = define_data_dir(
|
|
105
187
|
data_archive_dir=data_archive_dir,
|
|
@@ -110,8 +192,16 @@ def find_files(
|
|
|
110
192
|
# Product options
|
|
111
193
|
**product_kwargs,
|
|
112
194
|
)
|
|
113
|
-
|
|
114
|
-
|
|
195
|
+
# For the DISDRODB RAW product, retrieve glob_pattern from metadata if not specified
|
|
196
|
+
if product == "RAW" and glob_pattern is None:
|
|
197
|
+
metadata = read_station_metadata(
|
|
198
|
+
data_source=data_source,
|
|
199
|
+
campaign_name=campaign_name,
|
|
200
|
+
station_name=station_name,
|
|
201
|
+
)
|
|
202
|
+
glob_pattern = metadata.get("raw_data_glob_pattern", "")
|
|
203
|
+
|
|
204
|
+
# For the others DISDRODB products, define the correct glob pattern
|
|
115
205
|
if product != "RAW":
|
|
116
206
|
glob_pattern = "*.parquet" if product == "L0A" else "*.nc"
|
|
117
207
|
|
|
@@ -126,6 +216,13 @@ def find_files(
|
|
|
126
216
|
msg = f"No {product} files are available in {data_dir}. Run {product} processing first."
|
|
127
217
|
raise ValueError(msg)
|
|
128
218
|
|
|
219
|
+
# Filter files by start_time and end_time
|
|
220
|
+
if product != "RAW":
|
|
221
|
+
filepaths = filter_by_time(filepaths=filepaths, start_time=start_time, end_time=end_time)
|
|
222
|
+
if len(filepaths) == 0:
|
|
223
|
+
msg = f"No {product} files are available between {start_time} and {end_time}."
|
|
224
|
+
raise ValueError(msg)
|
|
225
|
+
|
|
129
226
|
# Sort filepaths
|
|
130
227
|
filepaths = sorted(filepaths)
|
|
131
228
|
return filepaths
|
|
@@ -133,6 +230,117 @@ def find_files(
|
|
|
133
230
|
|
|
134
231
|
####----------------------------------------------------------------------------------
|
|
135
232
|
#### DISDRODB Open Product Files
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def open_raw_files(filepaths, data_source, campaign_name, station_name):
|
|
236
|
+
"""Open raw files to DISDRODB L0A or L0B format.
|
|
237
|
+
|
|
238
|
+
Raw text files are opened into a DISDRODB L0A pandas Dataframe.
|
|
239
|
+
Raw netCDF files are opened into a DISDRODB L0B xarray Dataset.
|
|
240
|
+
"""
|
|
241
|
+
from disdrodb.issue import read_station_issue
|
|
242
|
+
from disdrodb.l0 import generate_l0a, generate_l0b_from_nc, get_station_reader
|
|
243
|
+
from disdrodb.metadata import read_station_metadata
|
|
244
|
+
|
|
245
|
+
# Read station metadata
|
|
246
|
+
metadata = read_station_metadata(
|
|
247
|
+
data_source=data_source,
|
|
248
|
+
campaign_name=campaign_name,
|
|
249
|
+
station_name=station_name,
|
|
250
|
+
)
|
|
251
|
+
sensor_name = metadata["sensor_name"]
|
|
252
|
+
|
|
253
|
+
# Read station issue YAML file
|
|
254
|
+
try:
|
|
255
|
+
issue_dict = read_station_issue(
|
|
256
|
+
data_source=data_source,
|
|
257
|
+
campaign_name=campaign_name,
|
|
258
|
+
station_name=station_name,
|
|
259
|
+
)
|
|
260
|
+
except Exception:
|
|
261
|
+
issue_dict = None
|
|
262
|
+
|
|
263
|
+
# Get reader
|
|
264
|
+
reader = get_station_reader(
|
|
265
|
+
data_source=data_source,
|
|
266
|
+
campaign_name=campaign_name,
|
|
267
|
+
station_name=station_name,
|
|
268
|
+
)
|
|
269
|
+
# Return DISDRODB L0A dataframe if raw text files
|
|
270
|
+
if metadata["raw_data_format"] == "txt":
|
|
271
|
+
df = generate_l0a(
|
|
272
|
+
filepaths=filepaths,
|
|
273
|
+
reader=reader,
|
|
274
|
+
sensor_name=sensor_name,
|
|
275
|
+
issue_dict=issue_dict,
|
|
276
|
+
verbose=False,
|
|
277
|
+
)
|
|
278
|
+
return df
|
|
279
|
+
|
|
280
|
+
# Return DISDRODB L0B dataframe if raw netCDF files
|
|
281
|
+
ds = generate_l0b_from_nc(
|
|
282
|
+
filepaths=filepaths,
|
|
283
|
+
reader=reader,
|
|
284
|
+
sensor_name=sensor_name,
|
|
285
|
+
metadata=metadata,
|
|
286
|
+
issue_dict=issue_dict,
|
|
287
|
+
verbose=False,
|
|
288
|
+
)
|
|
289
|
+
return ds
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def open_netcdf_files(
|
|
293
|
+
filepaths,
|
|
294
|
+
chunks=-1,
|
|
295
|
+
start_time=None,
|
|
296
|
+
end_time=None,
|
|
297
|
+
variables=None,
|
|
298
|
+
parallel=False,
|
|
299
|
+
compute=True,
|
|
300
|
+
**open_kwargs,
|
|
301
|
+
):
|
|
302
|
+
"""Open DISDRODB netCDF files using xarray."""
|
|
303
|
+
import xarray as xr
|
|
304
|
+
|
|
305
|
+
# Ensure variables is a list
|
|
306
|
+
if variables is not None and isinstance(variables, str):
|
|
307
|
+
variables = [variables]
|
|
308
|
+
# Define preprocessing function for parallel opening
|
|
309
|
+
preprocess = (lambda ds: ds[variables]) if parallel and variables is not None else None
|
|
310
|
+
|
|
311
|
+
# Open netcdf
|
|
312
|
+
ds = xr.open_mfdataset(
|
|
313
|
+
filepaths,
|
|
314
|
+
chunks=chunks,
|
|
315
|
+
combine="nested",
|
|
316
|
+
concat_dim="time",
|
|
317
|
+
engine="netcdf4",
|
|
318
|
+
parallel=parallel,
|
|
319
|
+
preprocess=preprocess,
|
|
320
|
+
compat="no_conflicts",
|
|
321
|
+
combine_attrs="override",
|
|
322
|
+
coords="different", # maybe minimal?
|
|
323
|
+
decode_timedelta=False,
|
|
324
|
+
cache=False,
|
|
325
|
+
autoclose=True,
|
|
326
|
+
**open_kwargs,
|
|
327
|
+
)
|
|
328
|
+
# - Subset variables
|
|
329
|
+
if variables is not None and preprocess is None:
|
|
330
|
+
ds = ds[variables]
|
|
331
|
+
# - Subset time
|
|
332
|
+
ds = ds.sel(time=slice(start_time, end_time))
|
|
333
|
+
# - If compute=True, load in memory and close connections to files
|
|
334
|
+
if compute:
|
|
335
|
+
dataset = ds.compute()
|
|
336
|
+
ds.close()
|
|
337
|
+
dataset.close()
|
|
338
|
+
del ds
|
|
339
|
+
else:
|
|
340
|
+
dataset = ds
|
|
341
|
+
return dataset
|
|
342
|
+
|
|
343
|
+
|
|
136
344
|
def open_dataset(
|
|
137
345
|
data_source,
|
|
138
346
|
campaign_name,
|
|
@@ -141,7 +349,12 @@ def open_dataset(
|
|
|
141
349
|
product_kwargs=None,
|
|
142
350
|
debugging_mode: bool = False,
|
|
143
351
|
data_archive_dir: Optional[str] = None,
|
|
352
|
+
chunks=-1,
|
|
144
353
|
parallel=False,
|
|
354
|
+
compute=False,
|
|
355
|
+
start_time=None,
|
|
356
|
+
end_time=None,
|
|
357
|
+
variables=None,
|
|
145
358
|
**open_kwargs,
|
|
146
359
|
):
|
|
147
360
|
"""Retrieve DISDRODB product files for a give station.
|
|
@@ -179,13 +392,8 @@ def open_dataset(
|
|
|
179
392
|
xarray.Dataset
|
|
180
393
|
|
|
181
394
|
"""
|
|
182
|
-
import xarray as xr
|
|
183
|
-
|
|
184
395
|
from disdrodb.l0.l0a_processing import read_l0a_dataframe
|
|
185
396
|
|
|
186
|
-
# Check product validity
|
|
187
|
-
if product == "RAW":
|
|
188
|
-
raise ValueError("It's not possible to open the raw data with this function.")
|
|
189
397
|
product_kwargs = product_kwargs if product_kwargs else {}
|
|
190
398
|
|
|
191
399
|
# List product files
|
|
@@ -196,25 +404,36 @@ def open_dataset(
|
|
|
196
404
|
station_name=station_name,
|
|
197
405
|
product=product,
|
|
198
406
|
debugging_mode=debugging_mode,
|
|
407
|
+
start_time=start_time,
|
|
408
|
+
end_time=end_time,
|
|
199
409
|
**product_kwargs,
|
|
200
410
|
)
|
|
201
411
|
|
|
412
|
+
# Open RAW files
|
|
413
|
+
# - For raw txt files return DISDRODB L0A dataframe
|
|
414
|
+
# - For raw netCDF files return DISDRODB L0B dataframe
|
|
415
|
+
if product == "RAW":
|
|
416
|
+
obj = open_raw_files(
|
|
417
|
+
filepaths=filepaths,
|
|
418
|
+
data_source=data_source,
|
|
419
|
+
campaign_name=campaign_name,
|
|
420
|
+
station_name=station_name,
|
|
421
|
+
)
|
|
422
|
+
return obj
|
|
423
|
+
|
|
202
424
|
# Open L0A Parquet files
|
|
203
425
|
if product == "L0A":
|
|
204
426
|
return read_l0a_dataframe(filepaths)
|
|
205
427
|
|
|
206
428
|
# Open DISDRODB netCDF files using xarray
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
engine="netcdf4",
|
|
214
|
-
combine="nested", # 'by_coords',
|
|
215
|
-
concat_dim="time",
|
|
216
|
-
decode_timedelta=False,
|
|
429
|
+
ds = open_netcdf_files(
|
|
430
|
+
filepaths=filepaths,
|
|
431
|
+
chunks=chunks,
|
|
432
|
+
start_time=start_time,
|
|
433
|
+
end_time=end_time,
|
|
434
|
+
variables=variables,
|
|
217
435
|
parallel=parallel,
|
|
436
|
+
compute=compute,
|
|
218
437
|
**open_kwargs,
|
|
219
438
|
)
|
|
220
439
|
return ds
|
disdrodb/api/path.py
CHANGED
|
@@ -20,11 +20,12 @@
|
|
|
20
20
|
import os
|
|
21
21
|
|
|
22
22
|
from disdrodb.configs import get_data_archive_dir, get_metadata_archive_dir
|
|
23
|
+
from disdrodb.constants import ARCHIVE_VERSION
|
|
23
24
|
from disdrodb.utils.directories import check_directory_exists
|
|
24
25
|
from disdrodb.utils.time import (
|
|
25
26
|
ensure_sample_interval_in_seconds,
|
|
26
27
|
get_file_start_end_time,
|
|
27
|
-
|
|
28
|
+
seconds_to_temporal_resolution,
|
|
28
29
|
)
|
|
29
30
|
|
|
30
31
|
####--------------------------------------------------------------------------.
|
|
@@ -68,8 +69,6 @@ def define_disdrodb_path(
|
|
|
68
69
|
dir_path : str
|
|
69
70
|
Directory path
|
|
70
71
|
"""
|
|
71
|
-
from disdrodb import ARCHIVE_VERSION
|
|
72
|
-
|
|
73
72
|
if len(campaign_name) > 0 and len(data_source) == 0:
|
|
74
73
|
raise ValueError("If campaign_name is specified, data_source must be specified.")
|
|
75
74
|
|
|
@@ -349,6 +348,55 @@ def define_config_dir(product):
|
|
|
349
348
|
#### Directory/Filepaths L0A and L0B products
|
|
350
349
|
|
|
351
350
|
|
|
351
|
+
def define_partitioning_tree(time, folder_partitioning):
|
|
352
|
+
"""Define the time directory tree given a timestep.
|
|
353
|
+
|
|
354
|
+
Parameters
|
|
355
|
+
----------
|
|
356
|
+
time : datetime.datetime
|
|
357
|
+
Timestep.
|
|
358
|
+
folder_partitioning : str or None
|
|
359
|
+
Define the subdirectory structure where saving files.
|
|
360
|
+
Allowed values are:
|
|
361
|
+
- None: Files are saved directly in data_dir.
|
|
362
|
+
- "year": Files are saved under a subdirectory for the year.
|
|
363
|
+
- "year/month": Files are saved under subdirectories for year and month.
|
|
364
|
+
- "year/month/day": Files are saved under subdirectories for year, month and day
|
|
365
|
+
- "year/month_name": Files are stored under subdirectories by year and month name
|
|
366
|
+
- "year/quarter": Files are saved under subdirectories for year and quarter.
|
|
367
|
+
|
|
368
|
+
Returns
|
|
369
|
+
-------
|
|
370
|
+
str
|
|
371
|
+
A time partitioned directory tree.
|
|
372
|
+
"""
|
|
373
|
+
if folder_partitioning == "":
|
|
374
|
+
return ""
|
|
375
|
+
if folder_partitioning == "year":
|
|
376
|
+
year = str(time.year)
|
|
377
|
+
return year
|
|
378
|
+
if folder_partitioning == "year/month":
|
|
379
|
+
year = str(time.year)
|
|
380
|
+
month = str(time.month).zfill(2)
|
|
381
|
+
return os.path.join(year, month)
|
|
382
|
+
if folder_partitioning == "year/month/day":
|
|
383
|
+
year = str(time.year)
|
|
384
|
+
month = str(time.month).zfill(2)
|
|
385
|
+
day = str(time.day).zfill(2)
|
|
386
|
+
return os.path.join(year, month, day)
|
|
387
|
+
if folder_partitioning == "year/month_name":
|
|
388
|
+
year = str(time.year)
|
|
389
|
+
month = str(time.month_name())
|
|
390
|
+
return os.path.join(year, month)
|
|
391
|
+
if folder_partitioning == "year/quarter":
|
|
392
|
+
year = str(time.year)
|
|
393
|
+
# Calculate quarter: months 1-3 => Q1, 4-6 => Q2, etc.
|
|
394
|
+
quarter = (time.month - 1) // 3 + 1
|
|
395
|
+
quarter_dir = f"Q{quarter}"
|
|
396
|
+
return os.path.join(year, quarter_dir)
|
|
397
|
+
raise NotImplementedError(f"Unrecognized '{folder_partitioning}' folder partitioning scheme.")
|
|
398
|
+
|
|
399
|
+
|
|
352
400
|
def define_file_folder_path(obj, data_dir, folder_partitioning):
|
|
353
401
|
"""
|
|
354
402
|
Define the folder path where saving a file based on the dataset's starting time.
|
|
@@ -382,32 +430,9 @@ def define_file_folder_path(obj, data_dir, folder_partitioning):
|
|
|
382
430
|
# Retrieve the starting time from the dataset.
|
|
383
431
|
starting_time, _ = get_file_start_end_time(obj)
|
|
384
432
|
|
|
385
|
-
# Build the folder path based on the chosen partition scheme
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
if folder_partitioning == "year":
|
|
389
|
-
year = str(starting_time.year)
|
|
390
|
-
return os.path.join(data_dir, year)
|
|
391
|
-
if folder_partitioning == "year/month":
|
|
392
|
-
year = str(starting_time.year)
|
|
393
|
-
month = str(starting_time.month).zfill(2)
|
|
394
|
-
return os.path.join(data_dir, year, month)
|
|
395
|
-
if folder_partitioning == "year/month/day":
|
|
396
|
-
year = str(starting_time.year)
|
|
397
|
-
month = str(starting_time.month).zfill(2)
|
|
398
|
-
day = str(starting_time.day).zfill(2)
|
|
399
|
-
return os.path.join(data_dir, year, month, day)
|
|
400
|
-
if folder_partitioning == "year/month_name":
|
|
401
|
-
year = str(starting_time.year)
|
|
402
|
-
month = str(starting_time.month_name())
|
|
403
|
-
return os.path.join(data_dir, year, month)
|
|
404
|
-
if folder_partitioning == "year/quarter":
|
|
405
|
-
year = str(starting_time.year)
|
|
406
|
-
# Calculate quarter: months 1-3 => Q1, 4-6 => Q2, etc.
|
|
407
|
-
quarter = (starting_time.month - 1) // 3 + 1
|
|
408
|
-
quarter_dir = f"Q{quarter}"
|
|
409
|
-
return os.path.join(data_dir, year, quarter_dir)
|
|
410
|
-
raise NotImplementedError(f"Unrecognized '{folder_partitioning}' folder partitioning scheme.")
|
|
433
|
+
# Build the folder path based on the chosen partition scheme
|
|
434
|
+
partitioning_tree = define_partitioning_tree(time=starting_time, folder_partitioning=folder_partitioning)
|
|
435
|
+
return os.path.join(data_dir, partitioning_tree)
|
|
411
436
|
|
|
412
437
|
|
|
413
438
|
def define_product_dir_tree(
|
|
@@ -448,16 +473,16 @@ def define_product_dir_tree(
|
|
|
448
473
|
sample_interval = product_kwargs.get("sample_interval")
|
|
449
474
|
check_rolling(rolling)
|
|
450
475
|
check_sample_interval(sample_interval)
|
|
451
|
-
|
|
452
|
-
return os.path.join(
|
|
476
|
+
temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
|
|
477
|
+
return os.path.join(temporal_resolution)
|
|
453
478
|
if product == "L2M":
|
|
454
479
|
rolling = product_kwargs.get("rolling")
|
|
455
480
|
sample_interval = product_kwargs.get("sample_interval")
|
|
456
481
|
model_name = product_kwargs.get("model_name")
|
|
457
482
|
check_rolling(rolling)
|
|
458
483
|
check_sample_interval(sample_interval)
|
|
459
|
-
|
|
460
|
-
return os.path.join(model_name,
|
|
484
|
+
temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
|
|
485
|
+
return os.path.join(model_name, temporal_resolution)
|
|
461
486
|
raise ValueError(f"The product {product} is not defined.")
|
|
462
487
|
|
|
463
488
|
|
|
@@ -629,15 +654,15 @@ def define_data_dir(
|
|
|
629
654
|
#### Filenames for DISDRODB products
|
|
630
655
|
|
|
631
656
|
|
|
632
|
-
def
|
|
633
|
-
"""Define the
|
|
657
|
+
def define_temporal_resolution(seconds, rolling):
|
|
658
|
+
"""Define the DISDRODB product temporal resolution.
|
|
634
659
|
|
|
635
|
-
Prefix the
|
|
660
|
+
Prefix the measurement interval with ROLL if rolling=True.
|
|
636
661
|
"""
|
|
637
|
-
|
|
662
|
+
temporal_resolution = seconds_to_temporal_resolution(seconds)
|
|
638
663
|
if rolling:
|
|
639
|
-
|
|
640
|
-
return
|
|
664
|
+
temporal_resolution = f"ROLL{temporal_resolution}"
|
|
665
|
+
return temporal_resolution
|
|
641
666
|
|
|
642
667
|
|
|
643
668
|
####--------------------------------------------------------------------------.
|
|
@@ -685,32 +710,31 @@ def define_filename(
|
|
|
685
710
|
str
|
|
686
711
|
L0B file name.
|
|
687
712
|
"""
|
|
688
|
-
from disdrodb import ARCHIVE_VERSION
|
|
689
713
|
from disdrodb.api.checks import check_product, check_product_kwargs
|
|
690
714
|
|
|
691
715
|
product = check_product(product)
|
|
692
716
|
product_kwargs = check_product_kwargs(product, product_kwargs)
|
|
693
717
|
|
|
694
718
|
# -----------------------------------------.
|
|
695
|
-
# TODO: Define
|
|
696
|
-
# - ADD
|
|
697
|
-
# - Add
|
|
719
|
+
# TODO: Define temporal_resolution
|
|
720
|
+
# - ADD temporal_resolution also to L0A and L0B
|
|
721
|
+
# - Add temporal_resolution also to L0C and L1
|
|
698
722
|
|
|
699
723
|
# -----------------------------------------.
|
|
700
|
-
# Define product
|
|
701
|
-
|
|
724
|
+
# Define product name
|
|
725
|
+
product_name = f"{product}"
|
|
702
726
|
if product in ["L2E", "L2M"]:
|
|
703
727
|
rolling = product_kwargs.get("rolling")
|
|
704
728
|
sample_interval = product_kwargs.get("sample_interval")
|
|
705
|
-
|
|
706
|
-
|
|
729
|
+
temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
|
|
730
|
+
product_name = f"L2E.{temporal_resolution}"
|
|
707
731
|
if product in ["L2M"]:
|
|
708
732
|
model_name = product_kwargs.get("model_name")
|
|
709
|
-
|
|
733
|
+
product_name = f"L2M_{model_name}.{temporal_resolution}"
|
|
710
734
|
|
|
711
735
|
# -----------------------------------------.
|
|
712
736
|
# Define base filename
|
|
713
|
-
filename = f"{
|
|
737
|
+
filename = f"{product_name}.{campaign_name}.{station_name}"
|
|
714
738
|
|
|
715
739
|
# -----------------------------------------.
|
|
716
740
|
# Add prefix
|
|
@@ -759,8 +783,6 @@ def define_l0a_filename(df, campaign_name: str, station_name: str) -> str:
|
|
|
759
783
|
str
|
|
760
784
|
L0A file name.
|
|
761
785
|
"""
|
|
762
|
-
from disdrodb import ARCHIVE_VERSION
|
|
763
|
-
|
|
764
786
|
starting_time, ending_time = get_file_start_end_time(df)
|
|
765
787
|
starting_time = starting_time.strftime("%Y%m%d%H%M%S")
|
|
766
788
|
ending_time = ending_time.strftime("%Y%m%d%H%M%S")
|
|
@@ -786,8 +808,6 @@ def define_l0b_filename(ds, campaign_name: str, station_name: str) -> str:
|
|
|
786
808
|
str
|
|
787
809
|
L0B file name.
|
|
788
810
|
"""
|
|
789
|
-
from disdrodb import ARCHIVE_VERSION
|
|
790
|
-
|
|
791
811
|
starting_time, ending_time = get_file_start_end_time(ds)
|
|
792
812
|
starting_time = starting_time.strftime("%Y%m%d%H%M%S")
|
|
793
813
|
ending_time = ending_time.strftime("%Y%m%d%H%M%S")
|
|
@@ -813,18 +833,14 @@ def define_l0c_filename(ds, campaign_name: str, station_name: str) -> str:
|
|
|
813
833
|
str
|
|
814
834
|
L0B file name.
|
|
815
835
|
"""
|
|
816
|
-
from disdrodb import ARCHIVE_VERSION
|
|
817
|
-
|
|
818
836
|
# TODO: add sample_interval as argument
|
|
819
837
|
sample_interval = int(ensure_sample_interval_in_seconds(ds["sample_interval"]).data.item())
|
|
820
|
-
|
|
838
|
+
temporal_resolution = define_temporal_resolution(sample_interval, rolling=False)
|
|
821
839
|
starting_time, ending_time = get_file_start_end_time(ds)
|
|
822
840
|
starting_time = starting_time.strftime("%Y%m%d%H%M%S")
|
|
823
841
|
ending_time = ending_time.strftime("%Y%m%d%H%M%S")
|
|
824
842
|
version = ARCHIVE_VERSION
|
|
825
|
-
filename =
|
|
826
|
-
f"L0C.{sample_interval_acronym}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
|
|
827
|
-
)
|
|
843
|
+
filename = f"L0C.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
|
|
828
844
|
return filename
|
|
829
845
|
|
|
830
846
|
|
|
@@ -845,18 +861,14 @@ def define_l1_filename(ds, campaign_name, station_name: str) -> str:
|
|
|
845
861
|
str
|
|
846
862
|
L1 file name.
|
|
847
863
|
"""
|
|
848
|
-
from disdrodb import ARCHIVE_VERSION
|
|
849
|
-
|
|
850
864
|
# TODO: add sample_interval as argument
|
|
851
865
|
sample_interval = int(ensure_sample_interval_in_seconds(ds["sample_interval"]).data.item())
|
|
852
|
-
|
|
866
|
+
temporal_resolution = define_temporal_resolution(sample_interval, rolling=False)
|
|
853
867
|
starting_time, ending_time = get_file_start_end_time(ds)
|
|
854
868
|
starting_time = starting_time.strftime("%Y%m%d%H%M%S")
|
|
855
869
|
ending_time = ending_time.strftime("%Y%m%d%H%M%S")
|
|
856
870
|
version = ARCHIVE_VERSION
|
|
857
|
-
filename =
|
|
858
|
-
f"L1.{sample_interval_acronym}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
|
|
859
|
-
)
|
|
871
|
+
filename = f"L1.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
|
|
860
872
|
return filename
|
|
861
873
|
|
|
862
874
|
|
|
@@ -877,16 +889,12 @@ def define_l2e_filename(ds, campaign_name: str, station_name: str, sample_interv
|
|
|
877
889
|
str
|
|
878
890
|
L0B file name.
|
|
879
891
|
"""
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
sample_interval_acronym = define_accumulation_acronym(seconds=sample_interval, rolling=rolling)
|
|
892
|
+
temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
|
|
883
893
|
starting_time, ending_time = get_file_start_end_time(ds)
|
|
884
894
|
starting_time = starting_time.strftime("%Y%m%d%H%M%S")
|
|
885
895
|
ending_time = ending_time.strftime("%Y%m%d%H%M%S")
|
|
886
896
|
version = ARCHIVE_VERSION
|
|
887
|
-
filename =
|
|
888
|
-
f"L2E.{sample_interval_acronym}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
|
|
889
|
-
)
|
|
897
|
+
filename = f"L2E.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
|
|
890
898
|
return filename
|
|
891
899
|
|
|
892
900
|
|
|
@@ -914,15 +922,13 @@ def define_l2m_filename(
|
|
|
914
922
|
str
|
|
915
923
|
L0B file name.
|
|
916
924
|
"""
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
sample_interval_acronym = define_accumulation_acronym(seconds=sample_interval, rolling=rolling)
|
|
925
|
+
temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
|
|
920
926
|
starting_time, ending_time = get_file_start_end_time(ds)
|
|
921
927
|
starting_time = starting_time.strftime("%Y%m%d%H%M%S")
|
|
922
928
|
ending_time = ending_time.strftime("%Y%m%d%H%M%S")
|
|
923
929
|
version = ARCHIVE_VERSION
|
|
924
930
|
filename = (
|
|
925
|
-
f"L2M_{model_name}.{
|
|
931
|
+
f"L2M_{model_name}.{temporal_resolution}.{campaign_name}."
|
|
926
932
|
+ f"{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
|
|
927
933
|
)
|
|
928
934
|
return filename
|
disdrodb/api/search.py
CHANGED
|
@@ -16,14 +16,13 @@ from disdrodb.api.path import (
|
|
|
16
16
|
define_station_dir,
|
|
17
17
|
)
|
|
18
18
|
from disdrodb.configs import get_data_archive_dir, get_metadata_archive_dir
|
|
19
|
-
from disdrodb.
|
|
19
|
+
from disdrodb.constants import PRODUCTS_REQUIREMENTS
|
|
20
|
+
from disdrodb.utils.directories import contains_files, contains_netcdf_or_parquet_files, list_directories, list_files
|
|
20
21
|
from disdrodb.utils.yaml import read_yaml
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
def get_required_product(product):
|
|
24
25
|
"""Determine the required product for input product processing."""
|
|
25
|
-
from disdrodb import PRODUCTS_REQUIREMENTS
|
|
26
|
-
|
|
27
26
|
# Check input
|
|
28
27
|
check_product(product)
|
|
29
28
|
# Determine required product
|
|
@@ -37,7 +36,8 @@ def get_required_product(product):
|
|
|
37
36
|
|
|
38
37
|
def list_data_sources(metadata_archive_dir, data_sources=None, invalid_fields_policy="raise"):
|
|
39
38
|
"""List data sources names in the DISDRODB Metadata Archive."""
|
|
40
|
-
|
|
39
|
+
path = os.path.join(metadata_archive_dir, "METADATA")
|
|
40
|
+
available_data_sources = sorted(list_directories(path, return_paths=False))
|
|
41
41
|
# Filter by optionally specified data_sources
|
|
42
42
|
if data_sources is not None:
|
|
43
43
|
available_data_sources = check_valid_fields(
|
|
@@ -52,7 +52,7 @@ def list_data_sources(metadata_archive_dir, data_sources=None, invalid_fields_po
|
|
|
52
52
|
|
|
53
53
|
def _list_campaign_names(metadata_archive_dir, data_source):
|
|
54
54
|
data_source_dir = define_data_source_dir(metadata_archive_dir, product="METADATA", data_source=data_source)
|
|
55
|
-
campaign_names =
|
|
55
|
+
campaign_names = sorted(list_directories(data_source_dir, return_paths=False))
|
|
56
56
|
return campaign_names
|
|
57
57
|
|
|
58
58
|
|
|
@@ -109,7 +109,7 @@ def _list_station_names(metadata_archive_dir, data_source, campaign_name):
|
|
|
109
109
|
data_source=data_source,
|
|
110
110
|
campaign_name=campaign_name,
|
|
111
111
|
)
|
|
112
|
-
metadata_filenames =
|
|
112
|
+
metadata_filenames = sorted(list_files(metadata_dir, glob_pattern="*.yml", return_paths=False))
|
|
113
113
|
station_names = [fname.replace(".yml", "").replace(".yaml", "") for fname in metadata_filenames]
|
|
114
114
|
return station_names
|
|
115
115
|
|