disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +68 -34
- disdrodb/_config.py +5 -4
- disdrodb/_version.py +16 -3
- disdrodb/accessor/__init__.py +20 -0
- disdrodb/accessor/methods.py +125 -0
- disdrodb/api/checks.py +177 -24
- disdrodb/api/configs.py +3 -3
- disdrodb/api/info.py +13 -13
- disdrodb/api/io.py +281 -22
- disdrodb/api/path.py +184 -195
- disdrodb/api/search.py +18 -9
- disdrodb/cli/disdrodb_create_summary.py +103 -0
- disdrodb/cli/disdrodb_create_summary_station.py +91 -0
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
- disdrodb/cli/disdrodb_run_l1_station.py +2 -2
- disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
- disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
- disdrodb/configs.py +149 -4
- disdrodb/constants.py +61 -0
- disdrodb/data_transfer/download_data.py +127 -11
- disdrodb/etc/configs/attributes.yaml +339 -0
- disdrodb/etc/configs/encodings.yaml +473 -0
- disdrodb/etc/products/L1/global.yaml +13 -0
- disdrodb/etc/products/L2E/10MIN.yaml +12 -0
- disdrodb/etc/products/L2E/1MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +22 -0
- disdrodb/etc/products/L2M/10MIN.yaml +12 -0
- disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +26 -0
- disdrodb/issue/writer.py +2 -0
- disdrodb/l0/__init__.py +13 -0
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
- disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
- disdrodb/l0/l0a_processing.py +37 -32
- disdrodb/l0/l0b_nc_processing.py +118 -8
- disdrodb/l0/l0b_processing.py +30 -65
- disdrodb/l0/l0c_processing.py +369 -259
- disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
- disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
- disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
- disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
- disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
- disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
- disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
- disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
- disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
- disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
- disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
- disdrodb/l1/__init__.py +5 -0
- disdrodb/l1/fall_velocity.py +46 -0
- disdrodb/l1/filters.py +34 -20
- disdrodb/l1/processing.py +46 -45
- disdrodb/l1/resampling.py +77 -66
- disdrodb/l1_env/routines.py +18 -3
- disdrodb/l2/__init__.py +7 -0
- disdrodb/l2/empirical_dsd.py +58 -10
- disdrodb/l2/processing.py +268 -117
- disdrodb/metadata/checks.py +132 -125
- disdrodb/metadata/standards.py +3 -1
- disdrodb/psd/fitting.py +631 -345
- disdrodb/psd/models.py +9 -6
- disdrodb/routines/__init__.py +54 -0
- disdrodb/{l0/routines.py → routines/l0.py} +316 -355
- disdrodb/{l1/routines.py → routines/l1.py} +76 -116
- disdrodb/routines/l2.py +1019 -0
- disdrodb/{routines.py → routines/wrappers.py} +98 -10
- disdrodb/scattering/__init__.py +16 -4
- disdrodb/scattering/axis_ratio.py +61 -37
- disdrodb/scattering/permittivity.py +504 -0
- disdrodb/scattering/routines.py +746 -184
- disdrodb/summary/__init__.py +17 -0
- disdrodb/summary/routines.py +4196 -0
- disdrodb/utils/archiving.py +434 -0
- disdrodb/utils/attrs.py +68 -125
- disdrodb/utils/cli.py +5 -5
- disdrodb/utils/compression.py +30 -1
- disdrodb/utils/dask.py +121 -9
- disdrodb/utils/dataframe.py +61 -7
- disdrodb/utils/decorators.py +31 -0
- disdrodb/utils/directories.py +35 -15
- disdrodb/utils/encoding.py +37 -19
- disdrodb/{l2 → utils}/event.py +15 -173
- disdrodb/utils/logger.py +14 -7
- disdrodb/utils/manipulations.py +81 -0
- disdrodb/utils/routines.py +166 -0
- disdrodb/utils/subsetting.py +214 -0
- disdrodb/utils/time.py +35 -177
- disdrodb/utils/writer.py +20 -7
- disdrodb/utils/xarray.py +5 -4
- disdrodb/viz/__init__.py +13 -0
- disdrodb/viz/plots.py +398 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
- disdrodb/l1/encoding_attrs.py +0 -642
- disdrodb/l2/processing_options.py +0 -213
- disdrodb/l2/routines.py +0 -868
- /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
disdrodb/api/io.py
CHANGED
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18
18
|
# -----------------------------------------------------------------------------.
|
|
19
19
|
"""Routines to list and open DISDRODB products."""
|
|
20
|
+
import datetime
|
|
20
21
|
import os
|
|
21
22
|
import shutil
|
|
22
23
|
import subprocess
|
|
@@ -24,6 +25,14 @@ import sys
|
|
|
24
25
|
from pathlib import Path
|
|
25
26
|
from typing import Optional
|
|
26
27
|
|
|
28
|
+
import numpy as np
|
|
29
|
+
|
|
30
|
+
from disdrodb.api.checks import (
|
|
31
|
+
check_filepaths,
|
|
32
|
+
check_start_end_time,
|
|
33
|
+
get_current_utc_time,
|
|
34
|
+
)
|
|
35
|
+
from disdrodb.api.info import get_start_end_time_from_filepaths
|
|
27
36
|
from disdrodb.api.path import (
|
|
28
37
|
define_campaign_dir,
|
|
29
38
|
define_data_dir,
|
|
@@ -48,6 +57,75 @@ def filter_filepaths(filepaths, debugging_mode):
|
|
|
48
57
|
return filepaths
|
|
49
58
|
|
|
50
59
|
|
|
60
|
+
def is_within_time_period(l_start_time, l_end_time, start_time, end_time):
|
|
61
|
+
"""Assess which files are within the start and end time."""
|
|
62
|
+
# - Case 1
|
|
63
|
+
# s e
|
|
64
|
+
# | |
|
|
65
|
+
# ---------> (-------->)
|
|
66
|
+
idx_select1 = np.logical_and(l_start_time <= start_time, l_end_time > start_time)
|
|
67
|
+
# - Case 2
|
|
68
|
+
# s e
|
|
69
|
+
# | |
|
|
70
|
+
# ---------(-.)
|
|
71
|
+
idx_select2 = np.logical_and(l_start_time >= start_time, l_end_time <= end_time)
|
|
72
|
+
# - Case 3
|
|
73
|
+
# s e
|
|
74
|
+
# | |
|
|
75
|
+
# -------------
|
|
76
|
+
idx_select3 = np.logical_and(l_start_time < end_time, l_end_time > end_time)
|
|
77
|
+
# - Get idx where one of the cases occur
|
|
78
|
+
idx_select = np.logical_or.reduce([idx_select1, idx_select2, idx_select3])
|
|
79
|
+
return idx_select
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def filter_by_time(filepaths, start_time=None, end_time=None):
|
|
83
|
+
"""Filter filepaths by start_time and end_time.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
filepaths : list
|
|
88
|
+
List of filepaths.
|
|
89
|
+
start_time : datetime.datetime
|
|
90
|
+
Start time.
|
|
91
|
+
If ``None``, will be set to 1997-01-01.
|
|
92
|
+
end_time : datetime.datetime
|
|
93
|
+
End time.
|
|
94
|
+
If ``None`` will be set to current UTC time.
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
filepaths : list
|
|
99
|
+
List of valid filepaths.
|
|
100
|
+
If no valid filepaths, returns an empty list !
|
|
101
|
+
|
|
102
|
+
"""
|
|
103
|
+
# -------------------------------------------------------------------------.
|
|
104
|
+
# Check filepaths
|
|
105
|
+
if isinstance(filepaths, type(None)):
|
|
106
|
+
return []
|
|
107
|
+
filepaths = check_filepaths(filepaths)
|
|
108
|
+
if len(filepaths) == 0:
|
|
109
|
+
return []
|
|
110
|
+
|
|
111
|
+
# -------------------------------------------------------------------------.
|
|
112
|
+
# Check start_time and end_time
|
|
113
|
+
if start_time is None:
|
|
114
|
+
start_time = datetime.datetime(1978, 1, 1, 0, 0, 0) # Dummy start
|
|
115
|
+
if end_time is None:
|
|
116
|
+
end_time = get_current_utc_time() # Current time
|
|
117
|
+
start_time, end_time = check_start_end_time(start_time, end_time)
|
|
118
|
+
|
|
119
|
+
# -------------------------------------------------------------------------.
|
|
120
|
+
# - Retrieve start_time and end_time of GPM granules
|
|
121
|
+
l_start_time, l_end_time = get_start_end_time_from_filepaths(filepaths)
|
|
122
|
+
|
|
123
|
+
# -------------------------------------------------------------------------.
|
|
124
|
+
# Select granules with data within the start and end time
|
|
125
|
+
idx_select = is_within_time_period(l_start_time, l_end_time, start_time=start_time, end_time=end_time)
|
|
126
|
+
return np.array(filepaths)[idx_select].tolist()
|
|
127
|
+
|
|
128
|
+
|
|
51
129
|
def find_files(
|
|
52
130
|
data_source,
|
|
53
131
|
campaign_name,
|
|
@@ -55,7 +133,10 @@ def find_files(
|
|
|
55
133
|
product,
|
|
56
134
|
debugging_mode: bool = False,
|
|
57
135
|
data_archive_dir: Optional[str] = None,
|
|
58
|
-
|
|
136
|
+
metadata_archive_dir: Optional[str] = None,
|
|
137
|
+
glob_pattern=None,
|
|
138
|
+
start_time=None,
|
|
139
|
+
end_time=None,
|
|
59
140
|
**product_kwargs,
|
|
60
141
|
):
|
|
61
142
|
"""Retrieve DISDRODB product files for a give station.
|
|
@@ -100,6 +181,8 @@ def find_files(
|
|
|
100
181
|
List of file paths.
|
|
101
182
|
|
|
102
183
|
"""
|
|
184
|
+
from disdrodb.metadata import read_station_metadata
|
|
185
|
+
|
|
103
186
|
# Retrieve data directory
|
|
104
187
|
data_dir = define_data_dir(
|
|
105
188
|
data_archive_dir=data_archive_dir,
|
|
@@ -110,8 +193,17 @@ def find_files(
|
|
|
110
193
|
# Product options
|
|
111
194
|
**product_kwargs,
|
|
112
195
|
)
|
|
113
|
-
|
|
114
|
-
|
|
196
|
+
# For the DISDRODB RAW product, retrieve glob_pattern from metadata if not specified
|
|
197
|
+
if product == "RAW" and glob_pattern is None:
|
|
198
|
+
metadata = read_station_metadata(
|
|
199
|
+
data_source=data_source,
|
|
200
|
+
campaign_name=campaign_name,
|
|
201
|
+
station_name=station_name,
|
|
202
|
+
metadata_archive_dir=metadata_archive_dir,
|
|
203
|
+
)
|
|
204
|
+
glob_pattern = metadata.get("raw_data_glob_pattern", "")
|
|
205
|
+
|
|
206
|
+
# For the others DISDRODB products, define the correct glob pattern
|
|
115
207
|
if product != "RAW":
|
|
116
208
|
glob_pattern = "*.parquet" if product == "L0A" else "*.nc"
|
|
117
209
|
|
|
@@ -126,6 +218,13 @@ def find_files(
|
|
|
126
218
|
msg = f"No {product} files are available in {data_dir}. Run {product} processing first."
|
|
127
219
|
raise ValueError(msg)
|
|
128
220
|
|
|
221
|
+
# Filter files by start_time and end_time
|
|
222
|
+
if product != "RAW":
|
|
223
|
+
filepaths = filter_by_time(filepaths=filepaths, start_time=start_time, end_time=end_time)
|
|
224
|
+
if len(filepaths) == 0:
|
|
225
|
+
msg = f"No {product} files are available between {start_time} and {end_time}."
|
|
226
|
+
raise ValueError(msg)
|
|
227
|
+
|
|
129
228
|
# Sort filepaths
|
|
130
229
|
filepaths = sorted(filepaths)
|
|
131
230
|
return filepaths
|
|
@@ -133,6 +232,154 @@ def find_files(
|
|
|
133
232
|
|
|
134
233
|
####----------------------------------------------------------------------------------
|
|
135
234
|
#### DISDRODB Open Product Files
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _open_raw_files(filepaths, data_source, campaign_name, station_name, metadata_archive_dir):
|
|
238
|
+
"""Open raw files to DISDRODB L0A or L0B format.
|
|
239
|
+
|
|
240
|
+
Raw text files are opened into a DISDRODB L0A pandas Dataframe.
|
|
241
|
+
Raw netCDF files are opened into a DISDRODB L0B xarray Dataset.
|
|
242
|
+
"""
|
|
243
|
+
from disdrodb.issue import read_station_issue
|
|
244
|
+
from disdrodb.l0 import generate_l0a, generate_l0b_from_nc, get_station_reader
|
|
245
|
+
from disdrodb.metadata import read_station_metadata
|
|
246
|
+
|
|
247
|
+
# Read station metadata
|
|
248
|
+
metadata = read_station_metadata(
|
|
249
|
+
data_source=data_source,
|
|
250
|
+
campaign_name=campaign_name,
|
|
251
|
+
station_name=station_name,
|
|
252
|
+
metadata_archive_dir=metadata_archive_dir,
|
|
253
|
+
)
|
|
254
|
+
sensor_name = metadata["sensor_name"]
|
|
255
|
+
|
|
256
|
+
# Read station issue YAML file
|
|
257
|
+
try:
|
|
258
|
+
issue_dict = read_station_issue(
|
|
259
|
+
data_source=data_source,
|
|
260
|
+
campaign_name=campaign_name,
|
|
261
|
+
station_name=station_name,
|
|
262
|
+
metadata_archive_dir=metadata_archive_dir,
|
|
263
|
+
)
|
|
264
|
+
except Exception:
|
|
265
|
+
issue_dict = None
|
|
266
|
+
|
|
267
|
+
# Get reader
|
|
268
|
+
reader = get_station_reader(
|
|
269
|
+
data_source=data_source,
|
|
270
|
+
campaign_name=campaign_name,
|
|
271
|
+
station_name=station_name,
|
|
272
|
+
metadata_archive_dir=metadata_archive_dir,
|
|
273
|
+
)
|
|
274
|
+
# Return DISDRODB L0A dataframe if raw text files
|
|
275
|
+
if metadata["raw_data_format"] == "txt":
|
|
276
|
+
df = generate_l0a(
|
|
277
|
+
filepaths=filepaths,
|
|
278
|
+
reader=reader,
|
|
279
|
+
sensor_name=sensor_name,
|
|
280
|
+
issue_dict=issue_dict,
|
|
281
|
+
verbose=False,
|
|
282
|
+
)
|
|
283
|
+
return df
|
|
284
|
+
|
|
285
|
+
# Return DISDRODB L0B dataframe if raw netCDF files
|
|
286
|
+
ds = generate_l0b_from_nc(
|
|
287
|
+
filepaths=filepaths,
|
|
288
|
+
reader=reader,
|
|
289
|
+
sensor_name=sensor_name,
|
|
290
|
+
metadata=metadata,
|
|
291
|
+
issue_dict=issue_dict,
|
|
292
|
+
verbose=False,
|
|
293
|
+
)
|
|
294
|
+
return ds
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def filter_dataset_by_time(ds, start_time=None, end_time=None):
|
|
298
|
+
"""Subset an xarray.Dataset by time, robust to duplicated/non-monotonic indices.
|
|
299
|
+
|
|
300
|
+
NOTE: ds.sel(time=slice(start_time, end_time)) fails in presence of duplicated
|
|
301
|
+
timesteps because time 'index is not monotonic increasing or decreasing'.
|
|
302
|
+
|
|
303
|
+
Parameters
|
|
304
|
+
----------
|
|
305
|
+
ds : xr.Dataset
|
|
306
|
+
Dataset with a `time` coordinate.
|
|
307
|
+
start_time : np.datetime64 or None
|
|
308
|
+
Inclusive start bound. If None, no lower bound is applied.
|
|
309
|
+
end_time : np.datetime64 or None
|
|
310
|
+
Inclusive end bound. If None, no upper bound is applied.
|
|
311
|
+
|
|
312
|
+
Returns
|
|
313
|
+
-------
|
|
314
|
+
xr.Dataset
|
|
315
|
+
Subset dataset with the same ordering of timesteps (duplicates preserved).
|
|
316
|
+
"""
|
|
317
|
+
time = ds["time"].to_numpy()
|
|
318
|
+
mask = np.ones(time.shape, dtype=bool)
|
|
319
|
+
if start_time is not None:
|
|
320
|
+
mask &= time >= np.array(start_time, dtype="datetime64[ns]")
|
|
321
|
+
if end_time is not None:
|
|
322
|
+
mask &= time <= np.array(end_time, dtype="datetime64[ns]")
|
|
323
|
+
return ds.isel(time=np.where(mask)[0])
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def open_netcdf_files(
|
|
327
|
+
filepaths,
|
|
328
|
+
chunks=-1,
|
|
329
|
+
start_time=None,
|
|
330
|
+
end_time=None,
|
|
331
|
+
variables=None,
|
|
332
|
+
parallel=False,
|
|
333
|
+
compute=True,
|
|
334
|
+
**open_kwargs,
|
|
335
|
+
):
|
|
336
|
+
"""Open DISDRODB netCDF files using xarray.
|
|
337
|
+
|
|
338
|
+
Using combine="nested" and join="outer" ensure that duplicated timesteps are not overwritten!
|
|
339
|
+
"""
|
|
340
|
+
import xarray as xr
|
|
341
|
+
|
|
342
|
+
# Ensure variables is a list
|
|
343
|
+
if variables is not None and isinstance(variables, str):
|
|
344
|
+
variables = [variables]
|
|
345
|
+
# Define preprocessing function for parallel opening
|
|
346
|
+
preprocess = (lambda ds: ds[variables]) if parallel and variables is not None else None
|
|
347
|
+
|
|
348
|
+
# Open netcdf
|
|
349
|
+
ds = xr.open_mfdataset(
|
|
350
|
+
filepaths,
|
|
351
|
+
chunks=chunks,
|
|
352
|
+
combine="nested",
|
|
353
|
+
join="outer",
|
|
354
|
+
concat_dim="time",
|
|
355
|
+
engine="netcdf4",
|
|
356
|
+
parallel=parallel,
|
|
357
|
+
preprocess=preprocess,
|
|
358
|
+
compat="no_conflicts",
|
|
359
|
+
combine_attrs="override",
|
|
360
|
+
coords="different", # maybe minimal?
|
|
361
|
+
decode_timedelta=False,
|
|
362
|
+
cache=False,
|
|
363
|
+
autoclose=True,
|
|
364
|
+
**open_kwargs,
|
|
365
|
+
)
|
|
366
|
+
# - Subset variables
|
|
367
|
+
if variables is not None and preprocess is None:
|
|
368
|
+
ds = ds[variables]
|
|
369
|
+
# - Subset time
|
|
370
|
+
if start_time is not None or end_time is not None:
|
|
371
|
+
ds = filter_dataset_by_time(ds, start_time=start_time, end_time=end_time)
|
|
372
|
+
# - If compute=True, load in memory and close connections to files
|
|
373
|
+
if compute:
|
|
374
|
+
dataset = ds.compute()
|
|
375
|
+
ds.close()
|
|
376
|
+
dataset.close()
|
|
377
|
+
del ds
|
|
378
|
+
else:
|
|
379
|
+
dataset = ds
|
|
380
|
+
return dataset
|
|
381
|
+
|
|
382
|
+
|
|
136
383
|
def open_dataset(
|
|
137
384
|
data_source,
|
|
138
385
|
campaign_name,
|
|
@@ -141,7 +388,13 @@ def open_dataset(
|
|
|
141
388
|
product_kwargs=None,
|
|
142
389
|
debugging_mode: bool = False,
|
|
143
390
|
data_archive_dir: Optional[str] = None,
|
|
391
|
+
metadata_archive_dir: Optional[str] = None,
|
|
392
|
+
chunks=-1,
|
|
144
393
|
parallel=False,
|
|
394
|
+
compute=False,
|
|
395
|
+
start_time=None,
|
|
396
|
+
end_time=None,
|
|
397
|
+
variables=None,
|
|
145
398
|
**open_kwargs,
|
|
146
399
|
):
|
|
147
400
|
"""Retrieve DISDRODB product files for a give station.
|
|
@@ -179,42 +432,50 @@ def open_dataset(
|
|
|
179
432
|
xarray.Dataset
|
|
180
433
|
|
|
181
434
|
"""
|
|
182
|
-
import xarray as xr
|
|
183
|
-
|
|
184
435
|
from disdrodb.l0.l0a_processing import read_l0a_dataframe
|
|
185
436
|
|
|
186
|
-
# Check product validity
|
|
187
|
-
if product == "RAW":
|
|
188
|
-
raise ValueError("It's not possible to open the raw data with this function.")
|
|
189
437
|
product_kwargs = product_kwargs if product_kwargs else {}
|
|
190
438
|
|
|
191
439
|
# List product files
|
|
192
440
|
filepaths = find_files(
|
|
193
441
|
data_archive_dir=data_archive_dir,
|
|
442
|
+
metadata_archive_dir=metadata_archive_dir,
|
|
194
443
|
data_source=data_source,
|
|
195
444
|
campaign_name=campaign_name,
|
|
196
445
|
station_name=station_name,
|
|
197
446
|
product=product,
|
|
198
447
|
debugging_mode=debugging_mode,
|
|
448
|
+
start_time=start_time,
|
|
449
|
+
end_time=end_time,
|
|
199
450
|
**product_kwargs,
|
|
200
451
|
)
|
|
201
452
|
|
|
453
|
+
# Open RAW files
|
|
454
|
+
# - For raw txt files return DISDRODB L0A dataframe
|
|
455
|
+
# - For raw netCDF files return DISDRODB L0B dataframe
|
|
456
|
+
if product == "RAW":
|
|
457
|
+
obj = _open_raw_files(
|
|
458
|
+
filepaths=filepaths,
|
|
459
|
+
data_source=data_source,
|
|
460
|
+
campaign_name=campaign_name,
|
|
461
|
+
station_name=station_name,
|
|
462
|
+
metadata_archive_dir=metadata_archive_dir,
|
|
463
|
+
)
|
|
464
|
+
return obj
|
|
465
|
+
|
|
202
466
|
# Open L0A Parquet files
|
|
203
467
|
if product == "L0A":
|
|
204
468
|
return read_l0a_dataframe(filepaths)
|
|
205
469
|
|
|
206
470
|
# Open DISDRODB netCDF files using xarray
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
engine="netcdf4",
|
|
214
|
-
combine="nested", # 'by_coords',
|
|
215
|
-
concat_dim="time",
|
|
216
|
-
decode_timedelta=False,
|
|
471
|
+
ds = open_netcdf_files(
|
|
472
|
+
filepaths=filepaths,
|
|
473
|
+
chunks=chunks,
|
|
474
|
+
start_time=start_time,
|
|
475
|
+
end_time=end_time,
|
|
476
|
+
variables=variables,
|
|
217
477
|
parallel=parallel,
|
|
478
|
+
compute=compute,
|
|
218
479
|
**open_kwargs,
|
|
219
480
|
)
|
|
220
481
|
return ds
|
|
@@ -245,11 +506,9 @@ def remove_product(
|
|
|
245
506
|
station_name=station_name,
|
|
246
507
|
**product_kwargs,
|
|
247
508
|
)
|
|
248
|
-
|
|
249
|
-
log_info(logger=logger, msg="Removal of {product} files started.", verbose=verbose)
|
|
509
|
+
log_info(logger=logger, msg="Removal of {product} files started.", verbose=verbose)
|
|
250
510
|
shutil.rmtree(data_dir)
|
|
251
|
-
|
|
252
|
-
log_info(logger=logger, msg="Removal of {product} files ended.", verbose=verbose)
|
|
511
|
+
log_info(logger=logger, msg="Removal of {product} files ended.", verbose=verbose)
|
|
253
512
|
|
|
254
513
|
|
|
255
514
|
####--------------------------------------------------------------------------.
|