disdrodb 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +4 -0
- disdrodb/_version.py +2 -2
- disdrodb/accessor/methods.py +14 -0
- disdrodb/api/checks.py +8 -7
- disdrodb/api/io.py +81 -29
- disdrodb/api/path.py +17 -14
- disdrodb/api/search.py +15 -18
- disdrodb/cli/disdrodb_open_products_options.py +38 -0
- disdrodb/cli/disdrodb_run.py +2 -2
- disdrodb/cli/disdrodb_run_station.py +4 -4
- disdrodb/configs.py +1 -1
- disdrodb/data_transfer/download_data.py +70 -1
- disdrodb/etc/configs/attributes.yaml +62 -8
- disdrodb/etc/configs/encodings.yaml +28 -0
- disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/GAMMA_ML.yaml +1 -1
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +1 -1
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/global.yaml +4 -4
- disdrodb/fall_velocity/graupel.py +8 -8
- disdrodb/fall_velocity/hail.py +2 -2
- disdrodb/fall_velocity/rain.py +33 -5
- disdrodb/issue/checks.py +1 -1
- disdrodb/l0/l0_reader.py +1 -1
- disdrodb/l0/l0a_processing.py +2 -2
- disdrodb/l0/l0b_nc_processing.py +5 -5
- disdrodb/l0/l0b_processing.py +20 -24
- disdrodb/l0/l0c_processing.py +18 -13
- disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +4 -0
- disdrodb/l0/readers/PARSIVEL2/VIETNAM/IGE_PARSIVEL2.py +239 -0
- disdrodb/l0/template_tools.py +13 -13
- disdrodb/l1/classification.py +10 -6
- disdrodb/l2/empirical_dsd.py +25 -15
- disdrodb/l2/processing.py +32 -14
- disdrodb/metadata/download.py +1 -1
- disdrodb/metadata/geolocation.py +4 -4
- disdrodb/metadata/reader.py +3 -3
- disdrodb/metadata/search.py +10 -8
- disdrodb/psd/__init__.py +4 -0
- disdrodb/psd/fitting.py +2660 -592
- disdrodb/psd/gof_metrics.py +389 -0
- disdrodb/psd/grid_search.py +1066 -0
- disdrodb/psd/models.py +1281 -145
- disdrodb/routines/l2.py +6 -6
- disdrodb/routines/options_validation.py +8 -8
- disdrodb/scattering/axis_ratio.py +70 -2
- disdrodb/scattering/permittivity.py +13 -10
- disdrodb/scattering/routines.py +10 -10
- disdrodb/summary/routines.py +23 -20
- disdrodb/utils/archiving.py +29 -22
- disdrodb/utils/attrs.py +6 -4
- disdrodb/utils/dataframe.py +4 -4
- disdrodb/utils/encoding.py +3 -1
- disdrodb/utils/event.py +9 -9
- disdrodb/utils/logger.py +4 -7
- disdrodb/utils/manipulations.py +2 -2
- disdrodb/utils/subsetting.py +1 -1
- disdrodb/utils/time.py +8 -7
- disdrodb/viz/plots.py +25 -17
- {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/METADATA +44 -33
- {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/RECORD +68 -66
- {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/entry_points.txt +1 -0
- disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_Z_MAE.yaml +0 -6
- {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/WHEEL +0 -0
- {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/top_level.txt +0 -0
disdrodb/__init__.py
CHANGED
|
@@ -78,6 +78,8 @@ from disdrodb.routines import (
|
|
|
78
78
|
run_l2e_station,
|
|
79
79
|
run_l2m,
|
|
80
80
|
run_l2m_station,
|
|
81
|
+
run,
|
|
82
|
+
run_station,
|
|
81
83
|
)
|
|
82
84
|
from disdrodb.utils.manipulations import convert_from_decibel as idecibel
|
|
83
85
|
from disdrodb.utils.manipulations import convert_to_decibel as decibel
|
|
@@ -158,6 +160,7 @@ __all__ = [
|
|
|
158
160
|
"read_metadata_archive",
|
|
159
161
|
"read_station_metadata",
|
|
160
162
|
# Functions invoking the disdrodb_run_* scripts in the terminals
|
|
163
|
+
"run",
|
|
161
164
|
"run_l0",
|
|
162
165
|
"run_l0_station",
|
|
163
166
|
"run_l0a",
|
|
@@ -172,6 +175,7 @@ __all__ = [
|
|
|
172
175
|
"run_l2e_station",
|
|
173
176
|
"run_l2m",
|
|
174
177
|
"run_l2m_station",
|
|
178
|
+
"run_station",
|
|
175
179
|
]
|
|
176
180
|
|
|
177
181
|
|
disdrodb/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.5.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 5,
|
|
31
|
+
__version__ = version = '0.5.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 5, 1)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
disdrodb/accessor/methods.py
CHANGED
|
@@ -135,6 +135,20 @@ class DISDRODB_Dataset_Accessor(DISDRODB_Base_Accessor):
|
|
|
135
135
|
|
|
136
136
|
return plot_raw_and_filtered_spectra(self._obj, **kwargs)
|
|
137
137
|
|
|
138
|
+
@property
|
|
139
|
+
def psd(self):
|
|
140
|
+
"""Return PSD class from DISDRODB L2M product."""
|
|
141
|
+
from disdrodb.psd.models import create_psd_from_dataset
|
|
142
|
+
|
|
143
|
+
return create_psd_from_dataset(self._obj)
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def psd_parameters(self):
|
|
147
|
+
"""Return PSD parameters from DISDRODB L2M product."""
|
|
148
|
+
from disdrodb.psd.models import get_parameters_from_dataset
|
|
149
|
+
|
|
150
|
+
return get_parameters_from_dataset(self._obj)
|
|
151
|
+
|
|
138
152
|
|
|
139
153
|
@xr.register_dataarray_accessor("disdrodb")
|
|
140
154
|
class DISDRODB_DataArray_Accessor(DISDRODB_Base_Accessor):
|
disdrodb/api/checks.py
CHANGED
|
@@ -171,16 +171,17 @@ def check_folder_partitioning(folder_partitioning):
|
|
|
171
171
|
folder_partitioning : str or None
|
|
172
172
|
Defines the subdirectory structure based on the dataset's start time.
|
|
173
173
|
Allowed values are:
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
174
|
+
|
|
175
|
+
- "" or None: No additional subdirectories, files are saved directly in dir.
|
|
176
|
+
- "year": Files are stored under a subdirectory for the year (<dir>/2025).
|
|
177
|
+
- "year/month": Files are stored under subdirectories by year and month (<dir>/2025/04).
|
|
178
|
+
- "year/month/day": Files are stored under subdirectories by year, month and day (<dir>/2025/04/01).
|
|
179
|
+
- "year/month_name": Files are stored under subdirectories by year and month name (<dir>/2025/April).
|
|
180
|
+
- "year/quarter": Files are stored under subdirectories by year and quarter (<dir>/2025/Q2).
|
|
180
181
|
|
|
181
182
|
Returns
|
|
182
183
|
-------
|
|
183
|
-
|
|
184
|
+
str
|
|
184
185
|
The verified folder partitioning scheme.
|
|
185
186
|
"""
|
|
186
187
|
valid_options = ["", "year", "year/month", "year/month/day", "year/month_name", "year/quarter"]
|
disdrodb/api/io.py
CHANGED
|
@@ -335,24 +335,24 @@ def filter_dataset_by_time(ds, start_time=None, end_time=None):
|
|
|
335
335
|
|
|
336
336
|
Parameters
|
|
337
337
|
----------
|
|
338
|
-
ds :
|
|
338
|
+
ds : xarray.Dataset
|
|
339
339
|
Dataset with a `time` coordinate.
|
|
340
|
-
start_time :
|
|
340
|
+
start_time : str, numpy.datetime64 or None
|
|
341
341
|
Inclusive start bound. If None, no lower bound is applied.
|
|
342
|
-
end_time :
|
|
342
|
+
end_time : str, numpy.datetime64 or None
|
|
343
343
|
Inclusive end bound. If None, no upper bound is applied.
|
|
344
344
|
|
|
345
345
|
Returns
|
|
346
346
|
-------
|
|
347
|
-
|
|
347
|
+
xarray.Dataset
|
|
348
348
|
Subset dataset with the same ordering of timesteps (duplicates preserved).
|
|
349
349
|
"""
|
|
350
350
|
time = ds["time"].to_numpy()
|
|
351
351
|
mask = np.ones(time.shape, dtype=bool)
|
|
352
352
|
if start_time is not None:
|
|
353
|
-
mask &= time >= np.array(start_time, dtype="datetime64[ns]")
|
|
353
|
+
mask &= time >= np.array(check_time(start_time), dtype="datetime64[ns]")
|
|
354
354
|
if end_time is not None:
|
|
355
|
-
mask &= time <= np.array(end_time, dtype="datetime64[ns]")
|
|
355
|
+
mask &= time <= np.array(check_time(end_time), dtype="datetime64[ns]")
|
|
356
356
|
return ds.isel(time=np.where(mask)[0])
|
|
357
357
|
|
|
358
358
|
|
|
@@ -445,30 +445,73 @@ def open_netcdf_files(
|
|
|
445
445
|
engine="netcdf4",
|
|
446
446
|
**open_kwargs,
|
|
447
447
|
):
|
|
448
|
-
"""Open DISDRODB
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
448
|
+
"""Open DISDRODB NetCDF files using xarray.
|
|
449
|
+
|
|
450
|
+
This function opens and concatenates multiple NetCDF files using
|
|
451
|
+
``xarray.open_mfdataset`` with settings optimized for time-based
|
|
452
|
+
concatenation and minimal variable checking.
|
|
453
|
+
|
|
454
|
+
The function uses ``data_vars="minimal"``, ``coords="minimal"``,
|
|
455
|
+
and ``compat="override"`` to:
|
|
456
|
+
|
|
457
|
+
- Concatenate only variables that depend on the time dimension.
|
|
458
|
+
- Skip consistency checks for variables without a time dimension, taking them from the first file instead.
|
|
459
|
+
|
|
460
|
+
See: https://github.com/pydata/xarray/issues/1385#issuecomment-1958761334
|
|
461
|
+
|
|
462
|
+
Using ``combine="nested"`` and ``join="outer"`` ensures that duplicated
|
|
463
|
+
timesteps are preserved and not overwritten.
|
|
464
|
+
|
|
465
|
+
Behavior depends on ``decode_cf``:
|
|
466
|
+
|
|
467
|
+
- If ``decode_cf=False``: ``lat`` and ``lon`` are treated as data variables and concatenated without validation.
|
|
468
|
+
- If ``decode_cf=True``: ``lat`` and ``lon`` are promoted to coordinates and checked for equality across files.
|
|
469
|
+
|
|
470
|
+
Special handling of ``sample_interval``:
|
|
471
|
+
|
|
472
|
+
- For L0B products, if ``sample_interval`` varies with time, it is safely concatenated.
|
|
473
|
+
- For L0C products, if ``sample_interval`` differs across files, only the value from the first file is retained.
|
|
474
|
+
- For L1 and L2 processing, only files with identical ``sample_interval`` values should be passed to this function.
|
|
475
|
+
|
|
476
|
+
Parameters
|
|
477
|
+
----------
|
|
478
|
+
filepaths : str or sequence of str
|
|
479
|
+
Path(s) to NetCDF files to open.
|
|
480
|
+
chunks : int, dict, or None, optional
|
|
481
|
+
Chunking strategy passed to xarray for dask-backed arrays.
|
|
482
|
+
Use ``-1`` to load data into a single chunk (default).
|
|
483
|
+
start_time : str or datetime-like or None, optional
|
|
484
|
+
Start time for temporal subsetting.
|
|
485
|
+
end_time : str or datetime-like or None, optional
|
|
486
|
+
End time for temporal subsetting.
|
|
487
|
+
variables : sequence of str or None, optional
|
|
488
|
+
Subset of variables to retain.
|
|
489
|
+
parallel : bool, optional
|
|
490
|
+
Whether to open files in parallel using dask.
|
|
491
|
+
The default is ``False``.
|
|
492
|
+
compute : bool, optional
|
|
493
|
+
Whether to immediately compute the dataset when using dask.
|
|
494
|
+
The default is ``True``.
|
|
495
|
+
engine : str, optional
|
|
496
|
+
Backend engine used by xarray to read NetCDF files.
|
|
497
|
+
The default is "netcdf4".
|
|
498
|
+
**open_kwargs
|
|
499
|
+
Additional keyword arguments passed to
|
|
500
|
+
``xarray.open_mfdataset``.
|
|
501
|
+
|
|
502
|
+
Returns
|
|
503
|
+
-------
|
|
504
|
+
xarray.Dataset
|
|
505
|
+
The opened and concatenated dataset.
|
|
471
506
|
|
|
507
|
+
See Also
|
|
508
|
+
--------
|
|
509
|
+
xarray.open_mfdataset
|
|
510
|
+
|
|
511
|
+
Notes
|
|
512
|
+
-----
|
|
513
|
+
This function is decorated with ``ensure_safe_open_mfdataset`` to
|
|
514
|
+
protect against unsafe or incompatible combinations of arguments.
|
|
472
515
|
"""
|
|
473
516
|
import xarray as xr
|
|
474
517
|
|
|
@@ -804,6 +847,15 @@ def open_readers_directory():
|
|
|
804
847
|
open_file_explorer(readers_directory)
|
|
805
848
|
|
|
806
849
|
|
|
850
|
+
def open_products_options():
|
|
851
|
+
"""Open the disdrodb product options directory."""
|
|
852
|
+
from disdrodb.configs import get_products_configs_dir
|
|
853
|
+
|
|
854
|
+
products_configs_dir = get_products_configs_dir()
|
|
855
|
+
|
|
856
|
+
open_file_explorer(products_configs_dir)
|
|
857
|
+
|
|
858
|
+
|
|
807
859
|
def open_metadata_archive(
|
|
808
860
|
metadata_archive_dir=None,
|
|
809
861
|
):
|
disdrodb/api/path.py
CHANGED
|
@@ -358,12 +358,13 @@ def define_partitioning_tree(time, folder_partitioning):
|
|
|
358
358
|
folder_partitioning : str or None
|
|
359
359
|
Define the subdirectory structure where saving files.
|
|
360
360
|
Allowed values are:
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
361
|
+
|
|
362
|
+
- None: Files are saved directly in data_dir.
|
|
363
|
+
- "year": Files are saved under a subdirectory for the year.
|
|
364
|
+
- "year/month": Files are saved under subdirectories for year and month.
|
|
365
|
+
- "year/month/day": Files are saved under subdirectories for year, month and day
|
|
366
|
+
- "year/month_name": Files are stored under subdirectories by year and month name
|
|
367
|
+
- "year/quarter": Files are saved under subdirectories for year and quarter.
|
|
367
368
|
|
|
368
369
|
Returns
|
|
369
370
|
-------
|
|
@@ -411,12 +412,14 @@ def define_file_folder_path(obj, dir_path, folder_partitioning):
|
|
|
411
412
|
folder_partitioning : str or None
|
|
412
413
|
Define the subdirectory structure where saving files.
|
|
413
414
|
Allowed values are:
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
415
|
+
|
|
416
|
+
- None or "": Files are saved directly in data_dir.
|
|
417
|
+
- "year": Files are saved under a subdirectory for the year.
|
|
418
|
+
- "year/month": Files are saved under subdirectories for year and month.
|
|
419
|
+
- "year/month/day": Files are saved under subdirectories for year, month and day
|
|
420
|
+
- "year/month_name": Files are stored under subdirectories by year and month name
|
|
421
|
+
- "year/quarter": Files are saved under subdirectories for year and quarter.
|
|
422
|
+
|
|
420
423
|
|
|
421
424
|
Returns
|
|
422
425
|
-------
|
|
@@ -682,10 +685,10 @@ def define_filename(
|
|
|
682
685
|
Name of the campaign.
|
|
683
686
|
station_name : str
|
|
684
687
|
Name of the station.
|
|
685
|
-
start_time : datetime.
|
|
688
|
+
start_time : datetime.datetime, optional
|
|
686
689
|
Start time.
|
|
687
690
|
Required if add_time_period = True.
|
|
688
|
-
end_time : datetime.
|
|
691
|
+
end_time : datetime.datetime, optional
|
|
689
692
|
End time.
|
|
690
693
|
Required if add_time_period = True.
|
|
691
694
|
temporal_resolution : str, optional
|
disdrodb/api/search.py
CHANGED
|
@@ -310,8 +310,7 @@ def available_stations(
|
|
|
310
310
|
metadata_archive_dir=None,
|
|
311
311
|
**filter_kwargs,
|
|
312
312
|
):
|
|
313
|
-
"""
|
|
314
|
-
Return stations information for which metadata or product data are available on disk.
|
|
313
|
+
"""Return stations information for which metadata or product data are available on disk.
|
|
315
314
|
|
|
316
315
|
This function queries the DISDRODB Metadata Archive and, optionally, the
|
|
317
316
|
local DISDRODB Data Archive to identify stations that satisfy the specified
|
|
@@ -334,31 +333,29 @@ def available_stations(
|
|
|
334
333
|
it lists the stations present in the local DISDRODB Data Archive given the specified filtering criteria.
|
|
335
334
|
The default is None.
|
|
336
335
|
|
|
337
|
-
data_sources : str or
|
|
336
|
+
data_sources : str or list of str, optional
|
|
338
337
|
One or more data source identifiers to filter stations by.
|
|
339
338
|
The name(s) must be UPPER CASE.
|
|
340
339
|
If None, no filtering on data source is applied. The default is is ``None``.
|
|
341
|
-
campaign_names : str or
|
|
340
|
+
campaign_names : str or list of str, optional
|
|
342
341
|
One or more campaign names to filter stations by.
|
|
343
342
|
The name(s) must be UPPER CASE.
|
|
344
343
|
If None, no filtering on campaign is applied. The default is is ``None``.
|
|
345
|
-
station_names : str or
|
|
344
|
+
station_names : str or list of str, optional
|
|
346
345
|
One or more station names to include.
|
|
347
346
|
If None, all stations matching other filters are considered. The default is is ``None``.
|
|
348
347
|
available_data : bool, optional
|
|
349
348
|
|
|
350
349
|
If ``product`` is not specified:
|
|
351
350
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
online DISDRODB Decentralized Data Archive (i.e., stations with the disdrodb_data_url in the metadata).
|
|
351
|
+
- if ``available_data=False``, return stations present in the DISDRODB Metadata Archive.
|
|
352
|
+
- if ``available_data=True``, return stations with data available on the online DISDRODB Decentralized Data Archive (i.e., stations with the disdrodb_data_url in the metadata).
|
|
355
353
|
|
|
356
354
|
If ``product`` is specified:
|
|
357
355
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
in the local DISDRODB Data Archive.
|
|
356
|
+
- if ``available_data=False``, return stations where the product directory exists in the in the local DISDRODB Data Archive
|
|
357
|
+
- if ``available_data=True``, return stations where product data exists in the in the local DISDRODB Data Archive.
|
|
358
|
+
|
|
362
359
|
The default is is False.
|
|
363
360
|
|
|
364
361
|
return_tuple : bool, optional
|
|
@@ -368,17 +365,17 @@ def available_stations(
|
|
|
368
365
|
raise_error_if_empty : bool, optional
|
|
369
366
|
If True and no stations satisfy the criteria, raise a ``ValueError``.
|
|
370
367
|
If False, return an empty list/tuple. The default is False.
|
|
371
|
-
invalid_fields_policy :
|
|
368
|
+
invalid_fields_policy : str, optional
|
|
372
369
|
How to handle invalid filter values for ``data_sources``, ``campaign_names``,
|
|
373
370
|
or ``station_names`` that are not present in the metadata archive:
|
|
374
371
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
372
|
+
- 'raise' : raise a ``ValueError`` (default)
|
|
373
|
+
- 'warn' : emit a warning, then ignore invalid entries
|
|
374
|
+
- 'ignore': silently drop invalid entries
|
|
378
375
|
|
|
379
376
|
data_archive_dir : str or Path-like, optional
|
|
380
377
|
Path to the root of the local DISDRODB Data Archive.
|
|
381
|
-
Required only if ``product``is specified.
|
|
378
|
+
Required only if ``product`` is specified.
|
|
382
379
|
If None, the default data archive base directory is used. Default is None.
|
|
383
380
|
metadata_archive_dir : str or Path-like, optional
|
|
384
381
|
Path to the root of the DISDRODB Metadata Archive.
|
|
@@ -405,7 +402,7 @@ def available_stations(
|
|
|
405
402
|
>>> raw_stations = available_stations(product="RAW", available_data=True)
|
|
406
403
|
>>> # List stations of specific data sources
|
|
407
404
|
>>> stations = available_stations(data_sources=["NASA", "EPFL"])
|
|
408
|
-
"""
|
|
405
|
+
""" # noqa: E501
|
|
409
406
|
# Retrieve DISDRODB Data and Metadata Archive directories
|
|
410
407
|
metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir)
|
|
411
408
|
product = check_product(product) if product is not None else None
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------.
|
|
2
|
+
# Copyright (c) 2021-2026 DISDRODB developers
|
|
3
|
+
#
|
|
4
|
+
# This program is free software: you can redistribute it and/or modify
|
|
5
|
+
# it under the terms of the GNU General Public License as published by
|
|
6
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
7
|
+
# (at your option) any later version.
|
|
8
|
+
#
|
|
9
|
+
# This program is distributed in the hope that it will be useful,
|
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
+
# GNU General Public License for more details.
|
|
13
|
+
#
|
|
14
|
+
# You should have received a copy of the GNU General Public License
|
|
15
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
|
+
# -----------------------------------------------------------------------------.
|
|
17
|
+
"""Routine to open the DISDRODB Products Options."""
|
|
18
|
+
|
|
19
|
+
import sys
|
|
20
|
+
|
|
21
|
+
import click
|
|
22
|
+
|
|
23
|
+
sys.tracebacklimit = 0 # avoid full traceback error if occur
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@click.command()
|
|
27
|
+
def disdrodb_open_products_options():
|
|
28
|
+
"""Open the DISDRODB Products Configuration Directory in the system file explorer.
|
|
29
|
+
|
|
30
|
+
Examples
|
|
31
|
+
--------
|
|
32
|
+
# Open the data archive from active configuration
|
|
33
|
+
disdrodb_open_product_options
|
|
34
|
+
|
|
35
|
+
"""
|
|
36
|
+
from disdrodb.api.io import open_products_options
|
|
37
|
+
|
|
38
|
+
open_products_options()
|
disdrodb/cli/disdrodb_run.py
CHANGED
|
@@ -57,7 +57,7 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
|
|
|
57
57
|
"--l2m_processing",
|
|
58
58
|
type=bool,
|
|
59
59
|
show_default=True,
|
|
60
|
-
default=
|
|
60
|
+
default=False,
|
|
61
61
|
help="Run L2M processing.",
|
|
62
62
|
)
|
|
63
63
|
@click_processing_options
|
|
@@ -77,7 +77,7 @@ def disdrodb_run(
|
|
|
77
77
|
# Higher level processing options
|
|
78
78
|
l1_processing: bool = True,
|
|
79
79
|
l2e_processing: bool = True,
|
|
80
|
-
l2m_processing: bool =
|
|
80
|
+
l2m_processing: bool = False,
|
|
81
81
|
# Processing options
|
|
82
82
|
force: bool = False,
|
|
83
83
|
verbose: bool = True,
|
|
@@ -59,7 +59,7 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
|
|
|
59
59
|
"--l2m_processing",
|
|
60
60
|
type=bool,
|
|
61
61
|
show_default=True,
|
|
62
|
-
default=
|
|
62
|
+
default=False,
|
|
63
63
|
help="Run L2M processing.",
|
|
64
64
|
)
|
|
65
65
|
@click_data_archive_dir_option
|
|
@@ -78,7 +78,7 @@ def disdrodb_run_station(
|
|
|
78
78
|
# Higher level processing options
|
|
79
79
|
l1_processing: bool = True,
|
|
80
80
|
l2e_processing: bool = True,
|
|
81
|
-
l2m_processing: bool =
|
|
81
|
+
l2m_processing: bool = False,
|
|
82
82
|
# Processing options
|
|
83
83
|
force: bool = False,
|
|
84
84
|
verbose: bool = True,
|
|
@@ -131,8 +131,8 @@ def disdrodb_run_station(
|
|
|
131
131
|
# Process station with debugging mode and custom workers
|
|
132
132
|
DASK_NUM_WORKERS=4 disdrodb_run_station NETHERLANDS DELFT PAR001_Cabauw --debugging_mode True
|
|
133
133
|
|
|
134
|
-
# Process station,
|
|
135
|
-
disdrodb_run_station FRANCE ENPC_CARNOT Carnot_Pars1 --l2m_processing
|
|
134
|
+
# Process station, create L2M product (disabled by default)
|
|
135
|
+
disdrodb_run_station FRANCE ENPC_CARNOT Carnot_Pars1 --l2m_processing True
|
|
136
136
|
|
|
137
137
|
# Force overwrite existing files with verbose output
|
|
138
138
|
disdrodb_run_station EPFL HYMEX_2012 10 --force True --verbose True
|
disdrodb/configs.py
CHANGED
|
@@ -24,6 +24,7 @@ import urllib.parse
|
|
|
24
24
|
|
|
25
25
|
import click
|
|
26
26
|
import pooch
|
|
27
|
+
import requests
|
|
27
28
|
import tqdm
|
|
28
29
|
|
|
29
30
|
from disdrodb.api.path import define_metadata_filepath, define_station_dir
|
|
@@ -405,6 +406,8 @@ def build_webserver_wget_command(url: str, cut_dirs: int, dst_dir: str, verbose:
|
|
|
405
406
|
f"--cut-dirs={cut_dirs}",
|
|
406
407
|
# Downloads just new data without re-downloading existing files
|
|
407
408
|
"--timestamping", # -N
|
|
409
|
+
# Specify agent
|
|
410
|
+
"--user-agent=disdrodb (+https://github.com/ltelab/disdrodb)",
|
|
408
411
|
]
|
|
409
412
|
|
|
410
413
|
# Define source and destination directory
|
|
@@ -541,15 +544,81 @@ def _download_file_from_url(url: str, dst_dir: str) -> str:
|
|
|
541
544
|
|
|
542
545
|
os.makedirs(dst_dir, exist_ok=True)
|
|
543
546
|
|
|
547
|
+
# Check if it can be downloaded
|
|
548
|
+
if not is_programmatic_downloadable(url):
|
|
549
|
+
raise RuntimeError(
|
|
550
|
+
f"Cannot download data programmatically from '{url}' right now. "
|
|
551
|
+
"The server requires a web browser (e.g. WAF / anti-bot protection).",
|
|
552
|
+
)
|
|
553
|
+
|
|
544
554
|
# Grab Pooch's logger and remember its current level
|
|
545
555
|
logger = pooch.get_logger()
|
|
546
556
|
orig_level = logger.level
|
|
547
557
|
# Silence INFO messages (including the SHA256 print)
|
|
548
558
|
logger.setLevel(logging.WARNING)
|
|
559
|
+
|
|
549
560
|
# Define pooch downloader
|
|
550
|
-
|
|
561
|
+
headers = {"User-Agent": "disdrodb (+https://github.com/ltelab/disdrodb)"}
|
|
562
|
+
# "Accept": "*/*"}
|
|
563
|
+
downloader = pooch.HTTPDownloader(progressbar=True, headers=headers)
|
|
551
564
|
# Download the file
|
|
552
565
|
pooch.retrieve(url=url, known_hash=None, path=dst_dir, fname=dst_filename, downloader=downloader, progressbar=tqdm)
|
|
553
566
|
# Restore the previous logging level
|
|
554
567
|
logger.setLevel(orig_level)
|
|
568
|
+
|
|
569
|
+
# Check file has been downloaded
|
|
570
|
+
# Note: `pooch.retrieve` will raise an exception if the download fails, so we do not
|
|
571
|
+
# perform an additional explicit file existence/size check here.
|
|
572
|
+
# if not os.path.isfile(dst_filepath) or os.path.getsize(dst_filepath) == 0:
|
|
573
|
+
# raise RuntimeError(f"URL {url} likely unreachable. Try manually.")
|
|
574
|
+
|
|
555
575
|
return dst_filepath
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def is_programmatic_downloadable(url, timeout=5):
|
|
579
|
+
"""Check whether a URL is programmatically downloadable.
|
|
580
|
+
|
|
581
|
+
WITHOUT downloading the file.
|
|
582
|
+
|
|
583
|
+
Returns
|
|
584
|
+
-------
|
|
585
|
+
bool
|
|
586
|
+
True -> safe for pooch / requests
|
|
587
|
+
False -> browser-only / blocked
|
|
588
|
+
"""
|
|
589
|
+
headers = {
|
|
590
|
+
# Range prevents full download (0-0 = 1 byte max)
|
|
591
|
+
"Range": "bytes=0-0",
|
|
592
|
+
# Explicitly non-browser
|
|
593
|
+
"User-Agent": "python-downloader/1.0",
|
|
594
|
+
}
|
|
595
|
+
# Open stream and close it after check terminated
|
|
596
|
+
try:
|
|
597
|
+
with requests.get(
|
|
598
|
+
url,
|
|
599
|
+
headers=headers,
|
|
600
|
+
allow_redirects=True,
|
|
601
|
+
timeout=timeout,
|
|
602
|
+
stream=True,
|
|
603
|
+
) as r:
|
|
604
|
+
|
|
605
|
+
# --- Hard fail signals ---
|
|
606
|
+
if r.status_code in (202, 403, 401):
|
|
607
|
+
return False
|
|
608
|
+
|
|
609
|
+
if r.headers.get("x-amzn-waf-action") == "challenge":
|
|
610
|
+
return False
|
|
611
|
+
|
|
612
|
+
content_type = r.headers.get("Content-Type", "").lower()
|
|
613
|
+
if "text/html" in content_type:
|
|
614
|
+
return False
|
|
615
|
+
|
|
616
|
+
content_length = r.headers.get("Content-Length")
|
|
617
|
+
if content_length == "0":
|
|
618
|
+
return False
|
|
619
|
+
|
|
620
|
+
# If server honored Range, we are good
|
|
621
|
+
return r.status_code in (200, 206)
|
|
622
|
+
|
|
623
|
+
except requests.RequestException:
|
|
624
|
+
return False
|