disdrodb 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +14 -0
  4. disdrodb/api/checks.py +8 -7
  5. disdrodb/api/io.py +81 -29
  6. disdrodb/api/path.py +17 -14
  7. disdrodb/api/search.py +15 -18
  8. disdrodb/cli/disdrodb_open_products_options.py +38 -0
  9. disdrodb/cli/disdrodb_run.py +2 -2
  10. disdrodb/cli/disdrodb_run_station.py +4 -4
  11. disdrodb/configs.py +1 -1
  12. disdrodb/data_transfer/download_data.py +70 -1
  13. disdrodb/etc/configs/attributes.yaml +62 -8
  14. disdrodb/etc/configs/encodings.yaml +28 -0
  15. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_SSE.yaml +8 -0
  16. disdrodb/etc/products/L2M/MODELS/GAMMA_ML.yaml +1 -1
  17. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_SSE.yaml +8 -0
  18. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_SSE.yaml +8 -0
  19. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +1 -1
  20. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_SSE.yaml +8 -0
  21. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_SSE.yaml +8 -0
  22. disdrodb/etc/products/L2M/global.yaml +4 -4
  23. disdrodb/fall_velocity/graupel.py +8 -8
  24. disdrodb/fall_velocity/hail.py +2 -2
  25. disdrodb/fall_velocity/rain.py +33 -5
  26. disdrodb/issue/checks.py +1 -1
  27. disdrodb/l0/l0_reader.py +1 -1
  28. disdrodb/l0/l0a_processing.py +2 -2
  29. disdrodb/l0/l0b_nc_processing.py +5 -5
  30. disdrodb/l0/l0b_processing.py +20 -24
  31. disdrodb/l0/l0c_processing.py +18 -13
  32. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +4 -0
  33. disdrodb/l0/readers/PARSIVEL2/VIETNAM/IGE_PARSIVEL2.py +239 -0
  34. disdrodb/l0/template_tools.py +13 -13
  35. disdrodb/l1/classification.py +10 -6
  36. disdrodb/l2/empirical_dsd.py +25 -15
  37. disdrodb/l2/processing.py +32 -14
  38. disdrodb/metadata/download.py +1 -1
  39. disdrodb/metadata/geolocation.py +4 -4
  40. disdrodb/metadata/reader.py +3 -3
  41. disdrodb/metadata/search.py +10 -8
  42. disdrodb/psd/__init__.py +4 -0
  43. disdrodb/psd/fitting.py +2660 -592
  44. disdrodb/psd/gof_metrics.py +389 -0
  45. disdrodb/psd/grid_search.py +1066 -0
  46. disdrodb/psd/models.py +1281 -145
  47. disdrodb/routines/l2.py +6 -6
  48. disdrodb/routines/options_validation.py +8 -8
  49. disdrodb/scattering/axis_ratio.py +70 -2
  50. disdrodb/scattering/permittivity.py +13 -10
  51. disdrodb/scattering/routines.py +10 -10
  52. disdrodb/summary/routines.py +23 -20
  53. disdrodb/utils/archiving.py +29 -22
  54. disdrodb/utils/attrs.py +6 -4
  55. disdrodb/utils/dataframe.py +4 -4
  56. disdrodb/utils/encoding.py +3 -1
  57. disdrodb/utils/event.py +9 -9
  58. disdrodb/utils/logger.py +4 -7
  59. disdrodb/utils/manipulations.py +2 -2
  60. disdrodb/utils/subsetting.py +1 -1
  61. disdrodb/utils/time.py +8 -7
  62. disdrodb/viz/plots.py +25 -17
  63. {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/METADATA +44 -33
  64. {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/RECORD +68 -66
  65. {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/WHEEL +1 -1
  66. {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/entry_points.txt +1 -0
  67. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +0 -6
  68. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +0 -6
  69. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +0 -6
  70. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_MAE.yaml +0 -6
  71. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_MAE.yaml +0 -6
  72. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +0 -6
  73. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_Z_MAE.yaml +0 -6
  74. {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/licenses/LICENSE +0 -0
  75. {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/top_level.txt +0 -0
disdrodb/__init__.py CHANGED
@@ -78,6 +78,8 @@ from disdrodb.routines import (
78
78
  run_l2e_station,
79
79
  run_l2m,
80
80
  run_l2m_station,
81
+ run,
82
+ run_station,
81
83
  )
82
84
  from disdrodb.utils.manipulations import convert_from_decibel as idecibel
83
85
  from disdrodb.utils.manipulations import convert_to_decibel as decibel
@@ -158,6 +160,7 @@ __all__ = [
158
160
  "read_metadata_archive",
159
161
  "read_station_metadata",
160
162
  # Functions invoking the disdrodb_run_* scripts in the terminals
163
+ "run",
161
164
  "run_l0",
162
165
  "run_l0_station",
163
166
  "run_l0a",
@@ -172,6 +175,7 @@ __all__ = [
172
175
  "run_l2e_station",
173
176
  "run_l2m",
174
177
  "run_l2m_station",
178
+ "run_station",
175
179
  ]
176
180
 
177
181
 
disdrodb/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.4.0'
32
- __version_tuple__ = version_tuple = (0, 4, 0)
31
+ __version__ = version = '0.5.1'
32
+ __version_tuple__ = version_tuple = (0, 5, 1)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -135,6 +135,20 @@ class DISDRODB_Dataset_Accessor(DISDRODB_Base_Accessor):
135
135
 
136
136
  return plot_raw_and_filtered_spectra(self._obj, **kwargs)
137
137
 
138
+ @property
139
+ def psd(self):
140
+ """Return PSD class from DISDRODB L2M product."""
141
+ from disdrodb.psd.models import create_psd_from_dataset
142
+
143
+ return create_psd_from_dataset(self._obj)
144
+
145
+ @property
146
+ def psd_parameters(self):
147
+ """Return PSD parameters from DISDRODB L2M product."""
148
+ from disdrodb.psd.models import get_parameters_from_dataset
149
+
150
+ return get_parameters_from_dataset(self._obj)
151
+
138
152
 
139
153
  @xr.register_dataarray_accessor("disdrodb")
140
154
  class DISDRODB_DataArray_Accessor(DISDRODB_Base_Accessor):
disdrodb/api/checks.py CHANGED
@@ -171,16 +171,17 @@ def check_folder_partitioning(folder_partitioning):
171
171
  folder_partitioning : str or None
172
172
  Defines the subdirectory structure based on the dataset's start time.
173
173
  Allowed values are:
174
- - "" or None: No additional subdirectories, files are saved directly in dir.
175
- - "year": Files are stored under a subdirectory for the year (<dir>/2025).
176
- - "year/month": Files are stored under subdirectories by year and month (<dir>/2025/04).
177
- - "year/month/day": Files are stored under subdirectories by year, month and day (<dir>/2025/04/01).
178
- - "year/month_name": Files are stored under subdirectories by year and month name (<dir>/2025/April).
179
- - "year/quarter": Files are stored under subdirectories by year and quarter (<dir>/2025/Q2).
174
+
175
+ - "" or None: No additional subdirectories, files are saved directly in dir.
176
+ - "year": Files are stored under a subdirectory for the year (<dir>/2025).
177
+ - "year/month": Files are stored under subdirectories by year and month (<dir>/2025/04).
178
+ - "year/month/day": Files are stored under subdirectories by year, month and day (<dir>/2025/04/01).
179
+ - "year/month_name": Files are stored under subdirectories by year and month name (<dir>/2025/April).
180
+ - "year/quarter": Files are stored under subdirectories by year and quarter (<dir>/2025/Q2).
180
181
 
181
182
  Returns
182
183
  -------
183
- folder_partitioning
184
+ str
184
185
  The verified folder partitioning scheme.
185
186
  """
186
187
  valid_options = ["", "year", "year/month", "year/month/day", "year/month_name", "year/quarter"]
disdrodb/api/io.py CHANGED
@@ -335,24 +335,24 @@ def filter_dataset_by_time(ds, start_time=None, end_time=None):
335
335
 
336
336
  Parameters
337
337
  ----------
338
- ds : xr.Dataset
338
+ ds : xarray.Dataset
339
339
  Dataset with a `time` coordinate.
340
- start_time : np.datetime64 or None
340
+ start_time : str, numpy.datetime64 or None
341
341
  Inclusive start bound. If None, no lower bound is applied.
342
- end_time : np.datetime64 or None
342
+ end_time : str, numpy.datetime64 or None
343
343
  Inclusive end bound. If None, no upper bound is applied.
344
344
 
345
345
  Returns
346
346
  -------
347
- xr.Dataset
347
+ xarray.Dataset
348
348
  Subset dataset with the same ordering of timesteps (duplicates preserved).
349
349
  """
350
350
  time = ds["time"].to_numpy()
351
351
  mask = np.ones(time.shape, dtype=bool)
352
352
  if start_time is not None:
353
- mask &= time >= np.array(start_time, dtype="datetime64[ns]")
353
+ mask &= time >= np.array(check_time(start_time), dtype="datetime64[ns]")
354
354
  if end_time is not None:
355
- mask &= time <= np.array(end_time, dtype="datetime64[ns]")
355
+ mask &= time <= np.array(check_time(end_time), dtype="datetime64[ns]")
356
356
  return ds.isel(time=np.where(mask)[0])
357
357
 
358
358
 
@@ -445,30 +445,73 @@ def open_netcdf_files(
445
445
  engine="netcdf4",
446
446
  **open_kwargs,
447
447
  ):
448
- """Open DISDRODB netCDF files using xarray.
449
-
450
- Using data_vars="minimal", coords="minimal", compat="override"
451
- --> will only concatenate those variables with the time dimension,
452
- --> will skip any checking for variables that don't have a time dimension
453
- (simply pick the variable from the first file).
454
- https://github.com/pydata/xarray/issues/1385#issuecomment-1958761334
455
-
456
- Using combine="nested" and join="outer" ensure that duplicated timesteps
457
- are not overwritten!
458
-
459
- When decode_cf=False
460
- --> lat,lon are data_vars and get concatenated without any checking or reading
461
- When decode_cf=True
462
- --> lat, lon are promoted to coords, then get checked for equality across all files
463
-
464
- For L0B product, if sample_interval variable is present and varies with time,
465
- this function concatenate the variable over time without problems.
466
- For L0C product, if sample_interval changes across listed files,
467
- only sample_interval of first file is reported.
468
- --> open_dataset take care of just providing filepaths of files with same sample interval.
469
- In L1 and L2 processing, only filepaths of files with same sample interval
470
- must be passed to this function.
448
+ """Open DISDRODB NetCDF files using xarray.
449
+
450
+ This function opens and concatenates multiple NetCDF files using
451
+ ``xarray.open_mfdataset`` with settings optimized for time-based
452
+ concatenation and minimal variable checking.
453
+
454
+ The function uses ``data_vars="minimal"``, ``coords="minimal"``,
455
+ and ``compat="override"`` to:
456
+
457
+ - Concatenate only variables that depend on the time dimension.
458
+ - Skip consistency checks for variables without a time dimension, taking them from the first file instead.
459
+
460
+ See: https://github.com/pydata/xarray/issues/1385#issuecomment-1958761334
461
+
462
+ Using ``combine="nested"`` and ``join="outer"`` ensures that duplicated
463
+ timesteps are preserved and not overwritten.
464
+
465
+ Behavior depends on ``decode_cf``:
466
+
467
+ - If ``decode_cf=False``: ``lat`` and ``lon`` are treated as data variables and concatenated without validation.
468
+ - If ``decode_cf=True``: ``lat`` and ``lon`` are promoted to coordinates and checked for equality across files.
469
+
470
+ Special handling of ``sample_interval``:
471
+
472
+ - For L0B products, if ``sample_interval`` varies with time, it is safely concatenated.
473
+ - For L0C products, if ``sample_interval`` differs across files, only the value from the first file is retained.
474
+ - For L1 and L2 processing, only files with identical ``sample_interval`` values should be passed to this function.
475
+
476
+ Parameters
477
+ ----------
478
+ filepaths : str or sequence of str
479
+ Path(s) to NetCDF files to open.
480
+ chunks : int, dict, or None, optional
481
+ Chunking strategy passed to xarray for dask-backed arrays.
482
+ Use ``-1`` to load data into a single chunk (default).
483
+ start_time : str or datetime-like or None, optional
484
+ Start time for temporal subsetting.
485
+ end_time : str or datetime-like or None, optional
486
+ End time for temporal subsetting.
487
+ variables : sequence of str or None, optional
488
+ Subset of variables to retain.
489
+ parallel : bool, optional
490
+ Whether to open files in parallel using dask.
491
+ The default is ``False``.
492
+ compute : bool, optional
493
+ Whether to immediately compute the dataset when using dask.
494
+ The default is ``True``.
495
+ engine : str, optional
496
+ Backend engine used by xarray to read NetCDF files.
497
+ The default is "netcdf4".
498
+ **open_kwargs
499
+ Additional keyword arguments passed to
500
+ ``xarray.open_mfdataset``.
501
+
502
+ Returns
503
+ -------
504
+ xarray.Dataset
505
+ The opened and concatenated dataset.
471
506
 
507
+ See Also
508
+ --------
509
+ xarray.open_mfdataset
510
+
511
+ Notes
512
+ -----
513
+ This function is decorated with ``ensure_safe_open_mfdataset`` to
514
+ protect against unsafe or incompatible combinations of arguments.
472
515
  """
473
516
  import xarray as xr
474
517
 
@@ -804,6 +847,15 @@ def open_readers_directory():
804
847
  open_file_explorer(readers_directory)
805
848
 
806
849
 
850
+ def open_products_options():
851
+ """Open the disdrodb product options directory."""
852
+ from disdrodb.configs import get_products_configs_dir
853
+
854
+ products_configs_dir = get_products_configs_dir()
855
+
856
+ open_file_explorer(products_configs_dir)
857
+
858
+
807
859
  def open_metadata_archive(
808
860
  metadata_archive_dir=None,
809
861
  ):
disdrodb/api/path.py CHANGED
@@ -358,12 +358,13 @@ def define_partitioning_tree(time, folder_partitioning):
358
358
  folder_partitioning : str or None
359
359
  Define the subdirectory structure where saving files.
360
360
  Allowed values are:
361
- - None: Files are saved directly in data_dir.
362
- - "year": Files are saved under a subdirectory for the year.
363
- - "year/month": Files are saved under subdirectories for year and month.
364
- - "year/month/day": Files are saved under subdirectories for year, month and day
365
- - "year/month_name": Files are stored under subdirectories by year and month name
366
- - "year/quarter": Files are saved under subdirectories for year and quarter.
361
+
362
+ - None: Files are saved directly in data_dir.
363
+ - "year": Files are saved under a subdirectory for the year.
364
+ - "year/month": Files are saved under subdirectories for year and month.
365
+ - "year/month/day": Files are saved under subdirectories for year, month and day
366
+ - "year/month_name": Files are stored under subdirectories by year and month name
367
+ - "year/quarter": Files are saved under subdirectories for year and quarter.
367
368
 
368
369
  Returns
369
370
  -------
@@ -411,12 +412,14 @@ def define_file_folder_path(obj, dir_path, folder_partitioning):
411
412
  folder_partitioning : str or None
412
413
  Define the subdirectory structure where saving files.
413
414
  Allowed values are:
414
- - None or "": Files are saved directly in data_dir.
415
- - "year": Files are saved under a subdirectory for the year.
416
- - "year/month": Files are saved under subdirectories for year and month.
417
- - "year/month/day": Files are saved under subdirectories for year, month and day
418
- - "year/month_name": Files are stored under subdirectories by year and month name
419
- - "year/quarter": Files are saved under subdirectories for year and quarter.
415
+
416
+ - None or "": Files are saved directly in data_dir.
417
+ - "year": Files are saved under a subdirectory for the year.
418
+ - "year/month": Files are saved under subdirectories for year and month.
419
+ - "year/month/day": Files are saved under subdirectories for year, month and day
420
+ - "year/month_name": Files are stored under subdirectories by year and month name
421
+ - "year/quarter": Files are saved under subdirectories for year and quarter.
422
+
420
423
 
421
424
  Returns
422
425
  -------
@@ -682,10 +685,10 @@ def define_filename(
682
685
  Name of the campaign.
683
686
  station_name : str
684
687
  Name of the station.
685
- start_time : datetime.datatime, optional
688
+ start_time : datetime.datetime, optional
686
689
  Start time.
687
690
  Required if add_time_period = True.
688
- end_time : datetime.datatime, optional
691
+ end_time : datetime.datetime, optional
689
692
  End time.
690
693
  Required if add_time_period = True.
691
694
  temporal_resolution : str, optional
disdrodb/api/search.py CHANGED
@@ -310,8 +310,7 @@ def available_stations(
310
310
  metadata_archive_dir=None,
311
311
  **filter_kwargs,
312
312
  ):
313
- """
314
- Return stations information for which metadata or product data are available on disk.
313
+ """Return stations information for which metadata or product data are available on disk.
315
314
 
316
315
  This function queries the DISDRODB Metadata Archive and, optionally, the
317
316
  local DISDRODB Data Archive to identify stations that satisfy the specified
@@ -334,31 +333,29 @@ def available_stations(
334
333
  it lists the stations present in the local DISDRODB Data Archive given the specified filtering criteria.
335
334
  The default is None.
336
335
 
337
- data_sources : str or sequence of str, optional
336
+ data_sources : str or list of str, optional
338
337
  One or more data source identifiers to filter stations by.
339
338
  The name(s) must be UPPER CASE.
340
339
  If None, no filtering on data source is applied. The default is is ``None``.
341
- campaign_names : str or sequence of str, optional
340
+ campaign_names : str or list of str, optional
342
341
  One or more campaign names to filter stations by.
343
342
  The name(s) must be UPPER CASE.
344
343
  If None, no filtering on campaign is applied. The default is is ``None``.
345
- station_names : str or sequence of str, optional
344
+ station_names : str or list of str, optional
346
345
  One or more station names to include.
347
346
  If None, all stations matching other filters are considered. The default is is ``None``.
348
347
  available_data : bool, optional
349
348
 
350
349
  If ``product`` is not specified:
351
350
 
352
- - if available_data is False, return stations present in the DISDRODB Metadata Archive
353
- - if available_data is True, return stations with data available on the
354
- online DISDRODB Decentralized Data Archive (i.e., stations with the disdrodb_data_url in the metadata).
351
+ - if ``available_data=False``, return stations present in the DISDRODB Metadata Archive.
352
+ - if ``available_data=True``, return stations with data available on the online DISDRODB Decentralized Data Archive (i.e., stations with the disdrodb_data_url in the metadata).
355
353
 
356
354
  If ``product`` is specified:
357
355
 
358
- - if available_data is False, return stations where the product directory exists in the
359
- in the local DISDRODB Data Archive
360
- - if available_data is True, return stations where product data exists in the
361
- in the local DISDRODB Data Archive.
356
+ - if ``available_data=False``, return stations where the product directory exists in the in the local DISDRODB Data Archive
357
+ - if ``available_data=True``, return stations where product data exists in the in the local DISDRODB Data Archive.
358
+
362
359
  The default is is False.
363
360
 
364
361
  return_tuple : bool, optional
@@ -368,17 +365,17 @@ def available_stations(
368
365
  raise_error_if_empty : bool, optional
369
366
  If True and no stations satisfy the criteria, raise a ``ValueError``.
370
367
  If False, return an empty list/tuple. The default is False.
371
- invalid_fields_policy : {'raise', 'warn', 'ignore'}, optional
368
+ invalid_fields_policy : str, optional
372
369
  How to handle invalid filter values for ``data_sources``, ``campaign_names``,
373
370
  or ``station_names`` that are not present in the metadata archive:
374
371
 
375
- - 'raise' : raise a ``ValueError`` (default)
376
- - 'warn' : emit a warning, then ignore invalid entries
377
- - 'ignore': silently drop invalid entries
372
+ - 'raise' : raise a ``ValueError`` (default)
373
+ - 'warn' : emit a warning, then ignore invalid entries
374
+ - 'ignore': silently drop invalid entries
378
375
 
379
376
  data_archive_dir : str or Path-like, optional
380
377
  Path to the root of the local DISDRODB Data Archive.
381
- Required only if ``product``is specified.
378
+ Required only if ``product`` is specified.
382
379
  If None, the default data archive base directory is used. Default is None.
383
380
  metadata_archive_dir : str or Path-like, optional
384
381
  Path to the root of the DISDRODB Metadata Archive.
@@ -405,7 +402,7 @@ def available_stations(
405
402
  >>> raw_stations = available_stations(product="RAW", available_data=True)
406
403
  >>> # List stations of specific data sources
407
404
  >>> stations = available_stations(data_sources=["NASA", "EPFL"])
408
- """
405
+ """ # noqa: E501
409
406
  # Retrieve DISDRODB Data and Metadata Archive directories
410
407
  metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir)
411
408
  product = check_product(product) if product is not None else None
@@ -0,0 +1,38 @@
1
+ # -----------------------------------------------------------------------------.
2
+ # Copyright (c) 2021-2026 DISDRODB developers
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ # -----------------------------------------------------------------------------.
17
+ """Routine to open the DISDRODB Products Options."""
18
+
19
+ import sys
20
+
21
+ import click
22
+
23
+ sys.tracebacklimit = 0 # avoid full traceback error if occur
24
+
25
+
26
+ @click.command()
27
+ def disdrodb_open_products_options():
28
+ """Open the DISDRODB Products Configuration Directory in the system file explorer.
29
+
30
+ Examples
31
+ --------
32
+ # Open the data archive from active configuration
33
+ disdrodb_open_product_options
34
+
35
+ """
36
+ from disdrodb.api.io import open_products_options
37
+
38
+ open_products_options()
@@ -57,7 +57,7 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
57
57
  "--l2m_processing",
58
58
  type=bool,
59
59
  show_default=True,
60
- default=True,
60
+ default=False,
61
61
  help="Run L2M processing.",
62
62
  )
63
63
  @click_processing_options
@@ -77,7 +77,7 @@ def disdrodb_run(
77
77
  # Higher level processing options
78
78
  l1_processing: bool = True,
79
79
  l2e_processing: bool = True,
80
- l2m_processing: bool = True,
80
+ l2m_processing: bool = False,
81
81
  # Processing options
82
82
  force: bool = False,
83
83
  verbose: bool = True,
@@ -59,7 +59,7 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
59
59
  "--l2m_processing",
60
60
  type=bool,
61
61
  show_default=True,
62
- default=True,
62
+ default=False,
63
63
  help="Run L2M processing.",
64
64
  )
65
65
  @click_data_archive_dir_option
@@ -78,7 +78,7 @@ def disdrodb_run_station(
78
78
  # Higher level processing options
79
79
  l1_processing: bool = True,
80
80
  l2e_processing: bool = True,
81
- l2m_processing: bool = True,
81
+ l2m_processing: bool = False,
82
82
  # Processing options
83
83
  force: bool = False,
84
84
  verbose: bool = True,
@@ -131,8 +131,8 @@ def disdrodb_run_station(
131
131
  # Process station with debugging mode and custom workers
132
132
  DASK_NUM_WORKERS=4 disdrodb_run_station NETHERLANDS DELFT PAR001_Cabauw --debugging_mode True
133
133
 
134
- # Process station, skip final L2M level
135
- disdrodb_run_station FRANCE ENPC_CARNOT Carnot_Pars1 --l2m_processing False
134
+ # Process station, create L2M product (disabled by default)
135
+ disdrodb_run_station FRANCE ENPC_CARNOT Carnot_Pars1 --l2m_processing True
136
136
 
137
137
  # Force overwrite existing files with verbose output
138
138
  disdrodb_run_station EPFL HYMEX_2012 10 --force True --verbose True
disdrodb/configs.py CHANGED
@@ -282,7 +282,7 @@ def copy_default_products_configs(products_configs_dir):
282
282
 
283
283
  Returns
284
284
  -------
285
- products_configs_dir
285
+ str
286
286
  The path to the newly created custom product configuration directory.
287
287
 
288
288
  """
@@ -24,6 +24,7 @@ import urllib.parse
24
24
 
25
25
  import click
26
26
  import pooch
27
+ import requests
27
28
  import tqdm
28
29
 
29
30
  from disdrodb.api.path import define_metadata_filepath, define_station_dir
@@ -405,6 +406,8 @@ def build_webserver_wget_command(url: str, cut_dirs: int, dst_dir: str, verbose:
405
406
  f"--cut-dirs={cut_dirs}",
406
407
  # Downloads just new data without re-downloading existing files
407
408
  "--timestamping", # -N
409
+ # Specify agent
410
+ "--user-agent=disdrodb (+https://github.com/ltelab/disdrodb)",
408
411
  ]
409
412
 
410
413
  # Define source and destination directory
@@ -541,15 +544,81 @@ def _download_file_from_url(url: str, dst_dir: str) -> str:
541
544
 
542
545
  os.makedirs(dst_dir, exist_ok=True)
543
546
 
547
+ # Check if it can be downloaded
548
+ if not is_programmatic_downloadable(url):
549
+ raise RuntimeError(
550
+ f"Cannot download data programmatically from '{url}' right now. "
551
+ "The server requires a web browser (e.g. WAF / anti-bot protection).",
552
+ )
553
+
544
554
  # Grab Pooch's logger and remember its current level
545
555
  logger = pooch.get_logger()
546
556
  orig_level = logger.level
547
557
  # Silence INFO messages (including the SHA256 print)
548
558
  logger.setLevel(logging.WARNING)
559
+
549
560
  # Define pooch downloader
550
- downloader = pooch.HTTPDownloader(progressbar=True)
561
+ headers = {"User-Agent": "disdrodb (+https://github.com/ltelab/disdrodb)"}
562
+ # "Accept": "*/*"}
563
+ downloader = pooch.HTTPDownloader(progressbar=True, headers=headers)
551
564
  # Download the file
552
565
  pooch.retrieve(url=url, known_hash=None, path=dst_dir, fname=dst_filename, downloader=downloader, progressbar=tqdm)
553
566
  # Restore the previous logging level
554
567
  logger.setLevel(orig_level)
568
+
569
+ # Check file has been downloaded
570
+ # Note: `pooch.retrieve` will raise an exception if the download fails, so we do not
571
+ # perform an additional explicit file existence/size check here.
572
+ # if not os.path.isfile(dst_filepath) or os.path.getsize(dst_filepath) == 0:
573
+ # raise RuntimeError(f"URL {url} likely unreachable. Try manually.")
574
+
555
575
  return dst_filepath
576
+
577
+
578
+ def is_programmatic_downloadable(url, timeout=5):
579
+ """Check whether a URL is programmatically downloadable.
580
+
581
+ WITHOUT downloading the file.
582
+
583
+ Returns
584
+ -------
585
+ bool
586
+ True -> safe for pooch / requests
587
+ False -> browser-only / blocked
588
+ """
589
+ headers = {
590
+ # Range prevents full download (0-0 = 1 byte max)
591
+ "Range": "bytes=0-0",
592
+ # Explicitly non-browser
593
+ "User-Agent": "python-downloader/1.0",
594
+ }
595
+ # Open stream and close it after check terminated
596
+ try:
597
+ with requests.get(
598
+ url,
599
+ headers=headers,
600
+ allow_redirects=True,
601
+ timeout=timeout,
602
+ stream=True,
603
+ ) as r:
604
+
605
+ # --- Hard fail signals ---
606
+ if r.status_code in (202, 403, 401):
607
+ return False
608
+
609
+ if r.headers.get("x-amzn-waf-action") == "challenge":
610
+ return False
611
+
612
+ content_type = r.headers.get("Content-Type", "").lower()
613
+ if "text/html" in content_type:
614
+ return False
615
+
616
+ content_length = r.headers.get("Content-Length")
617
+ if content_length == "0":
618
+ return False
619
+
620
+ # If server honored Range, we are good
621
+ return r.status_code in (200, 206)
622
+
623
+ except requests.RequestException:
624
+ return False