disdrodb 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/checks.py +70 -47
  4. disdrodb/api/configs.py +0 -2
  5. disdrodb/api/create_directories.py +0 -2
  6. disdrodb/api/info.py +3 -3
  7. disdrodb/api/io.py +48 -8
  8. disdrodb/api/path.py +116 -133
  9. disdrodb/api/search.py +12 -3
  10. disdrodb/cli/disdrodb_create_summary.py +113 -0
  11. disdrodb/cli/disdrodb_create_summary_station.py +11 -1
  12. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  13. disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
  14. disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
  15. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  16. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  17. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  18. disdrodb/constants.py +1 -1
  19. disdrodb/data_transfer/download_data.py +123 -7
  20. disdrodb/etc/products/L1/global.yaml +1 -1
  21. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  22. disdrodb/etc/products/L2E/global.yaml +1 -1
  23. disdrodb/etc/products/L2M/GAMMA_GS_ND_MAE.yaml +6 -0
  24. disdrodb/etc/products/L2M/GAMMA_ML.yaml +1 -1
  25. disdrodb/etc/products/L2M/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  26. disdrodb/etc/products/L2M/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  27. disdrodb/etc/products/L2M/LOGNORMAL_ML.yaml +8 -0
  28. disdrodb/etc/products/L2M/global.yaml +11 -3
  29. disdrodb/issue/writer.py +2 -0
  30. disdrodb/l0/check_configs.py +49 -16
  31. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  32. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  33. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  34. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  35. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  36. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  37. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  38. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  39. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  40. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  41. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  42. disdrodb/l0/l0a_processing.py +10 -5
  43. disdrodb/l0/l0b_nc_processing.py +10 -6
  44. disdrodb/l0/l0b_processing.py +92 -72
  45. disdrodb/l0/l0c_processing.py +369 -251
  46. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +8 -1
  47. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  48. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  49. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  50. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  51. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  52. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  53. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  54. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  55. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  56. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  58. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  59. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +5 -14
  60. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  61. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  62. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  63. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  66. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  67. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  69. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  70. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  71. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
  72. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  73. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  75. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  76. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  79. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
  80. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PANGASA.py +232 -0
  81. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  82. disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +120 -0
  83. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +7 -25
  84. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  85. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  86. disdrodb/l1/beard_model.py +31 -129
  87. disdrodb/l1/fall_velocity.py +156 -57
  88. disdrodb/l1/filters.py +25 -28
  89. disdrodb/l1/processing.py +12 -14
  90. disdrodb/l1_env/routines.py +46 -17
  91. disdrodb/l2/empirical_dsd.py +6 -0
  92. disdrodb/l2/processing.py +3 -3
  93. disdrodb/metadata/checks.py +132 -125
  94. disdrodb/metadata/geolocation.py +0 -2
  95. disdrodb/psd/fitting.py +180 -210
  96. disdrodb/psd/models.py +1 -1
  97. disdrodb/routines/__init__.py +54 -0
  98. disdrodb/{l0/routines.py → routines/l0.py} +288 -418
  99. disdrodb/{l1/routines.py → routines/l1.py} +60 -92
  100. disdrodb/{l2/routines.py → routines/l2.py} +284 -485
  101. disdrodb/{routines.py → routines/wrappers.py} +100 -7
  102. disdrodb/scattering/axis_ratio.py +95 -85
  103. disdrodb/scattering/permittivity.py +24 -0
  104. disdrodb/scattering/routines.py +56 -36
  105. disdrodb/summary/routines.py +147 -45
  106. disdrodb/utils/archiving.py +434 -0
  107. disdrodb/utils/attrs.py +2 -0
  108. disdrodb/utils/cli.py +5 -5
  109. disdrodb/utils/dask.py +62 -1
  110. disdrodb/utils/decorators.py +31 -0
  111. disdrodb/utils/encoding.py +10 -1
  112. disdrodb/{l2 → utils}/event.py +1 -66
  113. disdrodb/utils/logger.py +1 -1
  114. disdrodb/utils/manipulations.py +22 -12
  115. disdrodb/utils/routines.py +166 -0
  116. disdrodb/utils/time.py +5 -293
  117. disdrodb/utils/xarray.py +3 -0
  118. disdrodb/viz/plots.py +109 -15
  119. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/METADATA +3 -2
  120. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/RECORD +124 -96
  121. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/entry_points.txt +1 -0
  122. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/WHEEL +0 -0
  123. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/licenses/LICENSE +0 -0
  124. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/top_level.txt +0 -0
@@ -19,7 +19,6 @@
19
19
  """Functions to process DISDRODB L0A files into DISDRODB L0B netCDF files."""
20
20
 
21
21
  import logging
22
- import os
23
22
 
24
23
  import numpy as np
25
24
  import pandas as pd
@@ -43,13 +42,8 @@ from disdrodb.utils.attrs import (
43
42
  set_coordinate_attributes,
44
43
  set_disdrodb_attrs,
45
44
  )
46
- from disdrodb.utils.directories import create_directory, remove_if_exists
47
45
  from disdrodb.utils.encoding import set_encodings
48
- from disdrodb.utils.logger import (
49
- # log_warning,
50
- # log_debug,
51
- log_info,
52
- )
46
+ from disdrodb.utils.logger import log_info
53
47
  from disdrodb.utils.time import ensure_sorted_by_time
54
48
 
55
49
  logger = logging.getLogger(__name__)
@@ -86,15 +80,16 @@ def infer_split_str(string: str) -> str:
86
80
  return split_str
87
81
 
88
82
 
89
- def _replace_empty_strings_with_zeros(values):
83
+ def replace_empty_strings_with_zeros(values):
84
+ """Replace empty comma separated strings with '0'."""
90
85
  values[np.char.str_len(values) == 0] = "0"
91
86
  return values
92
87
 
93
88
 
94
- def _format_string_array(string: str, n_values: int) -> np.array:
89
+ def format_string_array(string: str, n_values: int) -> np.array:
95
90
  """Split a string with multiple numbers separated by a delimiter into an 1D array.
96
91
 
97
- e.g. : _format_string_array("2,44,22,33", 4) will return [ 2. 44. 22. 33.]
92
+ e.g. : format_string_array("2,44,22,33", 4) will return [ 2. 44. 22. 33.]
98
93
 
99
94
  If empty string ("") --> Return an arrays of zeros
100
95
  If the list length is not n_values -> Return an arrays of np.nan
@@ -132,7 +127,7 @@ def _format_string_array(string: str, n_values: int) -> np.array:
132
127
  # Ensure string type
133
128
  values = values.astype("str")
134
129
  # Replace '' with 0
135
- values = _replace_empty_strings_with_zeros(values)
130
+ values = replace_empty_strings_with_zeros(values)
136
131
  # Replace "-9.999" with 0
137
132
  values = np.char.replace(values, "-9.999", "0")
138
133
  # Cast values to float type
@@ -141,7 +136,7 @@ def _format_string_array(string: str, n_values: int) -> np.array:
141
136
  return values
142
137
 
143
138
 
144
- def _reshape_raw_spectrum(
139
+ def reshape_raw_spectrum(
145
140
  arr: np.array,
146
141
  dims_order: list,
147
142
  dims_size_dict: dict,
@@ -246,12 +241,20 @@ def retrieve_l0b_arrays(
246
241
  unavailable_keys.append(key)
247
242
  continue
248
243
 
249
- # Ensure is a string
250
- df_series = df[key].astype(str)
244
+ # Ensure is a string, get a numpy array for each row and then stack
245
+ # - Option 1: Clear but lot of copies
246
+ # df_series = df[key].astype(str)
247
+ # list_arr = df_series.apply(format_string_array, n_values=n_values)
248
+ # arr = np.stack(list_arr, axis=0)
249
+
250
+ # - Option 2: still copies
251
+ # arr = np.vstack(format_string_array(s, n_values=n_values) for s in df_series.astype(str))
251
252
 
252
- # Get a numpy array for each row and then stack
253
- list_arr = df_series.apply(_format_string_array, n_values=n_values)
254
- arr = np.stack(list_arr, axis=0)
253
+ # - Option 3: more memory efficient
254
+ n_timesteps = len(df[key])
255
+ arr = np.empty((n_timesteps, n_values), dtype=float) # preallocates
256
+ for i, s in enumerate(df[key].astype(str)):
257
+ arr[i, :] = format_string_array(s, n_values=n_values)
255
258
 
256
259
  # Retrieve dimensions
257
260
  dims_order = dims_order_dict[key]
@@ -261,7 +264,7 @@ def retrieve_l0b_arrays(
261
264
  # - This applies i.e for PARSIVEL*, LPM, PWS100
262
265
  # - This does not apply to RD80
263
266
  if key == "raw_drop_number" and len(dims_order) == 2:
264
- arr, dims = _reshape_raw_spectrum(
267
+ arr, dims = reshape_raw_spectrum(
265
268
  arr=arr,
266
269
  dims_order=dims_order,
267
270
  dims_size_dict=dims_size_dict,
@@ -286,7 +289,57 @@ def retrieve_l0b_arrays(
286
289
  #### L0B Coords and attributes
287
290
 
288
291
 
289
- def _convert_object_variables_to_string(ds: xr.Dataset) -> xr.Dataset:
292
+ def ensure_valid_geolocation(ds: xr.Dataset, coord: str, errors: str = "ignore") -> xr.Dataset:
293
+ """Ensure valid geolocation coordinates.
294
+
295
+ 'altitude' must be >= 0, 'latitude' must be within [-90, 90] and
296
+ 'longitude' within [-180, 180].
297
+
298
+ It can deal with coordinates varying with time.
299
+
300
+ Parameters
301
+ ----------
302
+ ds : xarray.Dataset
303
+ Dataset containing the coordinate.
304
+ coord : str
305
+ Name of the coordinate variable to validate.
306
+ errors : {"ignore", "raise", "coerce"}, default "ignore"
307
+ - "ignore": nothing is done.
308
+ - "raise" : raise ValueError if invalid values are found.
309
+ - "coerce": out-of-range values are replaced with NaN.
310
+
311
+ Returns
312
+ -------
313
+ xr.Dataset
314
+ Dataset with validated coordinate values.
315
+ """
316
+ # Define coordinates ranges
317
+ ranges = {
318
+ "altitude": (0, np.inf),
319
+ "latitude": (-90, 90),
320
+ "longitude": (-180, 180), # used only for "raise"/"coerce"
321
+ }
322
+
323
+ # Check coordinate is available and correctly defined.
324
+ if coord not in ds:
325
+ raise ValueError(f"Coordinate '{coord}' not found in dataset.")
326
+ if coord not in list(ranges):
327
+ raise ValueError(f"Valid geolocation coordinates are: {list(ranges)}.")
328
+
329
+ # Validate coordinate
330
+ vmin, vmax = ranges[coord]
331
+ invalid = (ds[coord] < vmin) | (ds[coord] > vmax)
332
+ invalid = invalid.compute()
333
+
334
+ # Deal within invalid errors
335
+ if errors == "raise" and invalid.any():
336
+ raise ValueError(f"{coord} out of range {vmin}-{vmax}.")
337
+ if errors == "coerce":
338
+ ds[coord] = ds[coord].where(~invalid)
339
+ return ds
340
+
341
+
342
+ def convert_object_variables_to_string(ds: xr.Dataset) -> xr.Dataset:
290
343
  """Convert variables with ``object`` dtype to ``string``.
291
344
 
292
345
  Parameters
@@ -305,7 +358,7 @@ def _convert_object_variables_to_string(ds: xr.Dataset) -> xr.Dataset:
305
358
  return ds
306
359
 
307
360
 
308
- def _set_variable_attributes(ds: xr.Dataset, sensor_name: str) -> xr.Dataset:
361
+ def set_variable_attributes(ds: xr.Dataset, sensor_name: str) -> xr.Dataset:
309
362
  """Set attributes to each ``xr.Dataset`` variable.
310
363
 
311
364
  Parameters
@@ -333,18 +386,6 @@ def _set_variable_attributes(ds: xr.Dataset, sensor_name: str) -> xr.Dataset:
333
386
  return ds
334
387
 
335
388
 
336
- def _set_dataset_attrs(ds, sensor_name):
337
- """Set variable and coordinates attributes."""
338
- # - Add netCDF variable attributes
339
- # --> Attributes: long_name, units, descriptions, valid_min, valid_max
340
- ds = _set_variable_attributes(ds=ds, sensor_name=sensor_name)
341
- # - Add netCDF coordinate attributes
342
- ds = set_coordinate_attributes(ds=ds)
343
- # - Set DISDRODB global attributes
344
- ds = set_disdrodb_attrs(ds=ds, product="L0B")
345
- return ds
346
-
347
-
348
389
  def add_dataset_crs_coords(ds):
349
390
  """Add the CRS coordinate to the xr.Dataset."""
350
391
  # TODO: define CF-compliant CRS !
@@ -363,7 +404,7 @@ def add_dataset_crs_coords(ds):
363
404
 
364
405
 
365
406
  def _define_dataset_variables(df, sensor_name, logger=None, verbose=False):
366
- """Define DISDRODB L0B netCDF variables."""
407
+ """Define DISDRODB L0B netCDF array variables."""
367
408
  # Preprocess raw_spectrum, diameter and velocity arrays if available
368
409
  raw_fields = ["raw_drop_concentration", "raw_drop_average_velocity", "raw_drop_number"]
369
410
  if np.any(np.isin(raw_fields, df.columns)):
@@ -446,7 +487,7 @@ def set_geolocation_coordinates(ds, metadata):
446
487
  # If coordinate not present, add it from dictionary
447
488
  if coord not in ds:
448
489
  ds = ds.assign_coords({coord: metadata.pop(coord, np.nan)})
449
- # Else if set coordinates the variable in the dataset (present in the raw data)
490
+ # Else ensure coord is a dataset coordinates
450
491
  else:
451
492
  ds = ds.set_coords(coord)
452
493
  _ = metadata.pop(coord, None)
@@ -455,6 +496,10 @@ def set_geolocation_coordinates(ds, metadata):
455
496
  for coord in coords:
456
497
  ds[coord] = xr.where(ds[coord] == -9999, np.nan, ds[coord])
457
498
 
499
+ # Ensure valid geolocation coordinates
500
+ for coord in coords:
501
+ ds = ensure_valid_geolocation(ds=ds, coord=coord, errors="coerce")
502
+
458
503
  # Set attributes without geolocation coordinates
459
504
  ds.attrs = metadata
460
505
  return ds
@@ -475,16 +520,25 @@ def finalize_dataset(ds, sensor_name, metadata):
475
520
  ds = add_dataset_crs_coords(ds)
476
521
 
477
522
  # Set netCDF dimension order
523
+ # --> Required for correct encoding !
478
524
  ds = ds.transpose("time", "diameter_bin_center", ...)
479
525
 
480
- # Add netCDF variable and coordinate attributes
481
- ds = _set_dataset_attrs(ds, sensor_name)
482
-
483
526
  # Ensure variables with dtype object are converted to string
484
- ds = _convert_object_variables_to_string(ds)
527
+ ds = convert_object_variables_to_string(ds)
528
+
529
+ # Add netCDF variable and coordinate attributes
530
+ # - Add variable attributes: long_name, units, descriptions, valid_min, valid_max
531
+ ds = set_variable_attributes(ds=ds, sensor_name=sensor_name)
532
+ # - Add netCDF coordinate attributes
533
+ ds = set_coordinate_attributes(ds=ds)
534
+ # - Set DISDRODB global attributes
535
+ ds = set_disdrodb_attrs(ds=ds, product="L0B")
485
536
 
486
537
  # Check L0B standards
487
538
  check_l0b_standards(ds)
539
+
540
+ # Set L0B encodings
541
+ ds = set_l0b_encodings(ds=ds, sensor_name=sensor_name)
488
542
  return ds
489
543
 
490
544
 
@@ -508,38 +562,4 @@ def set_l0b_encodings(ds: xr.Dataset, sensor_name: str):
508
562
  return ds
509
563
 
510
564
 
511
- def write_l0b(ds: xr.Dataset, filepath: str, force=False) -> None:
512
- """Save the xarray dataset into a NetCDF file.
513
-
514
- Parameters
515
- ----------
516
- ds : xarray.Dataset
517
- Input xarray dataset.
518
- filepath : str
519
- Output file path.
520
- sensor_name : str
521
- Name of the sensor.
522
- force : bool, optional
523
- Whether to overwrite existing data.
524
- If ``True``, overwrite existing data into destination directories.
525
- If ``False``, raise an error if there are already data into destination directories. This is the default.
526
- """
527
- # Create station directory if does not exist
528
- create_directory(os.path.dirname(filepath))
529
-
530
- # Check if the file already exists
531
- # - If force=True --> Remove it
532
- # - If force=False --> Raise error
533
- remove_if_exists(filepath, force=force)
534
-
535
- # Get sensor name from dataset
536
- sensor_name = ds.attrs.get("sensor_name")
537
-
538
- # Set encodings
539
- ds = set_l0b_encodings(ds=ds, sensor_name=sensor_name)
540
-
541
- # Write netcdf
542
- ds.to_netcdf(filepath, engine="netcdf4")
543
-
544
-
545
565
  ####--------------------------------------------------------------------------.