disdrodb 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +14 -0
  4. disdrodb/api/checks.py +8 -7
  5. disdrodb/api/io.py +81 -29
  6. disdrodb/api/path.py +17 -14
  7. disdrodb/api/search.py +15 -18
  8. disdrodb/cli/disdrodb_open_products_options.py +38 -0
  9. disdrodb/cli/disdrodb_run.py +2 -2
  10. disdrodb/cli/disdrodb_run_station.py +4 -4
  11. disdrodb/configs.py +1 -1
  12. disdrodb/data_transfer/download_data.py +70 -1
  13. disdrodb/etc/configs/attributes.yaml +62 -8
  14. disdrodb/etc/configs/encodings.yaml +28 -0
  15. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_SSE.yaml +8 -0
  16. disdrodb/etc/products/L2M/MODELS/GAMMA_ML.yaml +1 -1
  17. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_SSE.yaml +8 -0
  18. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_SSE.yaml +8 -0
  19. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +1 -1
  20. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_SSE.yaml +8 -0
  21. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_SSE.yaml +8 -0
  22. disdrodb/etc/products/L2M/global.yaml +4 -4
  23. disdrodb/fall_velocity/graupel.py +8 -8
  24. disdrodb/fall_velocity/hail.py +2 -2
  25. disdrodb/fall_velocity/rain.py +33 -5
  26. disdrodb/issue/checks.py +1 -1
  27. disdrodb/l0/l0_reader.py +1 -1
  28. disdrodb/l0/l0a_processing.py +2 -2
  29. disdrodb/l0/l0b_nc_processing.py +5 -5
  30. disdrodb/l0/l0b_processing.py +20 -24
  31. disdrodb/l0/l0c_processing.py +18 -13
  32. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +4 -0
  33. disdrodb/l0/readers/PARSIVEL2/VIETNAM/IGE_PARSIVEL2.py +239 -0
  34. disdrodb/l0/template_tools.py +13 -13
  35. disdrodb/l1/classification.py +10 -6
  36. disdrodb/l2/empirical_dsd.py +25 -15
  37. disdrodb/l2/processing.py +32 -14
  38. disdrodb/metadata/download.py +1 -1
  39. disdrodb/metadata/geolocation.py +4 -4
  40. disdrodb/metadata/reader.py +3 -3
  41. disdrodb/metadata/search.py +10 -8
  42. disdrodb/psd/__init__.py +4 -0
  43. disdrodb/psd/fitting.py +2660 -592
  44. disdrodb/psd/gof_metrics.py +389 -0
  45. disdrodb/psd/grid_search.py +1066 -0
  46. disdrodb/psd/models.py +1281 -145
  47. disdrodb/routines/l2.py +6 -6
  48. disdrodb/routines/options_validation.py +8 -8
  49. disdrodb/scattering/axis_ratio.py +70 -2
  50. disdrodb/scattering/permittivity.py +13 -10
  51. disdrodb/scattering/routines.py +10 -10
  52. disdrodb/summary/routines.py +23 -20
  53. disdrodb/utils/archiving.py +29 -22
  54. disdrodb/utils/attrs.py +6 -4
  55. disdrodb/utils/dataframe.py +4 -4
  56. disdrodb/utils/encoding.py +3 -1
  57. disdrodb/utils/event.py +9 -9
  58. disdrodb/utils/logger.py +4 -7
  59. disdrodb/utils/manipulations.py +2 -2
  60. disdrodb/utils/subsetting.py +1 -1
  61. disdrodb/utils/time.py +8 -7
  62. disdrodb/viz/plots.py +25 -17
  63. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/METADATA +44 -33
  64. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/RECORD +68 -66
  65. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/entry_points.txt +1 -0
  66. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +0 -6
  67. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +0 -6
  68. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +0 -6
  69. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_MAE.yaml +0 -6
  70. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_MAE.yaml +0 -6
  71. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +0 -6
  72. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_Z_MAE.yaml +0 -6
  73. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/WHEEL +0 -0
  74. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/licenses/LICENSE +0 -0
  75. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/top_level.txt +0 -0
@@ -97,6 +97,18 @@ n_large_hail:
97
97
  units: "1"
98
98
  description: "Count of hail particles larger than 8 mm in diameter."
99
99
  valid_min: 0
100
+ n_margin_fallers:
101
+ long_name: "number of margin fallers"
102
+ standard_name: "number_of_margin_fallers"
103
+ units: "1"
104
+ description: "Count of particles falling at velocities significantly above the expected terminal velocity for their diameter. These may indicate measurement artifacts or non-hydrometeor particles."
105
+ valid_min: 0
106
+ n_splashing:
107
+ long_name: "number of splashing drops"
108
+ standard_name: "number_of_splashing_drops"
109
+ units: "1"
110
+ description: "Count of splashing drops detected. Splashing drops are identified as particles with diameter between 0 and 6 mm and velocity <= 0.6 m/s."
111
+ valid_min: 0
100
112
  drop_number:
101
113
  description: Counts of drops per diameter and velocity class
102
114
  long_name: Drop counts per diameter and velocity class
@@ -138,20 +150,20 @@ Nremoved:
138
150
  long_name: Total number of discarded drops
139
151
  units: ""
140
152
  Nbins:
141
- description: Number of diameter bins with drops
142
- long_name: Number of diameter bins with drops
153
+ description: Number of diameter bins with non-zero particle counts
154
+ long_name: Number of diameter bins with non-zero particle counts
143
155
  units: ""
144
156
  Nbins_missing:
145
- description: Number of diameter bins with no drops
146
- long_name: Number of diameter bins with no drops
157
+ description: Number of diameter bins with zero particle counts between the first and last non-zero bins
158
+ long_name: Number of diameter bins with zero particle counts
147
159
  units: ""
148
160
  Nbins_missing_fraction:
149
- description: Fraction of diameter bins with no drops
150
- long_name: Fraction of diameter bins with no drops
161
+ description: Fraction of diameter bins with zero particle counts between the first and last non-zero bins
162
+ long_name: Fraction of diameter bins with zero particle counts
151
163
  units: ""
152
164
  Nbins_missing_consecutive:
153
- description: Maximum number of consecutive diameter bins with no drops
154
- long_name: Maximum number of consecutive diameter bins with no drops
165
+ description: Maximum number of consecutive diameter bins with zero particle counts between the first and last non-zero bins
166
+ long_name: Maximum number of consecutive diameter bins with zero particle counts
155
167
  units: ""
156
168
  drop_number_concentration:
157
169
  description: Number concentration of drops per diameter class per unit volume
@@ -371,6 +383,30 @@ KLDiv:
371
383
  description: A distance measure of how the modelled distribution diverges from the observed N(D) one.
372
384
  units: ""
373
385
  valid_range: [0, inf]
386
+ JSD:
387
+ long_name: Jensen-Shannon Distance
388
+ standard_name: jensen_shannon_distance
389
+ description: Jensen-Shannon distance between observed and modelled N(D).
390
+ units: ""
391
+ valid_range: [0, inf]
392
+ WD:
393
+ long_name: Wasserstein Distance
394
+ standard_name: wasserstein_distance
395
+ description: Wasserstein-1 distance between observed and modelled N(D).
396
+ units: ""
397
+ valid_range: [0, inf]
398
+ KS:
399
+ long_name: Kolmogorov-Smirnov Statistic
400
+ standard_name: kolmogorov_smirnov_statistic
401
+ description: Kolmogorov-Smirnov test statistic between observed and modelled N(D).
402
+ units: ""
403
+ valid_range: [0, 1]
404
+ KS_pvalue:
405
+ long_name: Kolmogorov-Smirnov Test p-value
406
+ standard_name: kolmogorov_smirnov_pvalue
407
+ description: Kolmogorov-Smirnov test p-value for the comparison between observed and modelled N(D).
408
+ units: ""
409
+ valid_range: [0, 1]
374
410
  air_temperature:
375
411
  description: "Air temperature in degrees Celsius (C)"
376
412
  long_name: Air temperature
@@ -387,3 +423,21 @@ wind_direction:
387
423
  description: "Wind direction in degrees (0-360)"
388
424
  long_name: Wind direction
389
425
  units: "degrees"
426
+ flag_splashing:
427
+ long_name: "Splashing detection flag"
428
+ standard_name: "flag_splashing"
429
+ units: "1"
430
+ description: "Quality control flag indicating the presence of splashing drops. Flag is set to 1 when fraction_splashing (n_splashing/n_particles) exceeds 0.1 during no precipitation periods. Splashing drops are identified as particles with diameter_lower >= 0.0 mm, diameter_upper <= 6 mm, and velocity_upper <= 0.6 m/s."
431
+ valid_min: 0
432
+ valid_max: 1
433
+ flag_values: [0, 1]
434
+ flag_meanings: "no_splashing splashing_detected"
435
+ flag_wind_artefacts:
436
+ long_name: "Wind artefacts detection flag"
437
+ standard_name: "flag_wind_artefacts"
438
+ units: "1"
439
+ description: "Quality control flag for strong wind artefacts in heavy rainfall. Wind artefacts are detected using criteria from Friedrich et al. (2013): particles with diameter_lower >= 5 mm and velocity_upper < 1 m/s."
440
+ valid_min: 0
441
+ valid_max: 1
442
+ flag_values: [0, 1]
443
+ flag_meanings: "no_wind_artefacts wind_artefacts_detected"
@@ -478,6 +478,34 @@ KLDiv:
478
478
  shuffle: true
479
479
  fletcher32: false
480
480
  contiguous: false
481
+ JSD:
482
+ dtype: float32
483
+ zlib: true
484
+ complevel: 3
485
+ shuffle: true
486
+ fletcher32: false
487
+ contiguous: false
488
+ WD:
489
+ dtype: float32
490
+ zlib: true
491
+ complevel: 3
492
+ shuffle: true
493
+ fletcher32: false
494
+ contiguous: false
495
+ KS:
496
+ dtype: float32
497
+ zlib: true
498
+ complevel: 3
499
+ shuffle: true
500
+ fletcher32: false
501
+ contiguous: false
502
+ KS_pvalue:
503
+ dtype: float32
504
+ zlib: true
505
+ complevel: 3
506
+ shuffle: true
507
+ fletcher32: false
508
+ contiguous: false
481
509
  air_temperature:
482
510
  dtype: uint16
483
511
  scale_factor: 0.1
@@ -0,0 +1,8 @@
1
+ psd_model: "GammaPSD"
2
+ optimization: "GS"
3
+ optimization_settings:
4
+ objectives:
5
+ - target: "N(D)"
6
+ transformation: "identity"
7
+ censoring: "none"
8
+ loss: "SSE"
@@ -1,6 +1,6 @@
1
1
  psd_model: "GammaPSD"
2
2
  optimization: "ML"
3
- optimization_kwargs:
3
+ optimization_settings:
4
4
  init_method: "None"
5
5
  probability_method: "cdf"
6
6
  likelihood: "multinomial"
@@ -0,0 +1,8 @@
1
+ psd_model: "LognormalPSD"
2
+ optimization: "GS"
3
+ optimization_settings:
4
+ objectives:
5
+ - target: "N(D)"
6
+ transformation: "log"
7
+ censoring: "none"
8
+ loss: "SSE"
@@ -0,0 +1,8 @@
1
+ psd_model: "LognormalPSD"
2
+ optimization: "GS"
3
+ optimization_settings:
4
+ objectives:
5
+ - target: "N(D)"
6
+ transformation: "identity"
7
+ censoring: "none"
8
+ loss: "SSE"
@@ -1,6 +1,6 @@
1
1
  psd_model: "LognormalPSD"
2
2
  optimization: "ML"
3
- optimization_kwargs:
3
+ optimization_settings:
4
4
  init_method: "None"
5
5
  probability_method: "cdf"
6
6
  likelihood: "multinomial"
@@ -0,0 +1,8 @@
1
+ psd_model: "NormalizedGammaPSD"
2
+ optimization: "GS"
3
+ optimization_settings:
4
+ objectives:
5
+ - target: "N(D)"
6
+ transformation: "log"
7
+ censoring: "none"
8
+ loss: "SSE"
@@ -0,0 +1,8 @@
1
+ psd_model: "NormalizedGammaPSD"
2
+ optimization: "GS"
3
+ optimization_settings:
4
+ objectives:
5
+ - target: "N(D)"
6
+ transformation: "identity"
7
+ censoring: "none"
8
+ loss: "SSE"
@@ -2,11 +2,11 @@ temporal_resolutions: ["1MIN", "5MIN", "10MIN"]
2
2
  models:
3
3
  [
4
4
  "GAMMA_ML",
5
- "GAMMA_GS_ND_MAE",
6
- "NGAMMA_GS_LOG_ND_MAE",
7
- "NGAMMA_GS_ND_MAE",
5
+ "GAMMA_GS_ND_SSE",
6
+ "NGAMMA_GS_LOG_ND_SSE",
7
+ "NGAMMA_GS_ND_SSE",
8
8
  "LOGNORMAL_ML",
9
- "LOGNORMAL_GS_ND_MAE",
9
+ "LOGNORMAL_GS_ND_SSE",
10
10
  ]
11
11
  archive_options:
12
12
  strategy: time_block
@@ -66,7 +66,7 @@ def get_fall_velocity_lee_2015(diameter):
66
66
 
67
67
  Returns
68
68
  -------
69
- fall_velocity : ndarray or xarray.DataArray
69
+ fall_velocity : numpy.ndarray or xarray.DataArray
70
70
  Terminal fall velocity [m s⁻¹].
71
71
  """
72
72
  fall_velocity = 1.10 * diameter**0.28
@@ -86,7 +86,7 @@ def get_fall_velocity_locatelli_1974_lump(diameter):
86
86
 
87
87
  Returns
88
88
  -------
89
- fall_velocity : ndarray or xarray.DataArray
89
+ fall_velocity : numpy.ndarray or xarray.DataArray
90
90
  Terminal fall velocity [m s⁻¹].
91
91
 
92
92
  Reference
@@ -114,7 +114,7 @@ def get_fall_velocity_locatelli_1974_conical(diameter):
114
114
 
115
115
  Returns
116
116
  -------
117
- fall_velocity : ndarray or xarray.DataArray
117
+ fall_velocity : numpy.ndarray or xarray.DataArray
118
118
  Terminal fall velocity [m s⁻¹].
119
119
 
120
120
  Reference
@@ -140,7 +140,7 @@ def get_fall_velocity_locatelli_1974_hexagonal(diameter):
140
140
 
141
141
  Returns
142
142
  -------
143
- fall_velocity : ndarray or xarray.DataArray
143
+ fall_velocity : numpy.ndarray or xarray.DataArray
144
144
  Terminal fall velocity [m s⁻¹].
145
145
 
146
146
  Reference
@@ -166,7 +166,7 @@ def get_fall_velocity_heymsfield_2014(diameter):
166
166
 
167
167
  Returns
168
168
  -------
169
- fall_velocity : ndarray or xarray.DataArray
169
+ fall_velocity : numpy.ndarray or xarray.DataArray
170
170
  Terminal fall velocity [m s⁻¹].
171
171
  Reference
172
172
  ---------
@@ -319,7 +319,7 @@ def retrieve_graupel_heymsfield2014_fall_velocity(
319
319
  ----------
320
320
  diameter : array-like
321
321
  Diameter of the graupel particles in millimeters.
322
- ds_env : xr.Dataset
322
+ ds_env : xarray.Dataset
323
323
  A dataset containing the following environmental variables:
324
324
  - 'altitude' : Altitude in meters (m).
325
325
  - 'latitude' : Latitude in degrees.
@@ -414,7 +414,7 @@ def get_graupel_fall_velocity(diameter, model, ds_env=None, minimum_diameter=0.5
414
414
  The model to use for calculating the graupel fall velocity. Must be one of the following:
415
415
  'Lee2015', 'Locatelli1974Lump', 'Locatelli1974Conical', 'Locatelli1974Hexagonal',
416
416
  'Heymsfield2014', 'Heymsfield2018'.
417
- ds_env : xr.Dataset, optional
417
+ ds_env : xarray.Dataset, optional
418
418
  A dataset containing the following environmental variables:
419
419
  - 'altitude' (m)
420
420
  - 'latitude' (°)
@@ -426,7 +426,7 @@ def get_graupel_fall_velocity(diameter, model, ds_env=None, minimum_diameter=0.5
426
426
 
427
427
  Returns
428
428
  -------
429
- fall_velocity : xr.DataArray
429
+ fall_velocity : xarray.DataArray
430
430
  The calculated graupel fall velocities per diameter.
431
431
 
432
432
  Notes
@@ -229,7 +229,7 @@ def get_hail_fall_velocity(diameter, model, ds_env=None, minimum_diameter=4):
229
229
  model : str
230
230
  The model to use for calculating the hail fall velocity. Must be one of the following:
231
231
  'Laurie1960', 'Knight1983LD', 'Knight1983HD', 'Heymsfield2014', 'Heymsfield2018', 'Fehlmann2020'.
232
- ds_env : xr.Dataset, optional
232
+ ds_env : xarray.Dataset, optional
233
233
  A dataset containing the following environmental variables:
234
234
  - 'altitude' (m)
235
235
  - 'latitude' (°)
@@ -241,7 +241,7 @@ def get_hail_fall_velocity(diameter, model, ds_env=None, minimum_diameter=4):
241
241
 
242
242
  Returns
243
243
  -------
244
- fall_velocity : xr.DataArray
244
+ fall_velocity : xarray.DataArray
245
245
  The calculated hail fall velocities per diameter.
246
246
 
247
247
  """
@@ -58,6 +58,33 @@ def get_fall_velocity_atlas_1973(diameter):
58
58
  return fall_velocity
59
59
 
60
60
 
61
+ def get_fall_velocity_lhermitte1988(diameter):
62
+ """
63
+ Compute the fall velocity of raindrops using the Lhermitte et al. (1988) relationship.
64
+
65
+ Parameters
66
+ ----------
67
+ diameter : array-like
68
+ Diameter of the raindrops in millimeters.
69
+
70
+ Returns
71
+ -------
72
+ fall_velocity : array-like
73
+ Fall velocities corresponding to the input diameters, in meters per second.
74
+
75
+ References
76
+ ----------
77
+ Roger M. Lhermitte, 1988.
78
+ Observation of rain at vertical incidence with a 94 GHz Doppler radar: An insight on Mie scattering.
79
+ Geophysical Research Letter, 15(10), 1125-1128.
80
+ https://doi.org/10.1029/GL015i010p01125
81
+ """
82
+ fall_velocity = 9.25 * (1 - np.exp(-(0.068 * diameter**2 + 0.488 * diameter))) # Ladino 2025
83
+ # fall_velocity = 9.25 * (1 - np.exp(-(6.8 * (diameter*10)**2 + 4.88*(diameter*10)))) # Lhermitte 1988 formula wrong
84
+ fall_velocity = fall_velocity.clip(min=0, max=None)
85
+ return fall_velocity
86
+
87
+
61
88
  def get_fall_velocity_brandes_2002(diameter):
62
89
  """
63
90
  Compute the fall velocity of raindrops using the Brandes et al. (2002) relationship.
@@ -298,7 +325,7 @@ def retrieve_raindrop_beard_fall_velocity(
298
325
  ----------
299
326
  diameter : array-like
300
327
  Diameter of the raindrops in millimeters.
301
- ds_env : xr.Dataset
328
+ ds_env : xarray.Dataset
302
329
  A dataset containing the following environmental variables:
303
330
  - 'altitude' : Altitude in meters (m).
304
331
  - 'latitude' : Latitude in degrees.
@@ -395,8 +422,9 @@ def retrieve_raindrop_beard_fall_velocity(
395
422
  RAIN_FALL_VELOCITY_MODELS = {
396
423
  "Atlas1973": get_fall_velocity_atlas_1973,
397
424
  "Beard1976": retrieve_raindrop_beard_fall_velocity,
398
- "Brandes2002": get_fall_velocity_brandes_2002,
399
425
  "Uplinger1981": get_fall_velocity_uplinger_1981,
426
+ "Lhermitte1988": get_fall_velocity_lhermitte1988,
427
+ "Brandes2002": get_fall_velocity_brandes_2002,
400
428
  "VanDijk2002": get_fall_velocity_van_dijk_2002,
401
429
  }
402
430
 
@@ -448,7 +476,7 @@ def get_rain_fall_velocity(diameter, model, ds_env=None):
448
476
  model : str
449
477
  The model to use for calculating the raindrop fall velocity. Must be one of the following:
450
478
  'Atlas1973', 'Beard1976', 'Brandes2002', 'Uplinger1981', 'VanDijk2002'.
451
- ds_env : xr.Dataset, optional
479
+ ds_env : xarray.Dataset, optional
452
480
  Only required if model is 'Beard1976'.
453
481
  A dataset containing the following environmental variables:
454
482
  - 'altitude' (m)
@@ -461,7 +489,7 @@ def get_rain_fall_velocity(diameter, model, ds_env=None):
461
489
 
462
490
  Returns
463
491
  -------
464
- fall_velocity : xr.DataArray
492
+ fall_velocity : xarray.DataArray
465
493
  The calculated raindrop fall velocities per diameter.
466
494
 
467
495
  Notes
@@ -532,7 +560,7 @@ def get_rain_fall_velocity_from_ds(ds, ds_env=None, model="Beard1976", diameter=
532
560
  model : str, optional
533
561
  Model to compute rain drop fall velocity.
534
562
  The default model is ``"Beard1976"``.
535
- ds_env : xr.Dataset, optional
563
+ ds_env : xarray.Dataset, optional
536
564
  Only required if model is 'Beard1976'.
537
565
  A dataset containing the following environmental variables:
538
566
  - 'temperature' : Temperature in degrees Kelvin (K).
disdrodb/issue/checks.py CHANGED
@@ -176,7 +176,7 @@ def _get_issue_timesteps(issue_dict):
176
176
  # Check validity
177
177
  timesteps = check_timesteps(timesteps)
178
178
  # Sort
179
- timesteps.sort()
179
+ timesteps = np.sort(timesteps)
180
180
  return timesteps
181
181
 
182
182
 
disdrodb/l0/l0_reader.py CHANGED
@@ -294,7 +294,7 @@ def is_documented_by(original):
294
294
 
295
295
  Parameters
296
296
  ----------
297
- original : function
297
+ original : callable
298
298
  Function to take the docstring from.
299
299
  """
300
300
 
@@ -119,8 +119,8 @@ def read_raw_text_file(
119
119
  # Preprocess reader_kwargs
120
120
  reader_kwargs = preprocess_reader_kwargs(reader_kwargs)
121
121
 
122
- # Enforce all raw files columns with dtype = 'object'
123
- dtype = "object"
122
+ # Enforce all raw files columns with dtype = 'str'
123
+ dtype = "str"
124
124
 
125
125
  # Try to read the data
126
126
  try:
@@ -186,7 +186,7 @@ def replace_custom_nan_flags(ds, dict_nan_flags, logger=None, verbose=False):
186
186
 
187
187
  Parameters
188
188
  ----------
189
- df : xarray.Dataset
189
+ df : xarray.Dataset
190
190
  Input xarray dataset
191
191
  dict_nan_flags : dict
192
192
  Dictionary with nan flags value to set as ``np.nan``.
@@ -221,7 +221,7 @@ def replace_nan_flags(ds, sensor_name, verbose, logger=None):
221
221
 
222
222
  Parameters
223
223
  ----------
224
- ds : xarray.Dataset
224
+ ds : xarray.Dataset
225
225
  Input xarray dataset
226
226
  dict_nan_flags : dict
227
227
  Dictionary with nan flags value to set as np.nan
@@ -245,7 +245,7 @@ def set_nan_outside_data_range(ds, sensor_name, verbose, logger=None):
245
245
 
246
246
  Parameters
247
247
  ----------
248
- ds : xarray.Dataset
248
+ ds : xarray.Dataset
249
249
  Input xarray dataset
250
250
  sensor_name : str
251
251
  Name of the sensor.
@@ -283,7 +283,7 @@ def set_nan_invalid_values(ds, sensor_name, verbose, logger=None):
283
283
 
284
284
  Parameters
285
285
  ----------
286
- ds : xarray.Dataset
286
+ ds : xarray.Dataset
287
287
  Input xarray dataset
288
288
  sensor_name : str
289
289
  Name of the sensor.
@@ -404,7 +404,7 @@ def remove_issue_timesteps(
404
404
  issue_dict : dict
405
405
  Dictionary with optional keys 'timesteps' (list of datetimes) and
406
406
  'time_periods' (list of (start, end) tuples).
407
- logger : any, optional
407
+ logger : optional
408
408
  Logger instance to record dropped steps, by default None.
409
409
  verbose : bool, optional
410
410
  Whether to log informational messages, by default False.
@@ -85,10 +85,10 @@ def replace_empty_strings_with_zeros(values):
85
85
  return values
86
86
 
87
87
 
88
- def format_string_array(string: str, n_values: int) -> np.array:
88
+ def format_string_array(string: str, n_values: int):
89
89
  """Split a string with multiple numbers separated by a delimiter into an 1D array.
90
90
 
91
- e.g. : format_string_array("2,44,22,33", 4) will return [ 2. 44. 22. 33.]
91
+ format_string_array("2,44,22,33", 4) will return [ 2. 44. 22. 33.]
92
92
 
93
93
  If empty string ("") or "" --> Return an arrays of zeros
94
94
  If the list length is not n_values -> Return an arrays of np.nan
@@ -104,7 +104,7 @@ def format_string_array(string: str, n_values: int) -> np.array:
104
104
 
105
105
  Returns
106
106
  -------
107
- np.array
107
+ numpy.ndarray
108
108
  array of float
109
109
  """
110
110
  # Check for empty string or "0" case
@@ -143,42 +143,36 @@ def format_string_array(string: str, n_values: int) -> np.array:
143
143
 
144
144
 
145
145
  def reshape_raw_spectrum(
146
- arr: np.array,
146
+ arr,
147
147
  dims_order: list,
148
148
  dims_size_dict: dict,
149
149
  n_timesteps: int,
150
- ) -> np.array:
150
+ ):
151
151
  """Reshape the raw spectrum to a 2D+time array.
152
152
 
153
153
  The array has dimensions ["time"] + dims_order
154
154
 
155
155
  Parameters
156
156
  ----------
157
- arr : np.array
157
+ arr : numpy.ndarray
158
158
  Input array.
159
159
  dims_order : list
160
160
  The order of dimension in the raw spectrum.
161
-
162
- Examples
163
- --------
164
- - OTT PARSIVEL spectrum [v1d1 ... v1d32, v2d1, ..., v2d32]
165
- --> dims_order = ["diameter_bin_center", "velocity_bin_center"]
166
- - Thies LPM spectrum [v1d1 ... v20d1, v1d2, ..., v20d2]
167
- --> dims_order = ["velocity_bin_center", "diameter_bin_center"]
161
+ For OTT PARSIVEL spectrum [v1d1 ... v1d32, v2d1, ..., v2d32], thus
162
+ ``dims_order = ["diameter_bin_center", "velocity_bin_center"]``
163
+ For Thies LPM spectrum [v1d1 ... v20d1, v1d2, ..., v20d2], thus
164
+ ``dims_order = ["velocity_bin_center", "diameter_bin_center"]``
168
165
  dims_size_dict : dict
169
166
  Dictionary with the number of bins for each dimension.
170
- For PARSIVEL and PARSIVEL2:
171
- {"diameter_bin_center": 32, "velocity_bin_center": 32}
172
- For LPM
173
- {"diameter_bin_center": 22, "velocity_bin_center": 20}
174
- For PWS100
175
- {"diameter_bin_center": 34, "velocity_bin_center": 34}
167
+ For PARSIVEL and PARSIVEL2: ``{"diameter_bin_center": 32, "velocity_bin_center": 32}``
168
+ For LPM: ``{"diameter_bin_center": 22, "velocity_bin_center": 20}``
169
+ For PWS100: ``{"diameter_bin_center": 34, "velocity_bin_center": 34}``
176
170
  n_timesteps : int
177
171
  Number of timesteps.
178
172
 
179
173
  Returns
180
174
  -------
181
- np.array
175
+ numpy.ndarray
182
176
  Output array.
183
177
 
184
178
  Raises
@@ -309,14 +303,16 @@ def ensure_valid_geolocation(ds: xr.Dataset, coord: str, errors: str = "ignore")
309
303
  Dataset containing the coordinate.
310
304
  coord : str
311
305
  Name of the coordinate variable to validate.
312
- errors : {"ignore", "raise", "coerce"}, default "ignore"
306
+ errors : str, optional
307
+ How to handle invalid values. Options are:
308
+
313
309
  - "ignore": nothing is done.
314
310
  - "raise" : raise ValueError if invalid values are found.
315
311
  - "coerce": out-of-range values are replaced with NaN.
316
312
 
317
313
  Returns
318
314
  -------
319
- xr.Dataset
315
+ xarray.Dataset
320
316
  Dataset with validated coordinate values.
321
317
  """
322
318
  # Define coordinates ranges
@@ -376,8 +372,8 @@ def set_variable_attributes(ds: xr.Dataset, sensor_name: str) -> xr.Dataset:
376
372
 
377
373
  Returns
378
374
  -------
379
- ds
380
- xr.Dataset.
375
+ xarray.Dataset
376
+ Dataset with variable attributes.
381
377
  """
382
378
  # Retrieve attributes dictionaries
383
379
  cf_attrs_dict = get_l0b_cf_attrs_dict(sensor_name)
@@ -70,16 +70,6 @@ def split_dataset_by_sampling_intervals(
70
70
  """
71
71
  Split a dataset into subsets where each subset has a consistent sampling interval.
72
72
 
73
- Notes
74
- -----
75
- - Does not modify timesteps (regularization is left to `regularize_timesteps`).
76
- - Assumes no duplicated timesteps in the dataset.
77
- - If only one measurement interval is specified, no timestep-diff checks are performed.
78
- - If multiple measurement intervals are specified:
79
- * Raises an error if *none* of the expected intervals appear.
80
- * Splits where interval changes.
81
- - Segments shorter than `min_block_size` are discarded.
82
-
83
73
  Parameters
84
74
  ----------
85
75
  ds : xarray.Dataset
@@ -97,9 +87,24 @@ def split_dataset_by_sampling_intervals(
97
87
  Whether time refers to the end of the measurement interval.
98
88
  The default is True.
99
89
 
90
+ Notes
91
+ -----
92
+ Does not modify timesteps (regularization is left to `regularize_timesteps`).
93
+
94
+ Assumes no duplicated timesteps in the dataset.
95
+
96
+ If only one measurement interval is specified, no timestep-diff checks are performed.
97
+
98
+ If multiple measurement intervals are specified:
99
+
100
+ - Raises an error if *none* of the expected intervals appear.
101
+ - Splits where interval changes.
102
+
103
+ Segments shorter than `min_block_size` are discarded.
104
+
100
105
  Returns
101
106
  -------
102
- dict[int, xr.Dataset]
107
+ dict[int, xarray.Dataset]
103
108
  A dictionary where keys are the identified sampling intervals (in seconds),
104
109
  and values are xarray.Datasets containing only data from those sampling intervals.
105
110
  """
@@ -594,7 +599,7 @@ def check_timesteps_regularity(ds, sample_interval, verbose=False, logger=None):
594
599
  #### Wrapper
595
600
 
596
601
 
597
- def _finalize_l0c_dataset(ds, sample_interval, sensor_name, verbose=True, logger=None):
602
+ def finalize_l0c_dataset(ds, sample_interval, sensor_name, verbose=True, logger=None):
598
603
  """Finalize a L0C dataset with unique sampling interval.
599
604
 
600
605
  It adds the sampling_interval coordinate and it regularizes the timesteps for trailing seconds.
@@ -756,7 +761,7 @@ def create_l0c_datasets(
756
761
  # - Add and ensure sample_interval coordinate has just 1 value (not varying with time)
757
762
  # - Regularize timesteps for trailing seconds
758
763
  dict_ds = {
759
- sample_interval: _finalize_l0c_dataset(
764
+ sample_interval: finalize_l0c_dataset(
760
765
  ds=ds,
761
766
  sample_interval=sample_interval,
762
767
  sensor_name=sensor_name,
@@ -257,6 +257,10 @@ def read_SM05_telegram(
257
257
  # time_str = df["time"].str.extract(r"(\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2})")[0]
258
258
  df["time"] = pd.to_datetime(time_str, format="%d/%m/%Y %H:%M:%S", errors="coerce")
259
259
 
260
+ # Remove rows where time year is 1999
261
+ # - Timesteps with 1999-11-30 appears sometimes when sensors fails
262
+ df = df[df["time"].dt.year != 1999]
263
+
260
264
  # Remove checksum from raw_drop_number
261
265
  df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
262
266