disdrodb 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/checks.py +70 -47
  4. disdrodb/api/configs.py +0 -2
  5. disdrodb/api/create_directories.py +0 -2
  6. disdrodb/api/info.py +3 -3
  7. disdrodb/api/io.py +48 -8
  8. disdrodb/api/path.py +116 -133
  9. disdrodb/api/search.py +12 -3
  10. disdrodb/cli/disdrodb_create_summary.py +113 -0
  11. disdrodb/cli/disdrodb_create_summary_station.py +11 -1
  12. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  13. disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
  14. disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
  15. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  16. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  17. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  18. disdrodb/constants.py +1 -1
  19. disdrodb/data_transfer/download_data.py +123 -7
  20. disdrodb/etc/products/L1/global.yaml +1 -1
  21. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  22. disdrodb/etc/products/L2E/global.yaml +1 -1
  23. disdrodb/etc/products/L2M/GAMMA_GS_ND_MAE.yaml +6 -0
  24. disdrodb/etc/products/L2M/GAMMA_ML.yaml +1 -1
  25. disdrodb/etc/products/L2M/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  26. disdrodb/etc/products/L2M/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  27. disdrodb/etc/products/L2M/LOGNORMAL_ML.yaml +8 -0
  28. disdrodb/etc/products/L2M/global.yaml +11 -3
  29. disdrodb/issue/writer.py +2 -0
  30. disdrodb/l0/check_configs.py +49 -16
  31. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  32. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  33. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  34. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  35. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  36. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  37. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  38. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  39. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  40. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  41. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  42. disdrodb/l0/l0a_processing.py +10 -5
  43. disdrodb/l0/l0b_nc_processing.py +10 -6
  44. disdrodb/l0/l0b_processing.py +92 -72
  45. disdrodb/l0/l0c_processing.py +369 -251
  46. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +8 -1
  47. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  48. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  49. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  50. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  51. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  52. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  53. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  54. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  55. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  56. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  58. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  59. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +5 -14
  60. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  61. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  62. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  63. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  66. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  67. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  69. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  70. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  71. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
  72. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  73. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  75. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  76. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  79. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
  80. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PANGASA.py +232 -0
  81. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  82. disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +120 -0
  83. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +7 -25
  84. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  85. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  86. disdrodb/l1/beard_model.py +31 -129
  87. disdrodb/l1/fall_velocity.py +156 -57
  88. disdrodb/l1/filters.py +25 -28
  89. disdrodb/l1/processing.py +12 -14
  90. disdrodb/l1_env/routines.py +46 -17
  91. disdrodb/l2/empirical_dsd.py +6 -0
  92. disdrodb/l2/processing.py +3 -3
  93. disdrodb/metadata/checks.py +132 -125
  94. disdrodb/metadata/geolocation.py +0 -2
  95. disdrodb/psd/fitting.py +180 -210
  96. disdrodb/psd/models.py +1 -1
  97. disdrodb/routines/__init__.py +54 -0
  98. disdrodb/{l0/routines.py → routines/l0.py} +288 -418
  99. disdrodb/{l1/routines.py → routines/l1.py} +60 -92
  100. disdrodb/{l2/routines.py → routines/l2.py} +284 -485
  101. disdrodb/{routines.py → routines/wrappers.py} +100 -7
  102. disdrodb/scattering/axis_ratio.py +95 -85
  103. disdrodb/scattering/permittivity.py +24 -0
  104. disdrodb/scattering/routines.py +56 -36
  105. disdrodb/summary/routines.py +147 -45
  106. disdrodb/utils/archiving.py +434 -0
  107. disdrodb/utils/attrs.py +2 -0
  108. disdrodb/utils/cli.py +5 -5
  109. disdrodb/utils/dask.py +62 -1
  110. disdrodb/utils/decorators.py +31 -0
  111. disdrodb/utils/encoding.py +10 -1
  112. disdrodb/{l2 → utils}/event.py +1 -66
  113. disdrodb/utils/logger.py +1 -1
  114. disdrodb/utils/manipulations.py +22 -12
  115. disdrodb/utils/routines.py +166 -0
  116. disdrodb/utils/time.py +5 -293
  117. disdrodb/utils/xarray.py +3 -0
  118. disdrodb/viz/plots.py +109 -15
  119. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/METADATA +3 -2
  120. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/RECORD +124 -96
  121. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/entry_points.txt +1 -0
  122. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/WHEEL +0 -0
  123. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/licenses/LICENSE +0 -0
  124. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/top_level.txt +0 -0
@@ -15,10 +15,12 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Utilities to create summary statistics."""
18
+ import gc
19
+ import importlib
18
20
  import os
21
+ import shutil
19
22
  import subprocess
20
23
  import tempfile
21
- from shutil import which
22
24
 
23
25
  import matplotlib.lines as mlines
24
26
  import matplotlib.pyplot as plt
@@ -33,14 +35,15 @@ import disdrodb
33
35
  from disdrodb.api.path import define_station_dir
34
36
  from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
35
37
  from disdrodb.l2.empirical_dsd import get_drop_average_velocity
36
- from disdrodb.l2.event import group_timesteps_into_event
37
38
  from disdrodb.scattering import RADAR_OPTIONS
38
39
  from disdrodb.utils.dataframe import compute_2d_histogram, log_arange
40
+ from disdrodb.utils.event import group_timesteps_into_event
39
41
  from disdrodb.utils.manipulations import (
40
42
  get_diameter_bin_edges,
41
43
  resample_drop_number_concentration,
42
44
  unstack_radar_variables,
43
45
  )
46
+ from disdrodb.utils.time import get_sampling_information
44
47
  from disdrodb.utils.warnings import suppress_warnings
45
48
  from disdrodb.utils.yaml import write_yaml
46
49
  from disdrodb.viz import compute_dense_lines, max_blend_images, to_rgba
@@ -58,7 +61,7 @@ def is_latex_engine_available() -> bool:
58
61
  bool
59
62
  True if tectonic is found, False otherwise.
60
63
  """
61
- return which("tectonic") is not None
64
+ return shutil.which("tectonic") is not None
62
65
 
63
66
 
64
67
  def save_table_to_pdf(
@@ -142,7 +145,7 @@ def save_table_to_pdf(
142
145
  check=True,
143
146
  )
144
147
  # Move result
145
- os.replace(os.path.join(td, "table.pdf"), filepath)
148
+ shutil.move(os.path.join(td, "table.pdf"), filepath)
146
149
 
147
150
 
148
151
  ####-----------------------------------------------------------------
@@ -245,8 +248,9 @@ def create_table_dsd_summary(df):
245
248
  df_stats["SKEWNESS"] = df_subset.skew()
246
249
  df_stats["KURTOSIS"] = df_subset.kurt()
247
250
 
248
- # Round statistics
249
- df_stats = df_stats.astype(float).round(2)
251
+ # Round float columns to nearest integer, leave ints unchanged
252
+ float_cols = df_stats.select_dtypes(include=["float"]).columns
253
+ df_stats[float_cols] = df_stats[float_cols].astype(float).round(decimals=2)
250
254
  return df_stats
251
255
 
252
256
 
@@ -325,15 +329,19 @@ def create_table_events_summary(df):
325
329
  events_stats.append(event_stats)
326
330
 
327
331
  df_events = pd.DataFrame.from_records(events_stats)
332
+
333
+ # Round float columns to nearest integer, leave ints unchanged
334
+ float_cols = df_events.select_dtypes(include=["float"]).columns
335
+ df_events[float_cols] = df_events[float_cols].astype(float).round(decimals=2)
328
336
  return df_events
329
337
 
330
338
 
331
339
  def prepare_latex_table_dsd_summary(df):
332
340
  """Prepare a DataFrame with DSD statistics for LaTeX table output."""
333
341
  df = df.copy()
334
- # Round float columns to nearest integer, leave ints unchanged
335
- float_cols = df.select_dtypes(include=["float"]).columns
336
- df[float_cols] = df[float_cols].astype(float).round(decimals=2).astype(str)
342
+ # Cast numeric columns to string
343
+ numeric_cols = df.select_dtypes(include=["float", "int"]).columns
344
+ df[numeric_cols] = df[numeric_cols].astype(str)
337
345
  # Rename
338
346
  rename_dict = {
339
347
  "W": r"$W\,[\mathrm{g}\,\mathrm{m}^{-3}]$", # [g/m3]
@@ -358,9 +366,9 @@ def prepare_latex_table_events_summary(df):
358
366
  # Round datetime to minutes
359
367
  df["start_time"] = df["start_time"].dt.strftime("%Y-%m-%d %H:%M")
360
368
  df["end_time"] = df["end_time"].dt.strftime("%Y-%m-%d %H:%M")
361
- # Round float columns to nearest integer, leave ints unchanged
362
- float_cols = df.select_dtypes(include=["float"]).columns
363
- df[float_cols] = df[float_cols].astype(float).round(decimals=2).astype(str)
369
+ # Cast numeric columns to string
370
+ numeric_cols = df.select_dtypes(include=["float", "int"]).columns
371
+ df[numeric_cols] = df[numeric_cols].astype(str)
364
372
  # Rename
365
373
  rename_dict = {
366
374
  "start_time": r"Start",
@@ -400,14 +408,35 @@ def prepare_latex_table_events_summary(df):
400
408
  #### Powerlaw routines
401
409
 
402
410
 
403
- def fit_powerlaw(x, y, xbins, quantile=0.5, min_counts=10, x_in_db=False):
411
+ def fit_powerlaw_with_ransac(x, y):
412
+ """Fit powerlaw relationship with RANSAC algorithm."""
413
+ from sklearn.linear_model import LinearRegression, RANSACRegressor
414
+
415
+ x = np.asanyarray(x)
416
+ y = np.asanyarray(y)
417
+ X = np.log10(x).reshape(-1, 1)
418
+ Y = np.log10(y)
419
+ ransac = RANSACRegressor(
420
+ estimator=LinearRegression(),
421
+ min_samples=0.5,
422
+ residual_threshold=0.3,
423
+ random_state=42,
424
+ )
425
+ ransac.fit(X, Y)
426
+ b = ransac.estimator_.coef_[0] # slope
427
+ loga = ransac.estimator_.intercept_ # intercept
428
+ a = 10**loga
429
+ return a, b
430
+
431
+
432
+ def fit_powerlaw(x, y, xbins, quantile=0.5, min_counts=10, x_in_db=False, use_ransac=True):
404
433
  """
405
434
  Fit a power-law relationship ``y = a * x**b`` to binned median values.
406
435
 
407
436
  This function bins ``x`` into intervals defined by ``xbins``, computes the
408
437
  median of ``y`` in each bin (robust to outliers), and fits a power-law model
409
- using the Levenberg-Marquardt algorithm. Optionally, ``x`` can be converted
410
- from decibel units to linear scale automatically before fitting.
438
+ using the RANSAC or Levenberg-Marquardt algorithm.
439
+ Optionally, ``x`` can be converted from decibel units to linear scale automatically before fitting.
411
440
 
412
441
  Parameters
413
442
  ----------
@@ -424,6 +453,11 @@ def fit_powerlaw(x, y, xbins, quantile=0.5, min_counts=10, x_in_db=False):
424
453
  x_in_db : bool, optional
425
454
  If True, converts ``x`` values from decibels (dB) to linear scale using
426
455
  :func:`disdrodb.idecibel`. Default is False.
456
+ use_ransac: bool, optional
457
+ Whether to fit the powerlaw using the Random Sample Consensus (RANSAC)
458
+ algorithm or using the Levenberg-Marquardt algorithm.
459
+ The default is True.
460
+ To fit with RANSAC, scikit-learn must be installed.
427
461
 
428
462
  Returns
429
463
  -------
@@ -432,6 +466,8 @@ def fit_powerlaw(x, y, xbins, quantile=0.5, min_counts=10, x_in_db=False):
432
466
  params_std : tuple of float
433
467
  One standard deviation uncertainties ``(a_std, b_std)`` estimated from
434
468
  the covariance matrix of the fit.
469
+ Parameters standard deviation is currently
470
+ not available if fitting with the RANSAC algorithm.
435
471
 
436
472
  Notes
437
473
  -----
@@ -458,6 +494,11 @@ def fit_powerlaw(x, y, xbins, quantile=0.5, min_counts=10, x_in_db=False):
458
494
  if os.environ.get("PYTEST_CURRENT_TEST"):
459
495
  min_counts = 0
460
496
 
497
+ # Check if RANSAC algorithm is available
498
+ sklearn_available = importlib.util.find_spec("sklearn") is not None
499
+ if use_ransac and not sklearn_available:
500
+ use_ransac = False
501
+
461
502
  # Ensure numpy array
462
503
  x = np.asanyarray(x)
463
504
  y = np.asanyarray(y)
@@ -512,19 +553,27 @@ def fit_powerlaw(x, y, xbins, quantile=0.5, min_counts=10, x_in_db=False):
512
553
 
513
554
  # Fit the data
514
555
  with suppress_warnings():
515
- (a, b), pcov = curve_fit(
516
- lambda x, a, b: a * np.power(x, b),
517
- df_agg["x"],
518
- df_agg["y"],
519
- method="lm",
520
- sigma=sigma,
521
- absolute_sigma=True,
522
- maxfev=10_000, # max n iterations
523
- )
524
- (a_std, b_std) = np.sqrt(np.diag(pcov))
556
+ if use_ransac:
557
+ a, b = fit_powerlaw_with_ransac(x=df_agg["x"], y=df_agg["y"])
558
+ a_std = None
559
+ b_std = None
560
+ else:
561
+
562
+ (a, b), pcov = curve_fit(
563
+ lambda x, a, b: a * np.power(x, b),
564
+ df_agg["x"],
565
+ df_agg["y"],
566
+ method="lm",
567
+ sigma=sigma,
568
+ absolute_sigma=True,
569
+ maxfev=10_000, # max n iterations
570
+ )
571
+ (a_std, b_std) = np.sqrt(np.diag(pcov))
572
+ a_std = float(a_std)
573
+ b_std = float(b_std)
525
574
 
526
575
  # Return the parameters and their standard deviation
527
- return (float(a), float(b)), (float(a_std), float(b_std))
576
+ return (float(a), float(b)), (a_std, b_std)
528
577
 
529
578
 
530
579
  def predict_from_powerlaw(x, a, b):
@@ -611,8 +660,10 @@ def plot_drop_spectrum(drop_number, norm=None, add_colorbar=True, title="Drop Sp
611
660
  """Plot the drop spectrum."""
612
661
  cmap = plt.get_cmap("Spectral_r").copy()
613
662
  cmap.set_under("none")
663
+ if "time" in drop_number.dims:
664
+ drop_number = drop_number.sum(dim="time")
614
665
  if norm is None:
615
- norm = LogNorm(vmin=1, vmax=None)
666
+ norm = LogNorm(vmin=1, vmax=None) if drop_number.sum() > 0 else None
616
667
 
617
668
  p = drop_number.plot.pcolormesh(
618
669
  x=DIAMETER_DIMENSION,
@@ -623,8 +674,6 @@ def plot_drop_spectrum(drop_number, norm=None, add_colorbar=True, title="Drop Sp
623
674
  add_colorbar=add_colorbar,
624
675
  cbar_kwargs={"label": "Number of particles"},
625
676
  )
626
- p.axes.set_yticks([])
627
- p.axes.set_yticklabels([])
628
677
  p.axes.set_xlabel("Diamenter [mm]")
629
678
  p.axes.set_ylabel("Fall velocity [m/s]")
630
679
  p.axes.set_title(title)
@@ -645,6 +694,13 @@ def plot_raw_and_filtered_spectrums(
645
694
  cmap = plt.get_cmap("Spectral_r").copy()
646
695
  cmap.set_under("none")
647
696
 
697
+ if "time" in drop_number.dims:
698
+ drop_number = drop_number.sum(dim="time")
699
+ if "time" in raw_drop_number.dims:
700
+ raw_drop_number = raw_drop_number.sum(dim="time")
701
+ if "time" in theoretical_average_velocity.dims:
702
+ theoretical_average_velocity = theoretical_average_velocity.mean(dim="time")
703
+
648
704
  if norm is None:
649
705
  norm = LogNorm(1, None)
650
706
 
@@ -782,7 +838,7 @@ def plot_dsd_density(df_nd, diameter_bin_edges, figsize=(8, 8), dpi=300):
782
838
  return p
783
839
 
784
840
 
785
- def plot_dsd_with_dense_lines(ds, figsize=(8, 8), dpi=300):
841
+ def plot_dsd_with_dense_lines(drop_number_concentration, r, figsize=(8, 8), dpi=300):
786
842
  """Plot N(D) ~ D using dense lines."""
787
843
  # Define intervals for rain rates
788
844
  r_bins = [0, 2, 5, 10, 50, 100, 500]
@@ -794,13 +850,13 @@ def plot_dsd_with_dense_lines(ds, figsize=(8, 8), dpi=300):
794
850
  # Resample N(D) to high resolution !
795
851
  # - quadratic, pchip
796
852
  da = resample_drop_number_concentration(
797
- ds["drop_number_concentration"],
853
+ drop_number_concentration.compute(),
798
854
  diameter_bin_edges=diameter_bin_edges,
799
855
  method="linear",
800
856
  )
801
857
  ds_resampled = xr.Dataset(
802
858
  {
803
- "R": ds["R"],
859
+ "R": r.compute(),
804
860
  "drop_number_concentration": da,
805
861
  },
806
862
  )
@@ -822,9 +878,12 @@ def plot_dsd_with_dense_lines(ds, figsize=(8, 8), dpi=300):
822
878
  # Compute dense lines
823
879
  dict_rgb = {}
824
880
  for i in range(0, len(r_bins) - 1):
881
+
825
882
  # Define dataset subset
826
883
  idx_rain_interval = np.logical_and(ds_resampled["R"] >= r_bins[i], ds_resampled["R"] < r_bins[i + 1])
827
884
  da = ds_resampled.isel(time=idx_rain_interval)["drop_number_concentration"]
885
+ if da.sizes["time"] == 0:
886
+ continue
828
887
 
829
888
  # Retrieve dense lines
830
889
  da_dense_lines = compute_dense_lines(
@@ -834,14 +893,17 @@ def plot_dsd_with_dense_lines(ds, figsize=(8, 8), dpi=300):
834
893
  y_bins=y_bins,
835
894
  normalization="max",
836
895
  )
896
+
837
897
  # Define cmap
838
898
  cmap = cmap_list[i]
899
+
839
900
  # Map colors and transparency
840
901
  # da_rgb = to_rgba(da_dense_lines, cmap=cmap, scaling="linear")
841
902
  # da_rgb = to_rgba(da_dense_lines, cmap=cmap, scaling="exp")
842
903
  # da_rgb = to_rgba(da_dense_lines, cmap=cmap, scaling="log")
843
904
  da_rgb = to_rgba(da_dense_lines, cmap=cmap, scaling="sqrt")
844
905
 
906
+ # Add to dictionary
845
907
  dict_rgb[i] = da_rgb
846
908
 
847
909
  # Blend images with max-alpha
@@ -1347,7 +1409,7 @@ def plot_dsd_params_density(df, log_dm=False, lwc=True, log_normalize=False, fig
1347
1409
 
1348
1410
  # Nt and Nw range
1349
1411
  nt_bins = log_arange(1, 100_000, log_step=log_step, base=10)
1350
- nw_bins = log_arange(1, 100_000, log_step=log_step, base=10)
1412
+ nw_bins = log_arange(1, 1_000_000, log_step=log_step, base=10)
1351
1413
  nw_lim = (10, 1_000_000)
1352
1414
  nt_lim = (1, 100_000)
1353
1415
 
@@ -3680,8 +3742,9 @@ def define_filename(prefix, extension, data_source, campaign_name, station_name)
3680
3742
 
3681
3743
  def create_l2_dataframe(ds):
3682
3744
  """Create pandas Dataframe for L2 analysis."""
3745
+ dims_to_drop = set(ds.dims).intersection({DIAMETER_DIMENSION, VELOCITY_DIMENSION})
3683
3746
  # - Drop array variables and convert to pandas
3684
- df = ds.drop_dims([DIAMETER_DIMENSION, VELOCITY_DIMENSION]).to_pandas()
3747
+ df = ds.drop_dims(dims_to_drop).to_pandas()
3685
3748
  # - Drop coordinates
3686
3749
  coords_to_drop = ["velocity_method", "sample_interval", *RADAR_OPTIONS]
3687
3750
  df = df.drop(columns=coords_to_drop, errors="ignore")
@@ -3710,13 +3773,20 @@ def prepare_summary_dataset(ds, velocity_method="fall_velocity", source="drop_nu
3710
3773
 
3711
3774
  # Select only timesteps with R > 0
3712
3775
  # - We save R with 2 decimals accuracy ... so 0.01 is the smallest value
3713
- rainy_timesteps = np.logical_and(ds["Rm"].compute() >= 0.01, ds["R"].compute() >= 0.01)
3714
- ds = ds.isel(time=ds["Rm"].compute() >= rainy_timesteps)
3776
+ if "Rm" in ds: # in L2E
3777
+ rainy_timesteps = np.logical_and(ds["Rm"].compute() >= 0.01, ds["R"].compute() >= 0.01)
3778
+ else: # L2M without Rm
3779
+ rainy_timesteps = ds["R"].compute() >= 0.01
3780
+
3781
+ ds = ds.isel(time=rainy_timesteps)
3715
3782
  return ds
3716
3783
 
3717
3784
 
3718
3785
  def generate_station_summary(ds, summary_dir_path, data_source, campaign_name, station_name):
3719
3786
  """Generate station summary using L2E dataset."""
3787
+ # Create summary directory if does not exist
3788
+ os.makedirs(summary_dir_path, exist_ok=True)
3789
+
3720
3790
  ####---------------------------------------------------------------------.
3721
3791
  #### Prepare dataset
3722
3792
  ds = prepare_summary_dataset(ds)
@@ -3724,6 +3794,10 @@ def generate_station_summary(ds, summary_dir_path, data_source, campaign_name, s
3724
3794
  # Ensure all data are in memory
3725
3795
  ds = ds.compute()
3726
3796
 
3797
+ # Keep only timesteps with at least 3 Nbins to remove noise
3798
+ valid_idx = np.where(ds["Nbins"] >= 3)[0]
3799
+ ds = ds.isel(time=valid_idx)
3800
+
3727
3801
  ####---------------------------------------------------------------------.
3728
3802
  #### Create drop spectrum figures and statistics
3729
3803
  # Compute sum of raw and filtered spectrum over time
@@ -4033,6 +4107,11 @@ def generate_station_summary(ds, summary_dir_path, data_source, campaign_name, s
4033
4107
  #### Create L2E QC summary plots
4034
4108
  # TODO:
4035
4109
 
4110
+ ####------------------------------------------------------------------------.
4111
+ #### Free space - Remove df from memory
4112
+ del df
4113
+ gc.collect()
4114
+
4036
4115
  ####------------------------------------------------------------------------.
4037
4116
  #### Create N(D) densities
4038
4117
  df_nd = create_nd_dataframe(ds)
@@ -4049,27 +4128,38 @@ def generate_station_summary(ds, summary_dir_path, data_source, campaign_name, s
4049
4128
  p.figure.savefig(os.path.join(summary_dir_path, filename))
4050
4129
  plt.close()
4051
4130
 
4052
- #### - Plot N(D) vs D with dense lines
4131
+ #### - Plot N(D)/Nw vs D/Dm
4053
4132
  filename = define_filename(
4054
- prefix="N(D)_DenseLines",
4133
+ prefix="N(D)_Normalized",
4055
4134
  extension="png",
4056
4135
  data_source=data_source,
4057
4136
  campaign_name=campaign_name,
4058
4137
  station_name=station_name,
4059
4138
  )
4060
- p = plot_dsd_with_dense_lines(ds)
4139
+ p = plot_normalized_dsd_density(df_nd)
4061
4140
  p.figure.savefig(os.path.join(summary_dir_path, filename))
4062
4141
  plt.close()
4063
4142
 
4064
- #### - Plot N(D)/Nw vs D/Dm
4143
+ #### Free space - Remove df_nd from memory
4144
+ del df_nd
4145
+ gc.collect()
4146
+
4147
+ #### - Plot N(D) vs D with DenseLines
4148
+ # Extract required variables and free memory
4149
+ drop_number_concentration = ds["drop_number_concentration"].compute().copy()
4150
+ r = ds["R"].compute().copy()
4151
+ del ds
4152
+ gc.collect()
4153
+
4154
+ # Create figure
4065
4155
  filename = define_filename(
4066
- prefix="N(D)_Normalized",
4156
+ prefix="N(D)_DenseLines",
4067
4157
  extension="png",
4068
4158
  data_source=data_source,
4069
4159
  campaign_name=campaign_name,
4070
4160
  station_name=station_name,
4071
4161
  )
4072
- p = plot_normalized_dsd_density(df_nd)
4162
+ p = plot_dsd_with_dense_lines(drop_number_concentration=drop_number_concentration, r=r)
4073
4163
  p.figure.savefig(os.path.join(summary_dir_path, filename))
4074
4164
  plt.close()
4075
4165
 
@@ -4078,8 +4168,15 @@ def generate_station_summary(ds, summary_dir_path, data_source, campaign_name, s
4078
4168
  #### Wrappers
4079
4169
 
4080
4170
 
4081
- def create_station_summary(data_source, campaign_name, station_name, parallel=False, data_archive_dir=None):
4082
- """Create summary figures and tables for a disdrometer station."""
4171
+ def create_station_summary(
4172
+ data_source,
4173
+ campaign_name,
4174
+ station_name,
4175
+ parallel=False,
4176
+ data_archive_dir=None,
4177
+ temporal_resolution="1MIN",
4178
+ ):
4179
+ """Create summary figures and tables for a DISDRODB station."""
4083
4180
  # Print processing info
4084
4181
  print(f"Creation of station summary for {data_source} {campaign_name} {station_name} has started.")
4085
4182
 
@@ -4094,6 +4191,10 @@ def create_station_summary(data_source, campaign_name, station_name, parallel=Fa
4094
4191
  )
4095
4192
  os.makedirs(summary_dir_path, exist_ok=True)
4096
4193
 
4194
+ # Define product_kwargs
4195
+ sample_interval, rolling = get_sampling_information(temporal_resolution)
4196
+ product_kwargs = {"rolling": rolling, "sample_interval": sample_interval}
4197
+
4097
4198
  # Load L2E 1MIN dataset
4098
4199
  ds = disdrodb.open_dataset(
4099
4200
  data_archive_dir=data_archive_dir,
@@ -4101,9 +4202,10 @@ def create_station_summary(data_source, campaign_name, station_name, parallel=Fa
4101
4202
  campaign_name=campaign_name,
4102
4203
  station_name=station_name,
4103
4204
  product="L2E",
4104
- product_kwargs={"rolling": False, "sample_interval": 60},
4205
+ product_kwargs=product_kwargs,
4105
4206
  parallel=parallel,
4106
4207
  chunks=-1,
4208
+ compute=True,
4107
4209
  )
4108
4210
 
4109
4211
  # Generate station summary figures and table