disdrodb 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/checks.py +70 -47
  4. disdrodb/api/configs.py +0 -2
  5. disdrodb/api/info.py +3 -3
  6. disdrodb/api/io.py +48 -8
  7. disdrodb/api/path.py +116 -133
  8. disdrodb/api/search.py +12 -3
  9. disdrodb/cli/disdrodb_create_summary.py +103 -0
  10. disdrodb/cli/disdrodb_create_summary_station.py +1 -1
  11. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
  13. disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
  14. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  15. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  16. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  17. disdrodb/data_transfer/download_data.py +123 -7
  18. disdrodb/issue/writer.py +2 -0
  19. disdrodb/l0/l0a_processing.py +10 -5
  20. disdrodb/l0/l0b_nc_processing.py +10 -6
  21. disdrodb/l0/l0b_processing.py +26 -61
  22. disdrodb/l0/l0c_processing.py +369 -251
  23. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
  24. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  25. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  26. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
  27. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  28. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  29. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
  30. disdrodb/l1/fall_velocity.py +46 -0
  31. disdrodb/l1/processing.py +1 -1
  32. disdrodb/l2/processing.py +1 -1
  33. disdrodb/metadata/checks.py +132 -125
  34. disdrodb/psd/fitting.py +172 -205
  35. disdrodb/psd/models.py +1 -1
  36. disdrodb/routines/__init__.py +54 -0
  37. disdrodb/{l0/routines.py → routines/l0.py} +288 -418
  38. disdrodb/{l1/routines.py → routines/l1.py} +60 -92
  39. disdrodb/{l2/routines.py → routines/l2.py} +249 -462
  40. disdrodb/{routines.py → routines/wrappers.py} +95 -7
  41. disdrodb/scattering/axis_ratio.py +5 -1
  42. disdrodb/scattering/permittivity.py +18 -0
  43. disdrodb/scattering/routines.py +56 -36
  44. disdrodb/summary/routines.py +110 -34
  45. disdrodb/utils/archiving.py +434 -0
  46. disdrodb/utils/cli.py +5 -5
  47. disdrodb/utils/dask.py +62 -1
  48. disdrodb/utils/decorators.py +31 -0
  49. disdrodb/utils/encoding.py +5 -1
  50. disdrodb/{l2 → utils}/event.py +1 -66
  51. disdrodb/utils/logger.py +1 -1
  52. disdrodb/utils/manipulations.py +22 -12
  53. disdrodb/utils/routines.py +166 -0
  54. disdrodb/utils/time.py +3 -291
  55. disdrodb/utils/xarray.py +3 -0
  56. disdrodb/viz/plots.py +85 -14
  57. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/METADATA +2 -2
  58. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/RECORD +62 -54
  59. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +1 -0
  60. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
  61. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
  62. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
@@ -15,10 +15,12 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Utilities to create summary statistics."""
18
+ import gc
19
+ import importlib
18
20
  import os
21
+ import shutil
19
22
  import subprocess
20
23
  import tempfile
21
- from shutil import which
22
24
 
23
25
  import matplotlib.lines as mlines
24
26
  import matplotlib.pyplot as plt
@@ -33,9 +35,9 @@ import disdrodb
33
35
  from disdrodb.api.path import define_station_dir
34
36
  from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
35
37
  from disdrodb.l2.empirical_dsd import get_drop_average_velocity
36
- from disdrodb.l2.event import group_timesteps_into_event
37
38
  from disdrodb.scattering import RADAR_OPTIONS
38
39
  from disdrodb.utils.dataframe import compute_2d_histogram, log_arange
40
+ from disdrodb.utils.event import group_timesteps_into_event
39
41
  from disdrodb.utils.manipulations import (
40
42
  get_diameter_bin_edges,
41
43
  resample_drop_number_concentration,
@@ -58,7 +60,7 @@ def is_latex_engine_available() -> bool:
58
60
  bool
59
61
  True if tectonic is found, False otherwise.
60
62
  """
61
- return which("tectonic") is not None
63
+ return shutil.which("tectonic") is not None
62
64
 
63
65
 
64
66
  def save_table_to_pdf(
@@ -142,7 +144,7 @@ def save_table_to_pdf(
142
144
  check=True,
143
145
  )
144
146
  # Move result
145
- os.replace(os.path.join(td, "table.pdf"), filepath)
147
+ shutil.move(os.path.join(td, "table.pdf"), filepath)
146
148
 
147
149
 
148
150
  ####-----------------------------------------------------------------
@@ -400,14 +402,35 @@ def prepare_latex_table_events_summary(df):
400
402
  #### Powerlaw routines
401
403
 
402
404
 
403
- def fit_powerlaw(x, y, xbins, quantile=0.5, min_counts=10, x_in_db=False):
405
+ def fit_powerlaw_with_ransac(x, y):
406
+ """Fit powerlaw relationship with RANSAC algorithm."""
407
+ from sklearn.linear_model import LinearRegression, RANSACRegressor
408
+
409
+ x = np.asanyarray(x)
410
+ y = np.asanyarray(y)
411
+ X = np.log10(x).reshape(-1, 1)
412
+ Y = np.log10(y)
413
+ ransac = RANSACRegressor(
414
+ estimator=LinearRegression(),
415
+ min_samples=0.5,
416
+ residual_threshold=0.3,
417
+ random_state=42,
418
+ )
419
+ ransac.fit(X, Y)
420
+ b = ransac.estimator_.coef_[0] # slope
421
+ loga = ransac.estimator_.intercept_ # intercept
422
+ a = 10**loga
423
+ return a, b
424
+
425
+
426
+ def fit_powerlaw(x, y, xbins, quantile=0.5, min_counts=10, x_in_db=False, use_ransac=True):
404
427
  """
405
428
  Fit a power-law relationship ``y = a * x**b`` to binned median values.
406
429
 
407
430
  This function bins ``x`` into intervals defined by ``xbins``, computes the
408
431
  median of ``y`` in each bin (robust to outliers), and fits a power-law model
409
- using the Levenberg-Marquardt algorithm. Optionally, ``x`` can be converted
410
- from decibel units to linear scale automatically before fitting.
432
+ using the RANSAC or Levenberg-Marquardt algorithm.
433
+ Optionally, ``x`` can be converted from decibel units to linear scale automatically before fitting.
411
434
 
412
435
  Parameters
413
436
  ----------
@@ -424,6 +447,11 @@ def fit_powerlaw(x, y, xbins, quantile=0.5, min_counts=10, x_in_db=False):
424
447
  x_in_db : bool, optional
425
448
  If True, converts ``x`` values from decibels (dB) to linear scale using
426
449
  :func:`disdrodb.idecibel`. Default is False.
450
+ use_ransac: bool, optional
451
+ Whether to fit the powerlaw using the Random Sample Consensus (RANSAC)
452
+ algorithm or using the Levenberg-Marquardt algorithm.
453
+ The default is True.
454
+ To fit with RANSAC, scikit-learn must be installed.
427
455
 
428
456
  Returns
429
457
  -------
@@ -432,6 +460,8 @@ def fit_powerlaw(x, y, xbins, quantile=0.5, min_counts=10, x_in_db=False):
432
460
  params_std : tuple of float
433
461
  One standard deviation uncertainties ``(a_std, b_std)`` estimated from
434
462
  the covariance matrix of the fit.
463
+ Parameters standard deviation is currently
464
+ not available if fitting with the RANSAC algorithm.
435
465
 
436
466
  Notes
437
467
  -----
@@ -458,6 +488,11 @@ def fit_powerlaw(x, y, xbins, quantile=0.5, min_counts=10, x_in_db=False):
458
488
  if os.environ.get("PYTEST_CURRENT_TEST"):
459
489
  min_counts = 0
460
490
 
491
+ # Check if RANSAC algorithm is available
492
+ sklearn_available = importlib.util.find_spec("sklearn") is not None
493
+ if use_ransac and not sklearn_available:
494
+ use_ransac = False
495
+
461
496
  # Ensure numpy array
462
497
  x = np.asanyarray(x)
463
498
  y = np.asanyarray(y)
@@ -512,19 +547,27 @@ def fit_powerlaw(x, y, xbins, quantile=0.5, min_counts=10, x_in_db=False):
512
547
 
513
548
  # Fit the data
514
549
  with suppress_warnings():
515
- (a, b), pcov = curve_fit(
516
- lambda x, a, b: a * np.power(x, b),
517
- df_agg["x"],
518
- df_agg["y"],
519
- method="lm",
520
- sigma=sigma,
521
- absolute_sigma=True,
522
- maxfev=10_000, # max n iterations
523
- )
524
- (a_std, b_std) = np.sqrt(np.diag(pcov))
550
+ if use_ransac:
551
+ a, b = fit_powerlaw_with_ransac(x=df_agg["x"], y=df_agg["y"])
552
+ a_std = None
553
+ b_std = None
554
+ else:
555
+
556
+ (a, b), pcov = curve_fit(
557
+ lambda x, a, b: a * np.power(x, b),
558
+ df_agg["x"],
559
+ df_agg["y"],
560
+ method="lm",
561
+ sigma=sigma,
562
+ absolute_sigma=True,
563
+ maxfev=10_000, # max n iterations
564
+ )
565
+ (a_std, b_std) = np.sqrt(np.diag(pcov))
566
+ a_std = float(a_std)
567
+ b_std = float(b_std)
525
568
 
526
569
  # Return the parameters and their standard deviation
527
- return (float(a), float(b)), (float(a_std), float(b_std))
570
+ return (float(a), float(b)), (a_std, b_std)
528
571
 
529
572
 
530
573
  def predict_from_powerlaw(x, a, b):
@@ -611,8 +654,10 @@ def plot_drop_spectrum(drop_number, norm=None, add_colorbar=True, title="Drop Sp
611
654
  """Plot the drop spectrum."""
612
655
  cmap = plt.get_cmap("Spectral_r").copy()
613
656
  cmap.set_under("none")
657
+ if "time" in drop_number.dims:
658
+ drop_number = drop_number.sum(dim="time")
614
659
  if norm is None:
615
- norm = LogNorm(vmin=1, vmax=None)
660
+ norm = LogNorm(vmin=1, vmax=None) if drop_number.sum() > 0 else None
616
661
 
617
662
  p = drop_number.plot.pcolormesh(
618
663
  x=DIAMETER_DIMENSION,
@@ -623,8 +668,6 @@ def plot_drop_spectrum(drop_number, norm=None, add_colorbar=True, title="Drop Sp
623
668
  add_colorbar=add_colorbar,
624
669
  cbar_kwargs={"label": "Number of particles"},
625
670
  )
626
- p.axes.set_yticks([])
627
- p.axes.set_yticklabels([])
628
671
  p.axes.set_xlabel("Diamenter [mm]")
629
672
  p.axes.set_ylabel("Fall velocity [m/s]")
630
673
  p.axes.set_title(title)
@@ -782,7 +825,7 @@ def plot_dsd_density(df_nd, diameter_bin_edges, figsize=(8, 8), dpi=300):
782
825
  return p
783
826
 
784
827
 
785
- def plot_dsd_with_dense_lines(ds, figsize=(8, 8), dpi=300):
828
+ def plot_dsd_with_dense_lines(drop_number_concentration, r, figsize=(8, 8), dpi=300):
786
829
  """Plot N(D) ~ D using dense lines."""
787
830
  # Define intervals for rain rates
788
831
  r_bins = [0, 2, 5, 10, 50, 100, 500]
@@ -794,13 +837,13 @@ def plot_dsd_with_dense_lines(ds, figsize=(8, 8), dpi=300):
794
837
  # Resample N(D) to high resolution !
795
838
  # - quadratic, pchip
796
839
  da = resample_drop_number_concentration(
797
- ds["drop_number_concentration"],
840
+ drop_number_concentration.compute(),
798
841
  diameter_bin_edges=diameter_bin_edges,
799
842
  method="linear",
800
843
  )
801
844
  ds_resampled = xr.Dataset(
802
845
  {
803
- "R": ds["R"],
846
+ "R": r.compute(),
804
847
  "drop_number_concentration": da,
805
848
  },
806
849
  )
@@ -822,9 +865,12 @@ def plot_dsd_with_dense_lines(ds, figsize=(8, 8), dpi=300):
822
865
  # Compute dense lines
823
866
  dict_rgb = {}
824
867
  for i in range(0, len(r_bins) - 1):
868
+
825
869
  # Define dataset subset
826
870
  idx_rain_interval = np.logical_and(ds_resampled["R"] >= r_bins[i], ds_resampled["R"] < r_bins[i + 1])
827
871
  da = ds_resampled.isel(time=idx_rain_interval)["drop_number_concentration"]
872
+ if da.sizes["time"] == 0:
873
+ continue
828
874
 
829
875
  # Retrieve dense lines
830
876
  da_dense_lines = compute_dense_lines(
@@ -834,14 +880,17 @@ def plot_dsd_with_dense_lines(ds, figsize=(8, 8), dpi=300):
834
880
  y_bins=y_bins,
835
881
  normalization="max",
836
882
  )
883
+
837
884
  # Define cmap
838
885
  cmap = cmap_list[i]
886
+
839
887
  # Map colors and transparency
840
888
  # da_rgb = to_rgba(da_dense_lines, cmap=cmap, scaling="linear")
841
889
  # da_rgb = to_rgba(da_dense_lines, cmap=cmap, scaling="exp")
842
890
  # da_rgb = to_rgba(da_dense_lines, cmap=cmap, scaling="log")
843
891
  da_rgb = to_rgba(da_dense_lines, cmap=cmap, scaling="sqrt")
844
892
 
893
+ # Add to dictionary
845
894
  dict_rgb[i] = da_rgb
846
895
 
847
896
  # Blend images with max-alpha
@@ -1347,7 +1396,7 @@ def plot_dsd_params_density(df, log_dm=False, lwc=True, log_normalize=False, fig
1347
1396
 
1348
1397
  # Nt and Nw range
1349
1398
  nt_bins = log_arange(1, 100_000, log_step=log_step, base=10)
1350
- nw_bins = log_arange(1, 100_000, log_step=log_step, base=10)
1399
+ nw_bins = log_arange(1, 1_000_000, log_step=log_step, base=10)
1351
1400
  nw_lim = (10, 1_000_000)
1352
1401
  nt_lim = (1, 100_000)
1353
1402
 
@@ -3711,12 +3760,15 @@ def prepare_summary_dataset(ds, velocity_method="fall_velocity", source="drop_nu
3711
3760
  # Select only timesteps with R > 0
3712
3761
  # - We save R with 2 decimals accuracy ... so 0.01 is the smallest value
3713
3762
  rainy_timesteps = np.logical_and(ds["Rm"].compute() >= 0.01, ds["R"].compute() >= 0.01)
3714
- ds = ds.isel(time=ds["Rm"].compute() >= rainy_timesteps)
3763
+ ds = ds.isel(time=rainy_timesteps)
3715
3764
  return ds
3716
3765
 
3717
3766
 
3718
3767
  def generate_station_summary(ds, summary_dir_path, data_source, campaign_name, station_name):
3719
3768
  """Generate station summary using L2E dataset."""
3769
+ # Create summary directory if does not exist
3770
+ os.makedirs(summary_dir_path, exist_ok=True)
3771
+
3720
3772
  ####---------------------------------------------------------------------.
3721
3773
  #### Prepare dataset
3722
3774
  ds = prepare_summary_dataset(ds)
@@ -3727,6 +3779,7 @@ def generate_station_summary(ds, summary_dir_path, data_source, campaign_name, s
3727
3779
  ####---------------------------------------------------------------------.
3728
3780
  #### Create drop spectrum figures and statistics
3729
3781
  # Compute sum of raw and filtered spectrum over time
3782
+
3730
3783
  raw_drop_number = ds["raw_drop_number"].sum(dim="time")
3731
3784
  drop_number = ds["drop_number"].sum(dim="time")
3732
3785
 
@@ -4033,6 +4086,11 @@ def generate_station_summary(ds, summary_dir_path, data_source, campaign_name, s
4033
4086
  #### Create L2E QC summary plots
4034
4087
  # TODO:
4035
4088
 
4089
+ ####------------------------------------------------------------------------.
4090
+ #### Free space - Remove df from memory
4091
+ del df
4092
+ gc.collect()
4093
+
4036
4094
  ####------------------------------------------------------------------------.
4037
4095
  #### Create N(D) densities
4038
4096
  df_nd = create_nd_dataframe(ds)
@@ -4049,27 +4107,38 @@ def generate_station_summary(ds, summary_dir_path, data_source, campaign_name, s
4049
4107
  p.figure.savefig(os.path.join(summary_dir_path, filename))
4050
4108
  plt.close()
4051
4109
 
4052
- #### - Plot N(D) vs D with dense lines
4110
+ #### - Plot N(D)/Nw vs D/Dm
4053
4111
  filename = define_filename(
4054
- prefix="N(D)_DenseLines",
4112
+ prefix="N(D)_Normalized",
4055
4113
  extension="png",
4056
4114
  data_source=data_source,
4057
4115
  campaign_name=campaign_name,
4058
4116
  station_name=station_name,
4059
4117
  )
4060
- p = plot_dsd_with_dense_lines(ds)
4118
+ p = plot_normalized_dsd_density(df_nd)
4061
4119
  p.figure.savefig(os.path.join(summary_dir_path, filename))
4062
4120
  plt.close()
4063
4121
 
4064
- #### - Plot N(D)/Nw vs D/Dm
4122
+ #### Free space - Remove df_nd from memory
4123
+ del df_nd
4124
+ gc.collect()
4125
+
4126
+ #### - Plot N(D) vs D with DenseLines
4127
+ # Extract required variables and free memory
4128
+ drop_number_concentration = ds["drop_number_concentration"].compute().copy()
4129
+ r = ds["R"].compute().copy()
4130
+ del ds
4131
+ gc.collect()
4132
+
4133
+ # Create figure
4065
4134
  filename = define_filename(
4066
- prefix="N(D)_Normalized",
4135
+ prefix="N(D)_DenseLines",
4067
4136
  extension="png",
4068
4137
  data_source=data_source,
4069
4138
  campaign_name=campaign_name,
4070
4139
  station_name=station_name,
4071
4140
  )
4072
- p = plot_normalized_dsd_density(df_nd)
4141
+ p = plot_dsd_with_dense_lines(drop_number_concentration=drop_number_concentration, r=r)
4073
4142
  p.figure.savefig(os.path.join(summary_dir_path, filename))
4074
4143
  plt.close()
4075
4144
 
@@ -4078,8 +4147,14 @@ def generate_station_summary(ds, summary_dir_path, data_source, campaign_name, s
4078
4147
  #### Wrappers
4079
4148
 
4080
4149
 
4081
- def create_station_summary(data_source, campaign_name, station_name, parallel=False, data_archive_dir=None):
4082
- """Create summary figures and tables for a disdrometer station."""
4150
+ def create_station_summary(
4151
+ data_source,
4152
+ campaign_name,
4153
+ station_name,
4154
+ parallel=False,
4155
+ data_archive_dir=None,
4156
+ ):
4157
+ """Create summary figures and tables for a DISDRODB station."""
4083
4158
  # Print processing info
4084
4159
  print(f"Creation of station summary for {data_source} {campaign_name} {station_name} has started.")
4085
4160
 
@@ -4104,6 +4179,7 @@ def create_station_summary(data_source, campaign_name, station_name, parallel=Fa
4104
4179
  product_kwargs={"rolling": False, "sample_interval": 60},
4105
4180
  parallel=parallel,
4106
4181
  chunks=-1,
4182
+ compute=True,
4107
4183
  )
4108
4184
 
4109
4185
  # Generate station summary figures and table