disdrodb 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. disdrodb/_version.py +2 -2
  2. disdrodb/accessor/methods.py +10 -3
  3. disdrodb/api/checks.py +1 -1
  4. disdrodb/api/io.py +6 -1
  5. disdrodb/constants.py +1 -1
  6. disdrodb/etc/products/L1/LPM_V0/1MIN.yaml +13 -0
  7. disdrodb/etc/products/L1/global.yaml +1 -1
  8. disdrodb/etc/products/L2E/global.yaml +1 -1
  9. disdrodb/etc/products/L2M/global.yaml +1 -1
  10. disdrodb/issue/checks.py +2 -2
  11. disdrodb/l0/check_configs.py +1 -1
  12. disdrodb/l0/configs/LPM/l0a_encodings.yml +0 -1
  13. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +0 -4
  14. disdrodb/l0/configs/LPM/l0b_encodings.yml +9 -9
  15. disdrodb/l0/configs/LPM/raw_data_format.yml +11 -11
  16. disdrodb/l0/configs/LPM_V0/bins_diameter.yml +103 -0
  17. disdrodb/l0/configs/LPM_V0/bins_velocity.yml +103 -0
  18. disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +45 -0
  19. disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +180 -0
  20. disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +410 -0
  21. disdrodb/l0/configs/LPM_V0/raw_data_format.yml +474 -0
  22. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +8 -8
  23. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +9 -9
  24. disdrodb/l0/l0a_processing.py +6 -2
  25. disdrodb/l0/l0b_processing.py +26 -19
  26. disdrodb/l0/l0c_processing.py +10 -0
  27. disdrodb/l0/manuals/LPM_V0.pdf +0 -0
  28. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +15 -7
  29. disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +279 -0
  30. disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +276 -0
  31. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  32. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +103 -0
  33. disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +216 -0
  34. disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +208 -0
  35. disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +219 -0
  36. disdrodb/l0/readers/LPM/USA/CHARLESTON.py +229 -0
  37. disdrodb/l0/readers/{LPM → LPM_V0}/BELGIUM/ULIEGE.py +33 -49
  38. disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +240 -0
  39. disdrodb/l0/readers/PARSIVEL/NASA/LPVEX.py +25 -13
  40. disdrodb/l0/readers/PARSIVEL/NASA/MC3E.py +1 -1
  41. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +1 -1
  42. disdrodb/l0/readers/PARSIVEL2/JAPAN/PRECIP.py +155 -0
  43. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +14 -7
  44. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +8 -3
  45. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +28 -5
  46. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +1 -1
  47. disdrodb/l0/readers/PARSIVEL2/{NASA/GCPEX.py → NORWAY/UIB.py} +54 -29
  48. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +6 -3
  49. disdrodb/l0/readers/{PARSIVEL/NASA/PIERS.py → PARSIVEL2/USA/CSU.py} +62 -29
  50. disdrodb/l0/readers/PARSIVEL2/USA/CW3E.py +48 -21
  51. disdrodb/l0/readers/{PARSIVEL/NASA/IFLOODS.py → RD80/BRAZIL/ATTO_RD80.py} +50 -34
  52. disdrodb/l0/readers/{SW250 → SWS250}/BELGIUM/KMI.py +1 -1
  53. disdrodb/l1/beard_model.py +45 -1
  54. disdrodb/l1/fall_velocity.py +1 -6
  55. disdrodb/l1/filters.py +2 -0
  56. disdrodb/l2/empirical_dsd.py +12 -8
  57. disdrodb/routines/l0.py +2 -2
  58. disdrodb/routines/options.py +2 -0
  59. disdrodb/scattering/axis_ratio.py +3 -0
  60. disdrodb/scattering/routines.py +1 -1
  61. disdrodb/summary/routines.py +63 -61
  62. disdrodb/utils/compression.py +4 -2
  63. disdrodb/utils/dask.py +31 -11
  64. disdrodb/utils/manipulations.py +7 -1
  65. disdrodb/viz/plots.py +5 -3
  66. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/METADATA +1 -1
  67. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/RECORD +71 -54
  68. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/WHEEL +0 -0
  69. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/entry_points.txt +0 -0
  70. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/licenses/LICENSE +0 -0
  71. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/top_level.txt +0 -0
disdrodb/routines/l0.py CHANGED
@@ -696,7 +696,7 @@ def run_l0b_station(
696
696
  # -----------------------------------------------------------------.
697
697
  # Start L0B processing
698
698
  t_i = time.time()
699
- msg = f"{product} processing of station_name {station_name} has started."
699
+ msg = f"{product} processing of station {station_name} has started."
700
700
  log_info(logger=logger, msg=msg, verbose=verbose)
701
701
 
702
702
  # -----------------------------------------------------------------.
@@ -774,7 +774,7 @@ def run_l0b_station(
774
774
  # -----------------------------------------------------------------.
775
775
  # End L0B processing
776
776
  timedelta_str = str(datetime.timedelta(seconds=round(time.time() - t_i)))
777
- msg = f"{product} processing of station_name {station_name} completed in {timedelta_str}"
777
+ msg = f"{product} processing of station {station_name} completed in {timedelta_str}"
778
778
  log_info(logger=logger, msg=msg, verbose=verbose)
779
779
 
780
780
  # -----------------------------------------------------------------.
@@ -35,6 +35,8 @@ from disdrodb.utils.yaml import read_yaml
35
35
  # get_product_options(product="L1", temporal_resolution="1MIN")
36
36
  # get_product_options(product="L1", temporal_resolution="1MIN", sensor_name="PARSIVEL")
37
37
 
38
+ # test temporal_resolutions are unique
39
+
38
40
  # TODO: test return list
39
41
  # get_product_temporal_resolutions(product="L1")
40
42
  # get_product_temporal_resolutions(product="L2E")
@@ -83,6 +83,9 @@ def get_axis_ratio_battaglia_2010(diameter):
83
83
  """
84
84
  Compute the axis ratio of raindrops using the Battaglia et al. (2010) model.
85
85
 
86
+ This axis ratio is assumed by OTT Parsivel sensors internally to compute the
87
+ reported particle size (Deq).
88
+
86
89
  Parameters
87
90
  ----------
88
91
  diameter : array-like
@@ -973,7 +973,7 @@ def get_radar_parameters(
973
973
  list_ds = [func(ds_subset, **params) for params in list_params]
974
974
 
975
975
  # Merge into a single dataset
976
- ds_radar = xr.merge(list_ds)
976
+ ds_radar = xr.merge(list_ds, compat="no_conflicts", join="outer")
977
977
 
978
978
  # Order frequency from lowest to highest
979
979
  # --> ['S', 'C', 'X', 'Ku', 'K', 'Ka', 'W']
@@ -716,7 +716,7 @@ def create_nd_dataframe(ds, variables=None):
716
716
  "sample_interval",
717
717
  *RADAR_OPTIONS,
718
718
  ]
719
- df_nd = ds_stack.to_dataframe().drop(columns=coords_to_drop, errors="ignore")
719
+ df_nd = ds_stack.to_dask_dataframe().drop(columns=coords_to_drop, errors="ignore").compute()
720
720
  df_nd["D"] = df_nd["diameter_bin_center"]
721
721
  df_nd["N(D)"] = df_nd["drop_number_concentration"]
722
722
  df_nd = df_nd[df_nd["R"] != 0]
@@ -3789,70 +3789,72 @@ def generate_station_summary(ds, summary_dir_path, data_source, campaign_name, s
3789
3789
 
3790
3790
  ####---------------------------------------------------------------------.
3791
3791
  #### Create drop spectrum figures and statistics
3792
- # Compute sum of raw and filtered spectrum over time
3793
- raw_drop_number = ds["raw_drop_number"].sum(dim="time")
3794
- drop_number = ds["drop_number"].sum(dim="time")
3795
-
3796
- # Define theoretical and measured average velocity
3797
- theoretical_average_velocity = ds["fall_velocity"].mean(dim="time")
3798
- measured_average_velocity = get_drop_average_velocity(drop_number)
3799
-
3800
- # Save raw and filtered spectrum over time & theoretical and measured average fall velocity
3801
- ds_stats = xr.Dataset()
3802
- ds_stats["raw_drop_number"] = raw_drop_number
3803
- ds_stats["drop_number"] = raw_drop_number
3804
- ds_stats["theoretical_average_velocity"] = theoretical_average_velocity
3805
- ds_stats["measured_average_velocity"] = measured_average_velocity
3806
- filename = define_filename(
3807
- prefix="SpectrumStats",
3808
- extension="nc",
3809
- data_source=data_source,
3810
- campaign_name=campaign_name,
3811
- station_name=station_name,
3812
- temporal_resolution=temporal_resolution,
3813
- )
3814
- ds_stats.to_netcdf(os.path.join(summary_dir_path, filename))
3792
+ if VELOCITY_DIMENSION in ds.dims:
3793
+ # Compute sum of raw and filtered spectrum over time
3794
+ raw_drop_number = ds["raw_drop_number"].sum(dim="time")
3795
+ drop_number = ds["drop_number"].sum(dim="time")
3796
+
3797
+ # Define theoretical and measured average velocity
3798
+ theoretical_average_velocity = ds["fall_velocity"].mean(dim="time")
3799
+ measured_average_velocity = get_drop_average_velocity(drop_number)
3800
+
3801
+ # Save raw and filtered spectrum over time & theoretical and measured average fall velocity
3802
+ ds_stats = xr.Dataset()
3803
+ ds_stats["raw_drop_number"] = raw_drop_number
3804
+ ds_stats["drop_number"] = raw_drop_number
3805
+ ds_stats["theoretical_average_velocity"] = theoretical_average_velocity
3806
+ if measured_average_velocity is not None:
3807
+ ds_stats["measured_average_velocity"] = measured_average_velocity
3808
+ filename = define_filename(
3809
+ prefix="SpectrumStats",
3810
+ extension="nc",
3811
+ data_source=data_source,
3812
+ campaign_name=campaign_name,
3813
+ station_name=station_name,
3814
+ temporal_resolution=temporal_resolution,
3815
+ )
3816
+ ds_stats.to_netcdf(os.path.join(summary_dir_path, filename))
3815
3817
 
3816
- # Create figures with raw and filtered spectrum
3817
- # - Raw
3818
- filename = define_filename(
3819
- prefix="SpectrumRaw",
3820
- extension="png",
3821
- data_source=data_source,
3822
- campaign_name=campaign_name,
3823
- station_name=station_name,
3824
- temporal_resolution=temporal_resolution,
3825
- )
3826
- p = plot_spectrum(raw_drop_number, title="Raw Drop Spectrum")
3827
- p.figure.savefig(os.path.join(summary_dir_path, filename))
3828
- plt.close()
3818
+ # Create figures with raw and filtered spectrum
3819
+ # - Raw
3820
+ filename = define_filename(
3821
+ prefix="SpectrumRaw",
3822
+ extension="png",
3823
+ data_source=data_source,
3824
+ campaign_name=campaign_name,
3825
+ station_name=station_name,
3826
+ temporal_resolution=temporal_resolution,
3827
+ )
3828
+ p = plot_spectrum(raw_drop_number, title="Raw Drop Spectrum")
3829
+ p.figure.savefig(os.path.join(summary_dir_path, filename))
3830
+ plt.close()
3829
3831
 
3830
- # - Filtered
3831
- filename = define_filename(
3832
- prefix="SpectrumFiltered",
3833
- extension="png",
3834
- data_source=data_source,
3835
- campaign_name=campaign_name,
3836
- station_name=station_name,
3837
- temporal_resolution=temporal_resolution,
3838
- )
3839
- p = plot_spectrum(drop_number, title="Filtered Drop Spectrum")
3840
- p.figure.savefig(os.path.join(summary_dir_path, filename))
3841
- plt.close()
3832
+ # - Filtered
3833
+ filename = define_filename(
3834
+ prefix="SpectrumFiltered",
3835
+ extension="png",
3836
+ data_source=data_source,
3837
+ campaign_name=campaign_name,
3838
+ station_name=station_name,
3839
+ temporal_resolution=temporal_resolution,
3840
+ )
3841
+ p = plot_spectrum(drop_number, title="Filtered Drop Spectrum")
3842
+ p.figure.savefig(os.path.join(summary_dir_path, filename))
3843
+ plt.close()
3842
3844
 
3843
- # Create figure comparing raw and filtered spectrum
3844
- filename = define_filename(
3845
- prefix="SpectrumSummary",
3846
- extension="png",
3847
- data_source=data_source,
3848
- campaign_name=campaign_name,
3849
- station_name=station_name,
3850
- temporal_resolution=temporal_resolution,
3851
- )
3845
+ # Create figure comparing raw and filtered spectrum
3846
+ filename = define_filename(
3847
+ prefix="SpectrumSummary",
3848
+ extension="png",
3849
+ data_source=data_source,
3850
+ campaign_name=campaign_name,
3851
+ station_name=station_name,
3852
+ temporal_resolution=temporal_resolution,
3853
+ )
3852
3854
 
3853
- fig = plot_raw_and_filtered_spectra(ds)
3854
- fig.savefig(os.path.join(summary_dir_path, filename))
3855
- plt.close()
3855
+ fig = plot_raw_and_filtered_spectra(ds)
3856
+ fig.savefig(os.path.join(summary_dir_path, filename))
3857
+ plt.close()
3856
3858
 
3857
3859
  ####---------------------------------------------------------------------.
3858
3860
  #### Create L2E dataframe
@@ -82,7 +82,7 @@ def unzip_file_on_terminal(filepath: str, dest_path: str) -> str:
82
82
  subprocess.run(cmd, check=True)
83
83
 
84
84
 
85
- def _zip_dir(dir_path: str) -> str:
85
+ def _zip_dir(dir_path: str, dst_dir=None) -> str:
86
86
  """Zip a directory into a file located in the same directory.
87
87
 
88
88
  Parameters
@@ -95,7 +95,9 @@ def _zip_dir(dir_path: str) -> str:
95
95
  str
96
96
  Path of the zip archive.
97
97
  """
98
- output_path_without_extension = os.path.join(tempfile.gettempdir(), os.path.basename(dir_path))
98
+ if dst_dir is None:
99
+ dst_dir = tempfile.gettempdir()
100
+ output_path_without_extension = os.path.join(dst_dir, os.path.basename(dir_path))
99
101
  output_path = output_path_without_extension + ".zip"
100
102
  shutil.make_archive(output_path_without_extension, "zip", dir_path)
101
103
  return output_path
disdrodb/utils/dask.py CHANGED
@@ -113,7 +113,13 @@ def close_dask_cluster(cluster, client):
113
113
  logger.setLevel(original_level)
114
114
 
115
115
 
116
- def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
116
+ def _batch_iterable(iterable, n):
117
+ """Yield successive n-sized chunks from iterable."""
118
+ for i in range(0, len(iterable), n):
119
+ yield iterable[i : i + n]
120
+
121
+
122
+ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str, max_tasks_per_batch=5_000):
117
123
  """
118
124
  Execute Dask tasks and skip failed ones.
119
125
 
@@ -125,6 +131,9 @@ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
125
131
  Whether to execute in parallel with Dask or not.
126
132
  logs_dir : str
127
133
  Directory to store FAILED_TASKS.log.
134
+ max_tasks_per_batch : int or None, optional
135
+ Maximum number of tasks to submit to `client.compute()` at once.
136
+ The default is 5000. Dask struggle if more than 10_000 tasks are submitted.
128
137
 
129
138
  Returns
130
139
  -------
@@ -150,18 +159,29 @@ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
150
159
  except ValueError:
151
160
  raise ValueError("No Dask Distributed Client found.")
152
161
 
153
- # Compute tasks (all concurrently)
154
- # - Runs tasks == num_workers * threads_per_worker (which is 1 for DISDRODB)
155
- # - If errors occurs in some, skip it
156
- futures = client.compute(list_tasks)
157
- results = client.gather(futures, errors="skip")
162
+ all_results = []
163
+ failed_futures = []
164
+
165
+ # Batch execution
166
+ task_batches = list(_batch_iterable(list_tasks, max_tasks_per_batch)) if max_tasks_per_batch else [list_tasks]
167
+
168
+ for batch in task_batches:
169
+ # Compute tasks (all concurrently)
170
+ # - Runs tasks == num_workers * threads_per_worker (which is 1 for DISDRODB)
171
+ # - If errors occurs in some, skip it
172
+ futures = client.compute(batch)
173
+ results = client.gather(futures, errors="skip")
174
+
175
+ # Identify and collect failed futures
176
+ batch_failed = [f for f in futures if f.status != "finished"]
177
+ failed_futures.extend(batch_failed)
158
178
 
159
- # Collect failed futures
160
- failed_futures = [f for f in futures if f.status != "finished"] # "error"
179
+ # Collect results from successful tasks
180
+ all_results.extend(results)
161
181
 
162
182
  # If no tasks failed, return results
163
183
  if not failed_futures:
164
- return results
184
+ return all_results
165
185
 
166
186
  # Otherwise define log file listing failed tasks
167
187
  with open(failed_log_path, "w") as f:
@@ -170,5 +190,5 @@ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
170
190
  f.write(f"ERROR - DASK TASK FAILURE - Task {fut.key} failed: {err}\n")
171
191
 
172
192
  # Append to list of log filepaths (results) the dask failing log
173
- results.append(failed_log_path)
174
- return results
193
+ all_results.append(failed_log_path)
194
+ return all_results
@@ -26,7 +26,13 @@ from disdrodb.utils.xarray import unstack_datarray_dimension
26
26
 
27
27
  def get_diameter_bin_edges(ds):
28
28
  """Retrieve diameter bin edges."""
29
- bin_edges = np.append(ds["diameter_bin_lower"].compute().data, ds["diameter_bin_upper"].compute().data[-1])
29
+ bin_edges = np.append(ds["diameter_bin_lower"].to_numpy(), ds["diameter_bin_upper"].to_numpy()[-1])
30
+ return bin_edges
31
+
32
+
33
+ def get_velocity_bin_edges(ds):
34
+ """Retrieve velocity bin edges."""
35
+ bin_edges = np.append(ds["velocity_bin_lower"].to_numpy(), ds["velocity_bin_upper"].to_numpy()[-1])
30
36
  return bin_edges
31
37
 
32
38
 
disdrodb/viz/plots.py CHANGED
@@ -96,6 +96,8 @@ def _check_has_diameter_and_velocity_dims(da):
96
96
  def _get_spectrum_variable(xr_obj, variable):
97
97
  if not isinstance(xr_obj, (xr.Dataset, xr.DataArray)):
98
98
  raise TypeError("Expecting xarray object as input.")
99
+ if VELOCITY_DIMENSION not in xr_obj.dims:
100
+ raise ValueError("2D spectrum not available.")
99
101
  if isinstance(xr_obj, xr.Dataset):
100
102
  if variable not in xr_obj:
101
103
  raise ValueError(f"The dataset do not include {variable=}.")
@@ -229,7 +231,7 @@ def plot_raw_and_filtered_spectra(
229
231
  theoretical_average_velocity = ds["fall_velocity"]
230
232
  if "time" in theoretical_average_velocity.dims:
231
233
  theoretical_average_velocity = theoretical_average_velocity.mean(dim="time")
232
- if add_measured_average_velocity:
234
+ if add_measured_average_velocity and VELOCITY_DIMENSION in drop_number.dims:
233
235
  measured_average_velocity = get_drop_average_velocity(drop_number)
234
236
 
235
237
  # Define norm if not specified
@@ -248,7 +250,7 @@ def plot_raw_and_filtered_spectra(
248
250
  # Add velocities if asked
249
251
  if add_theoretical_average_velocity:
250
252
  theoretical_average_velocity.plot(ax=ax1, c="k", linestyle="dashed")
251
- if add_measured_average_velocity:
253
+ if add_measured_average_velocity and VELOCITY_DIMENSION in drop_number.dims:
252
254
  measured_average_velocity.plot(ax=ax1, c="k", linestyle="dotted")
253
255
 
254
256
  # Improve plot appearance
@@ -262,7 +264,7 @@ def plot_raw_and_filtered_spectra(
262
264
  # Add velocities if asked
263
265
  if add_theoretical_average_velocity:
264
266
  theoretical_average_velocity.plot(ax=ax2, c="k", linestyle="dashed", label="Theoretical velocity")
265
- if add_measured_average_velocity:
267
+ if add_measured_average_velocity and VELOCITY_DIMENSION in drop_number.dims:
266
268
  measured_average_velocity.plot(ax=ax2, c="k", linestyle="dotted", label="Measured average velocity")
267
269
 
268
270
  # Improve plot appearance
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: disdrodb
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: disdrodb provides tools to download, standardize, share and analyze global disdrometer data.
5
5
  Author: Gionata Ghiggi
6
6
  Project-URL: homepage, https://github.com/ltelab/disdrodb