disdrodb 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +14 -0
  4. disdrodb/api/checks.py +8 -7
  5. disdrodb/api/io.py +81 -29
  6. disdrodb/api/path.py +17 -14
  7. disdrodb/api/search.py +15 -18
  8. disdrodb/cli/disdrodb_open_products_options.py +38 -0
  9. disdrodb/cli/disdrodb_run.py +2 -2
  10. disdrodb/cli/disdrodb_run_station.py +4 -4
  11. disdrodb/configs.py +1 -1
  12. disdrodb/data_transfer/download_data.py +70 -1
  13. disdrodb/etc/configs/attributes.yaml +62 -8
  14. disdrodb/etc/configs/encodings.yaml +28 -0
  15. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_SSE.yaml +8 -0
  16. disdrodb/etc/products/L2M/MODELS/GAMMA_ML.yaml +1 -1
  17. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_SSE.yaml +8 -0
  18. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_SSE.yaml +8 -0
  19. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +1 -1
  20. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_SSE.yaml +8 -0
  21. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_SSE.yaml +8 -0
  22. disdrodb/etc/products/L2M/global.yaml +4 -4
  23. disdrodb/fall_velocity/graupel.py +8 -8
  24. disdrodb/fall_velocity/hail.py +2 -2
  25. disdrodb/fall_velocity/rain.py +33 -5
  26. disdrodb/issue/checks.py +1 -1
  27. disdrodb/l0/l0_reader.py +1 -1
  28. disdrodb/l0/l0a_processing.py +2 -2
  29. disdrodb/l0/l0b_nc_processing.py +5 -5
  30. disdrodb/l0/l0b_processing.py +20 -24
  31. disdrodb/l0/l0c_processing.py +18 -13
  32. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +4 -0
  33. disdrodb/l0/readers/PARSIVEL2/VIETNAM/IGE_PARSIVEL2.py +239 -0
  34. disdrodb/l0/template_tools.py +13 -13
  35. disdrodb/l1/classification.py +10 -6
  36. disdrodb/l2/empirical_dsd.py +25 -15
  37. disdrodb/l2/processing.py +32 -14
  38. disdrodb/metadata/download.py +1 -1
  39. disdrodb/metadata/geolocation.py +4 -4
  40. disdrodb/metadata/reader.py +3 -3
  41. disdrodb/metadata/search.py +10 -8
  42. disdrodb/psd/__init__.py +4 -0
  43. disdrodb/psd/fitting.py +2660 -592
  44. disdrodb/psd/gof_metrics.py +389 -0
  45. disdrodb/psd/grid_search.py +1066 -0
  46. disdrodb/psd/models.py +1281 -145
  47. disdrodb/routines/l2.py +6 -6
  48. disdrodb/routines/options_validation.py +8 -8
  49. disdrodb/scattering/axis_ratio.py +70 -2
  50. disdrodb/scattering/permittivity.py +13 -10
  51. disdrodb/scattering/routines.py +10 -10
  52. disdrodb/summary/routines.py +23 -20
  53. disdrodb/utils/archiving.py +29 -22
  54. disdrodb/utils/attrs.py +6 -4
  55. disdrodb/utils/dataframe.py +4 -4
  56. disdrodb/utils/encoding.py +3 -1
  57. disdrodb/utils/event.py +9 -9
  58. disdrodb/utils/logger.py +4 -7
  59. disdrodb/utils/manipulations.py +2 -2
  60. disdrodb/utils/subsetting.py +1 -1
  61. disdrodb/utils/time.py +8 -7
  62. disdrodb/viz/plots.py +25 -17
  63. {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/METADATA +44 -33
  64. {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/RECORD +68 -66
  65. {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/WHEEL +1 -1
  66. {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/entry_points.txt +1 -0
  67. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +0 -6
  68. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +0 -6
  69. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +0 -6
  70. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_MAE.yaml +0 -6
  71. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_MAE.yaml +0 -6
  72. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +0 -6
  73. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_Z_MAE.yaml +0 -6
  74. {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/licenses/LICENSE +0 -0
  75. {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,239 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2026 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import os
19
+
20
+ import pandas as pd
21
+
22
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
23
+ from disdrodb.l0.l0a_processing import read_raw_text_file
24
+
25
+
26
+ def reader_parsivel(filepath, logger):
27
+ """Reader for Parsivel CR1000 Data Logger file."""
28
+ ##------------------------------------------------------------------------.
29
+ #### Define reader options
30
+ reader_kwargs = {}
31
+ # - Define delimiter
32
+ reader_kwargs["delimiter"] = "\\n"
33
+ # - Skip first row as columns names
34
+ reader_kwargs["header"] = None
35
+ # - Skip first 3 rows
36
+ reader_kwargs["skiprows"] = 0
37
+ # - Define encoding
38
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
39
+ # - Avoid first column to become df index !!!
40
+ reader_kwargs["index_col"] = False
41
+ # - Define behaviour when encountering bad lines
42
+ reader_kwargs["on_bad_lines"] = "skip"
43
+ # - Define reader engine
44
+ # - C engine is faster
45
+ # - Python engine is more feature-complete
46
+ reader_kwargs["engine"] = "python"
47
+ # - Define on-the-fly decompression of on-disk data
48
+ # - Available: gzip, bz2, zip
49
+ reader_kwargs["compression"] = "infer"
50
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
51
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
52
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
53
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
54
+ reader_kwargs["na_values"] = ["na", "", "error"]
55
+
56
+ ##------------------------------------------------------------------------.
57
+ #### Read the data
58
+ df_raw = read_raw_text_file(
59
+ filepath=filepath,
60
+ column_names=["TO_PARSE"],
61
+ reader_kwargs=reader_kwargs,
62
+ logger=logger,
63
+ )
64
+
65
+ # Retrieve header, number of columns and starting rows
66
+ # - Search in the first 3 rows where "TIMESTAMP" occurs
67
+ # - Once identified the row, strip away everything before TIMESTAMP
68
+ # - Then identify start_row_idx as the row where "TIMESTAMP" occurs + 2
69
+ for i in range(3):
70
+ line = df_raw.iloc[i]["TO_PARSE"]
71
+ if "TIMESTAMP" in line:
72
+ # Remove double and single quotes
73
+ line = line.replace('""', '"').replace('"', "")
74
+ # Define header
75
+ timestamp_idx = line.find("TIMESTAMP")
76
+ header_str = line[timestamp_idx:]
77
+ header = header_str.split(",")
78
+ # Define number of columns
79
+ n_columns = len(header)
80
+ # Define start row with data
81
+ start_row_idx = i + 3
82
+ break
83
+ else:
84
+ # start_row_idx = 0
85
+ # n_columns = len(df_raw["TO_PARSE"].iloc[0].split(","))
86
+ raise ValueError("Could not find 'TIMESTAMP' in the first 3 rows of the file.")
87
+
88
+ # Retrieve rows with actual data
89
+ df = df_raw.iloc[start_row_idx:]
90
+
91
+ # Expand dataframe
92
+ df = df["TO_PARSE"].str.split(",", expand=True, n=n_columns - 1)
93
+
94
+ #### Define column names
95
+ column_names = [
96
+ "time",
97
+ "RECORD",
98
+ "rainfall_rate_32bit",
99
+ "rainfall_accumulated_32bit",
100
+ "weather_code_synop_4680",
101
+ "weather_code_synop_4677",
102
+ "reflectivity_32bit",
103
+ "mor_visibility",
104
+ "laser_amplitude",
105
+ "number_particles",
106
+ "sensor_temperature",
107
+ "sensor_heating_current",
108
+ "sensor_battery_voltage",
109
+ "sample_interval",
110
+ "sensor_status",
111
+ "rain_kinetic_energy",
112
+ "sensor_temperature_receiver",
113
+ "sensor_temperature_trasmitter",
114
+ "V_Batt_Min",
115
+ ]
116
+
117
+ ##------------------------------------------------------------------------.
118
+ #### Assign column names
119
+ df.columns = column_names
120
+
121
+ ##------------------------------------------------------------------------.
122
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
123
+ # Define time as datetime column
124
+ df["time"] = pd.to_datetime(df["time"].str.strip('"'), format="%Y-%m-%d %H:%M:%S", errors="coerce")
125
+
126
+ # Drop columns not agreeing with DISDRODB L0 standards
127
+ columns_to_drop = [
128
+ "RECORD",
129
+ "V_Batt_Min",
130
+ ]
131
+ df = df.drop(columns=columns_to_drop, errors="ignore")
132
+ return df
133
+
134
+
135
+ def reader_spectrum(filepath, logger):
136
+ """Reader for Spectrum CR1000 Data Logger file."""
137
+ ##------------------------------------------------------------------------.
138
+ #### Define column names
139
+ column_names = ["TO_PARSE"]
140
+
141
+ ##------------------------------------------------------------------------.
142
+ #### Define reader options
143
+ reader_kwargs = {}
144
+ # - Define delimiter
145
+ reader_kwargs["delimiter"] = "\\n"
146
+ # - Skip first row as columns names
147
+ reader_kwargs["header"] = None
148
+ # - Skip first 3 rows
149
+ reader_kwargs["skiprows"] = 4
150
+ # - Define encoding
151
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
152
+ # - Avoid first column to become df index !!!
153
+ reader_kwargs["index_col"] = False
154
+ # - Define behaviour when encountering bad lines
155
+ reader_kwargs["on_bad_lines"] = "skip"
156
+ # - Define reader engine
157
+ # - C engine is faster
158
+ # - Python engine is more feature-complete
159
+ reader_kwargs["engine"] = "python"
160
+ # - Define on-the-fly decompression of on-disk data
161
+ # - Available: gzip, bz2, zip
162
+ reader_kwargs["compression"] = "infer"
163
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
164
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
165
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
166
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
167
+ reader_kwargs["na_values"] = ["na", "", "error"]
168
+
169
+ ##------------------------------------------------------------------------.
170
+ #### Read the data
171
+ df = read_raw_text_file(
172
+ filepath=filepath,
173
+ column_names=column_names,
174
+ reader_kwargs=reader_kwargs,
175
+ logger=logger,
176
+ )
177
+
178
+ ##------------------------------------------------------------------------.
179
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
180
+ # Split and assign columns
181
+ df = df["TO_PARSE"].str.split(",", n=2, expand=True)
182
+ df.columns = ["time", "RECORD", "TO_PARSE"]
183
+
184
+ # Define time in datetime format
185
+ df["time"] = pd.to_datetime(df["time"].str.strip('"'), format="%Y-%m-%d %H:%M:%S", errors="coerce")
186
+
187
+ # Keep only rows with valid number of values
188
+ df = df[df["TO_PARSE"].str.count(",") == 1085]
189
+
190
+ # Retrieve arrays
191
+ df_split = df["TO_PARSE"].str.split(",", expand=True)
192
+ raw_drop_concentration = df_split.iloc[:, :32].agg(",".join, axis=1).str.replace("-10", "0")
193
+ raw_drop_average_velocity = "0,0," + df_split.iloc[:, 32:62].agg(",".join, axis=1)
194
+ raw_drop_number = df_split.iloc[:, 62:].agg(",".join, axis=1)
195
+ df["raw_drop_concentration"] = raw_drop_concentration
196
+ df["raw_drop_average_velocity"] = raw_drop_average_velocity
197
+ df["raw_drop_number"] = raw_drop_number
198
+
199
+ # Drop columns not agreeing with DISDRODB L0 standards
200
+ df = df.drop(columns=["TO_PARSE", "RECORD"])
201
+ return df
202
+
203
+
204
+ @is_documented_by(reader_generic_docstring)
205
+ def reader(
206
+ filepath,
207
+ logger=None,
208
+ ):
209
+ """Reader."""
210
+ # Retrieve spectrum filepath
211
+ spectrum_filepath = filepath.replace("parsivel", "spectre")
212
+
213
+ # Read integral variables
214
+ df = reader_parsivel(filepath, logger=logger)
215
+
216
+ # Drop duplicates timesteps
217
+ df = df.drop_duplicates(subset="time", keep="first")
218
+
219
+ # Initialize empty arrays
220
+ # --> 0 values array produced in L0B
221
+ arrays_columns = ["raw_drop_concentration", "raw_drop_average_velocity", "raw_drop_number"]
222
+ for c in arrays_columns:
223
+ if c not in df:
224
+ df[c] = ""
225
+
226
+ # Add raw spectrum if available
227
+ if os.path.exists(spectrum_filepath):
228
+ # Read raw spectrum for corresponding timesteps
229
+ df_raw_spectrum = reader_spectrum(spectrum_filepath, logger=logger)
230
+ df_raw_spectrum = df_raw_spectrum.drop_duplicates(subset="time", keep="first")
231
+ # Add raw array to df
232
+ df = df.set_index("time")
233
+ df_raw_spectrum = df_raw_spectrum.set_index("time")
234
+ df.update(df_raw_spectrum)
235
+ # Set back time as column
236
+ df = df.reset_index()
237
+
238
+ # Return the dataframe adhering to DISDRODB L0 standards
239
+ return df
@@ -162,10 +162,11 @@ def _print_df_summary(df, indices, columns, print_column_names):
162
162
  df_summary = df_summary.loc[summary_stats]
163
163
  # Print summary stats
164
164
  for i, column in zip(indices, columns, strict=True):
165
- tmp_df = df_summary[[column]]
166
- tmp_df.columns = [""]
167
- _print_column_index(i, column_name=column, print_column_names=print_column_names)
168
- _print_value(tmp_df)
165
+ if column in df_summary:
166
+ tmp_df = df_summary[[column]]
167
+ tmp_df.columns = [""]
168
+ _print_column_index(i, column_name=column, print_column_names=print_column_names)
169
+ _print_value(tmp_df)
169
170
 
170
171
 
171
172
  def print_df_summary_stats(
@@ -192,19 +193,18 @@ def print_df_summary_stats(
192
193
  """
193
194
  # Define columns of interest
194
195
  _, columns_of_interest = _get_selected_column_names(df, column_indices)
195
- # Remove columns of dtype object or string
196
- indices_to_remove = np.where((df.dtypes == type(object)) | (df.dtypes == str)) # noqa
197
- indices = np.arange(0, len(df.columns))
198
- indices = indices[np.isin(indices, indices_to_remove, invert=True)]
199
- columns = df.columns[indices]
196
+ # Select only numeric columns (remove columns of dtype object or string)
197
+ columns = df.select_dtypes(include="number").columns
198
+ indices = df.columns.get_indexer(columns)
200
199
  if len(columns) == 0:
201
200
  raise ValueError("No numeric columns in the dataframe.")
202
201
  # Select only columns of interest
203
- idx_of_interest = np.where(np.isin(columns, columns_of_interest))[0]
204
- if len(idx_of_interest) == 0:
202
+ mask = columns.isin(columns_of_interest)
203
+ columns = columns[mask]
204
+ indices = indices[mask]
205
+ if len(columns) == 0:
205
206
  raise ValueError("No numeric columns at the specified column_indices.")
206
- columns = columns[idx_of_interest]
207
- indices = indices[idx_of_interest]
207
+
208
208
  # Print summary stats
209
209
  _print_df_summary(df=df, indices=indices, columns=columns, print_column_names=print_column_names)
210
210
 
@@ -144,7 +144,7 @@ def qc_spikes_isolated_precip(hydrometeor_type, sample_interval):
144
144
 
145
145
  Parameters
146
146
  ----------
147
- hydrometeor_type: xr.DataArray
147
+ hydrometeor_type: xarray.DataArray
148
148
  Hydrometeor type classification array with a ``time`` coordinate.
149
149
  Precipitation presence is defined where ``hydrometeor_type>= 1``.
150
150
  sample_interval : float or int
@@ -153,7 +153,7 @@ def qc_spikes_isolated_precip(hydrometeor_type, sample_interval):
153
153
 
154
154
  Returns
155
155
  -------
156
- flag_spikes : xr.DataArray of int
156
+ flag_spikes : xarray.DataArray of int
157
157
  Binary QC flag array (same dimensions as input) with:
158
158
  * 0 : no spike detected
159
159
  * 1 : isolated precipitation spike
@@ -762,9 +762,13 @@ def classify_raw_spectrum(
762
762
 
763
763
  # ------------------------------------------------------------------------.
764
764
  #### Define precipitation type variable
765
- precipitation_type = xr.ones_like(ds["time"], dtype=float) * -1
766
- precipitation_type = xr.where(hydrometeor_type.isin([0]), 0, precipitation_type)
767
- precipitation_type = xr.where(hydrometeor_type.isin([1, 2, 3]), 0, precipitation_type)
765
+ precipitation_type = xr.ones_like(ds["time"], dtype=float) * -2
766
+ precipitation_type = xr.where(hydrometeor_type.isin([0]), -1, precipitation_type)
767
+ precipitation_type = xr.where(
768
+ hydrometeor_type.isin([1, 2, 3, 9]),
769
+ 0,
770
+ precipitation_type,
771
+ ) # 9 hail in rainfall class currently
768
772
  precipitation_type = xr.where(hydrometeor_type.isin([5, 6, 7, 8]), 1, precipitation_type)
769
773
  precipitation_type = xr.where(hydrometeor_type.isin([4]), 2, precipitation_type)
770
774
  precipitation_type.attrs.update(
@@ -837,7 +841,7 @@ def classify_raw_spectrum(
837
841
  # ------------------------------------------------------------------------
838
842
  #### Define QC splashing, strong_wind, margin_fallers, spikes
839
843
  # FUTURE: flag_spikes can be used for non hydrometeor classification,
840
- # --> But caution because observing the below show true rainfall signature
844
+ # --> But caution because observing the below code show some true rainfall signature
841
845
  # --> raw_spectrum.isel(time=(flag_spikes == 0) & (precipitation_type == 0)).disdrodb.plot_spectrum()
842
846
 
843
847
  flag_splashing = xr.where((precipitation_type == 0) & (fraction_splash >= 0.1), 1, 0)
@@ -106,7 +106,7 @@ def count_bins_with_drops(ds):
106
106
 
107
107
  def _compute_qc_bins_metrics(arr):
108
108
  # Find indices of non-zero elements
109
- arr = arr.copy()
109
+ arr = np.asarray(arr).copy()
110
110
  arr[np.isnan(arr)] = 0
111
111
  non_zero_indices = np.nonzero(arr)[0]
112
112
  if non_zero_indices.size == 0:
@@ -117,13 +117,16 @@ def _compute_qc_bins_metrics(arr):
117
117
  segment = arr[start_idx : end_idx + 1]
118
118
 
119
119
  # Compute number of bins with drops
120
- total_bins = segment.size
120
+ total_bins = len(non_zero_indices)
121
+
122
+ # Compute number of bins in the segment
123
+ segment_bins = segment.size
121
124
 
122
125
  # Compute number of missing bins (zeros)
123
126
  n_missing_bins = int(np.sum(segment == 0))
124
127
 
125
128
  # Compute fraction of bins with missing drops
126
- fraction_missing = n_missing_bins / total_bins
129
+ fraction_missing = n_missing_bins / segment_bins
127
130
 
128
131
  # Identify longest with with consecutive zeros
129
132
  zero_mask = (segment == 0).astype(int)
@@ -152,14 +155,14 @@ def compute_qc_bins_metrics(ds):
152
155
  optionally collapses over velocity methods and the velocity dimension, then
153
156
  computes four metrics per time step:
154
157
 
155
- 1. Nbins: total number of diameter bins between the first and last non-zero count
156
- 2. Nbins_missing: number of bins with zero or NaN counts in that interval
157
- 3. Nbins_missing_fraction: fraction of missing bins (zeros) in the interval
158
+ 1. Nbins: total number of diameter bins with non-zero count
159
+ 2. Nbins_missing: number of bins with zero or NaN counts between the first and last non-zero count
160
+ 3. Nbins_missing_fraction: fraction of missing bins (zeros) between the first and last non-zero count
158
161
  4. Nbins_missing_consecutive: maximum length of consecutive missing bins
159
162
 
160
163
  Parameters
161
164
  ----------
162
- ds : xr.Dataset
165
+ ds : xarray.Dataset
163
166
  Input dataset containing one of the following variables:
164
167
  'drop_counts', 'drop_number_concentration', or 'drop_number'.
165
168
  If a 'velocity_method' dimension exists, only the first method is used.
@@ -167,7 +170,7 @@ def compute_qc_bins_metrics(ds):
167
170
 
168
171
  Returns
169
172
  -------
170
- xr.Dataset
173
+ xarray.Dataset
171
174
  Dataset with a new 'metric' dimension of size 4 and coordinates:
172
175
  ['Nbins', 'Nbins_missing', 'Nbins_missing_fraction', 'Nbins_missing_consecutive'],
173
176
  indexed by 'time'.
@@ -298,7 +301,7 @@ def get_drop_number_concentration(drop_number, velocity, diameter_bin_width, sam
298
301
 
299
302
  Returns
300
303
  -------
301
- drop_number_concentration : xarray.DataArray or ndarray
304
+ drop_number_concentration : xarray.DataArray or numpy.ndarray
302
305
  Array of drop number concentrations \\( N(D) \\) in m⁻³·mm⁻¹, representing
303
306
  the number of drops per unit volume per unit diameter interval.
304
307
 
@@ -355,7 +358,7 @@ def get_total_number_concentration(drop_number_concentration, diameter_bin_width
355
358
 
356
359
  Returns
357
360
  -------
358
- total_number_concentration : xarray.DataArray or ndarray
361
+ total_number_concentration : xarray.DataArray or numpy.ndarray
359
362
  Total number concentration \\( N_t \\) in m⁻³, representing the total number
360
363
  of drops per unit volume.
361
364
 
@@ -692,8 +695,10 @@ def get_equivalent_reflectivity_factor(drop_number_concentration, diameter, diam
692
695
  dim=DIAMETER_DIMENSION,
693
696
  skipna=False,
694
697
  )
698
+ # Set to NaN where z <= 0
695
699
  invalid_mask = z > 0
696
700
  z = z.where(invalid_mask)
701
+
697
702
  # Compute equivalent reflectivity factor in dBZ
698
703
  # - np.log10(np.nan) returns -Inf !
699
704
  # --> We mask again after the log
@@ -741,8 +746,11 @@ def get_equivalent_reflectivity_spectrum(drop_number_concentration, diameter):
741
746
  """
742
747
  # Compute reflectivity in mm⁶·m⁻³
743
748
  z = drop_number_concentration * ((diameter * 1000) ** 6)
749
+
750
+ # Set to NaN where z <= 0
744
751
  invalid_mask = z > 0
745
752
  z = z.where(invalid_mask)
753
+
746
754
  # Compute equivalent reflectivity factor in dBZ
747
755
  # - np.log10(np.nan) returns -Inf !
748
756
  # --> We mask again after the log
@@ -930,6 +938,8 @@ def get_min_max_diameter(drop_counts):
930
938
  max_drop_diameter : xarray.DataArray
931
939
  Maximum diameter where drop_counts is non-zero, for each time step.
932
940
  """
941
+ # TODO: maybe use lower bound for minimum, and upper bound for maximum
942
+
933
943
  # Create a boolean mask where drop_counts is non-zero
934
944
  non_zero_mask = drop_counts > 0
935
945
 
@@ -1500,7 +1510,7 @@ def get_kinetic_energy_spectrum(
1500
1510
 
1501
1511
  Returns
1502
1512
  -------
1503
- xr.DataArray
1513
+ xarray.DataArray
1504
1514
  Kinetic Energy Spectrum [J/m2/mm]
1505
1515
  """
1506
1516
  KE_spectrum = (
@@ -1718,9 +1728,9 @@ def compute_integral_parameters(
1718
1728
 
1719
1729
  Parameters
1720
1730
  ----------
1721
- drop_number_concentration : xr.DataArray
1731
+ drop_number_concentration : xarray.DataArray
1722
1732
  Drop number concentration in each diameter bin [#/m3/mm].
1723
- velocity : xr.DataArray
1733
+ velocity : xarray.DataArray
1724
1734
  Fall velocity of drops in each diameter bin [m/s].
1725
1735
  The presence of a velocity_method dimension enable to compute the parameters
1726
1736
  with different velocity estimates.
@@ -1873,9 +1883,9 @@ def compute_spectrum_parameters(
1873
1883
 
1874
1884
  Parameters
1875
1885
  ----------
1876
- drop_number_concentration : xr.DataArray
1886
+ drop_number_concentration : xarray.DataArray
1877
1887
  Drop number concentration in each diameter bin [#/m3/mm].
1878
- velocity : xr.DataArray
1888
+ velocity : xarray.DataArray
1879
1889
  Fall velocity of drops in each diameter bin [m/s].
1880
1890
  The presence of a velocity_method dimension enable to compute the parameters
1881
1891
  with different velocity estimates.
disdrodb/l2/processing.py CHANGED
@@ -36,7 +36,7 @@ from disdrodb.l2.empirical_dsd import (
36
36
  get_rain_rate_from_drop_number,
37
37
  )
38
38
  from disdrodb.psd import create_psd, estimate_model_parameters
39
- from disdrodb.psd.fitting import compute_gof_stats
39
+ from disdrodb.psd.gof_metrics import compute_gof_stats
40
40
  from disdrodb.utils.decorators import check_pytmatrix_availability
41
41
  from disdrodb.utils.manipulations import (
42
42
  define_diameter_array,
@@ -364,10 +364,12 @@ def generate_l2e(
364
364
  ds : xarray.Dataset
365
365
  DISDRODB L1 dataset.
366
366
  Alternatively, a xarray dataset with at least:
367
- - variables: raw_drop_number
368
- - dimension: DIAMETER_DIMENSION
369
- - coordinates: diameter_bin_center, diameter_bin_width, sample_interval
370
- - attributes: sensor_name
367
+
368
+ - variables: raw_drop_number
369
+ - dimension: DIAMETER_DIMENSION
370
+ - coordinates: diameter_bin_center, diameter_bin_width, sample_interval
371
+ - attributes: sensor_name
372
+
371
373
  ds_env : xarray.Dataset, optional
372
374
  Environmental dataset used for fall velocity and water density estimates.
373
375
  If None, a default environment dataset will be loaded.
@@ -642,10 +644,12 @@ def generate_l2e(
642
644
  def _get_default_optimization(psd_model):
643
645
  """PSD model defaults."""
644
646
  defaults = {
645
- "ExponentialPSD": "ML",
646
- "GammaPSD": "ML",
647
- "LognormalPSD": "ML",
647
+ "ExponentialPSD": "GS",
648
+ "GammaPSD": "GS",
649
+ "LognormalPSD": "GS",
648
650
  "NormalizedGammaPSD": "GS",
651
+ "GeneralizedGammaPSD": "GS",
652
+ "NormalizedGeneralizedGammaPSD": "GS",
649
653
  }
650
654
  optimization = defaults[psd_model]
651
655
  return optimization
@@ -695,7 +699,7 @@ def generate_l2m(
695
699
  psd_model,
696
700
  # Fitting options
697
701
  optimization=None,
698
- optimization_kwargs=None,
702
+ optimization_settings=None,
699
703
  # PSD discretization
700
704
  diameter_min=0,
701
705
  diameter_max=10,
@@ -735,7 +739,7 @@ def generate_l2m(
735
739
  optimization : str, optional
736
740
  The fitting optimization procedure. Either "GS" (Grid Search), "ML (Maximum Likelihood)
737
741
  or "MOM" (Method of Moments).
738
- optimization_kwargs : dict, optional
742
+ optimization_settings : dict, optional
739
743
  Dictionary with arguments to customize the fitting procedure.
740
744
  minimum_nbins: int
741
745
  Minimum number of bins with drops required to fit the PSD model.
@@ -786,7 +790,7 @@ def generate_l2m(
786
790
  ds=ds,
787
791
  psd_model=psd_model,
788
792
  optimization=optimization,
789
- optimization_kwargs=optimization_kwargs,
793
+ optimization_settings=optimization_settings,
790
794
  )
791
795
  psd_fitting_attrs = ds_psd_params.attrs
792
796
 
@@ -841,6 +845,20 @@ def generate_l2m(
841
845
  "drop_number_concentration",
842
846
  "fall_velocity",
843
847
  "N",
848
+ # L0C QC
849
+ "qc_time",
850
+ # L1 flags and variables
851
+ "qc_resampling",
852
+ "n_margin_fallers",
853
+ "n_splashing",
854
+ "flag_graupel",
855
+ "flag_hail",
856
+ "flag_spikes",
857
+ "flag_splashing",
858
+ "flag_wind_artefacts",
859
+ # L2E drop statistics
860
+ "Dmin",
861
+ "Dmax",
844
862
  *METEOROLOGICAL_VARIABLES,
845
863
  ]
846
864
  variables = [var for var in variables if var in ds]
@@ -885,12 +903,12 @@ def generate_l2_radar(
885
903
  ----------
886
904
  ds : xarray.Dataset
887
905
  Dataset containing the drop number concentration variable or the PSD parameters.
888
- frequency : str, float, or list of str and float, optional
906
+ frequency : str, float, or list of str or float, optional
889
907
  Frequencies in GHz for which to compute the radar parameters.
890
908
  Alternatively, also strings can be used to specify common radar frequencies.
891
909
  If ``None``, the common radar frequencies will be used.
892
910
  See ``disdrodb.scattering.available_radar_bands()``.
893
- num_points: int or list of integer, optional
911
+ num_points: int or list of int, optional
894
912
  Number of bins into which discretize the PSD.
895
913
  diameter_max : float or list of float, optional
896
914
  Maximum diameter. The default value is 10 mm.
@@ -899,7 +917,7 @@ def generate_l2_radar(
899
917
  axis_ratio_model : str or list of str, optional
900
918
  Models to compute the axis ratio. The default model is ``Thurai2007``.
901
919
  See available models with ``disdrodb.scattering.available_axis_ratio_models()``.
902
- permittivity_model : str str or list of str, optional
920
+ permittivity_model : str or list of str, optional
903
921
  Permittivity model to use to compute the refractive index and the
904
922
  rayleigh_dielectric_factor. The default is ``Turner2016``.
905
923
  See available models with ``disdrodb.scattering.available_permittivity_models()``.
@@ -38,7 +38,7 @@ def download_metadata_archive(directory_path, force=False):
38
38
 
39
39
  Returns
40
40
  -------
41
- metadata_archive_dir
41
+ str
42
42
  The DISDRODB Metadata Archive directory path.
43
43
  """
44
44
  # Define DISDRODB Metadata Archive GitHub URL
@@ -97,10 +97,10 @@ def infer_altitudes(lats, lons, dem="aster30m"):
97
97
 
98
98
  Notes
99
99
  -----
100
- - The OpenTopoData API has a limit of 1000 calls per day.
101
- - Each request can include up to 100 locations.
102
- - The API allows a maximum of 1 call per second.
103
- - The API requests are made in blocks of up to 100 coordinates,
100
+ The OpenTopoData API has a limit of 1000 calls per day.
101
+ Each request can include up to 100 locations.
102
+ The API allows a maximum of 1 call per second.
103
+ The API requests are made in blocks of up to 100 coordinates,
104
104
  with a 2-second delay between requests.
105
105
  """
106
106
  # Check that lats and lons have the same length
@@ -73,13 +73,13 @@ def read_metadata_archive(
73
73
  metadata_archive_dir : str or Path-like, optional
74
74
  Path to the root of the DISDRODB Metadata Archive. If None, the
75
75
  default metadata base directory is used. Default is None.
76
- data_sources : str or sequence of str, optional
76
+ data_sources : str or list of str, optional
77
77
  One or more data source identifiers to filter stations by. If None,
78
78
  no filtering on data source is applied. The default is is None.
79
- campaign_names : str or sequence of str, optional
79
+ campaign_names : str or list of str, optional
80
80
  One or more campaign names to filter stations by. If None, no filtering
81
81
  on campaign is applied. The default is is None.
82
- station_names : str or sequence of str, optional
82
+ station_names : str or list of str, optional
83
83
  One or more station names to include. If None, all stations matching
84
84
  other filters are considered. The default is is None.
85
85
  available_data: bool, optional
@@ -59,40 +59,42 @@ def get_list_metadata(
59
59
 
60
60
  - if available_data is False, return metadata filepaths of stations present in the DISDRODB Metadata Archive
61
61
  - if available_data is True, return metadata filepaths of stations with data available on the
62
- online DISDRODB Decentralized Data Archive (i.e., stations with the disdrodb_data_url in the metadata).
62
+ online DISDRODB Decentralized Data Archive (i.e., stations with the disdrodb_data_url in the metadata).
63
63
 
64
64
  If ``product`` is specified:
65
65
 
66
66
  - if available_data is False, return metadata filepaths of stations where
67
- the product directory exists in the in the local DISDRODB Data Archive
67
+ the product directory exists in the in the local DISDRODB Data Archive
68
68
  - if available_data is True, return metadata filepaths of stations where product data exists in the
69
69
  in the local DISDRODB Data Archive.
70
+
70
71
  The default is is False.
71
72
 
72
- data_sources : str or sequence of str, optional
73
+ data_sources : str or list of str, optional
73
74
  One or more data source identifiers to filter stations by.
74
75
  The name(s) must be UPPER CASE.
75
76
  If None, no filtering on data source is applied. The default is is ``None``.
76
- campaign_names : str or sequence of str, optional
77
+ campaign_names : str or list of str, optional
77
78
  One or more campaign names to filter stations by.
78
79
  The name(s) must be UPPER CASE.
79
80
  If None, no filtering on campaign is applied. The default is is ``None``.
80
- station_names : str or sequence of str, optional
81
+ station_names : str or list of str, optional
81
82
  One or more station names to include.
82
83
  If None, all stations matching other filters are considered. The default is is ``None``.
83
84
  raise_error_if_empty : bool, optional
84
85
  If True and no stations satisfy the criteria, raise a ``ValueError``.
85
86
  If False, return an empty list/tuple. The default is False.
86
- invalid_fields_policy : {'raise', 'warn', 'ignore'}, optional
87
+ invalid_fields_policy : str, optional
87
88
  How to handle invalid filter values for ``data_sources``, ``campaign_names``,
88
- or ``station_names`` that are not present in the metadata archive:
89
+ or ``station_names`` that are not present in the metadata archive. Valid options are:
89
90
 
90
91
  - 'raise' : raise a ``ValueError`` (default)
91
92
  - 'warn' : emit a warning, then ignore invalid entries
92
93
  - 'ignore': silently drop invalid entries
94
+
93
95
  data_archive_dir : str or Path-like, optional
94
96
  Path to the root of the local DISDRODB Data Archive. Format: ``<...>/DISDRODB``
95
- Required only if ``product``is specified.
97
+ Required only if ``product`` is specified.
96
98
  If None, the``data_archive_dir`` path specified in the DISDRODB active configuration file is used.
97
99
  The default is None.
98
100
  metadata_archive_dir : str or Path-like, optional