disdrodb 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. disdrodb/__init__.py +1 -5
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +22 -4
  4. disdrodb/api/checks.py +10 -0
  5. disdrodb/api/io.py +20 -18
  6. disdrodb/api/path.py +42 -77
  7. disdrodb/api/search.py +89 -23
  8. disdrodb/cli/disdrodb_create_summary.py +1 -1
  9. disdrodb/cli/disdrodb_run_l0.py +1 -1
  10. disdrodb/cli/disdrodb_run_l0a.py +1 -1
  11. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  13. disdrodb/cli/disdrodb_run_l1.py +1 -1
  14. disdrodb/cli/disdrodb_run_l2e.py +1 -1
  15. disdrodb/cli/disdrodb_run_l2m.py +1 -1
  16. disdrodb/configs.py +30 -83
  17. disdrodb/constants.py +4 -3
  18. disdrodb/data_transfer/download_data.py +4 -2
  19. disdrodb/docs.py +2 -2
  20. disdrodb/etc/products/L1/1MIN.yaml +13 -0
  21. disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
  22. disdrodb/etc/products/L1/LPM_V0/1MIN.yaml +13 -0
  23. disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
  24. disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
  25. disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
  26. disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
  27. disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
  28. disdrodb/etc/products/L1/global.yaml +6 -0
  29. disdrodb/etc/products/L2E/10MIN.yaml +1 -12
  30. disdrodb/etc/products/L2E/global.yaml +1 -1
  31. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/global.yaml +1 -1
  33. disdrodb/issue/checks.py +2 -2
  34. disdrodb/l0/check_configs.py +1 -1
  35. disdrodb/l0/configs/LPM/l0a_encodings.yml +0 -1
  36. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +0 -4
  37. disdrodb/l0/configs/LPM/l0b_encodings.yml +9 -9
  38. disdrodb/l0/configs/LPM/raw_data_format.yml +11 -11
  39. disdrodb/l0/configs/LPM_V0/bins_diameter.yml +103 -0
  40. disdrodb/l0/configs/LPM_V0/bins_velocity.yml +103 -0
  41. disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +45 -0
  42. disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +180 -0
  43. disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +410 -0
  44. disdrodb/l0/configs/LPM_V0/raw_data_format.yml +474 -0
  45. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
  46. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +8 -8
  47. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +9 -9
  48. disdrodb/l0/l0_reader.py +2 -2
  49. disdrodb/l0/l0a_processing.py +6 -2
  50. disdrodb/l0/l0b_processing.py +26 -19
  51. disdrodb/l0/l0c_processing.py +17 -3
  52. disdrodb/l0/manuals/LPM_V0.pdf +0 -0
  53. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +15 -7
  54. disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +279 -0
  55. disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +276 -0
  56. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  57. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +103 -0
  58. disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +216 -0
  59. disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +208 -0
  60. disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +219 -0
  61. disdrodb/l0/readers/LPM/USA/CHARLESTON.py +229 -0
  62. disdrodb/l0/readers/{LPM → LPM_V0}/BELGIUM/ULIEGE.py +33 -49
  63. disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +240 -0
  64. disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
  65. disdrodb/l0/readers/{PARSIVEL2 → PARSIVEL}/NASA/LPVEX.py +16 -28
  66. disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +1 -1
  67. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +3 -3
  68. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
  69. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL2/JAPAN/PRECIP.py +155 -0
  71. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +14 -7
  72. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +8 -3
  73. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +28 -5
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +1 -1
  75. disdrodb/l0/readers/PARSIVEL2/{GPM/GCPEX.py → NORWAY/UIB.py} +54 -29
  76. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/{PANGASA.py → PAGASA.py} +6 -3
  77. disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +1 -1
  78. disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
  79. disdrodb/l0/readers/{PARSIVEL/GPM/PIERS.py → PARSIVEL2/USA/CSU.py} +62 -29
  80. disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +51 -24
  81. disdrodb/l0/readers/{PARSIVEL/GPM/IFLOODS.py → RD80/BRAZIL/ATTO_RD80.py} +50 -34
  82. disdrodb/l0/readers/{SW250 → SWS250}/BELGIUM/KMI.py +1 -1
  83. disdrodb/l1/beard_model.py +45 -1
  84. disdrodb/l1/fall_velocity.py +1 -6
  85. disdrodb/l1/filters.py +2 -0
  86. disdrodb/l1/processing.py +6 -5
  87. disdrodb/l1/resampling.py +101 -38
  88. disdrodb/l2/empirical_dsd.py +12 -8
  89. disdrodb/l2/processing.py +4 -3
  90. disdrodb/metadata/search.py +3 -4
  91. disdrodb/routines/l0.py +4 -4
  92. disdrodb/routines/l1.py +173 -60
  93. disdrodb/routines/l2.py +121 -269
  94. disdrodb/routines/options.py +347 -0
  95. disdrodb/routines/wrappers.py +9 -1
  96. disdrodb/scattering/axis_ratio.py +3 -0
  97. disdrodb/scattering/routines.py +1 -1
  98. disdrodb/summary/routines.py +765 -724
  99. disdrodb/utils/archiving.py +51 -44
  100. disdrodb/utils/attrs.py +1 -1
  101. disdrodb/utils/compression.py +4 -2
  102. disdrodb/utils/dask.py +35 -15
  103. disdrodb/utils/dict.py +33 -0
  104. disdrodb/utils/encoding.py +1 -1
  105. disdrodb/utils/manipulations.py +7 -1
  106. disdrodb/utils/routines.py +9 -8
  107. disdrodb/utils/time.py +9 -1
  108. disdrodb/viz/__init__.py +0 -13
  109. disdrodb/viz/plots.py +209 -0
  110. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/METADATA +1 -1
  111. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/RECORD +124 -95
  112. disdrodb/l0/readers/PARSIVEL/GPM/LPVEX.py +0 -85
  113. /disdrodb/etc/products/L2M/{GAMMA_GS_ND_MAE.yaml → MODELS/GAMMA_GS_ND_MAE.yaml} +0 -0
  114. /disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +0 -0
  115. /disdrodb/etc/products/L2M/{LOGNORMAL_GS_LOG_ND_MAE.yaml → MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml} +0 -0
  116. /disdrodb/etc/products/L2M/{LOGNORMAL_GS_ND_MAE.yaml → MODELS/LOGNORMAL_GS_ND_MAE.yaml} +0 -0
  117. /disdrodb/etc/products/L2M/{LOGNORMAL_ML.yaml → MODELS/LOGNORMAL_ML.yaml} +0 -0
  118. /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
  119. /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
  120. /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
  121. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
  122. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/WHEEL +0 -0
  123. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/entry_points.txt +0 -0
  124. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/licenses/LICENSE +0 -0
  125. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env python3
2
+
2
3
  # -----------------------------------------------------------------------------.
3
4
  # Copyright (c) 2021-2023 DISDRODB developers
4
5
  #
@@ -29,17 +30,47 @@ def reader(
29
30
  """Reader."""
30
31
  ##------------------------------------------------------------------------.
31
32
  #### Define column names
32
- column_names = ["time", "TO_BE_SPLITTED"]
33
+ column_names = [
34
+ "date",
35
+ "time",
36
+ "sensor_status",
37
+ "sample_interval",
38
+ "n1",
39
+ "n2",
40
+ "n3",
41
+ "n4",
42
+ "n5",
43
+ "n6",
44
+ "n7",
45
+ "n8",
46
+ "n9",
47
+ "n10",
48
+ "n11",
49
+ "n12",
50
+ "n13",
51
+ "n14",
52
+ "n15",
53
+ "n16",
54
+ "n17",
55
+ "n18",
56
+ "n19",
57
+ "n20",
58
+ "RI",
59
+ "RA",
60
+ "RAT",
61
+ ]
33
62
 
34
63
  ##------------------------------------------------------------------------.
35
64
  #### Define reader options
36
65
  reader_kwargs = {}
37
66
  # - Define delimiter
38
- reader_kwargs["delimiter"] = ";"
39
- # - Skip first row as columns names
67
+ reader_kwargs["delimiter"] = "\\t"
68
+ # Skip header
40
69
  reader_kwargs["header"] = None
41
- # - Skip file with encoding errors
42
- reader_kwargs["encoding_errors"] = "ignore"
70
+ # Skip first row as columns names
71
+ reader_kwargs["skiprows"] = 1
72
+ # - Define encoding
73
+ reader_kwargs["encoding"] = "ISO-8859-1"
43
74
  # - Avoid first column to become df index !!!
44
75
  reader_kwargs["index_col"] = False
45
76
  # - Define behaviour when encountering bad lines
@@ -55,7 +86,7 @@ def reader(
55
86
  # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
56
87
  # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
57
88
  # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
58
- reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
89
+ reader_kwargs["na_values"] = ["na", "", "error"]
59
90
 
60
91
  ##------------------------------------------------------------------------.
61
92
  #### Read the data
@@ -68,37 +99,22 @@ def reader(
68
99
 
69
100
  ##------------------------------------------------------------------------.
70
101
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
71
- # Convert time column to datetime
72
- df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
73
-
74
- # Split the 'TO_BE_SPLITTED' column
75
- df = df["TO_BE_SPLITTED"].str.split(",", expand=True, n=9)
76
-
77
- # Assign column names
78
- columns_names = [
79
- "station_name",
80
- "sensor_status",
81
- "sensor_temperature",
82
- "number_particles",
83
- "rainfall_rate_32bit",
84
- "reflectivity_16bit",
85
- "mor_visibility",
86
- "weather_code_synop_4680",
87
- "weather_code_synop_4677",
88
- "raw_drop_number",
89
- ]
90
- df.columns = columns_names
102
+ # Replace 'status' NaN with 0
103
+ df["sensor_status"] = df["sensor_status"].astype(float).fillna(value=0).astype(int)
91
104
 
92
- # Add the time column
93
- df["time"] = df_time
105
+ # Define 'time' datetime column
106
+ df["time"] = df["date"].astype(str) + " " + df["time"].astype(str)
107
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
108
+ df = df.drop(columns=["date"])
94
109
 
95
- # Drop columns not agreeing with DISDRODB L0 standards
96
- df = df.drop(columns=["station_name"])
110
+ # Create raw_drop_number column
111
+ bin_columns = ["n" + str(i) for i in range(1, 21)]
112
+ df_arr = df[bin_columns]
113
+ df_raw_drop_number = df_arr.agg(";".join, axis=1)
114
+ df["raw_drop_number"] = df_raw_drop_number
97
115
 
98
- # Drop rows with invalid values
99
- # --> Ensure that weather_code_synop_4677 has length 2
100
- # --> If a previous column is missing it will have 000
101
- df = df[df["weather_code_synop_4677"].str.len() == 2]
116
+ # Remove bins columns
117
+ df = df.drop(columns=bin_columns)
102
118
 
103
119
  # Return the dataframe adhering to DISDRODB L0 standards
104
120
  return df
@@ -16,7 +16,7 @@
16
16
  # You should have received a copy of the GNU General Public License
17
17
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
18
  # -----------------------------------------------------------------------------.
19
- """DISDRODB reader for KMI Biral SW250 sensors."""
19
+ """DISDRODB reader for KMI Biral SWS250 sensors."""
20
20
  import pandas as pd
21
21
 
22
22
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -385,6 +385,49 @@ def get_water_density(temperature, air_pressure, sea_level_air_pressure=101_325)
385
385
  return get_pure_water_density(temperature) * np.exp(-1 * water_compressibility * delta_pressure)
386
386
 
387
387
 
388
+ ####---------------------------------------------------------------------------.
389
+ #### Wrappers
390
+ def retrieve_air_pressure(ds_env):
391
+ """Retrieve air pressure."""
392
+ if "air_pressure" in ds_env:
393
+ return ds_env["air_pressure"]
394
+ air_pressure = get_air_pressure_at_height(
395
+ altitude=ds_env["altitude"],
396
+ latitude=ds_env["latitude"],
397
+ temperature=ds_env["temperature"],
398
+ sea_level_air_pressure=ds_env["sea_level_air_pressure"],
399
+ lapse_rate=ds_env["lapse_rate"],
400
+ )
401
+ return air_pressure
402
+
403
+
404
+ def retrieve_air_dynamic_viscosity(ds_env):
405
+ """Retrieve air dynamic viscosity."""
406
+ air_viscosity = get_air_dynamic_viscosity(ds_env["temperature"])
407
+ return air_viscosity
408
+
409
+
410
+ def retrieve_air_density(ds_env):
411
+ """Retrieve air density."""
412
+ temperature = ds_env["temperature"]
413
+ relative_humidity = ds_env["relative_humidity"]
414
+ air_pressure = retrieve_air_pressure(ds_env)
415
+ vapor_pressure = get_vapor_actual_pressure(
416
+ relative_humidity=relative_humidity,
417
+ temperature=temperature,
418
+ )
419
+ air_density = get_air_density(
420
+ temperature=temperature,
421
+ air_pressure=air_pressure,
422
+ vapor_pressure=vapor_pressure,
423
+ )
424
+ return air_density
425
+
426
+
427
+ ####---------------------------------------------------------------------------.
428
+ #### Beard model
429
+
430
+
388
431
  def get_raindrop_reynolds_number(diameter, temperature, air_density, water_density, g):
389
432
  """Compute raindrop Reynolds number.
390
433
 
@@ -395,6 +438,7 @@ def get_raindrop_reynolds_number(diameter, temperature, air_density, water_densi
395
438
  Coefficients are taken from Table 1 of Beard 1976.
396
439
 
397
440
  Reference: Beard 1976; Pruppacher & Klett 1978
441
+ See also Table A1 in Rahman et al., 2020.
398
442
 
399
443
  Parameters
400
444
  ----------
@@ -422,7 +466,7 @@ def get_raindrop_reynolds_number(diameter, temperature, air_density, water_densi
422
466
  air_viscosity = get_air_dynamic_viscosity(temperature) # kg/(m*s) (aka Pa*s).
423
467
  delta_density = water_density - air_density
424
468
 
425
- # Compute Davis number for small droplets
469
+ # Compute Davies number for small droplets
426
470
  davis_number = 4 * air_density * delta_density * g * diameter**3 / (3 * air_viscosity**2)
427
471
 
428
472
  # Compute the slip correction (is approx 1 and can be discarded)
@@ -45,11 +45,6 @@ def get_fall_velocity_atlas_1973(diameter):
45
45
  Reviews of Geophysics, 11(1), 1-35.
46
46
  https://doi.org/10.1029/RG011i001p00001
47
47
 
48
- Atlas, D., & Ulbrich, C. W. (1977).
49
- Path- and area-integrated rainfall measurement by microwave attenuation in the 1-3 cm band.
50
- Journal of Applied Meteorology, 16(12), 1322-1331.
51
- https://doi.org/10.1175/1520-0450(1977)016<1322:PAAIRM>2.0.CO;2
52
-
53
48
  Gunn, R., & Kinzer, G. D. (1949).
54
49
  The terminal velocity of fall for water droplets in stagnant air.
55
50
  Journal of Meteorology, 6(4), 243-248.
@@ -111,7 +106,7 @@ def get_fall_velocity_uplinger_1981(diameter):
111
106
 
112
107
  """
113
108
  # Valid between 0.1 and 7 mm
114
- fall_velocity = 4.874 * diameter * np.exp(-0.195 * diameter)
109
+ fall_velocity = 4.874 * diameter * np.exp(-0.195 * diameter) # 4.854?
115
110
  fall_velocity = fall_velocity.clip(min=0, max=None)
116
111
  return fall_velocity
117
112
 
disdrodb/l1/filters.py CHANGED
@@ -157,6 +157,8 @@ def define_raindrop_spectrum_mask(
157
157
  A boolean mask array indicating valid bins according to the specified criteria.
158
158
 
159
159
  """
160
+ # TODO: use lower and upper fall_velocity !
161
+
160
162
  # Ensure it creates a 2D mask if the fall_velocity does not vary over time
161
163
  if "time" in drop_number.dims and "time" not in fall_velocity.dims:
162
164
  drop_number = drop_number.isel(time=0)
disdrodb/l1/processing.py CHANGED
@@ -121,9 +121,11 @@ def generate_l1(
121
121
  # Add sample interval as coordinate (in seconds)
122
122
  ds_l1 = add_sample_interval(ds_l1, sample_interval=sample_interval)
123
123
 
124
- # Add L0C coordinates that might got lost
125
- if "time_qc" in ds:
126
- ds_l1 = ds_l1.assign_coords({"time_qc": ds["time_qc"]})
124
+ # Add optional variables to L1 dataset
125
+ optional_variables = ["time_qc", "qc_resampling"]
126
+ for var in optional_variables:
127
+ if var in ds:
128
+ ds_l1[var] = ds[var]
127
129
 
128
130
  # -------------------------------------------------------------------------------------------
129
131
  # Filter dataset by diameter and velocity bins
@@ -160,10 +162,9 @@ def generate_l1(
160
162
  # -------------------------------------------------------------------------------------------
161
163
  # Retrieve drop number and drop_counts arrays
162
164
  if has_velocity_dimension:
163
- drop_number = ds_l1["raw_drop_number"].where(mask) # 2D (diameter, velocity)
165
+ drop_number = ds_l1["raw_drop_number"].where(mask, 0) # 2D (diameter, velocity)
164
166
  drop_counts = drop_number.sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
165
167
  drop_counts_raw = ds_l1["raw_drop_number"].sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
166
-
167
168
  else:
168
169
  drop_number = ds_l1["raw_drop_number"] # 1D (diameter)
169
170
  drop_counts = ds_l1["raw_drop_number"] # 1D (diameter)
disdrodb/l1/resampling.py CHANGED
@@ -19,9 +19,12 @@ import numpy as np
19
19
  import pandas as pd
20
20
  import xarray as xr
21
21
 
22
- from disdrodb.utils.time import ensure_sample_interval_in_seconds, regularize_dataset
23
-
24
- DEFAULT_ACCUMULATIONS = ["10s", "30s", "1min", "2min", "5min", "10min", "30min", "1hour"]
22
+ from disdrodb.utils.time import (
23
+ ensure_sample_interval_in_seconds,
24
+ get_dataset_start_end_time,
25
+ get_sampling_information,
26
+ regularize_dataset,
27
+ )
25
28
 
26
29
 
27
30
  def add_sample_interval(ds, sample_interval):
@@ -95,6 +98,27 @@ def define_window_size(sample_interval, accumulation_interval):
95
98
  return window_size
96
99
 
97
100
 
101
+ def _finalize_qc_resampling(ds, sample_interval, accumulation_interval):
102
+ # Compute qc_resampling
103
+ # - 0 if not missing timesteps
104
+ # - 1 if all timesteps missing
105
+ n_timesteps = accumulation_interval / sample_interval
106
+ ds["qc_resampling"] = np.round(1 - ds["qc_resampling"] / n_timesteps, 1)
107
+ ds["qc_resampling"].attrs = {
108
+ "long_name": "Resampling Quality Control Flag",
109
+ "standard_name": "quality_flag",
110
+ "units": "",
111
+ "valid_min": 0.0,
112
+ "valid_max": 1.0,
113
+ "description": (
114
+ "Fraction of timesteps missing when resampling the data."
115
+ "0 = No timesteps missing; 1 = All timesteps missing;"
116
+ "Intermediate values indicate partial data coverage."
117
+ ),
118
+ }
119
+ return ds
120
+
121
+
98
122
  def _resample(ds, variables, accumulation, op):
99
123
  if not variables:
100
124
  return {}
@@ -113,23 +137,24 @@ def _rolling(ds, variables, window_size, op):
113
137
  return ds_subset
114
138
 
115
139
 
116
- def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
140
+ def resample_dataset(ds, sample_interval, temporal_resolution):
117
141
  """
118
142
  Resample the dataset to a specified accumulation interval.
119
143
 
144
+ The output timesteps correspond to the starts of the periods over which
145
+ the resampling operation has been performed !
146
+
120
147
  Parameters
121
148
  ----------
122
149
  ds : xarray.Dataset
123
150
  The input dataset to be resampled.
124
151
  sample_interval : int
125
- The sample interval of the input dataset.
126
- accumulation_interval : int
127
- The interval in seconds over which to accumulate the data.
128
- rolling : bool, optional
129
- If True, apply a rolling window before resampling. Default is True.
130
- If True, forward rolling is performed.
131
- The output timesteps correspond to the starts of the periods over which
132
- the resampling operation has been performed !
152
+ The sample interval (in seconds) of the input dataset.
153
+ temporal_resolution : str
154
+ The desired temporal resolution for resampling.
155
+ It should be a string representing the accumulation interval,
156
+ e.g., "5MIN" for 5 minutes, "1H" for 1 hour, "30S" for 30 seconds, etc.
157
+ Prefixed with "ROLL" for rolling resampling, e.g., "ROLL5MIN".
133
158
 
134
159
  Returns
135
160
  -------
@@ -149,6 +174,9 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
149
174
  # Ensure sample interval in seconds
150
175
  sample_interval = int(ensure_sample_interval_in_seconds(sample_interval))
151
176
 
177
+ # Retrieve accumulation_interval and rolling option
178
+ accumulation_interval, rolling = get_sampling_information(temporal_resolution)
179
+
152
180
  # --------------------------------------------------------------------------.
153
181
  # Raise error if the accumulation_interval is less than the sample interval
154
182
  if accumulation_interval < sample_interval:
@@ -157,51 +185,78 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
157
185
  if not accumulation_interval % sample_interval == 0:
158
186
  raise ValueError("The accumulation_interval is not a multiple of sample interval.")
159
187
 
188
+ # Retrieve input dataset start_time and end_time
189
+ start_time, end_time = get_dataset_start_end_time(ds, time_dim="time")
190
+
191
+ # Initialize qc_resampling
192
+ ds["qc_resampling"] = xr.ones_like(ds["time"], dtype="float")
193
+
194
+ # Retrieve dataset attributes
195
+ attrs = ds.attrs.copy()
196
+
197
+ # If no resampling, return as it is
198
+ if sample_interval == accumulation_interval:
199
+ attrs["disdrodb_aggregated_product"] = "False"
200
+ attrs["disdrodb_rolled_product"] = "False"
201
+ attrs["disdrodb_temporal_resolution"] = temporal_resolution
202
+
203
+ ds = _finalize_qc_resampling(ds, sample_interval=sample_interval, accumulation_interval=accumulation_interval)
204
+ ds = add_sample_interval(ds, sample_interval=accumulation_interval)
205
+ ds.attrs = attrs
206
+ return ds
207
+
160
208
  # --------------------------------------------------------------------------.
161
209
  #### Preprocess the dataset
162
- # Here we set NaN in the raw_drop_number to 0
163
- # - We assume that NaN corresponds to 0
164
- # - When we regularize, we infill with NaN
210
+ # - Set timesteps with NaN in drop_number to zero (and set qc_resampling to 0)
165
211
  # - When we aggregate with sum, we don't skip NaN
166
- # --> Aggregation with original missing timesteps currently results in NaN !
212
+ # --> Resampling over missing timesteps will result in NaN drop_number and qc_resampling = 1
213
+ # --> Resampling over timesteps with NaN in drop_number will result in finite drop_number but qc_resampling > 0
214
+ # - qc_resampling will inform on the amount of timesteps missing
167
215
 
168
- # Infill NaN values with zeros for drop_number and raw_drop_number
169
- # - This might alter integrated statistics if NaN in spectrum does not actually correspond to 0 !
170
- # - TODO: NaN should not be set as 0 !
171
- for var in ["drop_number", "raw_drop_number"]:
216
+ for var in ["drop_number", "raw_drop_number", "drop_counts", "drop_number_concentration"]:
172
217
  if var in ds:
173
- ds[var] = xr.where(np.isnan(ds[var]), 0, ds[var])
218
+ dims = set(ds[var].dims) - {"time"}
219
+ invalid_timesteps = np.isnan(ds[var]).any(dim=dims)
220
+ ds[var] = ds[var].where(~invalid_timesteps, 0)
221
+ ds["qc_resampling"] = ds["qc_resampling"].where(~invalid_timesteps, 0)
222
+
223
+ if np.all(invalid_timesteps).item():
224
+ raise ValueError("No timesteps with valid spectrum.")
174
225
 
175
226
  # Ensure regular dataset without missing timesteps
176
227
  # --> This adds NaN values for missing timesteps
177
- ds = regularize_dataset(ds, freq=f"{sample_interval}s")
228
+ ds = regularize_dataset(ds, freq=f"{sample_interval}s", start_time=start_time, end_time=end_time)
229
+ ds["qc_resampling"] = ds["qc_resampling"].where(~np.isnan(ds["qc_resampling"]), 0)
178
230
 
179
231
  # --------------------------------------------------------------------------.
180
232
  # Define dataset attributes
181
- attrs = ds.attrs.copy()
182
233
  if rolling:
183
234
  attrs["disdrodb_rolled_product"] = "True"
184
235
  else:
185
236
  attrs["disdrodb_rolled_product"] = "False"
186
237
 
187
- if sample_interval == accumulation_interval:
188
- attrs["disdrodb_aggregated_product"] = "False"
189
- ds = add_sample_interval(ds, sample_interval=accumulation_interval)
190
- ds.attrs = attrs
191
- return ds
192
-
193
- # --------------------------------------------------------------------------.
194
- # Resample the dataset
195
238
  attrs["disdrodb_aggregated_product"] = "True"
239
+ attrs["disdrodb_temporal_resolution"] = temporal_resolution
196
240
 
241
+ # --------------------------------------------------------------------------.
197
242
  # Initialize resample dataset
198
243
  ds_resampled = xr.Dataset()
199
244
 
200
245
  # Retrieve variables to average/sum
246
+ # - ATTENTION: it will not resample non-dimensional time coordinates of the dataset !
201
247
  var_to_average = ["fall_velocity"]
202
- var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nraw", "Nremoved"]
248
+ var_to_cumulate = [
249
+ "raw_drop_number",
250
+ "drop_number",
251
+ "drop_counts",
252
+ "drop_number_concentration",
253
+ "N",
254
+ "Nraw",
255
+ "Nremoved",
256
+ "qc_resampling",
257
+ ]
203
258
  var_to_min = ["Dmin"]
204
- var_to_max = ["Dmax"]
259
+ var_to_max = ["Dmax", "time_qc"]
205
260
 
206
261
  # Retrieve available variables
207
262
  var_to_average = [var for var in var_to_average if var in ds]
@@ -209,11 +264,6 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
209
264
  var_to_min = [var for var in var_to_min if var in ds]
210
265
  var_to_max = [var for var in var_to_max if var in ds]
211
266
 
212
- # TODO Define custom processing
213
- # - quality_flag --> take worst
214
- # - skipna if less than fraction (to not waste lot of data when aggregating over i.e. hours)
215
- # - Add tolerance on fraction of missing timesteps for large accumulation_intervals
216
-
217
267
  # Resample the dataset
218
268
  # - Rolling currently does not allow direct rolling forward.
219
269
  # - We currently use center=False which means search for data backward (right-aligned) !
@@ -239,6 +289,19 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
239
289
  {"time": ds_resampled["time"].data[: -window_size + 1]},
240
290
  )
241
291
 
292
+ # Finalize qc_resampling
293
+ ds_resampled = _finalize_qc_resampling(
294
+ ds_resampled,
295
+ sample_interval=sample_interval,
296
+ accumulation_interval=accumulation_interval,
297
+ )
298
+ # Set to NaN timesteps where qc_resampling == 1
299
+ # --> This occurs for missing timesteps in input dataset or all NaN drop_number arrays
300
+ variables = list(set(ds_resampled.data_vars) - {"qc_resampling"})
301
+ mask_missing_timesteps = ds_resampled["qc_resampling"] != 1
302
+ for var in variables:
303
+ ds_resampled[var] = ds_resampled[var].where(mask_missing_timesteps)
304
+
242
305
  # Add attributes
243
306
  ds_resampled.attrs = attrs
244
307
 
@@ -220,27 +220,31 @@ def get_effective_sampling_area(sensor_name, diameter):
220
220
  check_sensor_name(sensor_name)
221
221
  if sensor_name in ["PARSIVEL", "PARSIVEL2"]:
222
222
  # Calculate sampling area for each diameter bin (S_i)
223
+ # - Parsivel remove margin fallers !
224
+ # - The effective sampling area decreases with increasing drop diameter
225
+ # sampling_area = 0.0054 # m2
223
226
  L = 180 / 1000 # Length of the Parsivel beam in m (180 mm)
224
227
  B = 30 / 1000 # Width of the Parsivel beam in m (30mm)
225
- sampling_area = L * (B - diameter / 2)
228
+ sampling_area = L * (B - diameter / 2) # d_eq
226
229
  return sampling_area
227
- if sensor_name == "LPM":
230
+ if sensor_name in ["LPM", "LPM_V0"]:
228
231
  # Calculate sampling area for each diameter bin (S_i)
229
- L = 228 / 1000 # Length of the Parsivel beam in m (228 mm)
230
- B = 20 / 1000 # Width of the Parsivel beam in m (20 mm)
231
- sampling_area = L * (B - diameter / 2)
232
+ # L = 228 / 1000 # Length of the beam in m (228 mm)
233
+ # B = 20 / 1000 # Width of the beam in m (20 mm)
234
+ # sampling_area = L * (B - diameter / 2)
235
+ sampling_area = 0.0045 # m2
232
236
  return sampling_area
233
237
  if sensor_name == "PWS100":
234
- sampling_area = 0.004 # m2 # TODO: L * (B - diameter / 2) ?
238
+ sampling_area = 0.004 # m2
235
239
  return sampling_area
236
240
  if sensor_name == "RD80":
237
241
  sampling_area = 0.005 # m2
238
242
  return sampling_area
239
- if sensor_name == "SWS250": # TODO: L * (B - diameter / 2) ?
243
+ if sensor_name == "SWS250":
240
244
  # Table 29 of the manual that the sample volume is 400cm3, path length?
241
245
  # Distance between the end of the hood heaters is 291 mm.
242
246
  # Adding a factor of 1.5 for better representation of the Tx-Rx distance: L= 436 mm.
243
- sampling_area = 0.0091 # m2
247
+ sampling_area = 0.0091 # m2 # 0.006504 m2 maybe?
244
248
  return sampling_area
245
249
  raise NotImplementedError(f"Effective sampling area for {sensor_name} must yet to be specified in the software.")
246
250
 
disdrodb/l2/processing.py CHANGED
@@ -27,7 +27,6 @@ from disdrodb.l2.empirical_dsd import (
27
27
  add_bins_metrics,
28
28
  compute_integral_parameters,
29
29
  compute_spectrum_parameters,
30
- get_drop_average_velocity,
31
30
  get_drop_number_concentration,
32
31
  get_effective_sampling_area,
33
32
  get_kinetic_energy_variables_from_drop_number,
@@ -273,6 +272,8 @@ def generate_l2e(
273
272
  "Dmin",
274
273
  "Dmax",
275
274
  "fall_velocity",
275
+ "qc_resampling",
276
+ "time_qc",
276
277
  ]
277
278
 
278
279
  variables = [var for var in variables if var in ds]
@@ -282,8 +283,8 @@ def generate_l2e(
282
283
  # -------------------------------------------------------------------------------------------
283
284
  # Compute and add drop average velocity if an optical disdrometer (i.e OTT Parsivel or ThiesLPM)
284
285
  # - We recompute it because if the input dataset is aggregated, it must be updated !
285
- if has_velocity_dimension:
286
- ds["drop_average_velocity"] = get_drop_average_velocity(ds["drop_number"])
286
+ # if has_velocity_dimension:
287
+ # ds["drop_average_velocity"] = get_drop_average_velocity(ds["drop_number"])
287
288
 
288
289
  # -------------------------------------------------------------------------------------------
289
290
  # Define velocity array with dimension 'velocity_method'
@@ -102,10 +102,9 @@ def get_list_metadata(
102
102
  Path to the root of the DISDRODB Metadata Archive. Format: ``<...>/DISDRODB``
103
103
  If None, the``metadata_archive_dir`` path specified in the DISDRODB active configuratio. The default is None.
104
104
  **product_kwargs : dict, optional
105
- Additional arguments required for some products.
106
- For example, for the "L2E" product, you need to specify ``rolling`` and
107
- ``sample_interval``. For the "L2M" product, you need to specify also
108
- the ``model_name``.
105
+ Additional arguments required for DISDRODB products L1, L2E and L2M.
106
+ For the L1, L2E and L2M products, ``temporal_resolution`` is required.
107
+ FOr the L2M product, ``model_name`` is required.
109
108
 
110
109
  Returns
111
110
  -------
disdrodb/routines/l0.py CHANGED
@@ -50,7 +50,7 @@ from disdrodb.l0.l0b_nc_processing import sanitize_ds
50
50
  from disdrodb.l0.l0b_processing import generate_l0b
51
51
  from disdrodb.l0.l0c_processing import TOLERANCE_SECONDS, create_l0c_datasets
52
52
  from disdrodb.metadata import read_station_metadata
53
- from disdrodb.utils.archiving import get_files_per_time_block
53
+ from disdrodb.utils.archiving import group_files_by_time_block
54
54
  from disdrodb.utils.dask import execute_tasks_safely
55
55
  from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
56
56
 
@@ -696,7 +696,7 @@ def run_l0b_station(
696
696
  # -----------------------------------------------------------------.
697
697
  # Start L0B processing
698
698
  t_i = time.time()
699
- msg = f"{product} processing of station_name {station_name} has started."
699
+ msg = f"{product} processing of station {station_name} has started."
700
700
  log_info(logger=logger, msg=msg, verbose=verbose)
701
701
 
702
702
  # -----------------------------------------------------------------.
@@ -774,7 +774,7 @@ def run_l0b_station(
774
774
  # -----------------------------------------------------------------.
775
775
  # End L0B processing
776
776
  timedelta_str = str(datetime.timedelta(seconds=round(time.time() - t_i)))
777
- msg = f"{product} processing of station_name {station_name} completed in {timedelta_str}"
777
+ msg = f"{product} processing of station {station_name} completed in {timedelta_str}"
778
778
  log_info(logger=logger, msg=msg, verbose=verbose)
779
779
 
780
780
  # -----------------------------------------------------------------.
@@ -928,7 +928,7 @@ def run_l0c_station(
928
928
  # -------------------------------------------------------------------------.
929
929
  # Retrieve dictionary with the required files per time block
930
930
  # TODO: allow customizing this in config file, but risk of out of memory !
931
- list_event_info = get_files_per_time_block(filepaths=filepaths, freq="day", tolerance_seconds=TOLERANCE_SECONDS)
931
+ list_event_info = group_files_by_time_block(filepaths=filepaths, freq="day", tolerance_seconds=TOLERANCE_SECONDS)
932
932
 
933
933
  # -----------------------------------------------------------------.
934
934
  # Generate L0C files