disdrodb 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. disdrodb/__init__.py +64 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +139 -9
  7. disdrodb/api/configs.py +4 -2
  8. disdrodb/api/info.py +10 -10
  9. disdrodb/api/io.py +237 -18
  10. disdrodb/api/path.py +81 -75
  11. disdrodb/api/search.py +6 -6
  12. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  13. disdrodb/cli/disdrodb_run_l0.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
  19. disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
  20. disdrodb/configs.py +149 -4
  21. disdrodb/constants.py +61 -0
  22. disdrodb/data_transfer/download_data.py +145 -14
  23. disdrodb/etc/configs/attributes.yaml +339 -0
  24. disdrodb/etc/configs/encodings.yaml +473 -0
  25. disdrodb/etc/products/L1/global.yaml +13 -0
  26. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  27. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  28. disdrodb/etc/products/L2E/global.yaml +22 -0
  29. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  30. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  31. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  33. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/global.yaml +26 -0
  35. disdrodb/l0/__init__.py +13 -0
  36. disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
  37. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  38. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  39. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  40. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  41. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
  42. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
  43. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +44 -3
  44. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +41 -1
  45. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  46. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  47. disdrodb/l0/l0a_processing.py +30 -30
  48. disdrodb/l0/l0b_nc_processing.py +108 -2
  49. disdrodb/l0/l0b_processing.py +4 -4
  50. disdrodb/l0/l0c_processing.py +5 -13
  51. disdrodb/l0/manuals/SWS250.pdf +0 -0
  52. disdrodb/l0/manuals/VPF730.pdf +0 -0
  53. disdrodb/l0/manuals/VPF750.pdf +0 -0
  54. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  55. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  56. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  57. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +105 -0
  58. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +128 -0
  59. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  60. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  61. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  62. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  63. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  64. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  65. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  66. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  67. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  68. disdrodb/l0/readers/{PARSIVEL → PARSIVEL2}/KIT/BURKINA_FASO.py +1 -1
  69. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  70. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  71. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → NCAR/FARM_PARSIVEL2.py} +43 -70
  72. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  73. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  75. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  76. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +29 -12
  77. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +69 -0
  78. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  79. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  80. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  81. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
  82. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  83. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  84. disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +31 -14
  85. disdrodb/l0/routines.py +105 -14
  86. disdrodb/l1/__init__.py +5 -0
  87. disdrodb/l1/filters.py +34 -20
  88. disdrodb/l1/processing.py +45 -44
  89. disdrodb/l1/resampling.py +77 -66
  90. disdrodb/l1/routines.py +35 -42
  91. disdrodb/l1_env/routines.py +18 -3
  92. disdrodb/l2/__init__.py +7 -0
  93. disdrodb/l2/empirical_dsd.py +58 -10
  94. disdrodb/l2/event.py +27 -120
  95. disdrodb/l2/processing.py +267 -116
  96. disdrodb/l2/routines.py +618 -254
  97. disdrodb/metadata/standards.py +3 -1
  98. disdrodb/psd/fitting.py +463 -144
  99. disdrodb/psd/models.py +8 -5
  100. disdrodb/routines.py +3 -3
  101. disdrodb/scattering/__init__.py +16 -4
  102. disdrodb/scattering/axis_ratio.py +56 -36
  103. disdrodb/scattering/permittivity.py +486 -0
  104. disdrodb/scattering/routines.py +701 -159
  105. disdrodb/summary/__init__.py +17 -0
  106. disdrodb/summary/routines.py +4120 -0
  107. disdrodb/utils/attrs.py +68 -125
  108. disdrodb/utils/compression.py +30 -1
  109. disdrodb/utils/dask.py +59 -8
  110. disdrodb/utils/dataframe.py +63 -9
  111. disdrodb/utils/directories.py +49 -17
  112. disdrodb/utils/encoding.py +33 -19
  113. disdrodb/utils/logger.py +13 -6
  114. disdrodb/utils/manipulations.py +71 -0
  115. disdrodb/utils/subsetting.py +214 -0
  116. disdrodb/utils/time.py +165 -19
  117. disdrodb/utils/writer.py +20 -7
  118. disdrodb/utils/xarray.py +85 -4
  119. disdrodb/viz/__init__.py +13 -0
  120. disdrodb/viz/plots.py +327 -0
  121. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
  122. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/RECORD +127 -87
  123. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
  124. disdrodb/l1/encoding_attrs.py +0 -635
  125. disdrodb/l2/processing_options.py +0 -213
  126. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  127. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
  128. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
  129. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
disdrodb/l1/processing.py CHANGED
@@ -16,22 +16,19 @@
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Core functions for DISDRODB L1 production."""
18
18
 
19
-
20
19
  import xarray as xr
21
20
 
22
- from disdrodb import DIAMETER_DIMENSION, VELOCITY_DIMENSION
23
- from disdrodb.l1.encoding_attrs import get_attrs_dict, get_encoding_dict
21
+ from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
24
22
  from disdrodb.l1.fall_velocity import get_raindrop_fall_velocity
25
23
  from disdrodb.l1.filters import define_spectrum_mask, filter_diameter_bins, filter_velocity_bins
26
24
  from disdrodb.l1.resampling import add_sample_interval
27
25
  from disdrodb.l1_env.routines import load_env_dataset
28
26
  from disdrodb.l2.empirical_dsd import ( # TODO: maybe move out of L2
29
- compute_qc_bins_metrics,
27
+ add_bins_metrics,
30
28
  get_min_max_diameter,
31
29
  )
32
- from disdrodb.utils.attrs import set_attrs
33
- from disdrodb.utils.encoding import set_encodings
34
30
  from disdrodb.utils.time import ensure_sample_interval_in_seconds, infer_sample_interval
31
+ from disdrodb.utils.writer import finalize_product
35
32
 
36
33
 
37
34
  def generate_l1(
@@ -51,7 +48,7 @@ def generate_l1(
51
48
  small_velocity_threshold=2.5, # 3
52
49
  maintain_smallest_drops=True,
53
50
  ):
54
- """Generate the DISDRODB L1 dataset from the DISDRODB L0C dataset.
51
+ """Generate DISDRODB L1 Dataset from DISDRODB L0C Dataset.
55
52
 
56
53
  Parameters
57
54
  ----------
@@ -88,17 +85,17 @@ def generate_l1(
88
85
  xarray.Dataset
89
86
  DISRODB L1 dataset.
90
87
  """
91
- # Take as input an L0 !
92
-
93
88
  # Retrieve source attributes
94
89
  attrs = ds.attrs.copy()
95
90
 
96
91
  # Determine if the velocity dimension is available
97
92
  has_velocity_dimension = VELOCITY_DIMENSION in ds.dims
98
93
 
99
- # Initialize L2 dataset
100
- ds_l1 = xr.Dataset()
94
+ # Retrieve sensor_name
95
+ # - If not present, don't drop Parsivels first two bins
96
+ sensor_name = attrs.get("sensor_name", "")
101
97
 
98
+ # ---------------------------------------------------------------------------
102
99
  # Retrieve sample interval
103
100
  # --> sample_interval is a coordinate of L0C products
104
101
  if "sample_interval" in ds:
@@ -107,39 +104,52 @@ def generate_l1(
107
104
  # This line is not called in the DISDRODB processing chain !
108
105
  sample_interval = infer_sample_interval(ds, verbose=False)
109
106
 
110
- # Re-add sample interval as coordinate (in seconds)
111
- ds = add_sample_interval(ds, sample_interval=sample_interval)
112
-
113
107
  # ---------------------------------------------------------------------------
114
108
  # Retrieve ENV dataset or take defaults
115
109
  # --> Used only for Beard fall velocity currently !
116
110
  ds_env = load_env_dataset(ds)
117
111
 
112
+ # ---------------------------------------------------------------------------
113
+ # Initialize L1 dataset
114
+ ds_l1 = xr.Dataset()
115
+
116
+ # Add raw_drop_number variable to L1 dataset
117
+ ds_l1["raw_drop_number"] = ds["raw_drop_number"]
118
+
119
+ # Add sample interval as coordinate (in seconds)
120
+ ds_l1 = add_sample_interval(ds_l1, sample_interval=sample_interval)
121
+
122
+ # Add L0C coordinates that might got lost
123
+ if "time_qc" in ds_l1:
124
+ ds_l1 = ds_l1.assign_coords({"time_qc": ds["time_qc"]})
125
+
118
126
  # -------------------------------------------------------------------------------------------
119
127
  # Filter dataset by diameter and velocity bins
128
+ if sensor_name in ["PARSIVEL", "PARSIVEL2"]:
129
+ # - Remove first two bins because never reports data !
130
+ # - If not removed, can alter e.g. L2M model fitting
131
+ ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=0.312) # it includes the 0.2495-0.3745 bin
132
+
120
133
  # - Filter diameter bins
121
- ds = filter_diameter_bins(ds=ds, minimum_diameter=minimum_diameter, maximum_diameter=maximum_diameter)
134
+ ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=minimum_diameter, maximum_diameter=maximum_diameter)
122
135
  # - Filter velocity bins
123
136
  if has_velocity_dimension:
124
- ds = filter_velocity_bins(ds=ds, minimum_velocity=minimum_velocity, maximum_velocity=maximum_velocity)
137
+ ds_l1 = filter_velocity_bins(ds=ds_l1, minimum_velocity=minimum_velocity, maximum_velocity=maximum_velocity)
125
138
 
126
139
  # -------------------------------------------------------------------------------------------
127
140
  # Compute fall velocity
128
- fall_velocity = get_raindrop_fall_velocity(
129
- diameter=ds["diameter_bin_center"],
141
+ ds_l1["fall_velocity"] = get_raindrop_fall_velocity(
142
+ diameter=ds_l1["diameter_bin_center"],
130
143
  method=fall_velocity_method,
131
144
  ds_env=ds_env, # mm
132
145
  )
133
146
 
134
- # Add fall velocity
135
- ds_l1["fall_velocity"] = fall_velocity
136
-
137
147
  # -------------------------------------------------------------------------------------------
138
148
  # Define filtering mask according to fall velocity
139
149
  if has_velocity_dimension:
140
150
  mask = define_spectrum_mask(
141
- drop_number=ds["raw_drop_number"],
142
- fall_velocity=fall_velocity,
151
+ drop_number=ds_l1["raw_drop_number"],
152
+ fall_velocity=ds_l1["fall_velocity"],
143
153
  above_velocity_fraction=above_velocity_fraction,
144
154
  above_velocity_tolerance=above_velocity_tolerance,
145
155
  below_velocity_fraction=below_velocity_fraction,
@@ -152,14 +162,14 @@ def generate_l1(
152
162
  # -------------------------------------------------------------------------------------------
153
163
  # Retrieve drop number and drop_counts arrays
154
164
  if has_velocity_dimension:
155
- drop_number = ds["raw_drop_number"].where(mask) # 2D (diameter, velocity)
165
+ drop_number = ds_l1["raw_drop_number"].where(mask) # 2D (diameter, velocity)
156
166
  drop_counts = drop_number.sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
157
- drop_counts_raw = ds["raw_drop_number"].sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
167
+ drop_counts_raw = ds_l1["raw_drop_number"].sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
158
168
 
159
169
  else:
160
- drop_number = ds["raw_drop_number"] # 1D (diameter)
161
- drop_counts = ds["raw_drop_number"] # 1D (diameter)
162
- drop_counts_raw = ds["raw_drop_number"]
170
+ drop_number = ds_l1["raw_drop_number"] # 1D (diameter)
171
+ drop_counts = ds_l1["raw_drop_number"] # 1D (diameter)
172
+ drop_counts_raw = ds_l1["raw_drop_number"]
163
173
 
164
174
  # Add drop number and drop_counts
165
175
  ds_l1["drop_number"] = drop_number
@@ -173,30 +183,21 @@ def generate_l1(
173
183
  ds_l1["Dmin"] = min_drop_diameter
174
184
  ds_l1["Dmax"] = max_drop_diameter
175
185
  ds_l1["N"] = drop_counts.sum(dim=DIAMETER_DIMENSION)
176
- ds_l1["Nremoved"] = drop_counts_raw.sum(dim=DIAMETER_DIMENSION) - ds_l1["N"]
186
+ ds_l1["Nraw"] = drop_counts_raw.sum(dim=DIAMETER_DIMENSION)
187
+ ds_l1["Nremoved"] = ds_l1["Nraw"] - ds_l1["N"]
177
188
 
178
189
  # Add bins statistics
179
- ds_l1.update(compute_qc_bins_metrics(ds_l1))
190
+ ds_l1 = add_bins_metrics(ds_l1)
180
191
 
181
192
  # -------------------------------------------------------------------------------------------
182
193
  # Add quality flags
183
194
  # TODO: snow_flags, insects_flag, ...
184
195
 
185
- # -------------------------------------------------------------------------------------------
186
- #### Add L0C coordinates that might got lost
187
- if "time_qc" in ds:
188
- ds_l1 = ds_l1.assign_coords({"time_qc": ds["time_qc"]})
189
-
190
196
  #### ----------------------------------------------------------------------------.
191
- #### Add encodings and attributes
192
- # Add variables attributes
193
- attrs_dict = get_attrs_dict()
194
- ds_l1 = set_attrs(ds_l1, attrs_dict=attrs_dict)
195
-
196
- # Add variables encoding
197
- encoding_dict = get_encoding_dict()
198
- ds_l1 = set_encodings(ds_l1, encoding_dict=encoding_dict)
199
-
197
+ #### Finalize dataset
200
198
  # Add global attributes
201
199
  ds_l1.attrs = attrs
200
+
201
+ # Add variables attributes and encodings
202
+ ds_l1 = finalize_product(ds_l1, product="L1")
202
203
  return ds_l1
disdrodb/l1/resampling.py CHANGED
@@ -15,12 +15,11 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Utilities for temporal resampling."""
18
-
19
-
18
+ import numpy as np
20
19
  import pandas as pd
21
20
  import xarray as xr
22
21
 
23
- from disdrodb.utils.time import regularize_dataset
22
+ from disdrodb.utils.time import ensure_sample_interval_in_seconds, regularize_dataset
24
23
 
25
24
  DEFAULT_ACCUMULATIONS = ["10s", "30s", "1min", "2min", "5min", "10min", "30min", "1hour"]
26
25
 
@@ -96,6 +95,24 @@ def define_window_size(sample_interval, accumulation_interval):
96
95
  return window_size
97
96
 
98
97
 
98
+ def _resample(ds, variables, accumulation, op):
99
+ if not variables:
100
+ return {}
101
+ ds_subset = ds[variables]
102
+ if "time" in ds_subset.dims:
103
+ return getattr(ds_subset.resample({"time": accumulation}), op)(skipna=False)
104
+ return ds_subset
105
+
106
+
107
+ def _rolling(ds, variables, window_size, op):
108
+ if not variables:
109
+ return {}
110
+ ds_subset = ds[variables]
111
+ if "time" in ds_subset.dims:
112
+ return getattr(ds_subset.rolling(time=window_size, center=False), op)(skipna=False)
113
+ return ds_subset
114
+
115
+
99
116
  def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
100
117
  """
101
118
  Resample the dataset to a specified accumulation interval.
@@ -128,20 +145,61 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
128
145
  - The function updates the dataset attributes and the sample_interval coordinate.
129
146
 
130
147
  """
131
- # Retrieve attributes
132
- attrs = ds.attrs.copy()
133
-
134
- # TODO: here infill NaN with zero if necessary before regularizing !
148
+ # --------------------------------------------------------------------------.
149
+ # Ensure sample interval in seconds
150
+ sample_interval = int(ensure_sample_interval_in_seconds(sample_interval))
151
+
152
+ # --------------------------------------------------------------------------.
153
+ # Raise error if the accumulation_interval is less than the sample interval
154
+ if accumulation_interval < sample_interval:
155
+ raise ValueError("Expecting an accumulation_interval > sample interval.")
156
+ # Raise error if accumulation_interval is not multiple of sample_interval
157
+ if not accumulation_interval % sample_interval == 0:
158
+ raise ValueError("The accumulation_interval is not a multiple of sample interval.")
159
+
160
+ # --------------------------------------------------------------------------.
161
+ #### Preprocess the dataset
162
+ # Here we set NaN in the raw_drop_number to 0
163
+ # - We assume that NaN corresponds to 0
164
+ # - When we regularize, we infill with NaN
165
+ # - When we aggregate with sum, we don't skip NaN
166
+ # --> Aggregation with original missing timesteps currently results in NaN !
167
+
168
+ # Infill NaN values with zeros for drop_number and raw_drop_number
169
+ # - This might alter integrated statistics if NaN in spectrum does not actually correspond to 0 !
170
+ # - TODO: NaN should not be set as 0 !
171
+ for var in ["drop_number", "raw_drop_number"]:
172
+ if var in ds:
173
+ ds[var] = xr.where(np.isnan(ds[var]), 0, ds[var])
135
174
 
136
175
  # Ensure regular dataset without missing timesteps
176
+ # --> This adds NaN values for missing timesteps
137
177
  ds = regularize_dataset(ds, freq=f"{sample_interval}s")
138
178
 
179
+ # --------------------------------------------------------------------------.
180
+ # Define dataset attributes
181
+ attrs = ds.attrs.copy()
182
+ if rolling:
183
+ attrs["disdrodb_rolled_product"] = "True"
184
+ else:
185
+ attrs["disdrodb_rolled_product"] = "False"
186
+
187
+ if sample_interval == accumulation_interval:
188
+ attrs["disdrodb_aggregated_product"] = "False"
189
+ ds = add_sample_interval(ds, sample_interval=accumulation_interval)
190
+ ds.attrs = attrs
191
+ return ds
192
+
193
+ # --------------------------------------------------------------------------.
194
+ # Resample the dataset
195
+ attrs["disdrodb_aggregated_product"] = "True"
196
+
139
197
  # Initialize resample dataset
140
198
  ds_resampled = xr.Dataset()
141
199
 
142
200
  # Retrieve variables to average/sum
143
201
  var_to_average = ["fall_velocity"]
144
- var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nremoved"]
202
+ var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nraw", "Nremoved"]
145
203
  var_to_min = ["Dmin"]
146
204
  var_to_max = ["Dmax"]
147
205
 
@@ -154,6 +212,7 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
154
212
  # TODO Define custom processing
155
213
  # - quality_flag --> take worst
156
214
  # - skipna if less than fraction (to not waste lot of data when aggregating over i.e. hours)
215
+ # - Add tolerance on fraction of missing timesteps for large accumulation_intervals
157
216
 
158
217
  # Resample the dataset
159
218
  # - Rolling currently does not allow direct rolling forward.
@@ -163,74 +222,26 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
163
222
  # - https://github.com/pydata/xarray/issues/8958
164
223
  if not rolling:
165
224
  # Resample
166
- if len(var_to_average) > 0:
167
- ds_resampled.update(
168
- ds[var_to_average].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).mean(skipna=False),
169
- )
170
- if len(var_to_cumulate) > 0:
171
- ds_resampled.update(
172
- ds[var_to_cumulate].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).sum(skipna=False),
173
- )
174
- if len(var_to_min) > 0:
175
- ds_resampled.update(
176
- ds[var_to_min].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).min(skipna=False),
177
- )
178
- if len(var_to_max) > 0:
179
- ds_resampled.update(
180
- ds[var_to_max].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).max(skipna=False),
181
- )
182
-
225
+ accumulation = pd.Timedelta(seconds=accumulation_interval)
226
+ ds_resampled.update(_resample(ds=ds, variables=var_to_average, accumulation=accumulation, op="mean"))
227
+ ds_resampled.update(_resample(ds=ds, variables=var_to_cumulate, accumulation=accumulation, op="sum"))
228
+ ds_resampled.update(_resample(ds=ds, variables=var_to_min, accumulation=accumulation, op="min"))
229
+ ds_resampled.update(_resample(ds=ds, variables=var_to_max, accumulation=accumulation, op="max"))
183
230
  else:
184
231
  # Roll and Resample
185
232
  window_size = define_window_size(sample_interval=sample_interval, accumulation_interval=accumulation_interval)
186
- if len(var_to_average) > 0:
187
- ds_resampled.update(ds[var_to_average].rolling({"time": window_size}, center=False).mean(skipna=False))
188
- if len(var_to_cumulate) > 0:
189
- ds_resampled.update(ds[var_to_cumulate].rolling({"time": window_size}, center=False).sum(skipna=False))
190
-
191
- if len(var_to_min) > 0:
192
- ds_resampled.update(ds[var_to_min].rolling({"time": window_size}, center=False).min(skipna=False))
193
- if len(var_to_max) > 0:
194
- ds_resampled.update(ds[var_to_max].rolling({"time": window_size}, center=False).max(skipna=False))
195
- # Ensure time to correspond to the start time of the integration
233
+ ds_resampled.update(_rolling(ds=ds, variables=var_to_average, window_size=window_size, op="mean"))
234
+ ds_resampled.update(_rolling(ds=ds, variables=var_to_cumulate, window_size=window_size, op="sum"))
235
+ ds_resampled.update(_rolling(ds=ds, variables=var_to_min, window_size=window_size, op="min"))
236
+ ds_resampled.update(_rolling(ds=ds, variables=var_to_max, window_size=window_size, op="max"))
237
+ # Ensure time to correspond to the start time of the measurement period
196
238
  ds_resampled = ds_resampled.isel(time=slice(window_size - 1, None)).assign_coords(
197
239
  {"time": ds_resampled["time"].data[: -window_size + 1]},
198
240
  )
199
241
 
200
242
  # Add attributes
201
243
  ds_resampled.attrs = attrs
202
- if rolling:
203
- ds_resampled.attrs["rolled"] = "True"
204
- else:
205
- ds_resampled.attrs["rolled"] = "False"
206
244
 
207
245
  # Add accumulation_interval as new sample_interval coordinate
208
246
  ds_resampled = add_sample_interval(ds_resampled, sample_interval=accumulation_interval)
209
247
  return ds_resampled
210
-
211
-
212
- def get_possible_accumulations(sample_interval, accumulations=None):
213
- """
214
- Get a list of valid accumulation intervals based on the sampling time.
215
-
216
- Parameters
217
- ----------
218
- - sample_interval (int): The inferred sampling time in seconds.
219
- - accumulations (list of int or string): List of desired accumulation intervals.
220
- If provide integers, specify accumulation in seconds.
221
-
222
- Returns
223
- -------
224
- - list of int: Valid accumulation intervals in seconds.
225
- """
226
- # Select default accumulations
227
- if accumulations is None:
228
- accumulations = DEFAULT_ACCUMULATIONS
229
-
230
- # Get accumulations in seconds
231
- accumulations = [int(pd.Timedelta(acc).total_seconds()) if isinstance(acc, str) else acc for acc in accumulations]
232
-
233
- # Filter candidate accumulations to include only those that are multiples of the sampling time
234
- possible_accumulations = [acc for acc in accumulations if acc % sample_interval == 0]
235
-
236
- return possible_accumulations
disdrodb/l1/routines.py CHANGED
@@ -21,13 +21,14 @@
21
21
  import datetime
22
22
  import logging
23
23
  import os
24
+ import shutil
24
25
  import time
25
26
  from typing import Optional
26
27
 
27
28
  import dask
28
29
  import xarray as xr
29
30
 
30
- # Directory
31
+ from disdrodb.api.checks import check_station_inputs
31
32
  from disdrodb.api.create_directories import (
32
33
  create_logs_directory,
33
34
  create_product_directory,
@@ -38,7 +39,12 @@ from disdrodb.api.path import (
38
39
  define_l1_filename,
39
40
  )
40
41
  from disdrodb.api.search import get_required_product
41
- from disdrodb.configs import get_data_archive_dir, get_folder_partitioning, get_metadata_archive_dir
42
+ from disdrodb.configs import (
43
+ get_data_archive_dir,
44
+ get_folder_partitioning,
45
+ get_metadata_archive_dir,
46
+ get_product_options,
47
+ )
42
48
  from disdrodb.l1.processing import generate_l1
43
49
  from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
44
50
 
@@ -55,41 +61,6 @@ from disdrodb.utils.writer import write_product
55
61
  logger = logging.getLogger(__name__)
56
62
 
57
63
 
58
- def get_l1_options():
59
- """Get L1 options."""
60
- # - TODO: from YAML
61
- # - TODO: as function of sensor name
62
-
63
- # minimum_diameter
64
- # --> PWS100: 0.05
65
- # --> PARSIVEL: 0.2495
66
- # --> RD80: 0.313
67
- # --> LPM: 0.125 (we currently discard first bin with this setting)
68
-
69
- # maximum_diameter
70
- # LPM: 8 mm
71
- # RD80: 5.6 mm
72
- # OTT: 26 mm
73
-
74
- l1_options = {
75
- # Fall velocity option
76
- "fall_velocity_method": "Beard1976",
77
- # Diameter-Velocity Filtering Options
78
- "minimum_diameter": 0.2495, # OTT PARSIVEL first two bin no data !
79
- "maximum_diameter": 10,
80
- "minimum_velocity": 0,
81
- "maximum_velocity": 12,
82
- "above_velocity_fraction": 0.5,
83
- "above_velocity_tolerance": None,
84
- "below_velocity_fraction": 0.5,
85
- "below_velocity_tolerance": None,
86
- "small_diameter_threshold": 1, # 2
87
- "small_velocity_threshold": 2.5, # 3
88
- "maintain_smallest_drops": True,
89
- }
90
- return l1_options
91
-
92
-
93
64
  @delayed_if_parallel
94
65
  @single_threaded_if_parallel
95
66
  def _generate_l1(
@@ -152,16 +123,16 @@ def _generate_l1(
152
123
  # Log start processing
153
124
  msg = f"{product} processing of {filename} has started."
154
125
  log_info(logger=logger, msg=msg, verbose=verbose)
155
-
126
+ success_flag = False
156
127
  ##------------------------------------------------------------------------.
157
128
  # Retrieve L1 configurations
158
- l1_options = get_l1_options()
129
+ l1_options = get_product_options("L1").get("product_options")
159
130
 
160
131
  ##------------------------------------------------------------------------.
161
132
  ### Core computation
162
133
  try:
163
134
  # Open the raw netCDF
164
- with xr.open_dataset(filepath, chunks={}, decode_timedelta=False, cache=False) as ds:
135
+ with xr.open_dataset(filepath, chunks=-1, decode_timedelta=False, cache=False) as ds:
165
136
  ds = ds[["raw_drop_number"]].load()
166
137
 
167
138
  # Produce L1 dataset
@@ -174,7 +145,13 @@ def _generate_l1(
174
145
  folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
175
146
  filepath = os.path.join(folder_path, filename)
176
147
  # Write to disk
177
- write_product(ds, product=product, filepath=filepath, force=force)
148
+ write_product(ds, filepath=filepath, force=force)
149
+
150
+ ##--------------------------------------------------------------------.
151
+ #### - Define logger file final directory
152
+ if folder_partitioning != "":
153
+ log_dst_dir = define_file_folder_path(ds, data_dir=logs_dir, folder_partitioning=folder_partitioning)
154
+ os.makedirs(log_dst_dir, exist_ok=True)
178
155
 
179
156
  ##--------------------------------------------------------------------.
180
157
  # Clean environment
@@ -183,6 +160,7 @@ def _generate_l1(
183
160
  # Log end processing
184
161
  msg = f"{product} processing of {filename} has ended."
185
162
  log_info(logger=logger, msg=msg, verbose=verbose)
163
+ success_flag = True
186
164
 
187
165
  ##--------------------------------------------------------------------.
188
166
  # Otherwise log the error
@@ -194,6 +172,13 @@ def _generate_l1(
194
172
  # Close the file logger
195
173
  close_logger(logger)
196
174
 
175
+ # Move logger file to correct partitioning directory
176
+ if success_flag and folder_partitioning != "" and logger_filepath is not None:
177
+ # Move logger file to correct partitioning directory
178
+ dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
179
+ shutil.move(logger_filepath, dst_filepath)
180
+ logger_filepath = dst_filepath
181
+
197
182
  # Return the logger file path
198
183
  return logger_filepath
199
184
 
@@ -259,6 +244,14 @@ def run_l1_station(
259
244
  # Retrieve DISDRODB Metadata Archive directory
260
245
  metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir)
261
246
 
247
+ # Check valid data_source, campaign_name, and station_name
248
+ check_station_inputs(
249
+ metadata_archive_dir=metadata_archive_dir,
250
+ data_source=data_source,
251
+ campaign_name=campaign_name,
252
+ station_name=station_name,
253
+ )
254
+
262
255
  # Define logs directory
263
256
  logs_dir = create_logs_directory(
264
257
  product=product,
@@ -309,7 +302,7 @@ def run_l1_station(
309
302
  # If no data available, print error message and return None
310
303
  if flag_not_available_data:
311
304
  msg = (
312
- f"{product} processing of {data_source} {campaign_name} {station_name}"
305
+ f"{product} processing of {data_source} {campaign_name} {station_name} "
313
306
  + f"has not been launched because of missing {required_product} data."
314
307
  )
315
308
  print(msg)
@@ -15,9 +15,10 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Core functions for DISDRODB ENV production."""
18
-
19
18
  import xarray as xr
20
19
 
20
+ from disdrodb.constants import GEOLOCATION_COORDS
21
+
21
22
 
22
23
  def get_default_environment_dataset():
23
24
  """Define defaults values for the ENV dataset."""
@@ -30,9 +31,23 @@ def get_default_environment_dataset():
30
31
  return ds_env
31
32
 
32
33
 
34
+ def _assign_geolocation(ds_src, dst_dst):
35
+
36
+ dict_coords = {coord: ds_src[coord] for coord in GEOLOCATION_COORDS if coord in ds_src}
37
+ dst_dst = dst_dst.assign_coords(dict_coords)
38
+ return dst_dst
39
+
40
+
33
41
  def load_env_dataset(ds):
34
42
  """Load the ENV dataset."""
35
- # TODO - Retrieve relative_humidity and temperature from L1-ENV
43
+ # TODO: Retrieve relative_humidity and temperature from L1-ENV
36
44
  ds_env = get_default_environment_dataset()
37
- ds_env = ds_env.assign_coords({"altitude": ds["altitude"], "latitude": ds["latitude"]})
45
+ # Compute water density
46
+ # get_water_density(
47
+ # temperature=temperature,
48
+ # air_pressure=air_pressure,
49
+ # )
50
+ # --> (T == 10 --> 999.7, T == 20 --> 998.2
51
+ ds_env["water_density"] = 1000 # kg / m3 # TODO as function of ENV (temperature, ...) ?
52
+ ds_env = _assign_geolocation(ds_src=ds, dst_dst=ds_env)
38
53
  return ds_env
disdrodb/l2/__init__.py CHANGED
@@ -15,3 +15,10 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Module for DISDRODB L2 production."""
18
+ from disdrodb.l2.processing import generate_l2_radar, generate_l2e, generate_l2m
19
+
20
+ __all__ = [
21
+ "generate_l2_radar",
22
+ "generate_l2e",
23
+ "generate_l2m",
24
+ ]