disdrodb 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. disdrodb/__init__.py +1 -5
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +14 -3
  4. disdrodb/api/checks.py +10 -0
  5. disdrodb/api/create_directories.py +0 -2
  6. disdrodb/api/io.py +14 -17
  7. disdrodb/api/path.py +42 -77
  8. disdrodb/api/search.py +89 -23
  9. disdrodb/cli/disdrodb_create_summary.py +11 -1
  10. disdrodb/cli/disdrodb_create_summary_station.py +10 -0
  11. disdrodb/cli/disdrodb_run_l0.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0a.py +1 -1
  13. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  15. disdrodb/cli/disdrodb_run_l1.py +1 -1
  16. disdrodb/cli/disdrodb_run_l2e.py +1 -1
  17. disdrodb/cli/disdrodb_run_l2m.py +1 -1
  18. disdrodb/configs.py +30 -83
  19. disdrodb/constants.py +4 -3
  20. disdrodb/data_transfer/download_data.py +4 -2
  21. disdrodb/docs.py +2 -2
  22. disdrodb/etc/products/L1/1MIN.yaml +13 -0
  23. disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
  24. disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
  25. disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
  26. disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
  27. disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
  28. disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
  29. disdrodb/etc/products/L1/global.yaml +7 -1
  30. disdrodb/etc/products/L2E/10MIN.yaml +1 -12
  31. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +1 -1
  33. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +1 -1
  35. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +8 -0
  38. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
  39. disdrodb/etc/products/L2M/global.yaml +11 -3
  40. disdrodb/l0/check_configs.py +49 -16
  41. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  42. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  43. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  44. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  45. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
  46. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  47. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  48. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  49. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  50. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  51. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  52. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  53. disdrodb/l0/l0_reader.py +2 -2
  54. disdrodb/l0/l0b_processing.py +70 -15
  55. disdrodb/l0/l0c_processing.py +7 -3
  56. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -1
  57. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  58. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  59. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  60. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  61. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  62. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  63. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  64. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  65. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  66. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  67. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  68. disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
  69. disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/LPVEX.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  71. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +8 -17
  72. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  73. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
  74. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  75. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  76. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  77. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  78. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  79. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  80. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  81. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  83. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  84. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  85. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  86. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  87. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +232 -0
  88. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  89. disdrodb/l0/readers/PARSIVEL2/{NASA/LPVEX.py → SPAIN/GRANADA.py} +46 -35
  90. disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
  91. disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +10 -28
  92. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  93. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  94. disdrodb/l1/beard_model.py +31 -129
  95. disdrodb/l1/fall_velocity.py +136 -83
  96. disdrodb/l1/filters.py +25 -28
  97. disdrodb/l1/processing.py +16 -17
  98. disdrodb/l1/resampling.py +101 -38
  99. disdrodb/l1_env/routines.py +46 -17
  100. disdrodb/l2/empirical_dsd.py +6 -0
  101. disdrodb/l2/processing.py +6 -5
  102. disdrodb/metadata/geolocation.py +0 -2
  103. disdrodb/metadata/search.py +3 -4
  104. disdrodb/psd/fitting.py +16 -13
  105. disdrodb/routines/l0.py +2 -2
  106. disdrodb/routines/l1.py +173 -60
  107. disdrodb/routines/l2.py +148 -284
  108. disdrodb/routines/options.py +345 -0
  109. disdrodb/routines/wrappers.py +14 -1
  110. disdrodb/scattering/axis_ratio.py +90 -84
  111. disdrodb/scattering/permittivity.py +6 -0
  112. disdrodb/summary/routines.py +735 -670
  113. disdrodb/utils/archiving.py +51 -44
  114. disdrodb/utils/attrs.py +3 -1
  115. disdrodb/utils/dask.py +4 -4
  116. disdrodb/utils/dict.py +33 -0
  117. disdrodb/utils/encoding.py +6 -1
  118. disdrodb/utils/routines.py +9 -8
  119. disdrodb/utils/time.py +11 -3
  120. disdrodb/viz/__init__.py +0 -13
  121. disdrodb/viz/plots.py +231 -1
  122. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/METADATA +2 -1
  123. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/RECORD +135 -103
  124. /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
  125. /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
  126. /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
  127. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/IFLOODS.py +0 -0
  128. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +0 -0
  129. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/PIERS.py +0 -0
  130. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/GCPEX.py +0 -0
  131. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
  132. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/WHEEL +0 -0
  133. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/entry_points.txt +0 -0
  134. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/licenses/LICENSE +0 -0
  135. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,148 @@
1
+ precipitation_rate:
2
+ n_digits: 6
3
+ n_characters: 7
4
+ n_decimals: 3
5
+ n_naturals: 3
6
+ data_range:
7
+ - 0
8
+ - 9999.999
9
+ nan_flags: null
10
+ precipitation_accumulated:
11
+ n_digits: 6
12
+ n_characters: 7
13
+ n_decimals: 2
14
+ n_naturals: 4
15
+ data_range:
16
+ - 0
17
+ - 9999.0
18
+ nan_flags: null
19
+ weather_code_synop_4680:
20
+ n_digits: 2
21
+ n_characters: 2
22
+ n_decimals: 0
23
+ n_naturals: 2
24
+ data_range:
25
+ - 0
26
+ - 89
27
+ nan_flags: null
28
+ weather_code_metar_4678:
29
+ n_digits: null
30
+ n_characters: null
31
+ n_decimals: null
32
+ n_naturals: null
33
+ data_range: null
34
+ nan_flags: null
35
+ past_weather1:
36
+ n_digits: null
37
+ n_characters: null
38
+ n_decimals: null
39
+ n_naturals: null
40
+ data_range: null
41
+ nan_flags: null
42
+ past_weather2:
43
+ n_digits: null
44
+ n_characters: null
45
+ n_decimals: null
46
+ n_naturals: null
47
+ data_range: null
48
+ nan_flags: null
49
+ mor_visibility_5min:
50
+ n_digits: null
51
+ n_characters: null
52
+ n_decimals: null
53
+ n_naturals: null
54
+ data_range: null
55
+ nan_flags: null
56
+ mor_visibility:
57
+ n_digits: null
58
+ n_characters: null
59
+ n_decimals: null
60
+ n_naturals: null
61
+ data_range: null
62
+ nan_flags: null
63
+ number_particles:
64
+ n_digits: 4
65
+ n_characters: 4
66
+ n_decimals: 0
67
+ n_naturals: 4
68
+ data_range:
69
+ - 0
70
+ - 9999
71
+ nan_flags: null
72
+ sensor_temperature:
73
+ n_digits: 4
74
+ n_characters: 6
75
+ n_decimals: 1
76
+ n_naturals: 3
77
+ data_range:
78
+ - -99
79
+ - 100
80
+ nan_flags: null
81
+ obstruction_status:
82
+ n_digits: null
83
+ n_characters: null
84
+ n_decimals: null
85
+ n_naturals: null
86
+ data_range: null
87
+ nan_flags: null
88
+ total_extinction_coefficient:
89
+ n_digits: 5
90
+ n_characters: 6
91
+ n_decimals: 2
92
+ n_naturals: 3
93
+ data_range:
94
+ - 0
95
+ - 999.99
96
+ nan_flags: null
97
+ transmissometer_extinction_coefficient:
98
+ n_digits: 5
99
+ n_characters: 6
100
+ n_decimals: 2
101
+ n_naturals: 3
102
+ data_range:
103
+ - 0
104
+ - 999.99
105
+ nan_flags: null
106
+ back_scatter_extinction_coefficient:
107
+ n_digits: 5
108
+ n_characters: 7
109
+ n_decimals: 2
110
+ n_naturals: 3
111
+ data_range:
112
+ - -999.99
113
+ - 999.99
114
+ nan_flags: null
115
+ ambient_light_sensor_signal:
116
+ n_digits: 5
117
+ n_characters: 5
118
+ n_decimals: 0
119
+ n_naturals: 5
120
+ data_range:
121
+ - 0
122
+ - 99998
123
+ nan_flags: 99999
124
+ sensor_status:
125
+ n_digits: null
126
+ n_characters: null
127
+ n_decimals: null
128
+ n_naturals: null
129
+ data_range: null
130
+ nan_flags: null
131
+ ambient_light_sensor_signal_status:
132
+ n_digits: null
133
+ n_characters: null
134
+ n_decimals: null
135
+ n_naturals: null
136
+ data_range: null
137
+ nan_flags: null
138
+ raw_drop_number:
139
+ n_digits: 0
140
+ n_characters: 4096
141
+ n_decimals: 0
142
+ n_naturals: 0
143
+ data_range: null
144
+ nan_flags: null
145
+ dimension_order:
146
+ - velocity_bin_center
147
+ - diameter_bin_center
148
+ n_values: 336
disdrodb/l0/l0_reader.py CHANGED
@@ -35,9 +35,9 @@ logger = logging.getLogger(__name__)
35
35
 
36
36
  def define_readers_directory(sensor_name="") -> str:
37
37
  """Returns the path to the ``disdrodb.l0.readers`` directory within the disdrodb package."""
38
- from disdrodb import __root_path__
38
+ from disdrodb import package_dir
39
39
 
40
- reader_dir = os.path.join(__root_path__, "disdrodb", "l0", "readers", sensor_name)
40
+ reader_dir = os.path.join(package_dir, "l0", "readers", sensor_name)
41
41
  return reader_dir
42
42
 
43
43
 
@@ -80,15 +80,16 @@ def infer_split_str(string: str) -> str:
80
80
  return split_str
81
81
 
82
82
 
83
- def _replace_empty_strings_with_zeros(values):
83
+ def replace_empty_strings_with_zeros(values):
84
+ """Replace empty comma separated strings with '0'."""
84
85
  values[np.char.str_len(values) == 0] = "0"
85
86
  return values
86
87
 
87
88
 
88
- def _format_string_array(string: str, n_values: int) -> np.array:
89
+ def format_string_array(string: str, n_values: int) -> np.array:
89
90
  """Split a string with multiple numbers separated by a delimiter into an 1D array.
90
91
 
91
- e.g. : _format_string_array("2,44,22,33", 4) will return [ 2. 44. 22. 33.]
92
+ e.g. : format_string_array("2,44,22,33", 4) will return [ 2. 44. 22. 33.]
92
93
 
93
94
  If empty string ("") --> Return an arrays of zeros
94
95
  If the list length is not n_values -> Return an arrays of np.nan
@@ -126,7 +127,7 @@ def _format_string_array(string: str, n_values: int) -> np.array:
126
127
  # Ensure string type
127
128
  values = values.astype("str")
128
129
  # Replace '' with 0
129
- values = _replace_empty_strings_with_zeros(values)
130
+ values = replace_empty_strings_with_zeros(values)
130
131
  # Replace "-9.999" with 0
131
132
  values = np.char.replace(values, "-9.999", "0")
132
133
  # Cast values to float type
@@ -135,7 +136,7 @@ def _format_string_array(string: str, n_values: int) -> np.array:
135
136
  return values
136
137
 
137
138
 
138
- def _reshape_raw_spectrum(
139
+ def reshape_raw_spectrum(
139
140
  arr: np.array,
140
141
  dims_order: list,
141
142
  dims_size_dict: dict,
@@ -243,17 +244,17 @@ def retrieve_l0b_arrays(
243
244
  # Ensure is a string, get a numpy array for each row and then stack
244
245
  # - Option 1: Clear but lot of copies
245
246
  # df_series = df[key].astype(str)
246
- # list_arr = df_series.apply(_format_string_array, n_values=n_values)
247
+ # list_arr = df_series.apply(format_string_array, n_values=n_values)
247
248
  # arr = np.stack(list_arr, axis=0)
248
249
 
249
250
  # - Option 2: still copies
250
- # arr = np.vstack(_format_string_array(s, n_values=n_values) for s in df_series.astype(str))
251
+ # arr = np.vstack(format_string_array(s, n_values=n_values) for s in df_series.astype(str))
251
252
 
252
253
  # - Option 3: more memory efficient
253
254
  n_timesteps = len(df[key])
254
255
  arr = np.empty((n_timesteps, n_values), dtype=float) # preallocates
255
256
  for i, s in enumerate(df[key].astype(str)):
256
- arr[i, :] = _format_string_array(s, n_values=n_values)
257
+ arr[i, :] = format_string_array(s, n_values=n_values)
257
258
 
258
259
  # Retrieve dimensions
259
260
  dims_order = dims_order_dict[key]
@@ -263,7 +264,7 @@ def retrieve_l0b_arrays(
263
264
  # - This applies i.e for PARSIVEL*, LPM, PWS100
264
265
  # - This does not apply to RD80
265
266
  if key == "raw_drop_number" and len(dims_order) == 2:
266
- arr, dims = _reshape_raw_spectrum(
267
+ arr, dims = reshape_raw_spectrum(
267
268
  arr=arr,
268
269
  dims_order=dims_order,
269
270
  dims_size_dict=dims_size_dict,
@@ -288,7 +289,57 @@ def retrieve_l0b_arrays(
288
289
  #### L0B Coords and attributes
289
290
 
290
291
 
291
- def _convert_object_variables_to_string(ds: xr.Dataset) -> xr.Dataset:
292
+ def ensure_valid_geolocation(ds: xr.Dataset, coord: str, errors: str = "ignore") -> xr.Dataset:
293
+ """Ensure valid geolocation coordinates.
294
+
295
+ 'altitude' must be >= 0, 'latitude' must be within [-90, 90] and
296
+ 'longitude' within [-180, 180].
297
+
298
+ It can deal with coordinates varying with time.
299
+
300
+ Parameters
301
+ ----------
302
+ ds : xarray.Dataset
303
+ Dataset containing the coordinate.
304
+ coord : str
305
+ Name of the coordinate variable to validate.
306
+ errors : {"ignore", "raise", "coerce"}, default "ignore"
307
+ - "ignore": nothing is done.
308
+ - "raise" : raise ValueError if invalid values are found.
309
+ - "coerce": out-of-range values are replaced with NaN.
310
+
311
+ Returns
312
+ -------
313
+ xr.Dataset
314
+ Dataset with validated coordinate values.
315
+ """
316
+ # Define coordinates ranges
317
+ ranges = {
318
+ "altitude": (0, np.inf),
319
+ "latitude": (-90, 90),
320
+ "longitude": (-180, 180), # used only for "raise"/"coerce"
321
+ }
322
+
323
+ # Check coordinate is available and correctly defined.
324
+ if coord not in ds:
325
+ raise ValueError(f"Coordinate '{coord}' not found in dataset.")
326
+ if coord not in list(ranges):
327
+ raise ValueError(f"Valid geolocation coordinates are: {list(ranges)}.")
328
+
329
+ # Validate coordinate
330
+ vmin, vmax = ranges[coord]
331
+ invalid = (ds[coord] < vmin) | (ds[coord] > vmax)
332
+ invalid = invalid.compute()
333
+
334
+ # Deal within invalid errors
335
+ if errors == "raise" and invalid.any():
336
+ raise ValueError(f"{coord} out of range {vmin}-{vmax}.")
337
+ if errors == "coerce":
338
+ ds[coord] = ds[coord].where(~invalid)
339
+ return ds
340
+
341
+
342
+ def convert_object_variables_to_string(ds: xr.Dataset) -> xr.Dataset:
292
343
  """Convert variables with ``object`` dtype to ``string``.
293
344
 
294
345
  Parameters
@@ -307,7 +358,7 @@ def _convert_object_variables_to_string(ds: xr.Dataset) -> xr.Dataset:
307
358
  return ds
308
359
 
309
360
 
310
- def _set_variable_attributes(ds: xr.Dataset, sensor_name: str) -> xr.Dataset:
361
+ def set_variable_attributes(ds: xr.Dataset, sensor_name: str) -> xr.Dataset:
311
362
  """Set attributes to each ``xr.Dataset`` variable.
312
363
 
313
364
  Parameters
@@ -353,7 +404,7 @@ def add_dataset_crs_coords(ds):
353
404
 
354
405
 
355
406
  def _define_dataset_variables(df, sensor_name, logger=None, verbose=False):
356
- """Define DISDRODB L0B netCDF variables."""
407
+ """Define DISDRODB L0B netCDF array variables."""
357
408
  # Preprocess raw_spectrum, diameter and velocity arrays if available
358
409
  raw_fields = ["raw_drop_concentration", "raw_drop_average_velocity", "raw_drop_number"]
359
410
  if np.any(np.isin(raw_fields, df.columns)):
@@ -436,7 +487,7 @@ def set_geolocation_coordinates(ds, metadata):
436
487
  # If coordinate not present, add it from dictionary
437
488
  if coord not in ds:
438
489
  ds = ds.assign_coords({coord: metadata.pop(coord, np.nan)})
439
- # Else if set coordinates the variable in the dataset (present in the raw data)
490
+ # Else ensure coord is a dataset coordinates
440
491
  else:
441
492
  ds = ds.set_coords(coord)
442
493
  _ = metadata.pop(coord, None)
@@ -445,6 +496,10 @@ def set_geolocation_coordinates(ds, metadata):
445
496
  for coord in coords:
446
497
  ds[coord] = xr.where(ds[coord] == -9999, np.nan, ds[coord])
447
498
 
499
+ # Ensure valid geolocation coordinates
500
+ for coord in coords:
501
+ ds = ensure_valid_geolocation(ds=ds, coord=coord, errors="coerce")
502
+
448
503
  # Set attributes without geolocation coordinates
449
504
  ds.attrs = metadata
450
505
  return ds
@@ -469,11 +524,11 @@ def finalize_dataset(ds, sensor_name, metadata):
469
524
  ds = ds.transpose("time", "diameter_bin_center", ...)
470
525
 
471
526
  # Ensure variables with dtype object are converted to string
472
- ds = _convert_object_variables_to_string(ds)
527
+ ds = convert_object_variables_to_string(ds)
473
528
 
474
529
  # Add netCDF variable and coordinate attributes
475
530
  # - Add variable attributes: long_name, units, descriptions, valid_min, valid_max
476
- ds = _set_variable_attributes(ds=ds, sensor_name=sensor_name)
531
+ ds = set_variable_attributes(ds=ds, sensor_name=sensor_name)
477
532
  # - Add netCDF coordinate attributes
478
533
  ds = set_coordinate_attributes(ds=ds)
479
534
  # - Set DISDRODB global attributes
@@ -117,7 +117,12 @@ def split_dataset_by_sampling_intervals(
117
117
 
118
118
  # If sample_interval is a dataset variable, use it to define dictionary of datasets
119
119
  if "sample_interval" in ds:
120
- return {int(interval): ds.isel(time=ds["sample_interval"] == interval) for interval in measurement_intervals}
120
+ dict_ds = {}
121
+ for interval in measurement_intervals:
122
+ ds_subset = ds.isel(time=ds["sample_interval"] == interval)
123
+ if ds_subset.sizes["time"] > 2:
124
+ dict_ds[int(interval)] = ds_subset
125
+ return dict_ds
121
126
 
122
127
  # ---------------------------------------------------------------------------------------.
123
128
  # Otherwise exploit difference between timesteps to identify change point
@@ -460,9 +465,8 @@ def regularize_timesteps(ds, sample_interval, robust=False, add_quality_flag=Tru
460
465
  # if last_time == last_time_expected and qc_flag[-1] == flag_next_missing:
461
466
  # qc_flag[-1] = 0
462
467
 
463
- # Assign time quality flag coordinate
468
+ # Add time quality flag variable
464
469
  ds["time_qc"] = xr.DataArray(qc_flag, dims="time")
465
- ds = ds.set_coords("time_qc")
466
470
 
467
471
  # Add CF attributes for time_qc
468
472
  ds["time_qc"].attrs = {
@@ -69,7 +69,7 @@ def reader(
69
69
  "quality_measurement": "quality_index",
70
70
  "max_diameter_hail": "max_hail_diameter",
71
71
  "laser_status": "laser_status",
72
- "static_signal": "static_signal",
72
+ "static_signal_status": "static_signal_status",
73
73
  "interior_temperature": "temperature_interior",
74
74
  "laser_temperature": "laser_temperature",
75
75
  "laser_temperature_analog_status": "laser_temperature_analog_status",
@@ -137,7 +137,7 @@ def reader(
137
137
  "quality_index",
138
138
  "max_hail_diameter",
139
139
  "laser_status",
140
- "static_signal",
140
+ "static_signal_status",
141
141
  "laser_temperature_analog_status",
142
142
  "laser_temperature_digital_status",
143
143
  "laser_current_analog_status",
@@ -151,7 +151,7 @@ def reader(
151
151
  "current_heating_heads_status",
152
152
  "current_heating_carriers_status",
153
153
  "control_output_laser_power_status",
154
- "reserve_status",
154
+ "reserved_status",
155
155
  "temperature_interior",
156
156
  "laser_temperature",
157
157
  "laser_current_average",
@@ -0,0 +1,256 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for ULIEGE LPM stations."""
20
+
21
+ import numpy as np
22
+ import pandas as pd
23
+
24
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
25
+ from disdrodb.l0.l0a_processing import read_raw_text_file
26
+ from disdrodb.utils.logger import log_error, log_warning
27
+
28
+
29
+ def read_txt_file(file, filename, logger):
30
+ """Parse ULIEGE LPM hourly file."""
31
+ #### - Define raw data headers
32
+ column_names = ["TO_PARSE"]
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Define reader options
36
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
37
+ reader_kwargs = {}
38
+
39
+ # - Define delimiter
40
+ reader_kwargs["delimiter"] = "\\n"
41
+
42
+ # - Avoid first column to become df index !!!
43
+ reader_kwargs["index_col"] = False
44
+
45
+ # Since column names are expected to be passed explicitly, header is set to None
46
+ reader_kwargs["header"] = None
47
+
48
+ # - Number of rows to be skipped at the beginning of the file
49
+ reader_kwargs["skiprows"] = None
50
+
51
+ # - Define behaviour when encountering bad lines
52
+ reader_kwargs["on_bad_lines"] = "skip"
53
+
54
+ # - Define reader engine
55
+ # - C engine is faster
56
+ # - Python engine is more feature-complete
57
+ reader_kwargs["engine"] = "python"
58
+
59
+ # - Define on-the-fly decompression of on-disk data
60
+ # - Available: gzip, bz2, zip
61
+ reader_kwargs["compression"] = "infer"
62
+
63
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
64
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
65
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
66
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
67
+ reader_kwargs["na_values"] = ["na", "", "error"]
68
+
69
+ ##------------------------------------------------------------------------.
70
+ #### Read the data
71
+ df = read_raw_text_file(
72
+ filepath=file,
73
+ column_names=column_names,
74
+ reader_kwargs=reader_kwargs,
75
+ logger=logger,
76
+ )
77
+
78
+ ##------------------------------------------------------------------------.
79
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
80
+ # Count number of delimiters to identify valid rows
81
+ df = df[df["TO_PARSE"].str.count(";") == 442]
82
+
83
+ # Check there are still valid rows
84
+ if len(df) == 0:
85
+ raise ValueError(f"No valid rows in {filename}.")
86
+
87
+ # Split by ; delimiter (before raw drop number)
88
+ df = df["TO_PARSE"].str.split(";", expand=True, n=43)
89
+
90
+ # Assign column names
91
+ column_names = [
92
+ "id",
93
+ "sample_interval",
94
+ "weather_code_synop_4677_5min", # or "weather_code_synop_4680_5min",
95
+ "weather_code_metar_4678_5min",
96
+ "precipitation_rate_5min",
97
+ "weather_code_synop_4677", # or "weather_code_synop_4680",
98
+ "weather_code_metar_4678",
99
+ "precipitation_rate",
100
+ "precipitation_accumulated",
101
+ "sensor_time",
102
+ # "mor_visibility",
103
+ # "reflectivity",
104
+ # "quality_index",
105
+ # "max_hail_diameter",
106
+ # "laser_status",
107
+ "dummy1",
108
+ "dummy2",
109
+ # "laser_temperature",
110
+ "laser_current_average",
111
+ "control_voltage",
112
+ "optical_control_voltage_output",
113
+ # "current_heating_house",
114
+ # "current_heating_heads",
115
+ # "current_heating_carriers",
116
+ "number_particles",
117
+ "number_particles_internal_data",
118
+ "number_particles_min_speed",
119
+ "number_particles_min_speed_internal_data",
120
+ "number_particles_max_speed",
121
+ "number_particles_max_speed_internal_data",
122
+ "number_particles_min_diameter",
123
+ "number_particles_min_diameter_internal_data",
124
+ "number_particles_no_hydrometeor",
125
+ "number_particles_no_hydrometeor_internal_data",
126
+ # "number_particles_unknown_classification", # ????
127
+ # "number_particles_unknown_classification_internal_data",
128
+ "number_particles_class_1",
129
+ "number_particles_class_1_internal_data",
130
+ "number_particles_class_2",
131
+ "number_particles_class_2_internal_data",
132
+ "number_particles_class_3",
133
+ "number_particles_class_3_internal_data",
134
+ "number_particles_class_4",
135
+ "number_particles_class_4_internal_data",
136
+ "number_particles_class_5",
137
+ "number_particles_class_5_internal_data",
138
+ "number_particles_class_6",
139
+ "number_particles_class_6_internal_data",
140
+ "number_particles_class_7",
141
+ "number_particles_class_7_internal_data",
142
+ "number_particles_class_8",
143
+ "number_particles_class_8_internal_data",
144
+ "number_particles_class_9",
145
+ "number_particles_class_9_internal_data",
146
+ "raw_drop_number",
147
+ ]
148
+ df.columns = column_names
149
+
150
+ # Deal with case if there are 61 timesteps
151
+ # - Occurs sometimes when previous hourly file miss timesteps
152
+ if len(df) == 61:
153
+ log_warning(logger=logger, msg=f"{filename} contains 61 timesteps. Dropping the first.")
154
+ df = df.iloc[1:]
155
+
156
+ # Raise error if more than 60 timesteps/rows
157
+ n_rows = len(df)
158
+ if n_rows > 60:
159
+ raise ValueError(f"The hourly file contains {n_rows} timesteps.")
160
+
161
+ # Infer and define "time" column
162
+ start_time_str = filename.split(".")[0] # '2024020200.txt'
163
+ start_time = pd.to_datetime(start_time_str, format="%Y%m%d%H")
164
+
165
+ # - Define timedelta based on sensor_time
166
+ dt = pd.to_timedelta(df["sensor_time"] + ":00").to_numpy().astype("m8[s]")
167
+ dt = dt - dt[0]
168
+
169
+ # - Define approximate time
170
+ df["time"] = start_time + dt
171
+
172
+ # - Keep rows where time increment is between 00 and 59 minutes
173
+ valid_rows = dt <= np.timedelta64(3540, "s")
174
+ df = df[valid_rows]
175
+
176
+ # Drop rows where sample interval is not 60 seconds
177
+ df = df[df["sample_interval"] == "000060"]
178
+
179
+ # Drop rows with invalid raw_drop_number
180
+ # --> 440 value # 22x20
181
+ # --> 400 here # 20x20
182
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1599]
183
+
184
+ # Deal with old LPM version 20x20 spectrum
185
+ # - Add 000 in first two velocity bins
186
+ df["raw_drop_number"] = df["raw_drop_number"] + ";" + ";".join(["000"] * 40)
187
+
188
+ # Drop columns not agreeing with DISDRODB L0 standards
189
+ columns_to_drop = [
190
+ "sample_interval",
191
+ "sensor_time",
192
+ "dummy1",
193
+ "dummy2",
194
+ "id",
195
+ ]
196
+ df = df.drop(columns=columns_to_drop)
197
+ return df
198
+
199
+
200
+ @is_documented_by(reader_generic_docstring)
201
+ def reader(
202
+ filepath,
203
+ logger=None,
204
+ ):
205
+ """Reader."""
206
+ import zipfile
207
+
208
+ ##------------------------------------------------------------------------.
209
+ # filename = os.path.basename(filepath)
210
+ # return read_txt_file(file=filepath, filename=filename, logger=logger)
211
+
212
+ # ---------------------------------------------------------------------.
213
+ #### Iterate over all files (aka timesteps) in the daily zip archive
214
+ # - Each file contain a single timestep !
215
+ # list_df = []
216
+ # with tempfile.TemporaryDirectory() as temp_dir:
217
+ # # Extract all files
218
+ # unzip_file_on_terminal(filepath, temp_dir)
219
+
220
+ # # Walk through extracted files
221
+ # for root, _, files in os.walk(temp_dir):
222
+ # for filename in sorted(files):
223
+ # if filename.endswith(".txt"):
224
+ # full_path = os.path.join(root, filename)
225
+ # try:
226
+ # df = read_txt_file(file=full_path, filename=filename, logger=logger)
227
+ # if df is not None:
228
+ # list_df.append(df)
229
+ # except Exception as e:
230
+ # msg = f"An error occurred while reading {filename}: {e}"
231
+ # log_error(logger=logger, msg=msg, verbose=True)
232
+
233
+ list_df = []
234
+ with zipfile.ZipFile(filepath, "r") as zip_ref:
235
+ filenames = sorted(zip_ref.namelist())
236
+ for filename in filenames:
237
+ if filename.endswith(".txt"):
238
+ # Open file
239
+ with zip_ref.open(filename) as file:
240
+ try:
241
+ df = read_txt_file(file=file, filename=filename, logger=logger)
242
+ if df is not None:
243
+ list_df.append(df)
244
+ except Exception as e:
245
+ msg = f"An error occurred while reading {filename}. The error is: {e}"
246
+ log_error(logger=logger, msg=msg, verbose=True)
247
+
248
+ # Check the zip file contains at least some non.empty files
249
+ if len(list_df) == 0:
250
+ raise ValueError(f"{filepath} contains only empty files!")
251
+
252
+ # Concatenate all dataframes into a single one
253
+ df = pd.concat(list_df)
254
+
255
+ # ---------------------------------------------------------------------.
256
+ return df
@@ -96,7 +96,7 @@ def reader(
96
96
  "quality_index",
97
97
  "max_hail_diameter",
98
98
  "laser_status",
99
- "static_signal",
99
+ "static_signal_status",
100
100
  "laser_temperature_analog_status",
101
101
  "laser_temperature_digital_status",
102
102
  "laser_current_analog_status",
@@ -110,7 +110,7 @@ def reader(
110
110
  "current_heating_heads_status",
111
111
  "current_heating_carriers_status",
112
112
  "control_output_laser_power_status",
113
- "reserve_status",
113
+ "reserved_status",
114
114
  "temperature_interior",
115
115
  "laser_temperature",
116
116
  "laser_current_average",
@@ -96,7 +96,7 @@ def reader(
96
96
  "quality_index",
97
97
  "max_hail_diameter",
98
98
  "laser_status",
99
- "static_signal",
99
+ "static_signal_status",
100
100
  "laser_temperature_analog_status",
101
101
  "laser_temperature_digital_status",
102
102
  "laser_current_analog_status",
@@ -110,7 +110,7 @@ def reader(
110
110
  "current_heating_heads_status",
111
111
  "current_heating_carriers_status",
112
112
  "control_output_laser_power_status",
113
- "reserve_status",
113
+ "reserved_status",
114
114
  "temperature_interior",
115
115
  "laser_temperature",
116
116
  "laser_current_average",