disdrodb 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/checks.py +70 -47
  4. disdrodb/api/configs.py +0 -2
  5. disdrodb/api/create_directories.py +0 -2
  6. disdrodb/api/info.py +3 -3
  7. disdrodb/api/io.py +48 -8
  8. disdrodb/api/path.py +116 -133
  9. disdrodb/api/search.py +12 -3
  10. disdrodb/cli/disdrodb_create_summary.py +113 -0
  11. disdrodb/cli/disdrodb_create_summary_station.py +11 -1
  12. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  13. disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
  14. disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
  15. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  16. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  17. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  18. disdrodb/constants.py +1 -1
  19. disdrodb/data_transfer/download_data.py +123 -7
  20. disdrodb/etc/products/L1/global.yaml +1 -1
  21. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  22. disdrodb/etc/products/L2E/global.yaml +1 -1
  23. disdrodb/etc/products/L2M/GAMMA_GS_ND_MAE.yaml +6 -0
  24. disdrodb/etc/products/L2M/GAMMA_ML.yaml +1 -1
  25. disdrodb/etc/products/L2M/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  26. disdrodb/etc/products/L2M/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  27. disdrodb/etc/products/L2M/LOGNORMAL_ML.yaml +8 -0
  28. disdrodb/etc/products/L2M/global.yaml +11 -3
  29. disdrodb/issue/writer.py +2 -0
  30. disdrodb/l0/check_configs.py +49 -16
  31. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  32. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  33. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  34. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  35. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  36. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  37. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  38. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  39. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  40. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  41. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  42. disdrodb/l0/l0a_processing.py +10 -5
  43. disdrodb/l0/l0b_nc_processing.py +10 -6
  44. disdrodb/l0/l0b_processing.py +92 -72
  45. disdrodb/l0/l0c_processing.py +369 -251
  46. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +8 -1
  47. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  48. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  49. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  50. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  51. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  52. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  53. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  54. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  55. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  56. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  58. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  59. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +5 -14
  60. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  61. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  62. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  63. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  66. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  67. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  69. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  70. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  71. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
  72. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  73. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  75. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  76. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  79. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
  80. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PANGASA.py +232 -0
  81. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  82. disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +120 -0
  83. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +7 -25
  84. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  85. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  86. disdrodb/l1/beard_model.py +31 -129
  87. disdrodb/l1/fall_velocity.py +156 -57
  88. disdrodb/l1/filters.py +25 -28
  89. disdrodb/l1/processing.py +12 -14
  90. disdrodb/l1_env/routines.py +46 -17
  91. disdrodb/l2/empirical_dsd.py +6 -0
  92. disdrodb/l2/processing.py +3 -3
  93. disdrodb/metadata/checks.py +132 -125
  94. disdrodb/metadata/geolocation.py +0 -2
  95. disdrodb/psd/fitting.py +180 -210
  96. disdrodb/psd/models.py +1 -1
  97. disdrodb/routines/__init__.py +54 -0
  98. disdrodb/{l0/routines.py → routines/l0.py} +288 -418
  99. disdrodb/{l1/routines.py → routines/l1.py} +60 -92
  100. disdrodb/{l2/routines.py → routines/l2.py} +284 -485
  101. disdrodb/{routines.py → routines/wrappers.py} +100 -7
  102. disdrodb/scattering/axis_ratio.py +95 -85
  103. disdrodb/scattering/permittivity.py +24 -0
  104. disdrodb/scattering/routines.py +56 -36
  105. disdrodb/summary/routines.py +147 -45
  106. disdrodb/utils/archiving.py +434 -0
  107. disdrodb/utils/attrs.py +2 -0
  108. disdrodb/utils/cli.py +5 -5
  109. disdrodb/utils/dask.py +62 -1
  110. disdrodb/utils/decorators.py +31 -0
  111. disdrodb/utils/encoding.py +10 -1
  112. disdrodb/{l2 → utils}/event.py +1 -66
  113. disdrodb/utils/logger.py +1 -1
  114. disdrodb/utils/manipulations.py +22 -12
  115. disdrodb/utils/routines.py +166 -0
  116. disdrodb/utils/time.py +5 -293
  117. disdrodb/utils/xarray.py +3 -0
  118. disdrodb/viz/plots.py +109 -15
  119. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/METADATA +3 -2
  120. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/RECORD +124 -96
  121. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/entry_points.txt +1 -0
  122. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/WHEEL +0 -0
  123. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/licenses/LICENSE +0 -0
  124. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,382 @@
1
+ # -----------------------------------------------------------------------------.
2
+ # Copyright (c) 2021-2023 DISDRODB developers
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ # -----------------------------------------------------------------------------.
17
+ """Reader for the EROSION campaign in Denmark."""
18
+ import numpy as np
19
+ import pandas as pd
20
+
21
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
22
+ from disdrodb.l0.l0a_processing import read_raw_text_file
23
+
24
+ COLUMNS = [
25
+ "rainfall_rate_32bit",
26
+ "rainfall_accumulated_32bit",
27
+ "weather_code_synop_4680",
28
+ "weather_code_synop_4677",
29
+ "weather_code_metar_4678",
30
+ "weather_code_nws",
31
+ "reflectivity_32bit",
32
+ "mor_visibility",
33
+ "sample_interval",
34
+ "laser_amplitude",
35
+ "number_particles",
36
+ "sensor_temperature",
37
+ "sensor_heating_current",
38
+ "sensor_battery_voltage",
39
+ "sensor_status",
40
+ "rain_kinetic_energy",
41
+ "snowfall_rate",
42
+ "raw_drop_concentration",
43
+ "raw_drop_average_velocity",
44
+ "raw_drop_number",
45
+ ]
46
+
47
+
48
+ def read_par_format(filepath, logger):
49
+ """Read .par data format."""
50
+ ##------------------------------------------------------------------------.
51
+ #### Define column names
52
+ column_names = ["TO_PARSE"]
53
+
54
+ ##------------------------------------------------------------------------.
55
+ #### Define reader options
56
+ reader_kwargs = {}
57
+ # - Define delimiter
58
+ reader_kwargs["delimiter"] = "\\n"
59
+
60
+ # - Avoid first column to become df index !!!
61
+ reader_kwargs["index_col"] = False
62
+
63
+ # - Define behaviour when encountering bad lines
64
+ reader_kwargs["on_bad_lines"] = "skip"
65
+
66
+ # Skip the first row (header)
67
+ reader_kwargs["skiprows"] = 1
68
+
69
+ # - Define encoding
70
+ reader_kwargs["encoding"] = "latin"
71
+
72
+ # - Define reader engine
73
+ # - C engine is faster
74
+ # - Python engine is more feature-complete
75
+ reader_kwargs["engine"] = "python"
76
+
77
+ # - Define on-the-fly decompression of on-disk data
78
+ # - Available: gzip, bz2, zip
79
+ reader_kwargs["compression"] = "infer"
80
+
81
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
82
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
83
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
84
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
85
+ reader_kwargs["na_values"] = ["na", "", "error"]
86
+
87
+ # Skip first row as columns names
88
+ reader_kwargs["header"] = None
89
+
90
+ ##------------------------------------------------------------------------.
91
+ #### Read the data
92
+ df_raw = read_raw_text_file(
93
+ filepath=filepath,
94
+ column_names=column_names,
95
+ reader_kwargs=reader_kwargs,
96
+ logger=logger,
97
+ )
98
+
99
+ ##------------------------------------------------------------------------.
100
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
101
+ n_separators, counts = np.unique(df_raw["TO_PARSE"].str.count(","), return_counts=True)
102
+ n_separators = n_separators[counts.argmax()]
103
+
104
+ # Assign names
105
+ if n_separators == 1113:
106
+ nsplit = 25
107
+ names = [
108
+ "id",
109
+ "y",
110
+ "m",
111
+ "d",
112
+ "hh",
113
+ "mm",
114
+ "ss",
115
+ "rainfall_accumulated_32bit",
116
+ "rainfall_rate_32bit",
117
+ "snowfall_rate",
118
+ "reflectivity_32bit",
119
+ "rain_kinetic_energy",
120
+ "mor_visibility",
121
+ "weather_code_synop_4680",
122
+ "weather_code_synop_4677",
123
+ "weather_code_metar_4678",
124
+ # "weather_code_nws",
125
+ "firmware_iop",
126
+ "firmware_dsp",
127
+ "sensor_status",
128
+ "htst",
129
+ "sensor_temperature",
130
+ "sensor_battery_voltage",
131
+ "laser_amplitude",
132
+ "number_particles",
133
+ "nPART",
134
+ "TO_SPLIT",
135
+ ]
136
+ elif n_separators == 1114:
137
+ nsplit = 26
138
+ names = [
139
+ "id",
140
+ "y",
141
+ "m",
142
+ "d",
143
+ "hh",
144
+ "mm",
145
+ "ss",
146
+ "rainfall_accumulated_32bit",
147
+ "rainfall_rate_32bit",
148
+ "snowfall_rate",
149
+ "reflectivity_32bit",
150
+ "rain_kinetic_energy",
151
+ "mor_visibility",
152
+ "weather_code_synop_4680",
153
+ "weather_code_synop_4677",
154
+ "weather_code_metar_4678",
155
+ "weather_code_nws",
156
+ "firmware_iop",
157
+ "firmware_dsp",
158
+ "sensor_status",
159
+ "htst",
160
+ "sensor_temperature",
161
+ "sensor_battery_voltage",
162
+ "laser_amplitude",
163
+ "number_particles",
164
+ "nPART",
165
+ "TO_SPLIT",
166
+ ]
167
+ else:
168
+ raise NotImplementedError("Unrecognized number of columns")
169
+
170
+ # Remove corrupted rows
171
+ df_raw = df_raw[df_raw["TO_PARSE"].str.count(",") == n_separators]
172
+
173
+ # Create ID and Value columns
174
+ df = df_raw["TO_PARSE"].str.split(",", expand=True, n=nsplit)
175
+
176
+ # Assign names
177
+ df.columns = names
178
+
179
+ # Define datetime "time" column
180
+ df["time"] = pd.to_datetime(
181
+ {"year": df["y"], "month": df["m"], "day": df["d"], "hour": df["hh"], "minute": df["mm"], "second": df["ss"]},
182
+ )
183
+
184
+ # Retrieve raw array
185
+ df_split = df["TO_SPLIT"].str.split(",", expand=True)
186
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
187
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:].agg(",".join, axis=1)
188
+ df["raw_drop_number"] = df_split.iloc[:, 64:].agg(",".join, axis=1)
189
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("-9", "0")
190
+ del df_split
191
+
192
+ # Drop columns not agreeing with DISDRODB L0 standards
193
+ columns_to_drop = [
194
+ "nPART",
195
+ "htst",
196
+ "id",
197
+ "y",
198
+ "m",
199
+ "d",
200
+ "hh",
201
+ "mm",
202
+ "ss",
203
+ "firmware_iop",
204
+ "firmware_dsp",
205
+ "TO_SPLIT",
206
+ ]
207
+ df = df.drop(columns=columns_to_drop)
208
+
209
+ # Return the dataframe adhering to DISDRODB L0 standards
210
+ return df
211
+
212
+
213
+ def read_asdo_format(filepath, logger):
214
+ """Read ASDO format."""
215
+ ##------------------------------------------------------------------------.
216
+ #### Define column names
217
+ column_names = ["TO_PARSE"]
218
+
219
+ ##------------------------------------------------------------------------.
220
+ #### Define reader options
221
+ reader_kwargs = {}
222
+ # - Define delimiter
223
+ reader_kwargs["delimiter"] = None
224
+
225
+ # - Avoid first column to become df index !!!
226
+ reader_kwargs["index_col"] = False
227
+
228
+ # - Define behaviour when encountering bad lines
229
+ reader_kwargs["on_bad_lines"] = "skip"
230
+
231
+ # Skip the first row (header)
232
+ reader_kwargs["skiprows"] = 0
233
+
234
+ # - Define encoding
235
+ reader_kwargs["encoding"] = "latin"
236
+
237
+ # - Define reader engine
238
+ # - C engine is faster
239
+ # - Python engine is more feature-complete
240
+ reader_kwargs["engine"] = "c"
241
+
242
+ # - Define on-the-fly decompression of on-disk data
243
+ # - Available: gzip, bz2, zip
244
+ reader_kwargs["compression"] = "infer"
245
+
246
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
247
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
248
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
249
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
250
+ reader_kwargs["na_values"] = ["na", "", "error"]
251
+
252
+ # Skip first row as columns names
253
+ reader_kwargs["header"] = None
254
+
255
+ ##------------------------------------------------------------------------.
256
+ #### Read the data
257
+ df = read_raw_text_file(
258
+ filepath=filepath,
259
+ column_names=column_names,
260
+ reader_kwargs=reader_kwargs,
261
+ logger=logger,
262
+ )
263
+
264
+ ##------------------------------------------------------------------------.
265
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
266
+ # Create ID and Value columns
267
+ df = df["TO_PARSE"].str.split(":", expand=True, n=1)
268
+ df.columns = ["ID", "Value"]
269
+
270
+ # Select only rows with values
271
+ df = df[df["Value"].astype(bool)]
272
+ df = df[df["Value"].apply(lambda x: x is not None)]
273
+
274
+ # Drop rows with invalid IDs
275
+ # - Corrupted rows
276
+ valid_id_str = np.char.rjust(np.arange(0, 98).astype(str), width=2, fillchar="0")
277
+ df = df[df["ID"].astype(str).isin(valid_id_str)]
278
+
279
+ # Raise error if no more rows after removed corrupted ones
280
+ if len(df) == 0:
281
+ raise ValueError("No rows left after removing corrupted ones.")
282
+
283
+ # Create the dataframe with each row corresponding to a timestep
284
+ # group -> row, ID -> column
285
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
286
+ df = df.pivot(index="_group", columns="ID") # noqa
287
+ df.columns = df.columns.get_level_values("ID")
288
+ df = df.reset_index(drop=True)
289
+
290
+ # Define column names
291
+ column_dict = {
292
+ "01": "rainfall_rate_32bit",
293
+ "02": "rainfall_accumulated_32bit",
294
+ "03": "weather_code_synop_4680",
295
+ "04": "weather_code_synop_4677",
296
+ "05": "weather_code_metar_4678",
297
+ "06": "weather_code_nws",
298
+ "07": "reflectivity_32bit",
299
+ "08": "mor_visibility",
300
+ "09": "sample_interval",
301
+ "10": "laser_amplitude",
302
+ "11": "number_particles",
303
+ "12": "sensor_temperature",
304
+ # "13": "sensor_serial_number",
305
+ # "14": "firmware_iop",
306
+ # "15": "firmware_dsp",
307
+ "16": "sensor_heating_current",
308
+ "17": "sensor_battery_voltage",
309
+ "18": "sensor_status",
310
+ "19": "start_time",
311
+ "20": "sensor_time",
312
+ "21": "sensor_date",
313
+ # "22": "station_name",
314
+ # "23": "station_number",
315
+ # "24": "rainfall_amount_absolute_32bit",
316
+ # "25": "error_code",
317
+ # "26": "sensor_temperature_pcb",
318
+ # "27": "sensor_temperature_receiver",
319
+ # "28": "sensor_temperature_trasmitter",
320
+ # "30": "rainfall_rate_16_bit_30",
321
+ # "31": "rainfall_rate_16_bit_1200",
322
+ # "32": "rainfall_accumulated_16bit",
323
+ "34": "rain_kinetic_energy",
324
+ "35": "snowfall_rate",
325
+ "90": "raw_drop_concentration",
326
+ "91": "raw_drop_average_velocity",
327
+ "93": "raw_drop_number",
328
+ }
329
+
330
+ # Identify missing columns and add NaN
331
+ missing_columns = COLUMNS[np.isin(COLUMNS, df.columns, invert=True)].tolist()
332
+ if len(missing_columns) > 0:
333
+ for column in missing_columns:
334
+ df[column] = "NaN"
335
+
336
+ # Rename columns
337
+ df = df.rename(column_dict, axis=1)
338
+
339
+ # Keep only columns defined in the dictionary
340
+ df = df[list(column_dict.values())]
341
+
342
+ # Define datetime "time" column
343
+ df["time"] = df["sensor_date"] + "-" + df["sensor_time"]
344
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%Y-%H:%M:%S", errors="coerce")
345
+
346
+ # Drop columns not agreeing with DISDRODB L0 standards
347
+ columns_to_drop = [
348
+ "sensor_date",
349
+ "sensor_time",
350
+ # "firmware_iop",
351
+ # "firmware_dsp",
352
+ # "sensor_serial_number",
353
+ # "station_name",
354
+ # "station_number",
355
+ ]
356
+ df = df.drop(columns=columns_to_drop)
357
+
358
+ # Return the dataframe adhering to DISDRODB L0 standards
359
+ return df
360
+
361
+
362
+ @is_documented_by(reader_generic_docstring)
363
+ def reader(
364
+ filepath,
365
+ logger=None,
366
+ ):
367
+ """Reader."""
368
+ # Choose the appropriate reader based on the file extension
369
+ if filepath.endswith(".par"): # e.g. in Thyboron # noqa: SIM108
370
+ df = read_par_format(filepath, logger)
371
+ else: # atm4
372
+ df = read_asdo_format(filepath, logger)
373
+
374
+ # Identify missing columns and add NaN
375
+ expected_columns = np.array(COLUMNS)
376
+ missing_columns = expected_columns[np.isin(expected_columns, df.columns, invert=True)].tolist()
377
+ if len(missing_columns) > 0:
378
+ for column in missing_columns:
379
+ df[column] = "NaN"
380
+
381
+ # Return the dataframe adhering to DISDRODB L0 standards
382
+ return df
@@ -31,6 +31,10 @@ def reader(
31
31
  #### Open the netCDF
32
32
  ds = open_raw_netcdf_file(filepath=filepath, logger=logger)
33
33
 
34
+ # Correct for inverted raw_spectrum axis
35
+ ds["data_raw"] = ds["data_raw"].transpose("time", "diameter", "velocity")
36
+ ds["data_raw"].data = ds["data_raw"].data.swapaxes(1, 2)
37
+
34
38
  ##------------------------------------------------------------------------.
35
39
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
36
40
  # Define dictionary mapping dataset variables to select and rename
@@ -483,7 +483,7 @@ def reader_spectrum(filepath, logger):
483
483
  df = df.drop(columns=["TO_PARSE", "RECORD"])
484
484
 
485
485
  # Infill with NaN at invalid timesteps
486
- add_nan_at_invalid_timesteps(df, invalid_timesteps)
486
+ df = add_nan_at_invalid_timesteps(df, invalid_timesteps)
487
487
  return df
488
488
 
489
489
 
@@ -0,0 +1,127 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import pandas as pd
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0a_processing import read_raw_text_file
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Define column names
32
+ column_names = ["TO_SPLIT"]
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Define reader options
36
+ reader_kwargs = {}
37
+
38
+ # - Define delimiter
39
+ reader_kwargs["delimiter"] = "\\n"
40
+
41
+ # - Skip first row as columns names
42
+ reader_kwargs["header"] = None
43
+
44
+ # - Skip header
45
+ reader_kwargs["skiprows"] = 0
46
+
47
+ # - Define encoding
48
+ reader_kwargs["encoding"] = "ISO-8859-1"
49
+
50
+ # - Avoid first column to become df index !!!
51
+ reader_kwargs["index_col"] = False
52
+
53
+ # - Define behaviour when encountering bad lines
54
+ reader_kwargs["on_bad_lines"] = "skip"
55
+
56
+ # - Define reader engine
57
+ # - C engine is faster
58
+ # - Python engine is more feature-complete
59
+ reader_kwargs["engine"] = "python"
60
+
61
+ # - Define on-the-fly decompression of on-disk data
62
+ # - Available: gzip, bz2, zip
63
+ # reader_kwargs['compression'] = 'xz'
64
+
65
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
66
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
67
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
68
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
69
+ reader_kwargs["na_values"] = ["na", "error", "-.-", " NA"]
70
+
71
+ ##------------------------------------------------------------------------.
72
+ #### Read the data
73
+ df = read_raw_text_file(
74
+ filepath=filepath,
75
+ column_names=column_names,
76
+ reader_kwargs=reader_kwargs,
77
+ logger=logger,
78
+ )
79
+
80
+ ##------------------------------------------------------------------------.
81
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
82
+ # Remove corrupted rows
83
+ df = df[df["TO_SPLIT"].str.count(";").isin([11, 1035])]
84
+
85
+ # Split into columns
86
+ df = df["TO_SPLIT"].str.split(";", expand=True, n=11)
87
+
88
+ # Assign columns names
89
+ names = [
90
+ "date",
91
+ "time",
92
+ "rainfall_rate_32bit",
93
+ "rainfall_accumulated_32bit",
94
+ "reflectivity_32bit",
95
+ "mor_visibility",
96
+ "laser_amplitude",
97
+ "number_particles",
98
+ "sensor_temperature",
99
+ "sensor_heating_current",
100
+ "sensor_battery_voltage",
101
+ "raw_drop_number",
102
+ ]
103
+ df.columns = names
104
+
105
+ # Add datetime time column
106
+ df["time"] = df["date"] + "-" + df["time"]
107
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%Y-%H:%M:%S", errors="coerce")
108
+ df = df.drop(columns=["date"])
109
+
110
+ # Preprocess the raw spectrum
111
+ # - The '<SPECTRUM>ZERO</SPECTRUM>' indicates no drops detected
112
+ # --> "" generates an array of zeros in L0B processing
113
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
114
+
115
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
116
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
117
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
118
+
119
+ # Add 0 before every , if , not preceded by a digit
120
+ # Example: ',,1,,' --> '0,0,1,0,'
121
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"(?<!\d);", "0;", regex=True)
122
+
123
+ # Replace ending 999; with 0;
124
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"999;$", "0", regex=True)
125
+
126
+ # Return the dataframe adhering to DISDRODB L0 standards
127
+ return df