disdrodb 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. disdrodb/__init__.py +1 -5
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +14 -3
  4. disdrodb/api/checks.py +10 -0
  5. disdrodb/api/create_directories.py +0 -2
  6. disdrodb/api/io.py +14 -17
  7. disdrodb/api/path.py +42 -77
  8. disdrodb/api/search.py +89 -23
  9. disdrodb/cli/disdrodb_create_summary.py +11 -1
  10. disdrodb/cli/disdrodb_create_summary_station.py +10 -0
  11. disdrodb/cli/disdrodb_run_l0.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0a.py +1 -1
  13. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  15. disdrodb/cli/disdrodb_run_l1.py +1 -1
  16. disdrodb/cli/disdrodb_run_l2e.py +1 -1
  17. disdrodb/cli/disdrodb_run_l2m.py +1 -1
  18. disdrodb/configs.py +30 -83
  19. disdrodb/constants.py +4 -3
  20. disdrodb/data_transfer/download_data.py +4 -2
  21. disdrodb/docs.py +2 -2
  22. disdrodb/etc/products/L1/1MIN.yaml +13 -0
  23. disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
  24. disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
  25. disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
  26. disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
  27. disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
  28. disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
  29. disdrodb/etc/products/L1/global.yaml +7 -1
  30. disdrodb/etc/products/L2E/10MIN.yaml +1 -12
  31. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +1 -1
  33. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +1 -1
  35. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +8 -0
  38. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
  39. disdrodb/etc/products/L2M/global.yaml +11 -3
  40. disdrodb/l0/check_configs.py +49 -16
  41. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  42. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  43. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  44. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  45. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
  46. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  47. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  48. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  49. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  50. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  51. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  52. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  53. disdrodb/l0/l0_reader.py +2 -2
  54. disdrodb/l0/l0b_processing.py +70 -15
  55. disdrodb/l0/l0c_processing.py +7 -3
  56. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -1
  57. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  58. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  59. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  60. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  61. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  62. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  63. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  64. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  65. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  66. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  67. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  68. disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
  69. disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/LPVEX.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  71. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +8 -17
  72. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  73. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
  74. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  75. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  76. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  77. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  78. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  79. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  80. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  81. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  83. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  84. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  85. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  86. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  87. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +232 -0
  88. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  89. disdrodb/l0/readers/PARSIVEL2/{NASA/LPVEX.py → SPAIN/GRANADA.py} +46 -35
  90. disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
  91. disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +10 -28
  92. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  93. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  94. disdrodb/l1/beard_model.py +31 -129
  95. disdrodb/l1/fall_velocity.py +136 -83
  96. disdrodb/l1/filters.py +25 -28
  97. disdrodb/l1/processing.py +16 -17
  98. disdrodb/l1/resampling.py +101 -38
  99. disdrodb/l1_env/routines.py +46 -17
  100. disdrodb/l2/empirical_dsd.py +6 -0
  101. disdrodb/l2/processing.py +6 -5
  102. disdrodb/metadata/geolocation.py +0 -2
  103. disdrodb/metadata/search.py +3 -4
  104. disdrodb/psd/fitting.py +16 -13
  105. disdrodb/routines/l0.py +2 -2
  106. disdrodb/routines/l1.py +173 -60
  107. disdrodb/routines/l2.py +148 -284
  108. disdrodb/routines/options.py +345 -0
  109. disdrodb/routines/wrappers.py +14 -1
  110. disdrodb/scattering/axis_ratio.py +90 -84
  111. disdrodb/scattering/permittivity.py +6 -0
  112. disdrodb/summary/routines.py +735 -670
  113. disdrodb/utils/archiving.py +51 -44
  114. disdrodb/utils/attrs.py +3 -1
  115. disdrodb/utils/dask.py +4 -4
  116. disdrodb/utils/dict.py +33 -0
  117. disdrodb/utils/encoding.py +6 -1
  118. disdrodb/utils/routines.py +9 -8
  119. disdrodb/utils/time.py +11 -3
  120. disdrodb/viz/__init__.py +0 -13
  121. disdrodb/viz/plots.py +231 -1
  122. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/METADATA +2 -1
  123. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/RECORD +135 -103
  124. /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
  125. /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
  126. /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
  127. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/IFLOODS.py +0 -0
  128. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +0 -0
  129. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/PIERS.py +0 -0
  130. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/GCPEX.py +0 -0
  131. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
  132. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/WHEEL +0 -0
  133. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/entry_points.txt +0 -0
  134. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/licenses/LICENSE +0 -0
  135. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,321 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for TU Wien PWS100 raw text data."""
20
+ import os
21
+
22
+ import pandas as pd
23
+
24
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
25
+ from disdrodb.l0.l0a_processing import read_raw_text_file
26
+
27
+
28
+ def reader_spectrum(
29
+ filepath,
30
+ logger=None,
31
+ ):
32
+ """Reader spectrum file."""
33
+ ##------------------------------------------------------------------------.
34
+ #### Define column names
35
+ column_names = ["TO_SPLIT"]
36
+
37
+ ##------------------------------------------------------------------------.
38
+ #### Define reader options
39
+ reader_kwargs = {}
40
+
41
+ # - Define delimiter
42
+ reader_kwargs["delimiter"] = "\\n"
43
+
44
+ # - Skip first row as columns names
45
+ reader_kwargs["header"] = None
46
+
47
+ # - Skip header
48
+ reader_kwargs["skiprows"] = 4
49
+
50
+ # - Define encoding
51
+ reader_kwargs["encoding"] = "ISO-8859-1"
52
+
53
+ # - Avoid first column to become df index !!!
54
+ reader_kwargs["index_col"] = False
55
+
56
+ # - Define behaviour when encountering bad lines
57
+ reader_kwargs["on_bad_lines"] = "skip"
58
+
59
+ # - Define reader engine
60
+ # - C engine is faster
61
+ # - Python engine is more feature-complete
62
+ reader_kwargs["engine"] = "python"
63
+
64
+ # - Define on-the-fly decompression of on-disk data
65
+ # - Available: gzip, bz2, zip
66
+ # reader_kwargs['compression'] = 'xz'
67
+
68
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
69
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
70
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
71
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
72
+ reader_kwargs["na_values"] = ["na", "error", "-.-", " NA", "NAN"]
73
+
74
+ ##------------------------------------------------------------------------.
75
+ #### Read the data
76
+ df = read_raw_text_file(
77
+ filepath=filepath,
78
+ column_names=column_names,
79
+ reader_kwargs=reader_kwargs,
80
+ logger=logger,
81
+ )
82
+
83
+ ##------------------------------------------------------------------------.
84
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
85
+ # Remove corrupted rows (and header)
86
+ df = df[df["TO_SPLIT"].str.count(",") == 1157]
87
+
88
+ # Split into columns
89
+ df = df["TO_SPLIT"].str.split(",", expand=True, n=2)
90
+
91
+ # Assign columns names
92
+ names = [
93
+ "time",
94
+ "record",
95
+ "raw_drop_number", # "Size_0.00_Vel_0.00","Size_0.00_Vel_0.10", ...
96
+ ]
97
+ df.columns = names
98
+
99
+ # Add datetime time column
100
+ df["time"] = df["time"].str.replace('"', "")
101
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
102
+
103
+ # Clean raw_drop_number '"NAN"' --> 'NaN'
104
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace('"NAN"', "NaN")
105
+
106
+ # Drop columns not needed
107
+ df = df.drop(columns=["record"])
108
+ return df
109
+
110
+
111
+ def reader_met_file(filepath, logger):
112
+ """Reader MET file."""
113
+ ##------------------------------------------------------------------------.
114
+ #### Define column names
115
+ column_names = ["TO_SPLIT"]
116
+
117
+ ##------------------------------------------------------------------------.
118
+ #### Define reader options
119
+ reader_kwargs = {}
120
+
121
+ # - Define delimiter
122
+ reader_kwargs["delimiter"] = "\\n"
123
+
124
+ # - Skip first row as columns names
125
+ reader_kwargs["header"] = None
126
+
127
+ # - Skip header
128
+ reader_kwargs["skiprows"] = 4
129
+
130
+ # - Define encoding
131
+ reader_kwargs["encoding"] = "ISO-8859-1"
132
+
133
+ # - Avoid first column to become df index !!!
134
+ reader_kwargs["index_col"] = False
135
+
136
+ # - Define behaviour when encountering bad lines
137
+ reader_kwargs["on_bad_lines"] = "skip"
138
+
139
+ # - Define reader engine
140
+ # - C engine is faster
141
+ # - Python engine is more feature-complete
142
+ reader_kwargs["engine"] = "python"
143
+
144
+ # - Define on-the-fly decompression of on-disk data
145
+ # - Available: gzip, bz2, zip
146
+ # reader_kwargs['compression'] = 'xz'
147
+
148
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
149
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
150
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
151
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
152
+ reader_kwargs["na_values"] = ["na", "error", "-.-", " NA", "NAN"]
153
+
154
+ ##------------------------------------------------------------------------.
155
+ #### Read the data
156
+ df = read_raw_text_file(
157
+ filepath=filepath,
158
+ column_names=column_names,
159
+ reader_kwargs=reader_kwargs,
160
+ logger=logger,
161
+ )
162
+
163
+ ##------------------------------------------------------------------------.
164
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
165
+ # Remove corrupted rows
166
+ df = df[df["TO_SPLIT"].str.count(",") == 40]
167
+
168
+ # Split into columns
169
+ df = df["TO_SPLIT"].str.split(",", expand=True)
170
+
171
+ # Assign columns names
172
+ names = [
173
+ "time",
174
+ "RECORD",
175
+ "PWS100_Year",
176
+ "PWS100_Month",
177
+ "PWS100_Day",
178
+ "PWS100_Hours",
179
+ "PWS100_Minutes",
180
+ "PWS100_Seconds",
181
+ "mor_visibility",
182
+ "weather_code_synop_4680",
183
+ "weather_code_metar_4678",
184
+ "weather_code_nws",
185
+ "PWS100_PWCode_NWS_String",
186
+ "air_temperature",
187
+ "relative_humidity",
188
+ "air_temperature_min",
189
+ "air_temperature_max",
190
+ "rainfall_rate",
191
+ "rainfall_accumulated",
192
+ "average_drop_velocity",
193
+ "average_drop_size",
194
+ "PWS100_PartType_Drizzle",
195
+ "PWS100_PartType_FreezingDrizzle",
196
+ "PWS100_PartType_Rain",
197
+ "PWS100_PartType_FreezingRain",
198
+ "PWS100_PartType_SnowGrains",
199
+ "PWS100_PartType_SnowFlakes",
200
+ "PWS100_PartType_IcePellets",
201
+ "PWS100_PartType_Hail",
202
+ "PWS100_PartType_Graupel",
203
+ "PWS100_PartType_Error",
204
+ "PWS100_PartType_Unknown",
205
+ "PWS100_VISAlarm1",
206
+ "PWS100_VISAlarm2",
207
+ "PWS100_VISAlarm3",
208
+ "PWS100_CleanLaserWindow",
209
+ "PWS100_CleanUpperWindow",
210
+ "PWS100_CleanLowerWindow",
211
+ "sensor_status",
212
+ "PWS100_FaultStatus_EN",
213
+ "PWS100_PowerStatus",
214
+ ]
215
+ df.columns = names
216
+
217
+ # Remove rows with only NaN
218
+ df = df[df["PWS100_Year"] != '"NAN"']
219
+
220
+ # Define type distribution variable
221
+ type_distribution_columns = [
222
+ "PWS100_PartType_Drizzle",
223
+ "PWS100_PartType_FreezingDrizzle",
224
+ "PWS100_PartType_Rain",
225
+ "PWS100_PartType_FreezingRain",
226
+ "PWS100_PartType_SnowGrains",
227
+ "PWS100_PartType_SnowFlakes",
228
+ "PWS100_PartType_IcePellets",
229
+ "PWS100_PartType_Hail",
230
+ "PWS100_PartType_Graupel",
231
+ "PWS100_PartType_Error",
232
+ "PWS100_PartType_Unknown",
233
+ ]
234
+ df["type_distribution"] = df[type_distribution_columns].agg(",".join, axis=1)
235
+
236
+ # Define alarms
237
+ # - should be 16 values
238
+ # alarms_columns = [
239
+ # "PWS100_VISAlarm1",
240
+ # "PWS100_VISAlarm2",
241
+ # "PWS100_VISAlarm3",
242
+ # "PWS100_CleanLaserWindow",
243
+ # "PWS100_CleanUpperWindow",
244
+ # "PWS100_CleanLowerWindow",
245
+ # "PWS100_FaultStatus",
246
+ # "PWS100_FaultStatus_EN",
247
+ # "PWS100_PowerStatus",
248
+ # ]
249
+ # df["alarms"] = df[alarms_columns].agg(",".join, axis=1)
250
+
251
+ # Define datetime "time" column from filename
252
+ df["time"] = df["time"].str.replace('"', "")
253
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S")
254
+
255
+ # # Drop columns not agreeing with DISDRODB L0 standards
256
+ columns_to_drop = [
257
+ "RECORD",
258
+ "PWS100_Year",
259
+ "PWS100_Month",
260
+ "PWS100_Day",
261
+ "PWS100_Hours",
262
+ "PWS100_Minutes",
263
+ "PWS100_Seconds",
264
+ "PWS100_PartType_Drizzle",
265
+ "PWS100_PartType_FreezingDrizzle",
266
+ "PWS100_PartType_Rain",
267
+ "PWS100_PartType_FreezingRain",
268
+ "PWS100_PartType_SnowGrains",
269
+ "PWS100_PartType_SnowFlakes",
270
+ "PWS100_PartType_IcePellets",
271
+ "PWS100_PartType_Hail",
272
+ "PWS100_PartType_Graupel",
273
+ "PWS100_PartType_Error",
274
+ "PWS100_PartType_Unknown",
275
+ "PWS100_VISAlarm1",
276
+ "PWS100_VISAlarm2",
277
+ "PWS100_VISAlarm3",
278
+ "PWS100_CleanLaserWindow",
279
+ "PWS100_CleanUpperWindow",
280
+ "PWS100_CleanLowerWindow",
281
+ "PWS100_FaultStatus_EN",
282
+ "PWS100_PowerStatus",
283
+ "PWS100_PWCode_NWS_String",
284
+ ]
285
+ df = df.drop(columns=columns_to_drop)
286
+ return df
287
+
288
+
289
+ @is_documented_by(reader_generic_docstring)
290
+ def reader(
291
+ filepath,
292
+ logger=None,
293
+ ):
294
+ """Reader."""
295
+ # Retrieve spectrum filepath
296
+ spectrum_filepath = filepath.replace("WS_MET_PWS100_Data", "WS_MET_Size_Vel_distr")
297
+
298
+ # Read integral variables
299
+ df = reader_met_file(filepath, logger=logger)
300
+
301
+ # Drop duplicates timesteps
302
+ df = df.drop_duplicates(subset="time", keep="first")
303
+
304
+ # Initialize raw_drop_number array
305
+ # --> 0 values array produced in L0B
306
+ df["raw_drop_number"] = ""
307
+
308
+ # Add raw spectrum if available
309
+ if os.path.exists(spectrum_filepath):
310
+ # Read raw spectrum for corresponding timesteps
311
+ df_raw_spectrum = reader_spectrum(spectrum_filepath, logger=logger)
312
+ df_raw_spectrum = df_raw_spectrum.drop_duplicates(subset="time", keep="first")
313
+ # Add raw array to df
314
+ df = df.set_index("time")
315
+ df_raw_spectrum = df_raw_spectrum.set_index("time")
316
+ df.update(df_raw_spectrum)
317
+ # Set back time as column
318
+ df = df.reset_index()
319
+
320
+ # Return the dataframe adhering to DISDRODB L0 standards
321
+ return df
@@ -0,0 +1,239 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for KMI Biral SW250 sensors."""
20
+ import pandas as pd
21
+
22
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
23
+ from disdrodb.l0.l0a_processing import read_raw_text_file
24
+
25
+
26
+ def parse_spectrum_line_to_string(line):
27
+ """Parse one M... spectrum line into a zero-padded string with 21 values."""
28
+ # Define number of velocity bins values expected
29
+ n_cols = 21
30
+
31
+ # Split spectrum line
32
+ parts = line.split(",")
33
+
34
+ # Check line validity
35
+ n_values = len(parts)
36
+ if n_values > n_cols:
37
+ raise ValueError(f"Unexpected number of velocity bins: {n_values}.")
38
+
39
+ # Strip 'M' from first bin
40
+ parts[0] = parts[0].replace(":00M", "")
41
+
42
+ # Strip last two letter from last value
43
+ parts[-1] = parts[-1][:3]
44
+
45
+ # Define list of values
46
+ values = [int(x) for x in parts]
47
+ if len(values) < n_cols:
48
+ values.extend([0] * (n_cols - len(values)))
49
+ values = values[:n_cols]
50
+
51
+ # Define comma-separated string
52
+ string = ",".join(str(v) for v in values)
53
+ return string
54
+
55
+
56
+ def parse_spectrum_block(lines):
57
+ """Parse an M-block into a fixed (16 x 21) matrix."""
58
+ n_values = len(lines)
59
+ if n_values != 16:
60
+ raise ValueError(f"Unexpected number of diameter bins: {n_values}.")
61
+ raw_drop_number_string = ",".join([parse_spectrum_line_to_string(line) for line in lines])
62
+ return raw_drop_number_string
63
+
64
+
65
+ def build_spectrum_block(group):
66
+ """Create SWS250 raw spectrum string."""
67
+ try:
68
+ return pd.Series(
69
+ {
70
+ "raw_drop_number": parse_spectrum_block(group["spectrum_line"].tolist()),
71
+ },
72
+ )
73
+ except Exception:
74
+ return pd.Series({"raw_drop_number": "NaN"})
75
+
76
+
77
+ @is_documented_by(reader_generic_docstring)
78
+ def reader(
79
+ filepath,
80
+ logger=None,
81
+ ):
82
+ """Reader."""
83
+ ##------------------------------------------------------------------------.
84
+ #### Define raw data headers
85
+ column_names = ["TO_PARSE"]
86
+
87
+ ##------------------------------------------------------------------------.
88
+ #### Define reader options
89
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
90
+ reader_kwargs = {}
91
+
92
+ # - Define delimiter
93
+ reader_kwargs["delimiter"] = "\\n"
94
+
95
+ # - Avoid first column to become df index !!!
96
+ reader_kwargs["index_col"] = False
97
+
98
+ # Since column names are expected to be passed explicitly, header is set to None
99
+ reader_kwargs["header"] = None
100
+
101
+ # - Number of rows to be skipped at the beginning of the file
102
+ reader_kwargs["skiprows"] = None
103
+
104
+ # - Define behaviour when encountering bad lines
105
+ reader_kwargs["on_bad_lines"] = "skip"
106
+
107
+ # - Define reader engine
108
+ # - C engine is faster
109
+ # - Python engine is more feature-complete
110
+ reader_kwargs["engine"] = "python"
111
+
112
+ # - Define on-the-fly decompression of on-disk data
113
+ # - Available: gzip, bz2, zip
114
+ reader_kwargs["compression"] = "infer"
115
+
116
+ # - Skip rows with badly encoded data
117
+ reader_kwargs["encoding_errors"] = "replace"
118
+
119
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
120
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
121
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
122
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
123
+ reader_kwargs["na_values"] = ["na", "", "error"]
124
+
125
+ ##------------------------------------------------------------------------.
126
+ #### Read the data
127
+ df = read_raw_text_file(
128
+ filepath=filepath,
129
+ column_names=column_names,
130
+ reader_kwargs=reader_kwargs,
131
+ logger=logger,
132
+ )
133
+
134
+ ##------------------------------------------------------------------------.
135
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
136
+ # Identify rows with data
137
+ df_params = df[df["TO_PARSE"].str.count(",") == 23]
138
+
139
+ # Identify rows with spectrum matrix
140
+ df_spectrum = df[df["TO_PARSE"].str.startswith(":00M")]
141
+ if len(df_spectrum) == 0:
142
+ raise ValueError("No spectrum available.")
143
+
144
+ df_spectrum = df_spectrum["TO_PARSE"].str.rsplit(",", expand=True, n=2)
145
+ df_spectrum.columns = ["spectrum_line", "date", "time"]
146
+ df_spectrum["datetime"] = pd.to_datetime(
147
+ df_spectrum["date"] + " " + df_spectrum["time"],
148
+ format="%d/%m/%Y %H:%M:%S",
149
+ )
150
+
151
+ # Define groups
152
+ # - Mark new group when time gap > 10 s
153
+ is_new_group = (df_spectrum["datetime"].diff().dt.total_seconds() > 10).fillna(True)
154
+ group_id = is_new_group.cumsum()
155
+ # - Assign the first datetime of each group
156
+ df_spectrum["group_time"] = df_spectrum.groupby(group_id)["datetime"].transform("first")
157
+
158
+ # Group spectrum by timesteps
159
+ df_raw_drop_number = (
160
+ df_spectrum.groupby("group_time", as_index=False)
161
+ .apply(build_spectrum_block, include_groups=False)
162
+ .reset_index(drop=True)
163
+ )
164
+
165
+ # Retrieve 1-min data
166
+ # - Split by ; delimiter (before raw drop number)
167
+ df_data = df_params["TO_PARSE"].str.split(",", expand=True)
168
+
169
+ # - Assign column names
170
+ names = [
171
+ "date",
172
+ "time",
173
+ "sws250",
174
+ "sensor_id",
175
+ "sample_interval",
176
+ "mor_visibility_5min", # remove unit and ensure in meters !
177
+ "weather_code_synop_4680",
178
+ "past_weather1",
179
+ "past_weather2",
180
+ "obstruction_status",
181
+ "weather_code_metar_4678",
182
+ "precipitation_rate",
183
+ "mor_visibility", # remove unit and ensure in meters !
184
+ "total_extinction_coefficient", # [km-1]
185
+ "transmissometer_extinction_coefficient", # [km-1]
186
+ "back_scatter_extinction_coefficient", # [km-1]
187
+ "sensor_temperature", # [degrees] or air_temperature ?
188
+ "ambient_light_sensor_signal", # [cd/m2] # ALS
189
+ "sensor_status",
190
+ "number_particles",
191
+ "precipitation_accumulated", # [mm] over sample_interval
192
+ "ambient_light_sensor_signal_status",
193
+ "date1",
194
+ "time1",
195
+ ]
196
+ df_data.columns = names
197
+
198
+ # Clean out variables
199
+ df_data["mor_visibility_5min"] = df_data["mor_visibility_5min"].str.replace("M", "")
200
+ df_data["mor_visibility"] = df_data["mor_visibility"].str.replace("M", "")
201
+ df_data["sensor_temperature"] = df_data["sensor_temperature"].str.replace("C", "")
202
+ df_data["ambient_light_sensor_signal"] = df_data["ambient_light_sensor_signal"].str.replace("+99999", "NaN")
203
+
204
+ # Define datetime
205
+ df_data["datetime"] = pd.to_datetime(df_data["date1"] + " " + df_data["time1"], format="%d/%m/%Y %H:%M:%S")
206
+
207
+ # Merge df_data on df_raw_drop_number
208
+ # TODO list
209
+ # - should we aggregate variables to 5 min temporal resolution
210
+ # to match raw_drop_number
211
+ # - should we infill df_raw_drop_number with 0 when no time every 5 min?
212
+ df = pd.merge_asof(
213
+ df_raw_drop_number,
214
+ df_data,
215
+ left_on="group_time",
216
+ right_on="datetime",
217
+ direction="nearest",
218
+ tolerance=pd.Timedelta("10s"), # max difference allowed
219
+ )
220
+
221
+ # Define final time
222
+ # TODO list
223
+ # - which time should we take as final time?
224
+ # - raw_drop_number time is end of measurement interval right?
225
+ df["time"] = df["group_time"]
226
+
227
+ # Drop columns not agreeing with DISDRODB L0 standards
228
+ columns_to_drop = [
229
+ "group_time",
230
+ "date",
231
+ "sws250",
232
+ "sample_interval",
233
+ "sensor_id",
234
+ "date1",
235
+ "time1",
236
+ "datetime",
237
+ ]
238
+ df = df.drop(columns=columns_to_drop)
239
+ return df