disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. disdrodb/__init__.py +68 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +177 -24
  7. disdrodb/api/configs.py +3 -3
  8. disdrodb/api/info.py +13 -13
  9. disdrodb/api/io.py +281 -22
  10. disdrodb/api/path.py +184 -195
  11. disdrodb/api/search.py +18 -9
  12. disdrodb/cli/disdrodb_create_summary.py +103 -0
  13. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  14. disdrodb/cli/disdrodb_run_l0.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
  19. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  20. disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
  21. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  22. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  23. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  24. disdrodb/configs.py +149 -4
  25. disdrodb/constants.py +61 -0
  26. disdrodb/data_transfer/download_data.py +127 -11
  27. disdrodb/etc/configs/attributes.yaml +339 -0
  28. disdrodb/etc/configs/encodings.yaml +473 -0
  29. disdrodb/etc/products/L1/global.yaml +13 -0
  30. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  31. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +22 -0
  33. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  34. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  35. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  38. disdrodb/etc/products/L2M/global.yaml +26 -0
  39. disdrodb/issue/writer.py +2 -0
  40. disdrodb/l0/__init__.py +13 -0
  41. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  42. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  43. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  44. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  45. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
  46. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
  47. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
  48. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  49. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  50. disdrodb/l0/l0a_processing.py +37 -32
  51. disdrodb/l0/l0b_nc_processing.py +118 -8
  52. disdrodb/l0/l0b_processing.py +30 -65
  53. disdrodb/l0/l0c_processing.py +369 -259
  54. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
  55. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  56. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  58. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
  59. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
  60. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  61. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  62. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  63. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  66. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  67. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  69. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  71. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
  72. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  73. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
  74. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  75. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  76. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  79. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  80. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  81. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
  83. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
  84. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  85. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  86. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  87. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
  88. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  89. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  90. disdrodb/l1/__init__.py +5 -0
  91. disdrodb/l1/fall_velocity.py +46 -0
  92. disdrodb/l1/filters.py +34 -20
  93. disdrodb/l1/processing.py +46 -45
  94. disdrodb/l1/resampling.py +77 -66
  95. disdrodb/l1_env/routines.py +18 -3
  96. disdrodb/l2/__init__.py +7 -0
  97. disdrodb/l2/empirical_dsd.py +58 -10
  98. disdrodb/l2/processing.py +268 -117
  99. disdrodb/metadata/checks.py +132 -125
  100. disdrodb/metadata/standards.py +3 -1
  101. disdrodb/psd/fitting.py +631 -345
  102. disdrodb/psd/models.py +9 -6
  103. disdrodb/routines/__init__.py +54 -0
  104. disdrodb/{l0/routines.py → routines/l0.py} +316 -355
  105. disdrodb/{l1/routines.py → routines/l1.py} +76 -116
  106. disdrodb/routines/l2.py +1019 -0
  107. disdrodb/{routines.py → routines/wrappers.py} +98 -10
  108. disdrodb/scattering/__init__.py +16 -4
  109. disdrodb/scattering/axis_ratio.py +61 -37
  110. disdrodb/scattering/permittivity.py +504 -0
  111. disdrodb/scattering/routines.py +746 -184
  112. disdrodb/summary/__init__.py +17 -0
  113. disdrodb/summary/routines.py +4196 -0
  114. disdrodb/utils/archiving.py +434 -0
  115. disdrodb/utils/attrs.py +68 -125
  116. disdrodb/utils/cli.py +5 -5
  117. disdrodb/utils/compression.py +30 -1
  118. disdrodb/utils/dask.py +121 -9
  119. disdrodb/utils/dataframe.py +61 -7
  120. disdrodb/utils/decorators.py +31 -0
  121. disdrodb/utils/directories.py +35 -15
  122. disdrodb/utils/encoding.py +37 -19
  123. disdrodb/{l2 → utils}/event.py +15 -173
  124. disdrodb/utils/logger.py +14 -7
  125. disdrodb/utils/manipulations.py +81 -0
  126. disdrodb/utils/routines.py +166 -0
  127. disdrodb/utils/subsetting.py +214 -0
  128. disdrodb/utils/time.py +35 -177
  129. disdrodb/utils/writer.py +20 -7
  130. disdrodb/utils/xarray.py +5 -4
  131. disdrodb/viz/__init__.py +13 -0
  132. disdrodb/viz/plots.py +398 -0
  133. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
  134. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
  135. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
  136. disdrodb/l1/encoding_attrs.py +0 -642
  137. disdrodb/l2/processing_options.py +0 -213
  138. disdrodb/l2/routines.py +0 -868
  139. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  140. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
  141. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
  142. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,525 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import os
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+
23
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
24
+ from disdrodb.l0.l0a_processing import read_raw_text_file
25
+ from disdrodb.utils.logger import log_error
26
+
27
+
28
+ def reader_parsivel(filepath, logger):
29
+ """Reader for Parsivel CR1000 Data Logger file."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Define reader options
32
+ reader_kwargs = {}
33
+ # - Define delimiter
34
+ reader_kwargs["delimiter"] = "\\n"
35
+ # - Skip first row as columns names
36
+ reader_kwargs["header"] = None
37
+ # - Skip first 3 rows
38
+ reader_kwargs["skiprows"] = 0
39
+ # - Define encoding
40
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
41
+ # - Avoid first column to become df index !!!
42
+ reader_kwargs["index_col"] = False
43
+ # - Define behaviour when encountering bad lines
44
+ reader_kwargs["on_bad_lines"] = "skip"
45
+ # - Define reader engine
46
+ # - C engine is faster
47
+ # - Python engine is more feature-complete
48
+ reader_kwargs["engine"] = "python"
49
+ # - Define on-the-fly decompression of on-disk data
50
+ # - Available: gzip, bz2, zip
51
+ reader_kwargs["compression"] = "infer"
52
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
53
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
54
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
55
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
56
+ reader_kwargs["na_values"] = ["na", "", "error"]
57
+
58
+ ##------------------------------------------------------------------------.
59
+ #### Read the data
60
+ df_raw = read_raw_text_file(
61
+ filepath=filepath,
62
+ column_names=["TO_PARSE"],
63
+ reader_kwargs=reader_kwargs,
64
+ logger=logger,
65
+ )
66
+
67
+ # Retrieve header, number of columns and starting rows
68
+ # - Search in the first 3 rows where "TIMESTAMP" occurs
69
+ # - Once identified the row, strip away everything before TIMESTAMP
70
+ # - Then identify start_row_idx as the row where "TIMESTAMP" occurs + 2
71
+ for i in range(3):
72
+ line = df_raw.iloc[i]["TO_PARSE"]
73
+ if "TIMESTAMP" in line:
74
+ # Remove double and single quotes
75
+ line = line.replace('""', '"').replace('"', "")
76
+ # Define header
77
+ timestamp_idx = line.find("TIMESTAMP")
78
+ header_str = line[timestamp_idx:]
79
+ header = header_str.split(",")
80
+ # Define number of columns
81
+ n_columns = len(header)
82
+ # Define start row with data
83
+ start_row_idx = i + 3
84
+ break
85
+ else:
86
+ # start_row_idx = 0
87
+ # n_columns = len(df_raw["TO_PARSE"].iloc[0].split(","))
88
+ raise ValueError("Could not find 'TIMESTAMP' in the first 3 rows of the file.")
89
+
90
+ # Retrieve rows with actual data
91
+ df = df_raw.iloc[start_row_idx:]
92
+
93
+ # Expand dataframe
94
+ df = df["TO_PARSE"].str.split(",", expand=True, n=n_columns - 1)
95
+
96
+ #### Define column names
97
+ if n_columns == 15:
98
+ # 05_VILLENEUVE_DE_BERG_1 (2011)
99
+ # 90_GALABRE (2020)
100
+ column_names = [
101
+ "time",
102
+ "RECORD",
103
+ "rainfall_rate_32bit",
104
+ "rainfall_accumulated_32bit",
105
+ "weather_code_synop_4680",
106
+ "weather_code_synop_4677",
107
+ "reflectivity_32bit",
108
+ "mor_visibility",
109
+ "laser_amplitude",
110
+ "number_particles",
111
+ "sensor_temperature",
112
+ "sensor_heating_current",
113
+ "sensor_battery_voltage",
114
+ "sensor_status",
115
+ "rain_kinetic_energy",
116
+ ]
117
+ elif n_columns == 16:
118
+ # 33_PRADEL_VIGNES (2011-2015)
119
+ if "Panel_Temp" in header:
120
+ column_names = [
121
+ "time",
122
+ "RECORD",
123
+ "V_Batt_Min",
124
+ "Panel_Temp",
125
+ "rainfall_rate_32bit",
126
+ "rainfall_accumulated_32bit",
127
+ "weather_code_synop_4680",
128
+ "weather_code_synop_4677",
129
+ "reflectivity_32bit",
130
+ "mor_visibility",
131
+ "laser_amplitude",
132
+ "number_particles",
133
+ "sensor_temperature",
134
+ "sensor_heating_current",
135
+ "sensor_battery_voltage",
136
+ "sensor_status",
137
+ ]
138
+ else:
139
+ # 33_PRADEL_VIGNES (2020)
140
+ column_names = [
141
+ "time",
142
+ "RECORD",
143
+ "rainfall_rate_32bit",
144
+ "rainfall_accumulated_32bit",
145
+ "weather_code_synop_4680",
146
+ "weather_code_synop_4677",
147
+ "reflectivity_32bit",
148
+ "mor_visibility",
149
+ "laser_amplitude",
150
+ "number_particles",
151
+ "sensor_temperature",
152
+ "sensor_heating_current",
153
+ "sensor_battery_voltage",
154
+ "sensor_status",
155
+ "rain_kinetic_energy",
156
+ "V_Batt_Min",
157
+ ]
158
+ elif n_columns == 19:
159
+ column_names = [
160
+ "time",
161
+ "RECORD",
162
+ "rainfall_rate_32bit",
163
+ "rainfall_accumulated_32bit",
164
+ "weather_code_synop_4680",
165
+ "weather_code_synop_4677",
166
+ "reflectivity_32bit",
167
+ "mor_visibility",
168
+ "laser_amplitude",
169
+ "number_particles",
170
+ "sensor_temperature",
171
+ "sensor_heating_current",
172
+ "sensor_battery_voltage",
173
+ "sample_interval",
174
+ "sensor_status",
175
+ "rain_kinetic_energy",
176
+ "sensor_temperature_receiver",
177
+ "sensor_temperature_trasmitter",
178
+ "V_Batt_Min",
179
+ ]
180
+ elif n_columns == 20:
181
+ column_names = [
182
+ "time",
183
+ "RECORD",
184
+ "rainfall_rate_32bit",
185
+ "rainfall_accumulated_32bit",
186
+ "weather_code_synop_4680",
187
+ "weather_code_synop_4677",
188
+ "reflectivity_32bit",
189
+ "mor_visibility",
190
+ "laser_amplitude",
191
+ "number_particles",
192
+ "sensor_temperature",
193
+ "sensor_heating_current",
194
+ "sensor_battery_voltage",
195
+ "sensor_status",
196
+ "sensor_temperature_receiver",
197
+ "sensor_temperature_trasmitter",
198
+ "rain_kinetic_energy",
199
+ "V_Batt_Min",
200
+ "sample_interval",
201
+ "Temps_present",
202
+ ]
203
+ elif n_columns == 24:
204
+ # ALES (2021)
205
+ column_names = [
206
+ "time", # 0
207
+ "RECORD", # 1
208
+ "rainfall_rate_32bit", # 2
209
+ "rainfall_accumulated_32bit", # 3
210
+ "weather_code_synop_4680", # 4
211
+ "weather_code_synop_4677", # 5
212
+ "reflectivity_32bit", # 6
213
+ "mor_visibility", # 7
214
+ "laser_amplitude", # 8
215
+ "number_particles", # 9
216
+ "sensor_temperature", # 10
217
+ "sensor_heating_current", # 11
218
+ "sensor_battery_voltage", # 12
219
+ "sensor_status", # # 13
220
+ "rain_kinetic_energy", # 14
221
+ "AccuH_parsivel", # 15
222
+ "AccuD_parsivel", # 16
223
+ "AccuM_parsivel", # 17
224
+ "AccuY_parsivel", # 18
225
+ "air_temperature", # 19
226
+ "relative_humidity", # 20
227
+ "wind_speed", # 21
228
+ "wind_direction", # 22
229
+ "V_Batt_Min", # 23
230
+ ]
231
+ elif n_columns == 25:
232
+ # AINAC (2024)
233
+ column_names = [
234
+ "time", # 0
235
+ "RECORD", # 1
236
+ "rainfall_rate_32bit", # 2
237
+ "rainfall_accumulated_32bit", # 3
238
+ "weather_code_synop_4680", # 4
239
+ "weather_code_synop_4677", # 5
240
+ "reflectivity_32bit", # 6
241
+ "mor_visibility", # 7
242
+ "laser_amplitude", # 8
243
+ "number_particles", # 9
244
+ "sensor_temperature", # 10
245
+ "sensor_heating_current", # 11
246
+ "sensor_battery_voltage", # 12
247
+ "sensor_status", # # 13
248
+ "rain_kinetic_energy", # 14
249
+ "AccuH_parsivel", # 15
250
+ "AccuD_parsivel", # 16
251
+ "AccuM_parsivel", # 17
252
+ "AccuY_parsivel", # 18
253
+ "air_temperature", # 19
254
+ "relative_humidity", # 20
255
+ "wind_speed", # 21
256
+ "wind_direction", # 22
257
+ "V_Batt_Min", # 23
258
+ "unknown",
259
+ ]
260
+ elif n_columns == 41:
261
+ df = df.iloc[:, :15]
262
+ column_names = [
263
+ "time", # 0
264
+ "RECORD", # 1
265
+ "rainfall_rate_32bit", # 2
266
+ "rainfall_accumulated_32bit", # 3
267
+ "weather_code_synop_4680", # 4
268
+ "weather_code_synop_4677", # 5
269
+ "reflectivity_32bit", # 6
270
+ "mor_visibility", # 7
271
+ "laser_amplitude", # 8
272
+ "number_particles", # 9
273
+ "sensor_temperature", # 10
274
+ "sensor_heating_current", # 11
275
+ "sensor_battery_voltage", # 12
276
+ "sensor_status", # # 13
277
+ "rain_kinetic_energy", # 14
278
+ ]
279
+ elif n_columns == 76:
280
+ # ALES (2009)
281
+ raw_drop_concentration = df.iloc[:, 14:46].agg(",".join, axis=1).str.replace("-10", "0")
282
+ raw_drop_average_velocity = "0,0," + df.iloc[:, 46:].agg(",".join, axis=1)
283
+ df = df.iloc[:, 0:14]
284
+ df["raw_drop_concentration"] = raw_drop_concentration
285
+ df["raw_drop_average_velocity"] = raw_drop_average_velocity
286
+
287
+ column_names = [
288
+ "time",
289
+ "RECORD",
290
+ "V_Batt_Min",
291
+ "rainfall_rate_32bit",
292
+ "rainfall_accumulated_32bit",
293
+ "weather_code_synop_4680",
294
+ "weather_code_synop_4677",
295
+ "reflectivity_32bit",
296
+ "mor_visibility",
297
+ "laser_amplitude",
298
+ "number_particles",
299
+ "sensor_heating_current",
300
+ "sensor_serial_numer",
301
+ "error_code",
302
+ "raw_drop_concentration",
303
+ "raw_drop_average_velocity",
304
+ ]
305
+
306
+ else:
307
+
308
+ raise ValueError(f"{filepath} has {n_columns} columns. Undefined reader.")
309
+
310
+ ##------------------------------------------------------------------------.
311
+ #### Assign column names
312
+ df.columns = column_names
313
+
314
+ ##------------------------------------------------------------------------.
315
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
316
+ # Define time as datetime column
317
+ df["time"] = pd.to_datetime(df["time"].str.strip('"'), format="%Y-%m-%d %H:%M:%S", errors="coerce")
318
+
319
+ # Set missing columns as NaN
320
+ potential_missing_columns = [
321
+ "sensor_temperature_receiver",
322
+ "sensor_temperature_trasmitter",
323
+ "rain_kinetic_energy",
324
+ ]
325
+ for column in potential_missing_columns:
326
+ if column not in df.columns:
327
+ df[column] = np.nan
328
+
329
+ # Drop columns not agreeing with DISDRODB L0 standards
330
+ columns_to_drop = [
331
+ "RECORD",
332
+ "V_Batt_Min",
333
+ "Panel_Temp",
334
+ "Temps_present",
335
+ "sample_interval",
336
+ "sensor_serial_numer",
337
+ "AccuH_parsivel",
338
+ "AccuD_parsivel",
339
+ "AccuM_parsivel",
340
+ "AccuY_parsivel",
341
+ "unknown",
342
+ ]
343
+ df = df.drop(columns=columns_to_drop, errors="ignore")
344
+ return df
345
+
346
+
347
+ def select_only_valid_rows(df, expected_n_values, logger, filepath):
348
+ """Select only rows with the expected number of values."""
349
+ # Ensure expected_n_values to be a list
350
+ if isinstance(expected_n_values, (int, float)):
351
+ expected_n_values = [expected_n_values]
352
+
353
+ # Identify number of values per row
354
+ n_values_per_row = df["TO_PARSE"].apply(lambda x: len(x.split(",")))
355
+
356
+ # Get the frequency of each unique number of values
357
+ unique_values, counts = np.unique(n_values_per_row, return_counts=True)
358
+
359
+ # Determine the valid number of values
360
+ valid_counts = [(val, count) for val, count in zip(unique_values, counts) if val in expected_n_values]
361
+ if not valid_counts:
362
+ raise ValueError(
363
+ f"{filepath} has no rows with expected number of values: {expected_n_values}."
364
+ f"Found rows with the following number of values: {unique_values}.",
365
+ )
366
+
367
+ # Select the most frequent valid number of values
368
+ n_values = max(valid_counts, key=lambda x: x[1])[0]
369
+
370
+ # Identify invalid rows
371
+ indices_invalid_values = n_values_per_row != n_values
372
+ invalid_timesteps = df["time"][indices_invalid_values]
373
+ invalid_timesteps_str = list(invalid_timesteps.astype(str))
374
+ # Log if multiple value formats are detected
375
+ if len(unique_values) != 1:
376
+ msg = f"{filepath} has an unexpected number of values at following timesteps: {invalid_timesteps_str}."
377
+ log_error(msg=msg, logger=logger)
378
+
379
+ # Remove rows with invalid number of values
380
+ df = df[~indices_invalid_values]
381
+
382
+ return df, n_values, invalid_timesteps
383
+
384
+
385
+ def add_nan_at_invalid_timesteps(df, invalid_timesteps):
386
+ """Infill invalid timesteps columns with NaN."""
387
+ # If no invalid timesteps, return dataframe as it is
388
+ if len(invalid_timesteps) == 0:
389
+ return df
390
+
391
+ # Create a DataFrame with NaNs and the original time values
392
+ nan_rows = pd.DataFrame({col: ["NaN"] * len(invalid_timesteps) for col in df.columns if col != "time"})
393
+ nan_rows["time"] = invalid_timesteps.to_numpy()
394
+
395
+ # Reinsert NaN rows and re-sort by time
396
+ df = pd.concat([df, nan_rows], ignore_index=True).sort_values("time").reset_index(drop=True)
397
+ return df
398
+
399
+
400
+ def reader_spectrum(filepath, logger):
401
+ """Reader for Spectrum CR1000 Data Logger file."""
402
+ ##------------------------------------------------------------------------.
403
+ #### Define column names
404
+ column_names = ["TO_PARSE"]
405
+
406
+ ##------------------------------------------------------------------------.
407
+ #### Define reader options
408
+ reader_kwargs = {}
409
+ # - Define delimiter
410
+ reader_kwargs["delimiter"] = "\\n"
411
+ # - Skip first row as columns names
412
+ reader_kwargs["header"] = None
413
+ # - Skip first 3 rows
414
+ reader_kwargs["skiprows"] = 4
415
+ # - Define encoding
416
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
417
+ # - Avoid first column to become df index !!!
418
+ reader_kwargs["index_col"] = False
419
+ # - Define behaviour when encountering bad lines
420
+ reader_kwargs["on_bad_lines"] = "skip"
421
+ # - Define reader engine
422
+ # - C engine is faster
423
+ # - Python engine is more feature-complete
424
+ reader_kwargs["engine"] = "python"
425
+ # - Define on-the-fly decompression of on-disk data
426
+ # - Available: gzip, bz2, zip
427
+ reader_kwargs["compression"] = "infer"
428
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
429
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
430
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
431
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
432
+ reader_kwargs["na_values"] = ["na", "", "error"]
433
+
434
+ ##------------------------------------------------------------------------.
435
+ #### Read the data
436
+ df = read_raw_text_file(
437
+ filepath=filepath,
438
+ column_names=column_names,
439
+ reader_kwargs=reader_kwargs,
440
+ logger=logger,
441
+ )
442
+
443
+ ##------------------------------------------------------------------------.
444
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
445
+ # Split and assign columns
446
+ df = df["TO_PARSE"].str.split(",", n=2, expand=True)
447
+ df.columns = ["time", "RECORD", "TO_PARSE"]
448
+
449
+ # Define time in datetime format
450
+ df["time"] = pd.to_datetime(df["time"].str.strip('"'), format="%Y-%m-%d %H:%M:%S", errors="coerce")
451
+
452
+ # Keep only rows with valid number of values
453
+ df, n_values, invalid_timesteps = select_only_valid_rows(
454
+ df=df,
455
+ expected_n_values=[1024, 1054, 1086],
456
+ logger=logger,
457
+ filepath=filepath,
458
+ )
459
+
460
+ # Derive raw drop arrays
461
+ if n_values == 1024:
462
+ df["raw_drop_number"] = df["TO_PARSE"]
463
+ elif n_values == 1054:
464
+ # VALESCURE (2014 03-09)
465
+ df_split = df["TO_PARSE"].str.split(",", expand=True)
466
+ raw_drop_average_velocity = "0,0," + df_split.iloc[:, :30].agg(",".join, axis=1)
467
+ raw_drop_number = df_split.iloc[:, 30:].agg(",".join, axis=1)
468
+ df["raw_drop_average_velocity"] = raw_drop_average_velocity
469
+ df["raw_drop_number"] = raw_drop_number
470
+ df["raw_drop_concentration"] = "NaN"
471
+ elif n_values == 1086:
472
+ df_split = df["TO_PARSE"].str.split(",", expand=True)
473
+ raw_drop_concentration = df_split.iloc[:, :32].agg(",".join, axis=1).str.replace("-10", "0")
474
+ raw_drop_average_velocity = "0,0," + df_split.iloc[:, 32:62].agg(",".join, axis=1)
475
+ raw_drop_number = df_split.iloc[:, 62:].agg(",".join, axis=1)
476
+ df["raw_drop_concentration"] = raw_drop_concentration
477
+ df["raw_drop_average_velocity"] = raw_drop_average_velocity
478
+ df["raw_drop_number"] = raw_drop_number
479
+ else:
480
+ raise ValueError(f"{filepath} has {n_values} spectrum values. Undefined reader.")
481
+
482
+ # Drop columns not agreeing with DISDRODB L0 standards
483
+ df = df.drop(columns=["TO_PARSE", "RECORD"])
484
+
485
+ # Infill with NaN at invalid timesteps
486
+ add_nan_at_invalid_timesteps(df, invalid_timesteps)
487
+ return df
488
+
489
+
490
+ @is_documented_by(reader_generic_docstring)
491
+ def reader(
492
+ filepath,
493
+ logger=None,
494
+ ):
495
+ """Reader."""
496
+ # Retrieve spectrum filepath
497
+ spectrum_filepath = filepath.replace("parsivel", "spectre")
498
+
499
+ # Read integral variables
500
+ df = reader_parsivel(filepath, logger=logger)
501
+
502
+ # Drop duplicates timesteps
503
+ df = df.drop_duplicates(subset="time", keep="first")
504
+
505
+ # Initialize empty arrays
506
+ # --> 0 values array produced in L0B
507
+ arrays_columns = ["raw_drop_concentration", "raw_drop_average_velocity", "raw_drop_number"]
508
+ for c in arrays_columns:
509
+ if c not in df:
510
+ df[c] = ""
511
+
512
+ # Add raw spectrum if available
513
+ if os.path.exists(spectrum_filepath):
514
+ # Read raw spectrum for corresponding timesteps
515
+ df_raw_spectrum = reader_spectrum(spectrum_filepath, logger=logger)
516
+ df_raw_spectrum = df_raw_spectrum.drop_duplicates(subset="time", keep="first")
517
+ # Add raw array to df
518
+ df = df.set_index("time")
519
+ df_raw_spectrum = df_raw_spectrum.set_index("time")
520
+ df.update(df_raw_spectrum)
521
+ # Set back time as column
522
+ df = df.reset_index()
523
+
524
+ # Return the dataframe adhering to DISDRODB L0 standards
525
+ return df
@@ -107,7 +107,7 @@ def reader(
107
107
  # - The '<SPECTRUM>ZERO</SPECTRUM>' indicates no drops detected
108
108
  # --> "" generates an array of zeros in L0B processing
109
109
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
110
- # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
110
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
111
111
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
112
112
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
113
113
 
@@ -15,6 +15,7 @@
15
15
  # You should have received a copy of the GNU General Public License
16
16
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
17
  # -----------------------------------------------------------------------------.
18
+ """This reader allows to read raw data from NASA GCPEX, OLYMPEX and IPHEX campaigns."""
18
19
  import pandas as pd
19
20
 
20
21
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -34,12 +35,13 @@ def reader(
34
35
  ##------------------------------------------------------------------------.
35
36
  #### Define reader options
36
37
  reader_kwargs = {}
37
- # Skip first row as columns names
38
- reader_kwargs["header"] = None
39
- # Skip file with encoding errors
40
- reader_kwargs["encoding_errors"] = "ignore"
41
38
  # - Define delimiter
42
39
  reader_kwargs["delimiter"] = ";"
40
+ # - Skip first row as columns names
41
+ reader_kwargs["header"] = None
42
+ reader_kwargs["skiprows"] = 0
43
+ # - Skip file with encoding errors
44
+ reader_kwargs["encoding_errors"] = "ignore"
43
45
  # - Avoid first column to become df index !!!
44
46
  reader_kwargs["index_col"] = False
45
47
  # - Define behaviour when encountering bad lines
@@ -68,14 +70,14 @@ def reader(
68
70
 
69
71
  ##------------------------------------------------------------------------.
70
72
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
71
- # Define 'time' datetime
73
+ # Convert time column to datetime
72
74
  df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
73
75
 
74
76
  # Split the 'TO_BE_SPLITTED' column
75
77
  df = df["TO_BE_SPLITTED"].str.split(",", n=9, expand=True)
76
78
 
77
79
  # Assign column names
78
- column_names = [
80
+ names = [
79
81
  "station_name",
80
82
  "sensor_status",
81
83
  "sensor_temperature",
@@ -87,7 +89,7 @@ def reader(
87
89
  "weather_code_synop_4677",
88
90
  "raw_drop_number",
89
91
  ]
90
- df.columns = column_names
92
+ df.columns = names
91
93
 
92
94
  # Add the time column
93
95
  df["time"] = df_time
@@ -115,7 +115,7 @@ def reader(
115
115
  # --> "" generates an array of zeros in L0B processing
116
116
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
117
117
 
118
- # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
118
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
119
119
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
120
120
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
121
121