disdrodb 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. disdrodb/__init__.py +1 -1
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/io.py +12 -2
  4. disdrodb/data_transfer/download_data.py +145 -14
  5. disdrodb/l0/check_standards.py +15 -10
  6. disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
  7. disdrodb/l0/configs/LPM/l0a_encodings.yml +4 -4
  8. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +22 -6
  9. disdrodb/l0/configs/LPM/l0b_encodings.yml +41 -0
  10. disdrodb/l0/configs/LPM/raw_data_format.yml +40 -0
  11. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  12. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  13. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
  14. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
  15. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +41 -0
  16. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +50 -10
  17. disdrodb/l0/configs/PWS100/bins_diameter.yml +173 -0
  18. disdrodb/l0/configs/PWS100/bins_velocity.yml +173 -0
  19. disdrodb/l0/configs/PWS100/l0a_encodings.yml +19 -0
  20. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +76 -0
  21. disdrodb/l0/configs/PWS100/l0b_encodings.yml +176 -0
  22. disdrodb/l0/configs/PWS100/raw_data_format.yml +182 -0
  23. disdrodb/l0/configs/RD80/raw_data_format.yml +2 -6
  24. disdrodb/l0/l0b_nc_processing.py +1 -1
  25. disdrodb/l0/l0b_processing.py +12 -10
  26. disdrodb/l0/manuals/SWS250.pdf +0 -0
  27. disdrodb/l0/manuals/VPF730.pdf +0 -0
  28. disdrodb/l0/manuals/VPF750.pdf +0 -0
  29. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +23 -13
  30. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +3 -3
  31. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +5 -3
  32. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +36 -20
  33. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +210 -0
  34. disdrodb/l0/readers/LPM/KIT/CHWALA.py +225 -0
  35. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +197 -0
  36. disdrodb/l0/readers/LPM/SLOVENIA/CRNI_VRH.py +197 -0
  37. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +107 -0
  38. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +125 -0
  39. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  40. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  41. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL_FGG.py +121 -0
  42. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +189 -0
  43. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +133 -0
  44. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +138 -0
  45. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  46. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  47. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +9 -0
  48. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +67 -0
  49. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +150 -0
  50. disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +291 -0
  51. disdrodb/l0/readers/template_reader_raw_netcdf_data.py +1 -1
  52. disdrodb/l0/standards.py +7 -4
  53. disdrodb/l0/template_tools.py +2 -2
  54. disdrodb/l1/encoding_attrs.py +30 -8
  55. disdrodb/l1/processing.py +6 -4
  56. disdrodb/l1/resampling.py +1 -1
  57. disdrodb/l1/routines.py +9 -7
  58. disdrodb/l2/empirical_dsd.py +100 -2
  59. disdrodb/l2/event.py +3 -3
  60. disdrodb/l2/processing.py +21 -12
  61. disdrodb/l2/processing_options.py +7 -7
  62. disdrodb/l2/routines.py +3 -3
  63. disdrodb/metadata/checks.py +15 -6
  64. disdrodb/metadata/manipulation.py +2 -2
  65. disdrodb/metadata/standards.py +83 -79
  66. disdrodb/metadata/writer.py +2 -2
  67. disdrodb/routines.py +246 -10
  68. disdrodb/scattering/routines.py +1 -1
  69. disdrodb/utils/dataframe.py +342 -0
  70. disdrodb/utils/directories.py +14 -2
  71. disdrodb/utils/xarray.py +83 -0
  72. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/METADATA +34 -61
  73. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/RECORD +77 -54
  74. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/WHEEL +1 -1
  75. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/entry_points.txt +3 -3
  76. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/licenses/LICENSE +0 -0
  77. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,182 @@
1
+ mor_visibility:
2
+ n_digits: 5
3
+ n_characters: 6
4
+ n_decimals: 1
5
+ n_naturals: 4
6
+ data_range:
7
+ - 0
8
+ - 9999.9
9
+ nan_flags: null
10
+ field_number: "20"
11
+ weather_code_synop_4680:
12
+ n_digits: 2
13
+ n_characters: 2
14
+ n_decimals: 0
15
+ n_naturals: 2
16
+ data_range:
17
+ - 0
18
+ - 89
19
+ nan_flags: null
20
+ field_number: "21"
21
+ weather_code_metar_4678:
22
+ n_digits: null
23
+ n_characters: null
24
+ n_decimals: null
25
+ n_naturals: null
26
+ data_range: null
27
+ nan_flags: null
28
+ field_number: "22"
29
+ weather_code_nws:
30
+ n_digits: null
31
+ n_characters: null
32
+ n_decimals: null
33
+ n_naturals: null
34
+ data_range: null
35
+ nan_flags: null
36
+ field_number: "23"
37
+ alarms:
38
+ n_digits: 0
39
+ n_characters: 31
40
+ n_decimals: 0
41
+ n_naturals: 0
42
+ data_range: null
43
+ nan_flags: null
44
+ field_number: "24"
45
+ sensor_status:
46
+ n_digits: 1
47
+ n_characters: 1
48
+ n_decimals: 0
49
+ n_naturals: 1
50
+ data_range:
51
+ - 0
52
+ - 4
53
+ nan_flags: null
54
+ valid_values:
55
+ - 0
56
+ - 1
57
+ - 2
58
+ - 3
59
+ - 4
60
+ field_number: "25"
61
+ air_temperature:
62
+ n_digits: 3
63
+ n_characters: 4
64
+ n_decimals: 2
65
+ n_naturals: 1
66
+ data_range:
67
+ - -99.9
68
+ - 99.9
69
+ nan_flags: null
70
+ field_number: "30"
71
+ relative_humidity:
72
+ n_digits: 3
73
+ n_characters: 4
74
+ n_decimals: 2
75
+ n_naturals: 2
76
+ data_range:
77
+ - 0
78
+ - 100
79
+ nan_flags: null
80
+ field_number: "30"
81
+ wetbulb_temperature:
82
+ n_digits: 3
83
+ n_characters: 4
84
+ n_decimals: 2
85
+ n_naturals: 1
86
+ data_range:
87
+ - -99.9
88
+ - 99.9
89
+ nan_flags: null
90
+ field_number: "30"
91
+ air_temperature_max:
92
+ n_digits: 3
93
+ n_characters: 4
94
+ n_decimals: 2
95
+ n_naturals: 1
96
+ data_range:
97
+ - -99.9
98
+ - 99.9
99
+ nan_flags: null
100
+ field_number: "31"
101
+ air_temperature_min:
102
+ n_digits: 3
103
+ n_characters: 4
104
+ n_decimals: 2
105
+ n_naturals: 1
106
+ data_range:
107
+ - -99.9
108
+ - 99.9
109
+ nan_flags: null
110
+ field_number: "31"
111
+ rainfall_rate:
112
+ n_digits: 5
113
+ n_characters: 6
114
+ n_decimals: 4
115
+ n_naturals: 1
116
+ data_range:
117
+ - 0
118
+ - 99999
119
+ nan_flags: null
120
+ field_number: "40"
121
+ rainfall_accumulated:
122
+ n_digits: 5
123
+ n_characters: 6
124
+ n_decimals: 4
125
+ n_naturals: 1
126
+ data_range:
127
+ - 0
128
+ - 99999
129
+ nan_flags: null
130
+ field_number: "41"
131
+ drop_size_distribution:
132
+ n_digits: 0
133
+ n_characters: 300
134
+ n_decimals: 0
135
+ n_naturals: 0
136
+ data_range: null
137
+ nan_flags: null
138
+ field_number: "42"
139
+ average_drop_velocity:
140
+ n_digits: 4
141
+ n_characters: 5
142
+ n_decimals: 3
143
+ n_naturals: 1
144
+ data_range: null
145
+ nan_flags: null
146
+ field_number: "43"
147
+ average_drop_size:
148
+ n_digits: 4
149
+ n_characters: 5
150
+ n_decimals: 3
151
+ n_naturals: 1
152
+ data_range: null
153
+ nan_flags: null
154
+ field_number: "43"
155
+ type_distribution:
156
+ n_digits: 0
157
+ n_characters: 11
158
+ n_decimals: 0
159
+ n_naturals: 0
160
+ data_range: null
161
+ nan_flags: null
162
+ field_number: "44"
163
+ raw_drop_number:
164
+ n_digits: 0
165
+ n_characters: 4624
166
+ n_decimals: 0
167
+ n_naturals: 0
168
+ data_range: null
169
+ nan_flags: null
170
+ dimension_order:
171
+ - diameter_bin_center
172
+ - velocity_bin_center
173
+ n_values: 1156
174
+ field_number: "47"
175
+ peak_to_pedestal_hist:
176
+ n_digits: 0
177
+ n_characters: 50
178
+ n_decimals: 0
179
+ n_naturals: 0
180
+ data_range: null
181
+ nan_flags: null
182
+ field_number: "48"
@@ -130,9 +130,7 @@ N0:
130
130
  n_characters: 9
131
131
  n_decimals: 4
132
132
  n_naturals: 4
133
- data_range:
134
- - 0
135
- - 9999.9999
133
+ data_range: null
136
134
  nan_flags: null
137
135
  field_number: "34"
138
136
  slope:
@@ -140,9 +138,7 @@ slope:
140
138
  n_characters: 9
141
139
  n_decimals: 4
142
140
  n_naturals: 1
143
- data_range:
144
- - 0
145
- - 9.9999
141
+ data_range: null
146
142
  nan_flags: null
147
143
  field_number: "35"
148
144
  Dmax:
@@ -480,7 +480,7 @@ def sanitize_ds(
480
480
  ds = set_nan_invalid_values(ds, sensor_name=sensor_name, logger=logger, verbose=verbose)
481
481
 
482
482
  # Finalize dataset
483
- ds = finalize_dataset(ds, sensor_name=sensor_name, attrs=metadata)
483
+ ds = finalize_dataset(ds, sensor_name=sensor_name, metadata=metadata)
484
484
 
485
485
  # Return dataset
486
486
  return ds
@@ -170,6 +170,8 @@ def _reshape_raw_spectrum(
170
170
  {"diameter_bin_center": 32, "velocity_bin_center": 32}
171
171
  For LPM
172
172
  {"diameter_bin_center": 22, "velocity_bin_center": 20}
173
+ For PWS100
174
+ {"diameter_bin_center": 34, "velocity_bin_center": 34}
173
175
  n_timesteps : int
174
176
  Number of timesteps.
175
177
 
@@ -256,7 +258,7 @@ def retrieve_l0b_arrays(
256
258
 
257
259
  # For key='raw_drop_number', if 2D spectrum, reshape to 2D matrix
258
260
  # Example:
259
- # - This applies i.e for PARSIVEL* and LPM
261
+ # - This applies i.e for PARSIVEL*, LPM, PWS100
260
262
  # - This does not apply to RD80
261
263
  if key == "raw_drop_number" and len(dims_order) == 2:
262
264
  arr, dims = _reshape_raw_spectrum(
@@ -416,15 +418,15 @@ def create_l0b_from_l0a(
416
418
  Error if the DISDRODB L0B xarray dataset can not be created.
417
419
  """
418
420
  # Retrieve sensor name
419
- attrs = metadata.copy()
420
- sensor_name = attrs["sensor_name"]
421
+ metadata = metadata.copy()
422
+ sensor_name = metadata["sensor_name"]
421
423
 
422
424
  # Define Dataset variables and coordinates
423
425
  data_vars = _define_dataset_variables(df, sensor_name=sensor_name, logger=logger, verbose=verbose)
424
426
 
425
427
  # Create xarray Dataset
426
428
  ds = xr.Dataset(data_vars=data_vars)
427
- ds = finalize_dataset(ds, sensor_name=sensor_name, attrs=attrs)
429
+ ds = finalize_dataset(ds, sensor_name=sensor_name, metadata=metadata)
428
430
  return ds
429
431
 
430
432
 
@@ -432,7 +434,7 @@ def create_l0b_from_l0a(
432
434
  #### L0B netCDF4 Writer
433
435
 
434
436
 
435
- def set_geolocation_coordinates(ds, attrs):
437
+ def set_geolocation_coordinates(ds, metadata):
436
438
  """Add geolocation coordinates to dataset."""
437
439
  # Assumption
438
440
  # - If coordinate is present in L0A, overrides the one specified in the attributes
@@ -443,22 +445,22 @@ def set_geolocation_coordinates(ds, attrs):
443
445
  for coord in coords:
444
446
  # If coordinate not present, add it from dictionary
445
447
  if coord not in ds:
446
- ds = ds.assign_coords({coord: attrs.pop(coord, np.nan)})
448
+ ds = ds.assign_coords({coord: metadata.pop(coord, np.nan)})
447
449
  # Else if set coordinates the variable in the dataset (present in the raw data)
448
450
  else:
449
451
  ds = ds.set_coords(coord)
450
- _ = attrs.pop(coord, None)
452
+ _ = metadata.pop(coord, None)
451
453
 
452
454
  # Set -9999 flag value to np.nan
453
455
  for coord in coords:
454
456
  ds[coord] = xr.where(ds[coord] == -9999, np.nan, ds[coord])
455
457
 
456
458
  # Set attributes without geolocation coordinates
457
- ds.attrs = attrs
459
+ ds.attrs = metadata
458
460
  return ds
459
461
 
460
462
 
461
- def finalize_dataset(ds, sensor_name, attrs):
463
+ def finalize_dataset(ds, sensor_name, metadata):
462
464
  """Finalize DISDRODB L0B Dataset."""
463
465
  # Ensure sorted by time
464
466
  ds = ensure_sorted_by_time(ds)
@@ -467,7 +469,7 @@ def finalize_dataset(ds, sensor_name, attrs):
467
469
  ds = ds.assign_coords(get_bin_coords_dict(sensor_name=sensor_name))
468
470
 
469
471
  # Set geolocation coordinates and attributes
470
- ds = set_geolocation_coordinates(ds, attrs=attrs)
472
+ ds = set_geolocation_coordinates(ds, metadata=metadata)
471
473
 
472
474
  # Add dataset CRS coordinate
473
475
  ds = add_dataset_crs_coords(ds)
Binary file
Binary file
Binary file
@@ -89,9 +89,11 @@ def reader(
89
89
  if len(df) == 0 or len(df) == 1:
90
90
  raise ValueError("No data to process.")
91
91
 
92
- # Retrieve time
92
+ # Retrieve time column and format in datetime64
93
93
  df_time = df[::2]
94
94
  df_time = df_time.reset_index(drop=True)
95
+ df_time = df_time["TO_BE_PARSED"].str.replace("-", "", n=1)
96
+ df_time = pd.to_datetime(df_time, format="%Y-%m-%d %H:%M:%S", errors="coerce")
95
97
 
96
98
  # Retrieve data
97
99
  df_data = df[1::2]
@@ -100,12 +102,6 @@ def reader(
100
102
  if len(df_time) != len(df_data):
101
103
  raise ValueError("Likely corrupted data. Not same number of timesteps and data.")
102
104
 
103
- # Remove starting - from timestep
104
- df_time = df_time["TO_BE_PARSED"].str.replace("-", "", n=1)
105
-
106
- # Format time in datetime64
107
- df_time = pd.to_datetime(df_time, format="%Y-%m-%d %H:%M:%S", errors="coerce")
108
-
109
105
  # Create dataframe
110
106
  df_data["time"] = df_time.to_numpy()
111
107
 
@@ -199,19 +195,31 @@ def reader(
199
195
  "number_particles_class_8_internal_data",
200
196
  "number_particles_class_9",
201
197
  "number_particles_class_9_internal_data",
202
- "raw_drop_number",
198
+ "TO_BE_FURTHER_PROCESSED",
203
199
  ]
204
200
  df.columns = column_names
205
201
 
202
+ # Extract the last variables remained in raw_drop_number
203
+ df_parsed = df["TO_BE_FURTHER_PROCESSED"].str.rsplit(";", n=6, expand=True)
204
+ df_parsed.columns = [
205
+ "raw_drop_number",
206
+ "air_temperature",
207
+ "relative_humidity",
208
+ "wind_speed",
209
+ "wind_direction",
210
+ "checksum",
211
+ "dummy",
212
+ ]
213
+
214
+ # Assign columns to the original dataframe
215
+ df[df_parsed.columns] = df_parsed
216
+
206
217
  # Drop row if start_identifier different than 00
207
218
  df["time"] = df_data["time"]
208
219
  df = df[df["start_identifier"].astype(str) == "00"]
209
220
 
210
- # Clean raw_drop_number (ignore last 5 column)
211
- df["raw_drop_number"] = df["raw_drop_number"].str[:1760]
212
-
213
221
  # Drop rows with invalid raw_drop_number
214
- df = df[df["raw_drop_number"].astype(str).str.len() == 1760]
222
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
215
223
 
216
224
  # Drop columns not agreeing with DISDRODB L0 standards
217
225
  columns_to_drop = [
@@ -220,7 +228,9 @@ def reader(
220
228
  "sensor_serial_number",
221
229
  "sensor_date",
222
230
  "sensor_time",
231
+ "TO_BE_FURTHER_PROCESSED",
232
+ "checksum",
233
+ "dummy",
223
234
  ]
224
235
  df = df.drop(columns=columns_to_drop)
225
-
226
236
  return df
@@ -167,11 +167,11 @@ def reader(
167
167
  # Drop row if start_identifier different than 00
168
168
  df = df[df["start_identifier"].astype(str) == "00"]
169
169
 
170
- # Clean raw_drop_number (ignore last "AC" character)
171
- df["raw_drop_number"] = df["raw_drop_number"].str[:1760]
170
+ # Remove checksum from raw_drop_number
171
+ df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
172
172
 
173
173
  # Drop rows with invalid raw_drop_number
174
- df = df[df["raw_drop_number"].astype(str).str.len() == 1760]
174
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
175
175
 
176
176
  # Drop columns not agreeing with DISDRODB L0 standards
177
177
  columns_to_drop = [
@@ -162,14 +162,16 @@ def reader(
162
162
  df["time"] = df["sensor_date"] + "-" + df["sensor_time"]
163
163
  df["time"] = pd.to_datetime(df["time"], format="%d.%m.%y-%H:%M:%S", errors="coerce")
164
164
 
165
+ # TODO: correct time is unavailable yet !
166
+
165
167
  # Drop row if start_identifier different than 00
166
168
  df = df[df["start_identifier"].astype(str) == "00"]
167
169
 
168
- # Clean raw_drop_number (ignore last "AC" character)
169
- df["raw_drop_number"] = df["raw_drop_number"].str[:1760]
170
+ # Remove checksum from raw_drop_number
171
+ df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
170
172
 
171
173
  # Drop rows with invalid raw_drop_number
172
- df = df[df["raw_drop_number"].astype(str).str.len() == 1760]
174
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
173
175
 
174
176
  # Drop columns not agreeing with DISDRODB L0 standards
175
177
  columns_to_drop = [
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env python3
2
+
2
3
  # -----------------------------------------------------------------------------.
3
4
  # Copyright (c) 2021-2023 DISDRODB developers
4
5
  #
@@ -15,7 +16,7 @@
15
16
  # You should have received a copy of the GNU General Public License
16
17
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18
  # -----------------------------------------------------------------------------.
18
- """Reader for the GID LPM network."""
19
+ """DISDRODB reader for GID LPM sensors not measuring wind."""
19
20
  import pandas as pd
20
21
 
21
22
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -29,32 +30,43 @@ def reader(
29
30
  ):
30
31
  """Reader."""
31
32
  ##------------------------------------------------------------------------.
32
- #### Define column names
33
+ #### - Define raw data headers
33
34
  column_names = ["TO_BE_SPLITTED"]
34
35
 
35
36
  ##------------------------------------------------------------------------.
36
37
  #### Define reader options
38
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
37
39
  reader_kwargs = {}
40
+
38
41
  # - Define delimiter
39
- reader_kwargs["delimiter"] = "\n"
40
- # Skip first row as columns names
41
- reader_kwargs["header"] = None
42
- # - Avoid first column to become df index
42
+ reader_kwargs["delimiter"] = "\\n"
43
+
44
+ # - Avoid first column to become df index !!!
43
45
  reader_kwargs["index_col"] = False
46
+
47
+ # Since column names are expected to be passed explicitly, header is set to None
48
+ reader_kwargs["header"] = None
49
+
50
+ # - Number of rows to be skipped at the beginning of the file
51
+ reader_kwargs["skiprows"] = None
52
+
44
53
  # - Define behaviour when encountering bad lines
45
54
  reader_kwargs["on_bad_lines"] = "skip"
55
+
46
56
  # - Define reader engine
47
57
  # - C engine is faster
48
58
  # - Python engine is more feature-complete
49
59
  reader_kwargs["engine"] = "python"
60
+
50
61
  # - Define on-the-fly decompression of on-disk data
51
62
  # - Available: gzip, bz2, zip
52
63
  reader_kwargs["compression"] = "infer"
64
+
53
65
  # - Strings to recognize as NA/NaN and replace with standard NA flags
54
66
  # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
55
67
  # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
56
68
  # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
57
- reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
69
+ reader_kwargs["na_values"] = ["na", "", "error"]
58
70
 
59
71
  ##------------------------------------------------------------------------.
60
72
  #### Read the data
@@ -67,14 +79,17 @@ def reader(
67
79
 
68
80
  ##------------------------------------------------------------------------.
69
81
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
70
- # Split columns
71
- df = df["TO_BE_SPLITTED"].str.split(";", n=79, expand=True)
82
+ # Count number of delimiters to identify valid rows
83
+ df = df[df["TO_BE_SPLITTED"].str.count(";") == 519]
84
+
85
+ # Split by ; delimiter (before raw drop number)
86
+ df = df["TO_BE_SPLITTED"].str.split(";", expand=True, n=79)
72
87
 
73
88
  # Assign column names
74
89
  column_names = [
75
90
  "start_identifier",
91
+ "device_address",
76
92
  "sensor_serial_number",
77
- "software_version",
78
93
  "sensor_date",
79
94
  "sensor_time",
80
95
  "weather_code_synop_4677_5min",
@@ -155,25 +170,26 @@ def reader(
155
170
  ]
156
171
  df.columns = column_names
157
172
 
158
- # Remove checksum at end of raw_drop_number
159
- df["raw_drop_number"] = df["raw_drop_number"].str.slice(stop=1760)
173
+ # Remove checksum from raw_drop_number
174
+ df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=1, expand=True)[0]
175
+
176
+ # Define datetime "time" column
177
+ df["time"] = df["sensor_date"] + "-" + df["sensor_time"]
178
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%y-%H:%M:%S", errors="coerce")
160
179
 
161
- # Define 'time column
162
- df["time"] = df["sensor_date"].astype(str) + " " + df["sensor_time"].astype(str)
180
+ # Drop row if start_identifier different than 00
181
+ df = df[df["start_identifier"].astype(str) == "00"]
163
182
 
164
- # Convert time column to datetime
165
- df["time"] = pd.to_datetime(df["time"], format="%d.%m.%y %H:%M:%S", errors="coerce")
183
+ # Drop rows with invalid raw_drop_number
184
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
166
185
 
167
186
  # Drop columns not agreeing with DISDRODB L0 standards
168
187
  columns_to_drop = [
169
188
  "start_identifier",
170
- "software_version",
189
+ "device_address",
171
190
  "sensor_serial_number",
172
191
  "sensor_date",
173
192
  "sensor_time",
174
193
  ]
175
194
  df = df.drop(columns=columns_to_drop)
176
- df = df.drop(columns=["sensor_date", "sensor_time"])
177
-
178
- # Return the dataframe adhering to DISDRODB L0 standards
179
195
  return df