disdrodb 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +1 -1
- disdrodb/_version.py +2 -2
- disdrodb/api/io.py +12 -2
- disdrodb/data_transfer/download_data.py +145 -14
- disdrodb/l0/check_standards.py +15 -10
- disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
- disdrodb/l0/configs/LPM/l0a_encodings.yml +4 -4
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +22 -6
- disdrodb/l0/configs/LPM/l0b_encodings.yml +41 -0
- disdrodb/l0/configs/LPM/raw_data_format.yml +40 -0
- disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
- disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
- disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +41 -0
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +50 -10
- disdrodb/l0/configs/PWS100/bins_diameter.yml +173 -0
- disdrodb/l0/configs/PWS100/bins_velocity.yml +173 -0
- disdrodb/l0/configs/PWS100/l0a_encodings.yml +19 -0
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +76 -0
- disdrodb/l0/configs/PWS100/l0b_encodings.yml +176 -0
- disdrodb/l0/configs/PWS100/raw_data_format.yml +182 -0
- disdrodb/l0/configs/RD80/raw_data_format.yml +2 -6
- disdrodb/l0/l0b_nc_processing.py +1 -1
- disdrodb/l0/l0b_processing.py +12 -10
- disdrodb/l0/manuals/SWS250.pdf +0 -0
- disdrodb/l0/manuals/VPF730.pdf +0 -0
- disdrodb/l0/manuals/VPF750.pdf +0 -0
- disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +23 -13
- disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +3 -3
- disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +5 -3
- disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +36 -20
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +210 -0
- disdrodb/l0/readers/LPM/KIT/CHWALA.py +225 -0
- disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +197 -0
- disdrodb/l0/readers/LPM/SLOVENIA/CRNI_VRH.py +197 -0
- disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +107 -0
- disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +125 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
- disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL_FGG.py +121 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +189 -0
- disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +133 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +138 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +9 -0
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +67 -0
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +150 -0
- disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +291 -0
- disdrodb/l0/readers/template_reader_raw_netcdf_data.py +1 -1
- disdrodb/l0/standards.py +7 -4
- disdrodb/l0/template_tools.py +2 -2
- disdrodb/l1/encoding_attrs.py +30 -8
- disdrodb/l1/processing.py +6 -4
- disdrodb/l1/resampling.py +1 -1
- disdrodb/l1/routines.py +9 -7
- disdrodb/l2/empirical_dsd.py +100 -2
- disdrodb/l2/event.py +3 -3
- disdrodb/l2/processing.py +21 -12
- disdrodb/l2/processing_options.py +7 -7
- disdrodb/l2/routines.py +3 -3
- disdrodb/metadata/checks.py +15 -6
- disdrodb/metadata/manipulation.py +2 -2
- disdrodb/metadata/standards.py +83 -79
- disdrodb/metadata/writer.py +2 -2
- disdrodb/routines.py +246 -10
- disdrodb/scattering/routines.py +1 -1
- disdrodb/utils/dataframe.py +342 -0
- disdrodb/utils/directories.py +14 -2
- disdrodb/utils/xarray.py +83 -0
- {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/METADATA +34 -61
- {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/RECORD +77 -54
- {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/WHEEL +1 -1
- {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/entry_points.txt +3 -3
- {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
mor_visibility:
|
|
2
|
+
n_digits: 5
|
|
3
|
+
n_characters: 6
|
|
4
|
+
n_decimals: 1
|
|
5
|
+
n_naturals: 4
|
|
6
|
+
data_range:
|
|
7
|
+
- 0
|
|
8
|
+
- 9999.9
|
|
9
|
+
nan_flags: null
|
|
10
|
+
field_number: "20"
|
|
11
|
+
weather_code_synop_4680:
|
|
12
|
+
n_digits: 2
|
|
13
|
+
n_characters: 2
|
|
14
|
+
n_decimals: 0
|
|
15
|
+
n_naturals: 2
|
|
16
|
+
data_range:
|
|
17
|
+
- 0
|
|
18
|
+
- 89
|
|
19
|
+
nan_flags: null
|
|
20
|
+
field_number: "21"
|
|
21
|
+
weather_code_metar_4678:
|
|
22
|
+
n_digits: null
|
|
23
|
+
n_characters: null
|
|
24
|
+
n_decimals: null
|
|
25
|
+
n_naturals: null
|
|
26
|
+
data_range: null
|
|
27
|
+
nan_flags: null
|
|
28
|
+
field_number: "22"
|
|
29
|
+
weather_code_nws:
|
|
30
|
+
n_digits: null
|
|
31
|
+
n_characters: null
|
|
32
|
+
n_decimals: null
|
|
33
|
+
n_naturals: null
|
|
34
|
+
data_range: null
|
|
35
|
+
nan_flags: null
|
|
36
|
+
field_number: "23"
|
|
37
|
+
alarms:
|
|
38
|
+
n_digits: 0
|
|
39
|
+
n_characters: 31
|
|
40
|
+
n_decimals: 0
|
|
41
|
+
n_naturals: 0
|
|
42
|
+
data_range: null
|
|
43
|
+
nan_flags: null
|
|
44
|
+
field_number: "24"
|
|
45
|
+
sensor_status:
|
|
46
|
+
n_digits: 1
|
|
47
|
+
n_characters: 1
|
|
48
|
+
n_decimals: 0
|
|
49
|
+
n_naturals: 1
|
|
50
|
+
data_range:
|
|
51
|
+
- 0
|
|
52
|
+
- 4
|
|
53
|
+
nan_flags: null
|
|
54
|
+
valid_values:
|
|
55
|
+
- 0
|
|
56
|
+
- 1
|
|
57
|
+
- 2
|
|
58
|
+
- 3
|
|
59
|
+
- 4
|
|
60
|
+
field_number: "25"
|
|
61
|
+
air_temperature:
|
|
62
|
+
n_digits: 3
|
|
63
|
+
n_characters: 4
|
|
64
|
+
n_decimals: 2
|
|
65
|
+
n_naturals: 1
|
|
66
|
+
data_range:
|
|
67
|
+
- -99.9
|
|
68
|
+
- 99.9
|
|
69
|
+
nan_flags: null
|
|
70
|
+
field_number: "30"
|
|
71
|
+
relative_humidity:
|
|
72
|
+
n_digits: 3
|
|
73
|
+
n_characters: 4
|
|
74
|
+
n_decimals: 2
|
|
75
|
+
n_naturals: 2
|
|
76
|
+
data_range:
|
|
77
|
+
- 0
|
|
78
|
+
- 100
|
|
79
|
+
nan_flags: null
|
|
80
|
+
field_number: "30"
|
|
81
|
+
wetbulb_temperature:
|
|
82
|
+
n_digits: 3
|
|
83
|
+
n_characters: 4
|
|
84
|
+
n_decimals: 2
|
|
85
|
+
n_naturals: 1
|
|
86
|
+
data_range:
|
|
87
|
+
- -99.9
|
|
88
|
+
- 99.9
|
|
89
|
+
nan_flags: null
|
|
90
|
+
field_number: "30"
|
|
91
|
+
air_temperature_max:
|
|
92
|
+
n_digits: 3
|
|
93
|
+
n_characters: 4
|
|
94
|
+
n_decimals: 2
|
|
95
|
+
n_naturals: 1
|
|
96
|
+
data_range:
|
|
97
|
+
- -99.9
|
|
98
|
+
- 99.9
|
|
99
|
+
nan_flags: null
|
|
100
|
+
field_number: "31"
|
|
101
|
+
air_temperature_min:
|
|
102
|
+
n_digits: 3
|
|
103
|
+
n_characters: 4
|
|
104
|
+
n_decimals: 2
|
|
105
|
+
n_naturals: 1
|
|
106
|
+
data_range:
|
|
107
|
+
- -99.9
|
|
108
|
+
- 99.9
|
|
109
|
+
nan_flags: null
|
|
110
|
+
field_number: "31"
|
|
111
|
+
rainfall_rate:
|
|
112
|
+
n_digits: 5
|
|
113
|
+
n_characters: 6
|
|
114
|
+
n_decimals: 4
|
|
115
|
+
n_naturals: 1
|
|
116
|
+
data_range:
|
|
117
|
+
- 0
|
|
118
|
+
- 99999
|
|
119
|
+
nan_flags: null
|
|
120
|
+
field_number: "40"
|
|
121
|
+
rainfall_accumulated:
|
|
122
|
+
n_digits: 5
|
|
123
|
+
n_characters: 6
|
|
124
|
+
n_decimals: 4
|
|
125
|
+
n_naturals: 1
|
|
126
|
+
data_range:
|
|
127
|
+
- 0
|
|
128
|
+
- 99999
|
|
129
|
+
nan_flags: null
|
|
130
|
+
field_number: "41"
|
|
131
|
+
drop_size_distribution:
|
|
132
|
+
n_digits: 0
|
|
133
|
+
n_characters: 300
|
|
134
|
+
n_decimals: 0
|
|
135
|
+
n_naturals: 0
|
|
136
|
+
data_range: null
|
|
137
|
+
nan_flags: null
|
|
138
|
+
field_number: "42"
|
|
139
|
+
average_drop_velocity:
|
|
140
|
+
n_digits: 4
|
|
141
|
+
n_characters: 5
|
|
142
|
+
n_decimals: 3
|
|
143
|
+
n_naturals: 1
|
|
144
|
+
data_range: null
|
|
145
|
+
nan_flags: null
|
|
146
|
+
field_number: "43"
|
|
147
|
+
average_drop_size:
|
|
148
|
+
n_digits: 4
|
|
149
|
+
n_characters: 5
|
|
150
|
+
n_decimals: 3
|
|
151
|
+
n_naturals: 1
|
|
152
|
+
data_range: null
|
|
153
|
+
nan_flags: null
|
|
154
|
+
field_number: "43"
|
|
155
|
+
type_distribution:
|
|
156
|
+
n_digits: 0
|
|
157
|
+
n_characters: 11
|
|
158
|
+
n_decimals: 0
|
|
159
|
+
n_naturals: 0
|
|
160
|
+
data_range: null
|
|
161
|
+
nan_flags: null
|
|
162
|
+
field_number: "44"
|
|
163
|
+
raw_drop_number:
|
|
164
|
+
n_digits: 0
|
|
165
|
+
n_characters: 4624
|
|
166
|
+
n_decimals: 0
|
|
167
|
+
n_naturals: 0
|
|
168
|
+
data_range: null
|
|
169
|
+
nan_flags: null
|
|
170
|
+
dimension_order:
|
|
171
|
+
- diameter_bin_center
|
|
172
|
+
- velocity_bin_center
|
|
173
|
+
n_values: 1156
|
|
174
|
+
field_number: "47"
|
|
175
|
+
peak_to_pedestal_hist:
|
|
176
|
+
n_digits: 0
|
|
177
|
+
n_characters: 50
|
|
178
|
+
n_decimals: 0
|
|
179
|
+
n_naturals: 0
|
|
180
|
+
data_range: null
|
|
181
|
+
nan_flags: null
|
|
182
|
+
field_number: "48"
|
|
@@ -130,9 +130,7 @@ N0:
|
|
|
130
130
|
n_characters: 9
|
|
131
131
|
n_decimals: 4
|
|
132
132
|
n_naturals: 4
|
|
133
|
-
data_range:
|
|
134
|
-
- 0
|
|
135
|
-
- 9999.9999
|
|
133
|
+
data_range: null
|
|
136
134
|
nan_flags: null
|
|
137
135
|
field_number: "34"
|
|
138
136
|
slope:
|
|
@@ -140,9 +138,7 @@ slope:
|
|
|
140
138
|
n_characters: 9
|
|
141
139
|
n_decimals: 4
|
|
142
140
|
n_naturals: 1
|
|
143
|
-
data_range:
|
|
144
|
-
- 0
|
|
145
|
-
- 9.9999
|
|
141
|
+
data_range: null
|
|
146
142
|
nan_flags: null
|
|
147
143
|
field_number: "35"
|
|
148
144
|
Dmax:
|
disdrodb/l0/l0b_nc_processing.py
CHANGED
|
@@ -480,7 +480,7 @@ def sanitize_ds(
|
|
|
480
480
|
ds = set_nan_invalid_values(ds, sensor_name=sensor_name, logger=logger, verbose=verbose)
|
|
481
481
|
|
|
482
482
|
# Finalize dataset
|
|
483
|
-
ds = finalize_dataset(ds, sensor_name=sensor_name,
|
|
483
|
+
ds = finalize_dataset(ds, sensor_name=sensor_name, metadata=metadata)
|
|
484
484
|
|
|
485
485
|
# Return dataset
|
|
486
486
|
return ds
|
disdrodb/l0/l0b_processing.py
CHANGED
|
@@ -170,6 +170,8 @@ def _reshape_raw_spectrum(
|
|
|
170
170
|
{"diameter_bin_center": 32, "velocity_bin_center": 32}
|
|
171
171
|
For LPM
|
|
172
172
|
{"diameter_bin_center": 22, "velocity_bin_center": 20}
|
|
173
|
+
For PWS100
|
|
174
|
+
{"diameter_bin_center": 34, "velocity_bin_center": 34}
|
|
173
175
|
n_timesteps : int
|
|
174
176
|
Number of timesteps.
|
|
175
177
|
|
|
@@ -256,7 +258,7 @@ def retrieve_l0b_arrays(
|
|
|
256
258
|
|
|
257
259
|
# For key='raw_drop_number', if 2D spectrum, reshape to 2D matrix
|
|
258
260
|
# Example:
|
|
259
|
-
# - This applies i.e for PARSIVEL
|
|
261
|
+
# - This applies i.e for PARSIVEL*, LPM, PWS100
|
|
260
262
|
# - This does not apply to RD80
|
|
261
263
|
if key == "raw_drop_number" and len(dims_order) == 2:
|
|
262
264
|
arr, dims = _reshape_raw_spectrum(
|
|
@@ -416,15 +418,15 @@ def create_l0b_from_l0a(
|
|
|
416
418
|
Error if the DISDRODB L0B xarray dataset can not be created.
|
|
417
419
|
"""
|
|
418
420
|
# Retrieve sensor name
|
|
419
|
-
|
|
420
|
-
sensor_name =
|
|
421
|
+
metadata = metadata.copy()
|
|
422
|
+
sensor_name = metadata["sensor_name"]
|
|
421
423
|
|
|
422
424
|
# Define Dataset variables and coordinates
|
|
423
425
|
data_vars = _define_dataset_variables(df, sensor_name=sensor_name, logger=logger, verbose=verbose)
|
|
424
426
|
|
|
425
427
|
# Create xarray Dataset
|
|
426
428
|
ds = xr.Dataset(data_vars=data_vars)
|
|
427
|
-
ds = finalize_dataset(ds, sensor_name=sensor_name,
|
|
429
|
+
ds = finalize_dataset(ds, sensor_name=sensor_name, metadata=metadata)
|
|
428
430
|
return ds
|
|
429
431
|
|
|
430
432
|
|
|
@@ -432,7 +434,7 @@ def create_l0b_from_l0a(
|
|
|
432
434
|
#### L0B netCDF4 Writer
|
|
433
435
|
|
|
434
436
|
|
|
435
|
-
def set_geolocation_coordinates(ds,
|
|
437
|
+
def set_geolocation_coordinates(ds, metadata):
|
|
436
438
|
"""Add geolocation coordinates to dataset."""
|
|
437
439
|
# Assumption
|
|
438
440
|
# - If coordinate is present in L0A, overrides the one specified in the attributes
|
|
@@ -443,22 +445,22 @@ def set_geolocation_coordinates(ds, attrs):
|
|
|
443
445
|
for coord in coords:
|
|
444
446
|
# If coordinate not present, add it from dictionary
|
|
445
447
|
if coord not in ds:
|
|
446
|
-
ds = ds.assign_coords({coord:
|
|
448
|
+
ds = ds.assign_coords({coord: metadata.pop(coord, np.nan)})
|
|
447
449
|
# Else if set coordinates the variable in the dataset (present in the raw data)
|
|
448
450
|
else:
|
|
449
451
|
ds = ds.set_coords(coord)
|
|
450
|
-
_ =
|
|
452
|
+
_ = metadata.pop(coord, None)
|
|
451
453
|
|
|
452
454
|
# Set -9999 flag value to np.nan
|
|
453
455
|
for coord in coords:
|
|
454
456
|
ds[coord] = xr.where(ds[coord] == -9999, np.nan, ds[coord])
|
|
455
457
|
|
|
456
458
|
# Set attributes without geolocation coordinates
|
|
457
|
-
ds.attrs =
|
|
459
|
+
ds.attrs = metadata
|
|
458
460
|
return ds
|
|
459
461
|
|
|
460
462
|
|
|
461
|
-
def finalize_dataset(ds, sensor_name,
|
|
463
|
+
def finalize_dataset(ds, sensor_name, metadata):
|
|
462
464
|
"""Finalize DISDRODB L0B Dataset."""
|
|
463
465
|
# Ensure sorted by time
|
|
464
466
|
ds = ensure_sorted_by_time(ds)
|
|
@@ -467,7 +469,7 @@ def finalize_dataset(ds, sensor_name, attrs):
|
|
|
467
469
|
ds = ds.assign_coords(get_bin_coords_dict(sensor_name=sensor_name))
|
|
468
470
|
|
|
469
471
|
# Set geolocation coordinates and attributes
|
|
470
|
-
ds = set_geolocation_coordinates(ds,
|
|
472
|
+
ds = set_geolocation_coordinates(ds, metadata=metadata)
|
|
471
473
|
|
|
472
474
|
# Add dataset CRS coordinate
|
|
473
475
|
ds = add_dataset_crs_coords(ds)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -89,9 +89,11 @@ def reader(
|
|
|
89
89
|
if len(df) == 0 or len(df) == 1:
|
|
90
90
|
raise ValueError("No data to process.")
|
|
91
91
|
|
|
92
|
-
# Retrieve time
|
|
92
|
+
# Retrieve time column and format in datetime64
|
|
93
93
|
df_time = df[::2]
|
|
94
94
|
df_time = df_time.reset_index(drop=True)
|
|
95
|
+
df_time = df_time["TO_BE_PARSED"].str.replace("-", "", n=1)
|
|
96
|
+
df_time = pd.to_datetime(df_time, format="%Y-%m-%d %H:%M:%S", errors="coerce")
|
|
95
97
|
|
|
96
98
|
# Retrieve data
|
|
97
99
|
df_data = df[1::2]
|
|
@@ -100,12 +102,6 @@ def reader(
|
|
|
100
102
|
if len(df_time) != len(df_data):
|
|
101
103
|
raise ValueError("Likely corrupted data. Not same number of timesteps and data.")
|
|
102
104
|
|
|
103
|
-
# Remove starting - from timestep
|
|
104
|
-
df_time = df_time["TO_BE_PARSED"].str.replace("-", "", n=1)
|
|
105
|
-
|
|
106
|
-
# Format time in datetime64
|
|
107
|
-
df_time = pd.to_datetime(df_time, format="%Y-%m-%d %H:%M:%S", errors="coerce")
|
|
108
|
-
|
|
109
105
|
# Create dataframe
|
|
110
106
|
df_data["time"] = df_time.to_numpy()
|
|
111
107
|
|
|
@@ -199,19 +195,31 @@ def reader(
|
|
|
199
195
|
"number_particles_class_8_internal_data",
|
|
200
196
|
"number_particles_class_9",
|
|
201
197
|
"number_particles_class_9_internal_data",
|
|
202
|
-
"
|
|
198
|
+
"TO_BE_FURTHER_PROCESSED",
|
|
203
199
|
]
|
|
204
200
|
df.columns = column_names
|
|
205
201
|
|
|
202
|
+
# Extract the last variables remained in raw_drop_number
|
|
203
|
+
df_parsed = df["TO_BE_FURTHER_PROCESSED"].str.rsplit(";", n=6, expand=True)
|
|
204
|
+
df_parsed.columns = [
|
|
205
|
+
"raw_drop_number",
|
|
206
|
+
"air_temperature",
|
|
207
|
+
"relative_humidity",
|
|
208
|
+
"wind_speed",
|
|
209
|
+
"wind_direction",
|
|
210
|
+
"checksum",
|
|
211
|
+
"dummy",
|
|
212
|
+
]
|
|
213
|
+
|
|
214
|
+
# Assign columns to the original dataframe
|
|
215
|
+
df[df_parsed.columns] = df_parsed
|
|
216
|
+
|
|
206
217
|
# Drop row if start_identifier different than 00
|
|
207
218
|
df["time"] = df_data["time"]
|
|
208
219
|
df = df[df["start_identifier"].astype(str) == "00"]
|
|
209
220
|
|
|
210
|
-
# Clean raw_drop_number (ignore last 5 column)
|
|
211
|
-
df["raw_drop_number"] = df["raw_drop_number"].str[:1760]
|
|
212
|
-
|
|
213
221
|
# Drop rows with invalid raw_drop_number
|
|
214
|
-
df = df[df["raw_drop_number"].astype(str).str.len() ==
|
|
222
|
+
df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
|
|
215
223
|
|
|
216
224
|
# Drop columns not agreeing with DISDRODB L0 standards
|
|
217
225
|
columns_to_drop = [
|
|
@@ -220,7 +228,9 @@ def reader(
|
|
|
220
228
|
"sensor_serial_number",
|
|
221
229
|
"sensor_date",
|
|
222
230
|
"sensor_time",
|
|
231
|
+
"TO_BE_FURTHER_PROCESSED",
|
|
232
|
+
"checksum",
|
|
233
|
+
"dummy",
|
|
223
234
|
]
|
|
224
235
|
df = df.drop(columns=columns_to_drop)
|
|
225
|
-
|
|
226
236
|
return df
|
|
@@ -167,11 +167,11 @@ def reader(
|
|
|
167
167
|
# Drop row if start_identifier different than 00
|
|
168
168
|
df = df[df["start_identifier"].astype(str) == "00"]
|
|
169
169
|
|
|
170
|
-
#
|
|
171
|
-
df["raw_drop_number"] = df["raw_drop_number"].str[
|
|
170
|
+
# Remove checksum from raw_drop_number
|
|
171
|
+
df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
|
|
172
172
|
|
|
173
173
|
# Drop rows with invalid raw_drop_number
|
|
174
|
-
df = df[df["raw_drop_number"].astype(str).str.len() ==
|
|
174
|
+
df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
|
|
175
175
|
|
|
176
176
|
# Drop columns not agreeing with DISDRODB L0 standards
|
|
177
177
|
columns_to_drop = [
|
|
@@ -162,14 +162,16 @@ def reader(
|
|
|
162
162
|
df["time"] = df["sensor_date"] + "-" + df["sensor_time"]
|
|
163
163
|
df["time"] = pd.to_datetime(df["time"], format="%d.%m.%y-%H:%M:%S", errors="coerce")
|
|
164
164
|
|
|
165
|
+
# TODO: correct time is unavailable yet !
|
|
166
|
+
|
|
165
167
|
# Drop row if start_identifier different than 00
|
|
166
168
|
df = df[df["start_identifier"].astype(str) == "00"]
|
|
167
169
|
|
|
168
|
-
#
|
|
169
|
-
df["raw_drop_number"] = df["raw_drop_number"].str[
|
|
170
|
+
# Remove checksum from raw_drop_number
|
|
171
|
+
df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
|
|
170
172
|
|
|
171
173
|
# Drop rows with invalid raw_drop_number
|
|
172
|
-
df = df[df["raw_drop_number"].astype(str).str.len() ==
|
|
174
|
+
df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
|
|
173
175
|
|
|
174
176
|
# Drop columns not agreeing with DISDRODB L0 standards
|
|
175
177
|
columns_to_drop = [
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
+
|
|
2
3
|
# -----------------------------------------------------------------------------.
|
|
3
4
|
# Copyright (c) 2021-2023 DISDRODB developers
|
|
4
5
|
#
|
|
@@ -15,7 +16,7 @@
|
|
|
15
16
|
# You should have received a copy of the GNU General Public License
|
|
16
17
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
17
18
|
# -----------------------------------------------------------------------------.
|
|
18
|
-
"""
|
|
19
|
+
"""DISDRODB reader for GID LPM sensors not measuring wind."""
|
|
19
20
|
import pandas as pd
|
|
20
21
|
|
|
21
22
|
from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
|
|
@@ -29,32 +30,43 @@ def reader(
|
|
|
29
30
|
):
|
|
30
31
|
"""Reader."""
|
|
31
32
|
##------------------------------------------------------------------------.
|
|
32
|
-
#### Define
|
|
33
|
+
#### - Define raw data headers
|
|
33
34
|
column_names = ["TO_BE_SPLITTED"]
|
|
34
35
|
|
|
35
36
|
##------------------------------------------------------------------------.
|
|
36
37
|
#### Define reader options
|
|
38
|
+
# - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
|
|
37
39
|
reader_kwargs = {}
|
|
40
|
+
|
|
38
41
|
# - Define delimiter
|
|
39
|
-
reader_kwargs["delimiter"] = "
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
# - Avoid first column to become df index
|
|
42
|
+
reader_kwargs["delimiter"] = "\\n"
|
|
43
|
+
|
|
44
|
+
# - Avoid first column to become df index !!!
|
|
43
45
|
reader_kwargs["index_col"] = False
|
|
46
|
+
|
|
47
|
+
# Since column names are expected to be passed explicitly, header is set to None
|
|
48
|
+
reader_kwargs["header"] = None
|
|
49
|
+
|
|
50
|
+
# - Number of rows to be skipped at the beginning of the file
|
|
51
|
+
reader_kwargs["skiprows"] = None
|
|
52
|
+
|
|
44
53
|
# - Define behaviour when encountering bad lines
|
|
45
54
|
reader_kwargs["on_bad_lines"] = "skip"
|
|
55
|
+
|
|
46
56
|
# - Define reader engine
|
|
47
57
|
# - C engine is faster
|
|
48
58
|
# - Python engine is more feature-complete
|
|
49
59
|
reader_kwargs["engine"] = "python"
|
|
60
|
+
|
|
50
61
|
# - Define on-the-fly decompression of on-disk data
|
|
51
62
|
# - Available: gzip, bz2, zip
|
|
52
63
|
reader_kwargs["compression"] = "infer"
|
|
64
|
+
|
|
53
65
|
# - Strings to recognize as NA/NaN and replace with standard NA flags
|
|
54
66
|
# - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
|
|
55
67
|
# '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
|
|
56
68
|
# 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
|
|
57
|
-
reader_kwargs["na_values"] = ["na", "", "error"
|
|
69
|
+
reader_kwargs["na_values"] = ["na", "", "error"]
|
|
58
70
|
|
|
59
71
|
##------------------------------------------------------------------------.
|
|
60
72
|
#### Read the data
|
|
@@ -67,14 +79,17 @@ def reader(
|
|
|
67
79
|
|
|
68
80
|
##------------------------------------------------------------------------.
|
|
69
81
|
#### Adapt the dataframe to adhere to DISDRODB L0 standards
|
|
70
|
-
#
|
|
71
|
-
df = df["TO_BE_SPLITTED"].str.
|
|
82
|
+
# Count number of delimiters to identify valid rows
|
|
83
|
+
df = df[df["TO_BE_SPLITTED"].str.count(";") == 519]
|
|
84
|
+
|
|
85
|
+
# Split by ; delimiter (before raw drop number)
|
|
86
|
+
df = df["TO_BE_SPLITTED"].str.split(";", expand=True, n=79)
|
|
72
87
|
|
|
73
88
|
# Assign column names
|
|
74
89
|
column_names = [
|
|
75
90
|
"start_identifier",
|
|
91
|
+
"device_address",
|
|
76
92
|
"sensor_serial_number",
|
|
77
|
-
"software_version",
|
|
78
93
|
"sensor_date",
|
|
79
94
|
"sensor_time",
|
|
80
95
|
"weather_code_synop_4677_5min",
|
|
@@ -155,25 +170,26 @@ def reader(
|
|
|
155
170
|
]
|
|
156
171
|
df.columns = column_names
|
|
157
172
|
|
|
158
|
-
# Remove checksum
|
|
159
|
-
df["raw_drop_number"] = df["raw_drop_number"].str.
|
|
173
|
+
# Remove checksum from raw_drop_number
|
|
174
|
+
df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=1, expand=True)[0]
|
|
175
|
+
|
|
176
|
+
# Define datetime "time" column
|
|
177
|
+
df["time"] = df["sensor_date"] + "-" + df["sensor_time"]
|
|
178
|
+
df["time"] = pd.to_datetime(df["time"], format="%d.%m.%y-%H:%M:%S", errors="coerce")
|
|
160
179
|
|
|
161
|
-
#
|
|
162
|
-
df
|
|
180
|
+
# Drop row if start_identifier different than 00
|
|
181
|
+
df = df[df["start_identifier"].astype(str) == "00"]
|
|
163
182
|
|
|
164
|
-
#
|
|
165
|
-
df
|
|
183
|
+
# Drop rows with invalid raw_drop_number
|
|
184
|
+
df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
|
|
166
185
|
|
|
167
186
|
# Drop columns not agreeing with DISDRODB L0 standards
|
|
168
187
|
columns_to_drop = [
|
|
169
188
|
"start_identifier",
|
|
170
|
-
"
|
|
189
|
+
"device_address",
|
|
171
190
|
"sensor_serial_number",
|
|
172
191
|
"sensor_date",
|
|
173
192
|
"sensor_time",
|
|
174
193
|
]
|
|
175
194
|
df = df.drop(columns=columns_to_drop)
|
|
176
|
-
df = df.drop(columns=["sensor_date", "sensor_time"])
|
|
177
|
-
|
|
178
|
-
# Return the dataframe adhering to DISDRODB L0 standards
|
|
179
195
|
return df
|