disdrodb 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. disdrodb/__init__.py +1 -5
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +22 -4
  4. disdrodb/api/checks.py +10 -0
  5. disdrodb/api/io.py +20 -18
  6. disdrodb/api/path.py +42 -77
  7. disdrodb/api/search.py +89 -23
  8. disdrodb/cli/disdrodb_create_summary.py +1 -1
  9. disdrodb/cli/disdrodb_run_l0.py +1 -1
  10. disdrodb/cli/disdrodb_run_l0a.py +1 -1
  11. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  13. disdrodb/cli/disdrodb_run_l1.py +1 -1
  14. disdrodb/cli/disdrodb_run_l2e.py +1 -1
  15. disdrodb/cli/disdrodb_run_l2m.py +1 -1
  16. disdrodb/configs.py +30 -83
  17. disdrodb/constants.py +4 -3
  18. disdrodb/data_transfer/download_data.py +4 -2
  19. disdrodb/docs.py +2 -2
  20. disdrodb/etc/products/L1/1MIN.yaml +13 -0
  21. disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
  22. disdrodb/etc/products/L1/LPM_V0/1MIN.yaml +13 -0
  23. disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
  24. disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
  25. disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
  26. disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
  27. disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
  28. disdrodb/etc/products/L1/global.yaml +6 -0
  29. disdrodb/etc/products/L2E/10MIN.yaml +1 -12
  30. disdrodb/etc/products/L2E/global.yaml +1 -1
  31. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/global.yaml +1 -1
  33. disdrodb/issue/checks.py +2 -2
  34. disdrodb/l0/check_configs.py +1 -1
  35. disdrodb/l0/configs/LPM/l0a_encodings.yml +0 -1
  36. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +0 -4
  37. disdrodb/l0/configs/LPM/l0b_encodings.yml +9 -9
  38. disdrodb/l0/configs/LPM/raw_data_format.yml +11 -11
  39. disdrodb/l0/configs/LPM_V0/bins_diameter.yml +103 -0
  40. disdrodb/l0/configs/LPM_V0/bins_velocity.yml +103 -0
  41. disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +45 -0
  42. disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +180 -0
  43. disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +410 -0
  44. disdrodb/l0/configs/LPM_V0/raw_data_format.yml +474 -0
  45. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
  46. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +8 -8
  47. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +9 -9
  48. disdrodb/l0/l0_reader.py +2 -2
  49. disdrodb/l0/l0a_processing.py +6 -2
  50. disdrodb/l0/l0b_processing.py +26 -19
  51. disdrodb/l0/l0c_processing.py +17 -3
  52. disdrodb/l0/manuals/LPM_V0.pdf +0 -0
  53. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +15 -7
  54. disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +279 -0
  55. disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +276 -0
  56. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  57. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +103 -0
  58. disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +216 -0
  59. disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +208 -0
  60. disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +219 -0
  61. disdrodb/l0/readers/LPM/USA/CHARLESTON.py +229 -0
  62. disdrodb/l0/readers/{LPM → LPM_V0}/BELGIUM/ULIEGE.py +33 -49
  63. disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +240 -0
  64. disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
  65. disdrodb/l0/readers/{PARSIVEL2 → PARSIVEL}/NASA/LPVEX.py +16 -28
  66. disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +1 -1
  67. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +3 -3
  68. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
  69. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL2/JAPAN/PRECIP.py +155 -0
  71. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +14 -7
  72. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +8 -3
  73. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +28 -5
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +1 -1
  75. disdrodb/l0/readers/PARSIVEL2/{GPM/GCPEX.py → NORWAY/UIB.py} +54 -29
  76. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/{PANGASA.py → PAGASA.py} +6 -3
  77. disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +1 -1
  78. disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
  79. disdrodb/l0/readers/{PARSIVEL/GPM/PIERS.py → PARSIVEL2/USA/CSU.py} +62 -29
  80. disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +51 -24
  81. disdrodb/l0/readers/{PARSIVEL/GPM/IFLOODS.py → RD80/BRAZIL/ATTO_RD80.py} +50 -34
  82. disdrodb/l0/readers/{SW250 → SWS250}/BELGIUM/KMI.py +1 -1
  83. disdrodb/l1/beard_model.py +45 -1
  84. disdrodb/l1/fall_velocity.py +1 -6
  85. disdrodb/l1/filters.py +2 -0
  86. disdrodb/l1/processing.py +6 -5
  87. disdrodb/l1/resampling.py +101 -38
  88. disdrodb/l2/empirical_dsd.py +12 -8
  89. disdrodb/l2/processing.py +4 -3
  90. disdrodb/metadata/search.py +3 -4
  91. disdrodb/routines/l0.py +4 -4
  92. disdrodb/routines/l1.py +173 -60
  93. disdrodb/routines/l2.py +121 -269
  94. disdrodb/routines/options.py +347 -0
  95. disdrodb/routines/wrappers.py +9 -1
  96. disdrodb/scattering/axis_ratio.py +3 -0
  97. disdrodb/scattering/routines.py +1 -1
  98. disdrodb/summary/routines.py +765 -724
  99. disdrodb/utils/archiving.py +51 -44
  100. disdrodb/utils/attrs.py +1 -1
  101. disdrodb/utils/compression.py +4 -2
  102. disdrodb/utils/dask.py +35 -15
  103. disdrodb/utils/dict.py +33 -0
  104. disdrodb/utils/encoding.py +1 -1
  105. disdrodb/utils/manipulations.py +7 -1
  106. disdrodb/utils/routines.py +9 -8
  107. disdrodb/utils/time.py +9 -1
  108. disdrodb/viz/__init__.py +0 -13
  109. disdrodb/viz/plots.py +209 -0
  110. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/METADATA +1 -1
  111. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/RECORD +124 -95
  112. disdrodb/l0/readers/PARSIVEL/GPM/LPVEX.py +0 -85
  113. /disdrodb/etc/products/L2M/{GAMMA_GS_ND_MAE.yaml → MODELS/GAMMA_GS_ND_MAE.yaml} +0 -0
  114. /disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +0 -0
  115. /disdrodb/etc/products/L2M/{LOGNORMAL_GS_LOG_ND_MAE.yaml → MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml} +0 -0
  116. /disdrodb/etc/products/L2M/{LOGNORMAL_GS_ND_MAE.yaml → MODELS/LOGNORMAL_GS_ND_MAE.yaml} +0 -0
  117. /disdrodb/etc/products/L2M/{LOGNORMAL_ML.yaml → MODELS/LOGNORMAL_ML.yaml} +0 -0
  118. /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
  119. /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
  120. /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
  121. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
  122. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/WHEEL +0 -0
  123. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/entry_points.txt +0 -0
  124. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/licenses/LICENSE +0 -0
  125. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,208 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for NMBU BIOKLIM LPM sensor."""
20
+ import os
21
+
22
+ import pandas as pd
23
+
24
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
25
+ from disdrodb.l0.l0a_processing import read_raw_text_file
26
+
27
+
28
+ @is_documented_by(reader_generic_docstring)
29
+ def reader(
30
+ filepath,
31
+ logger=None,
32
+ ):
33
+ """Reader."""
34
+ ##------------------------------------------------------------------------.
35
+ #### - Define raw data headers
36
+ column_names = ["TO_PARSE"]
37
+
38
+ ##------------------------------------------------------------------------.
39
+ #### Define reader options
40
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
41
+ reader_kwargs = {}
42
+
43
+ # - Define delimiter
44
+ reader_kwargs["delimiter"] = "\\n"
45
+
46
+ # - Avoid first column to become df index !!!
47
+ reader_kwargs["index_col"] = False
48
+
49
+ # Since column names are expected to be passed explicitly, header is set to None
50
+ reader_kwargs["header"] = None
51
+
52
+ # - Number of rows to be skipped at the beginning of the file
53
+ reader_kwargs["skiprows"] = None
54
+
55
+ # - Define behaviour when encountering bad lines
56
+ reader_kwargs["on_bad_lines"] = "skip"
57
+
58
+ # - Define reader engine
59
+ # - C engine is faster
60
+ # - Python engine is more feature-complete
61
+ reader_kwargs["engine"] = "python"
62
+
63
+ # - Define on-the-fly decompression of on-disk data
64
+ # - Available: gzip, bz2, zip
65
+ reader_kwargs["compression"] = "infer"
66
+
67
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
68
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
69
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
70
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
71
+ reader_kwargs["na_values"] = ["na", "", "error"]
72
+
73
+ ##------------------------------------------------------------------------.
74
+ #### Read the data
75
+ df = read_raw_text_file(
76
+ filepath=filepath,
77
+ column_names=column_names,
78
+ reader_kwargs=reader_kwargs,
79
+ logger=logger,
80
+ )
81
+
82
+ ##------------------------------------------------------------------------.
83
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
84
+ # Raise error if empty file
85
+ if len(df) == 0:
86
+ raise ValueError(f"{filepath} is empty.")
87
+
88
+ # Select only rows with expected number of delimiters
89
+ df = df[df["TO_PARSE"].str.count(";") == 525]
90
+
91
+ # Raise error if no data left
92
+ if len(df) == 0:
93
+ raise ValueError(f"No valid data in {filepath}.")
94
+
95
+ # Split by ; delimiter (before raw drop number)
96
+ df = df["TO_PARSE"].str.split(";", expand=True, n=80)
97
+
98
+ # Assign column names
99
+ names = [
100
+ "time",
101
+ "start_identifier",
102
+ "device_address",
103
+ "sensor_serial_number",
104
+ "sensor_date",
105
+ "sensor_time",
106
+ "weather_code_synop_4677_5min",
107
+ "weather_code_synop_4680_5min",
108
+ "weather_code_metar_4678_5min",
109
+ "precipitation_rate_5min",
110
+ "weather_code_synop_4677",
111
+ "weather_code_synop_4680",
112
+ "weather_code_metar_4678",
113
+ "precipitation_rate",
114
+ "rainfall_rate",
115
+ "snowfall_rate",
116
+ "precipitation_accumulated",
117
+ "mor_visibility",
118
+ "reflectivity",
119
+ "quality_index",
120
+ "max_hail_diameter",
121
+ "laser_status",
122
+ "static_signal_status",
123
+ "laser_temperature_analog_status",
124
+ "laser_temperature_digital_status",
125
+ "laser_current_analog_status",
126
+ "laser_current_digital_status",
127
+ "sensor_voltage_supply_status",
128
+ "current_heating_pane_transmitter_head_status",
129
+ "current_heating_pane_receiver_head_status",
130
+ "temperature_sensor_status",
131
+ "current_heating_voltage_supply_status",
132
+ "current_heating_house_status",
133
+ "current_heating_heads_status",
134
+ "current_heating_carriers_status",
135
+ "control_output_laser_power_status",
136
+ "reserved_status",
137
+ "temperature_interior",
138
+ "laser_temperature",
139
+ "laser_current_average",
140
+ "control_voltage",
141
+ "optical_control_voltage_output",
142
+ "sensor_voltage_supply",
143
+ "current_heating_pane_transmitter_head",
144
+ "current_heating_pane_receiver_head",
145
+ "temperature_ambient",
146
+ "current_heating_voltage_supply",
147
+ "current_heating_house",
148
+ "current_heating_heads",
149
+ "current_heating_carriers",
150
+ "number_particles",
151
+ "number_particles_internal_data",
152
+ "number_particles_min_speed",
153
+ "number_particles_min_speed_internal_data",
154
+ "number_particles_max_speed",
155
+ "number_particles_max_speed_internal_data",
156
+ "number_particles_min_diameter",
157
+ "number_particles_min_diameter_internal_data",
158
+ "number_particles_no_hydrometeor",
159
+ "number_particles_no_hydrometeor_internal_data",
160
+ "number_particles_unknown_classification",
161
+ "number_particles_unknown_classification_internal_data",
162
+ "number_particles_class_1",
163
+ "number_particles_class_1_internal_data",
164
+ "number_particles_class_2",
165
+ "number_particles_class_2_internal_data",
166
+ "number_particles_class_3",
167
+ "number_particles_class_3_internal_data",
168
+ "number_particles_class_4",
169
+ "number_particles_class_4_internal_data",
170
+ "number_particles_class_5",
171
+ "number_particles_class_5_internal_data",
172
+ "number_particles_class_6",
173
+ "number_particles_class_6_internal_data",
174
+ "number_particles_class_7",
175
+ "number_particles_class_7_internal_data",
176
+ "number_particles_class_8",
177
+ "number_particles_class_8_internal_data",
178
+ "number_particles_class_9",
179
+ "number_particles_class_9_internal_data",
180
+ "raw_drop_number",
181
+ ]
182
+ df.columns = names
183
+
184
+ # Remove checksum from raw_drop_number
185
+ df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=6, expand=True)[0]
186
+
187
+ # Define datetime "time" column
188
+ # df["time"] = df["sensor_date"] + "-" + df["sensor_time"]
189
+ date_str = os.path.basename(filepath).split(".")[0]
190
+ time_str = date_str + "T" + df["time"]
191
+ df["time"] = pd.to_datetime(time_str, format="%Y-%m-%dT%H:%M:%S", errors="coerce")
192
+
193
+ # Drop row if start_identifier different than 00
194
+ # df = df[df["start_identifier"].astype(str) == "00"]
195
+
196
+ # Drop rows with invalid raw_drop_number
197
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
198
+
199
+ # Drop columns not agreeing with DISDRODB L0 standards
200
+ columns_to_drop = [
201
+ "start_identifier",
202
+ "device_address",
203
+ "sensor_serial_number",
204
+ "sensor_date",
205
+ "sensor_time",
206
+ ]
207
+ df = df.drop(columns=columns_to_drop)
208
+ return df
@@ -0,0 +1,219 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for the MANCHESTER Withworth Meteorological Observatory LPM sensor."""
20
+ import pandas as pd
21
+
22
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
23
+ from disdrodb.l0.l0a_processing import read_raw_text_file
24
+
25
+
26
+ @is_documented_by(reader_generic_docstring)
27
+ def reader(
28
+ filepath,
29
+ logger=None,
30
+ ):
31
+ """Reader."""
32
+ ##------------------------------------------------------------------------.
33
+ #### - Define raw data headers
34
+ column_names = ["TO_PARSE"]
35
+
36
+ ##------------------------------------------------------------------------.
37
+ #### Define reader options
38
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
39
+ reader_kwargs = {}
40
+
41
+ # - Define delimiter
42
+ reader_kwargs["delimiter"] = "\\n"
43
+
44
+ # - Avoid first column to become df index !!!
45
+ reader_kwargs["index_col"] = False
46
+
47
+ # - Define encoding
48
+ reader_kwargs["encoding"] = "latin"
49
+
50
+ # Since column names are expected to be passed explicitly, header is set to None
51
+ reader_kwargs["header"] = None
52
+
53
+ # - Number of rows to be skipped at the beginning of the file
54
+ reader_kwargs["skiprows"] = None
55
+
56
+ # - Define behaviour when encountering bad lines
57
+ reader_kwargs["on_bad_lines"] = "skip"
58
+
59
+ # - Define reader engine
60
+ # - C engine is faster
61
+ # - Python engine is more feature-complete
62
+ reader_kwargs["engine"] = "python"
63
+
64
+ # - Define on-the-fly decompression of on-disk data
65
+ # - Available: gzip, bz2, zip
66
+ reader_kwargs["compression"] = "infer"
67
+
68
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
69
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
70
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
71
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
72
+ reader_kwargs["na_values"] = ["na", "", "error"]
73
+
74
+ ##------------------------------------------------------------------------.
75
+ #### Read the data
76
+ df = read_raw_text_file(
77
+ filepath=filepath,
78
+ column_names=column_names,
79
+ reader_kwargs=reader_kwargs,
80
+ logger=logger,
81
+ )
82
+
83
+ ##------------------------------------------------------------------------.
84
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
85
+ # Raise error if empty file
86
+ if len(df) == 0:
87
+ raise ValueError(f"{filepath} is empty.")
88
+
89
+ # Select only rows with expected number of delimiters
90
+ df = df[df["TO_PARSE"].str.count(";").isin([45, 522])]
91
+
92
+ # Raise error if no data left
93
+ if len(df) == 0:
94
+ raise ValueError(f"No valid data in {filepath}.")
95
+
96
+ # Split by ; delimiter (before raw drop number)
97
+ # - Add a dummy row with 82 delimiters so that str.split expand also if only 45 delimiters are present
98
+ dummy_row = ";".join(["DUMMY"] * 82)
99
+ df = pd.concat([df, pd.DataFrame({"TO_PARSE": [dummy_row]})], ignore_index=True)
100
+ # - Split columns
101
+ df = df["TO_PARSE"].str.split(";", expand=True, n=81)
102
+ # - Drop the dummy row (last one)
103
+ df = df.iloc[:-1, :]
104
+ # - Fill empty columns
105
+ df.loc[:, 46:50] = df.loc[:, 46:50].fillna("NaN")
106
+ df.loc[:, 51:] = df.loc[:, 51:].fillna("0")
107
+
108
+ # Assign column names
109
+ names = [
110
+ "date",
111
+ "time",
112
+ "start_identifier",
113
+ "device_address",
114
+ "sensor_serial_number",
115
+ "sensor_date",
116
+ "sensor_time",
117
+ "weather_code_synop_4677_5min",
118
+ "weather_code_synop_4680_5min",
119
+ "weather_code_metar_4678_5min",
120
+ "precipitation_rate_5min",
121
+ "weather_code_synop_4677",
122
+ "weather_code_synop_4680",
123
+ "weather_code_metar_4678",
124
+ "precipitation_rate",
125
+ "rainfall_rate",
126
+ "snowfall_rate",
127
+ "precipitation_accumulated",
128
+ "mor_visibility",
129
+ "reflectivity",
130
+ "quality_index",
131
+ "max_hail_diameter",
132
+ "laser_status",
133
+ "static_signal_status",
134
+ "laser_temperature_analog_status",
135
+ "laser_temperature_digital_status",
136
+ "laser_current_analog_status",
137
+ "laser_current_digital_status",
138
+ "sensor_voltage_supply_status",
139
+ "current_heating_pane_transmitter_head_status",
140
+ "current_heating_pane_receiver_head_status",
141
+ "temperature_sensor_status",
142
+ "current_heating_voltage_supply_status",
143
+ "current_heating_house_status",
144
+ "current_heating_heads_status",
145
+ "current_heating_carriers_status",
146
+ "control_output_laser_power_status",
147
+ "reserved_status",
148
+ "temperature_interior",
149
+ "laser_temperature",
150
+ "laser_current_average",
151
+ "control_voltage",
152
+ "optical_control_voltage_output",
153
+ "sensor_voltage_supply",
154
+ "current_heating_pane_transmitter_head",
155
+ "current_heating_pane_receiver_head",
156
+ "temperature_ambient",
157
+ "current_heating_voltage_supply",
158
+ "current_heating_house",
159
+ "current_heating_heads",
160
+ "current_heating_carriers",
161
+ "number_particles",
162
+ "number_particles_internal_data",
163
+ "number_particles_min_speed",
164
+ "number_particles_min_speed_internal_data",
165
+ "number_particles_max_speed",
166
+ "number_particles_max_speed_internal_data",
167
+ "number_particles_min_diameter",
168
+ "number_particles_min_diameter_internal_data",
169
+ "number_particles_no_hydrometeor",
170
+ "number_particles_no_hydrometeor_internal_data",
171
+ "number_particles_unknown_classification",
172
+ "number_particles_unknown_classification_internal_data",
173
+ "number_particles_class_1",
174
+ "number_particles_class_1_internal_data",
175
+ "number_particles_class_2",
176
+ "number_particles_class_2_internal_data",
177
+ "number_particles_class_3",
178
+ "number_particles_class_3_internal_data",
179
+ "number_particles_class_4",
180
+ "number_particles_class_4_internal_data",
181
+ "number_particles_class_5",
182
+ "number_particles_class_5_internal_data",
183
+ "number_particles_class_6",
184
+ "number_particles_class_6_internal_data",
185
+ "number_particles_class_7",
186
+ "number_particles_class_7_internal_data",
187
+ "number_particles_class_8",
188
+ "number_particles_class_8_internal_data",
189
+ "number_particles_class_9",
190
+ "number_particles_class_9_internal_data",
191
+ "raw_drop_number",
192
+ ]
193
+ df.columns = names
194
+
195
+ # Define datetime "time" column
196
+ time_str = df["date"] + " " + df["time"]
197
+ df["time"] = pd.to_datetime(time_str, format="%d/%m/%Y %H:%M:%S", errors="coerce")
198
+
199
+ # Remove checksum from raw_drop_number
200
+ df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
201
+
202
+ # Drop rows with invalid raw_drop_number
203
+ df = df[df["raw_drop_number"].astype(str).str.len().isin([1, 1759])]
204
+
205
+ # Drop row if start_identifier different than 00
206
+ # df["start_identifier"] = df["start_identifier"].astype(str).str[-2:]
207
+ # df = df[df["start_identifier"] == "00"]
208
+
209
+ # Drop columns not agreeing with DISDRODB L0 standards
210
+ columns_to_drop = [
211
+ "start_identifier",
212
+ "device_address",
213
+ "sensor_serial_number",
214
+ "sensor_date",
215
+ "sensor_time",
216
+ "date",
217
+ ]
218
+ df = df.drop(columns=columns_to_drop)
219
+ return df
@@ -0,0 +1,229 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for CHARLESTON experiment LPM sensors."""
20
+ import os
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
26
+ from disdrodb.l0.l0a_processing import read_raw_text_file
27
+
28
+
29
+ @is_documented_by(reader_generic_docstring)
30
+ def reader(
31
+ filepath,
32
+ logger=None,
33
+ ):
34
+ """Reader."""
35
+ ##------------------------------------------------------------------------.
36
+ #### - Define raw data headers
37
+ column_names = ["TO_PARSE"]
38
+
39
+ ##------------------------------------------------------------------------.
40
+ #### Define reader options
41
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
42
+ reader_kwargs = {}
43
+
44
+ # - Define delimiter
45
+ reader_kwargs["delimiter"] = "\\n"
46
+
47
+ # - Avoid first column to become df index !!!
48
+ reader_kwargs["index_col"] = False
49
+
50
+ # - Define encoding
51
+ reader_kwargs["encoding"] = "ISO-8859-1"
52
+
53
+ # - Since column names are expected to be passed explicitly, header is set to None
54
+ reader_kwargs["header"] = None
55
+
56
+ # - Number of rows to be skipped at the beginning of the file
57
+ reader_kwargs["skiprows"] = None
58
+
59
+ # - Define behaviour when encountering bad lines
60
+ reader_kwargs["on_bad_lines"] = "skip"
61
+
62
+ # - Define reader engine
63
+ # - C engine is faster
64
+ # - Python engine is more feature-complete
65
+ reader_kwargs["engine"] = "python"
66
+
67
+ # - Define on-the-fly decompression of on-disk data
68
+ # - Available: gzip, bz2, zip
69
+ reader_kwargs["compression"] = "infer"
70
+
71
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
72
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
73
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
74
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
75
+ reader_kwargs["na_values"] = ["na", "", "error"]
76
+
77
+ ##------------------------------------------------------------------------.
78
+ #### Read the data
79
+ df = read_raw_text_file(
80
+ filepath=filepath,
81
+ column_names=column_names,
82
+ reader_kwargs=reader_kwargs,
83
+ logger=logger,
84
+ )
85
+
86
+ ##------------------------------------------------------------------------.
87
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
88
+ # Raise error if empty file
89
+ if len(df) == 0:
90
+ raise ValueError(f"{filepath} is empty.")
91
+
92
+ # Select only rows with expected number of delimiters
93
+ df = df[df["TO_PARSE"].str.count(";") == 520]
94
+
95
+ # Raise error if no data left
96
+ if len(df) == 0:
97
+ raise ValueError(f"No valid data in {filepath}.")
98
+
99
+ # Split by ; delimiter (before raw drop number)
100
+ df = df["TO_PARSE"].str.split(";", expand=True, n=79)
101
+
102
+ # Assign column names
103
+ names = [
104
+ "start_identifier",
105
+ "device_address",
106
+ "sensor_serial_number",
107
+ "sensor_date",
108
+ "sensor_time",
109
+ "weather_code_synop_4677_5min",
110
+ "weather_code_synop_4680_5min",
111
+ "weather_code_metar_4678_5min",
112
+ "precipitation_rate_5min",
113
+ "weather_code_synop_4677",
114
+ "weather_code_synop_4680",
115
+ "weather_code_metar_4678",
116
+ "precipitation_rate",
117
+ "rainfall_rate",
118
+ "snowfall_rate",
119
+ "precipitation_accumulated",
120
+ "mor_visibility",
121
+ "reflectivity",
122
+ "quality_index",
123
+ "max_hail_diameter",
124
+ "laser_status",
125
+ "static_signal_status",
126
+ "laser_temperature_analog_status",
127
+ "laser_temperature_digital_status",
128
+ "laser_current_analog_status",
129
+ "laser_current_digital_status",
130
+ "sensor_voltage_supply_status",
131
+ "current_heating_pane_transmitter_head_status",
132
+ "current_heating_pane_receiver_head_status",
133
+ "temperature_sensor_status",
134
+ "current_heating_voltage_supply_status",
135
+ "current_heating_house_status",
136
+ "current_heating_heads_status",
137
+ "current_heating_carriers_status",
138
+ "control_output_laser_power_status",
139
+ "reserved_status",
140
+ "temperature_interior",
141
+ "laser_temperature",
142
+ "laser_current_average",
143
+ "control_voltage",
144
+ "optical_control_voltage_output",
145
+ "sensor_voltage_supply",
146
+ "current_heating_pane_transmitter_head",
147
+ "current_heating_pane_receiver_head",
148
+ "temperature_ambient",
149
+ "current_heating_voltage_supply",
150
+ "current_heating_house",
151
+ "current_heating_heads",
152
+ "current_heating_carriers",
153
+ "number_particles",
154
+ "number_particles_internal_data",
155
+ "number_particles_min_speed",
156
+ "number_particles_min_speed_internal_data",
157
+ "number_particles_max_speed",
158
+ "number_particles_max_speed_internal_data",
159
+ "number_particles_min_diameter",
160
+ "number_particles_min_diameter_internal_data",
161
+ "number_particles_no_hydrometeor",
162
+ "number_particles_no_hydrometeor_internal_data",
163
+ "number_particles_unknown_classification",
164
+ "number_particles_unknown_classification_internal_data",
165
+ "number_particles_class_1",
166
+ "number_particles_class_1_internal_data",
167
+ "number_particles_class_2",
168
+ "number_particles_class_2_internal_data",
169
+ "number_particles_class_3",
170
+ "number_particles_class_3_internal_data",
171
+ "number_particles_class_4",
172
+ "number_particles_class_4_internal_data",
173
+ "number_particles_class_5",
174
+ "number_particles_class_5_internal_data",
175
+ "number_particles_class_6",
176
+ "number_particles_class_6_internal_data",
177
+ "number_particles_class_7",
178
+ "number_particles_class_7_internal_data",
179
+ "number_particles_class_8",
180
+ "number_particles_class_8_internal_data",
181
+ "number_particles_class_9",
182
+ "number_particles_class_9_internal_data",
183
+ "raw_drop_number",
184
+ ]
185
+ df.columns = names
186
+
187
+ # Remove checksum from raw_drop_number
188
+ df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
189
+
190
+ # Remove corrupted characters
191
+ df = df.replace("°", "", regex=True) # station N
192
+
193
+ # Keep only rows where sensor_time matches HH:MM:SS format
194
+ df = df[df["sensor_time"].astype(str).str.match(r"^\d{2}:\d{2}:\d{2}$")]
195
+
196
+ # Keep only rows with valid spectrum
197
+ df = df[df["raw_drop_number"].astype(str).str.match(r"^(?:\d{3};)*\d{3};?$")]
198
+ if len(df) == 0:
199
+ raise ValueError("Spectra is corrupted")
200
+
201
+ # Define datetime "time" column
202
+ # - Define start time
203
+ filename = os.path.basename(filepath)
204
+ _, delta_dt, doy, year = filename.split(".")[0].split("_")
205
+ start_time = pd.to_datetime(f"{year}_{doy}", format="%y_%j") + pd.to_timedelta(int(delta_dt), unit="s")
206
+ # - Define timedelta based on sensor_time
207
+ # --> Add +24h to subsequent times when time resets
208
+ dt = pd.to_timedelta(df["sensor_time"]).to_numpy().astype("m8[s]")
209
+ rollover_indices = np.where(np.diff(dt) < np.timedelta64(0, "s"))[0]
210
+ if rollover_indices.size > 0:
211
+ for idx in rollover_indices:
212
+ dt[idx + 1 :] += np.timedelta64(24, "h")
213
+ dt = dt - dt[0]
214
+ # - Define measurement datetime
215
+ df["time"] = start_time + dt
216
+
217
+ # Drop rows with invalid raw_drop_number
218
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
219
+
220
+ # Drop columns not agreeing with DISDRODB L0 standards
221
+ variables_to_drop = [
222
+ "start_identifier",
223
+ "device_address",
224
+ "sensor_serial_number",
225
+ "sensor_date",
226
+ "sensor_time",
227
+ ]
228
+ df = df.drop(columns=variables_to_drop)
229
+ return df