disdrodb 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. disdrodb/__init__.py +64 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +139 -9
  7. disdrodb/api/configs.py +4 -2
  8. disdrodb/api/info.py +10 -10
  9. disdrodb/api/io.py +237 -18
  10. disdrodb/api/path.py +81 -75
  11. disdrodb/api/search.py +6 -6
  12. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  13. disdrodb/cli/disdrodb_run_l0.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
  19. disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
  20. disdrodb/configs.py +149 -4
  21. disdrodb/constants.py +61 -0
  22. disdrodb/data_transfer/download_data.py +5 -5
  23. disdrodb/etc/configs/attributes.yaml +339 -0
  24. disdrodb/etc/configs/encodings.yaml +473 -0
  25. disdrodb/etc/products/L1/global.yaml +13 -0
  26. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  27. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  28. disdrodb/etc/products/L2E/global.yaml +22 -0
  29. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  30. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  31. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  33. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/global.yaml +26 -0
  35. disdrodb/l0/__init__.py +13 -0
  36. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  37. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  38. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  39. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  40. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
  41. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
  42. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
  43. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  44. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  45. disdrodb/l0/l0a_processing.py +30 -30
  46. disdrodb/l0/l0b_nc_processing.py +108 -2
  47. disdrodb/l0/l0b_processing.py +4 -4
  48. disdrodb/l0/l0c_processing.py +5 -13
  49. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  50. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  51. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  52. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
  53. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
  54. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  55. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  56. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  57. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  58. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  59. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  60. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  61. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  62. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  63. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
  64. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  65. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  66. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  67. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  68. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  69. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  70. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  71. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
  72. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +2 -0
  73. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  74. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  75. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  76. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → USA/C3WE.py} +65 -85
  77. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  78. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  79. disdrodb/l0/routines.py +105 -14
  80. disdrodb/l1/__init__.py +5 -0
  81. disdrodb/l1/filters.py +34 -20
  82. disdrodb/l1/processing.py +45 -44
  83. disdrodb/l1/resampling.py +77 -66
  84. disdrodb/l1/routines.py +35 -43
  85. disdrodb/l1_env/routines.py +18 -3
  86. disdrodb/l2/__init__.py +7 -0
  87. disdrodb/l2/empirical_dsd.py +58 -10
  88. disdrodb/l2/event.py +27 -120
  89. disdrodb/l2/processing.py +267 -116
  90. disdrodb/l2/routines.py +618 -254
  91. disdrodb/metadata/standards.py +3 -1
  92. disdrodb/psd/fitting.py +463 -144
  93. disdrodb/psd/models.py +8 -5
  94. disdrodb/routines.py +3 -3
  95. disdrodb/scattering/__init__.py +16 -4
  96. disdrodb/scattering/axis_ratio.py +56 -36
  97. disdrodb/scattering/permittivity.py +486 -0
  98. disdrodb/scattering/routines.py +701 -159
  99. disdrodb/summary/__init__.py +17 -0
  100. disdrodb/summary/routines.py +4120 -0
  101. disdrodb/utils/attrs.py +68 -125
  102. disdrodb/utils/compression.py +30 -1
  103. disdrodb/utils/dask.py +59 -8
  104. disdrodb/utils/dataframe.py +61 -7
  105. disdrodb/utils/directories.py +35 -15
  106. disdrodb/utils/encoding.py +33 -19
  107. disdrodb/utils/logger.py +13 -6
  108. disdrodb/utils/manipulations.py +71 -0
  109. disdrodb/utils/subsetting.py +214 -0
  110. disdrodb/utils/time.py +165 -19
  111. disdrodb/utils/writer.py +20 -7
  112. disdrodb/utils/xarray.py +2 -4
  113. disdrodb/viz/__init__.py +13 -0
  114. disdrodb/viz/plots.py +327 -0
  115. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
  116. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/RECORD +121 -88
  117. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
  118. disdrodb/l1/encoding_attrs.py +0 -642
  119. disdrodb/l2/processing_options.py +0 -213
  120. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  121. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
  122. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
  123. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,144 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import pandas as pd
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0a_processing import read_raw_text_file
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Define column names
32
+ column_names = ["TO_PARSE"]
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Define reader options
36
+ reader_kwargs = {}
37
+ # - Define delimiter
38
+ reader_kwargs["delimiter"] = "\\n"
39
+ # - Skip first row as columns names
40
+ # - Define encoding
41
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
42
+ # - Avoid first column to become df index !!!
43
+ reader_kwargs["index_col"] = False
44
+ # - Define behaviour when encountering bad lines
45
+ reader_kwargs["on_bad_lines"] = "skip"
46
+ # - Define reader engine
47
+ # - C engine is faster
48
+ # - Python engine is more feature-complete
49
+ reader_kwargs["engine"] = "python"
50
+ # - Define on-the-fly decompression of on-disk data
51
+ # - Available: gzip, bz2, zip
52
+ reader_kwargs["compression"] = "infer"
53
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
54
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
55
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
56
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
57
+ reader_kwargs["na_values"] = ["na", "", "error"]
58
+
59
+ ##------------------------------------------------------------------------.
60
+ #### Read the data
61
+ df = read_raw_text_file(
62
+ filepath=filepath,
63
+ column_names=column_names,
64
+ reader_kwargs=reader_kwargs,
65
+ logger=logger,
66
+ )
67
+
68
+ ##------------------------------------------------------------------------.
69
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
70
+ # Define time
71
+ df = df["TO_PARSE"].str.split(",", n=2, expand=True)
72
+ df.columns = ["date", "time", "TO_PARSE"]
73
+ datetime_str = df["date"] + " " + df["time"]
74
+ df["time"] = pd.to_datetime(datetime_str, format="%d.%m.%Y %H:%M:%S", errors="coerce")
75
+
76
+ # Identify rows with integral variables
77
+ df_vars = df[df["TO_PARSE"].str.len() == 94]
78
+
79
+ # Split and assign column names
80
+ df_data = df_vars["TO_PARSE"].str.split(",", expand=True)
81
+ var_names = [
82
+ "rainfall_rate_32bit",
83
+ "rainfall_accumulated_32bit",
84
+ "weather_code_synop_4680",
85
+ "weather_code_synop_4677",
86
+ "reflectivity_32bit",
87
+ "mor_visibility",
88
+ "laser_amplitude",
89
+ "number_particles",
90
+ "sensor_temperature",
91
+ "sensor_heating_current",
92
+ "sensor_battery_voltage",
93
+ "sensor_status",
94
+ "sensor_serial_number",
95
+ "sensor_temperature_receiver",
96
+ "sensor_temperature_trasmitter",
97
+ "snowfall_rate",
98
+ "rain_kinetic_energy",
99
+ ]
100
+ df_data.columns = var_names
101
+ df_data["time"] = df_vars["time"]
102
+ df_data = df_data.drop(columns="sensor_serial_number")
103
+
104
+ # Initialize empty arrays
105
+ # --> 0 values array produced in L0B
106
+ df_data["raw_drop_concentration"] = ""
107
+ df_data["raw_drop_average_velocity"] = ""
108
+ df_data["raw_drop_number"] = ""
109
+
110
+ # Identify raw spectrum
111
+ df_raw_spectrum = df[df["TO_PARSE"].str.len() == 4545]
112
+
113
+ # Derive raw drop arrays
114
+ def split_string(s):
115
+ vals = [v.strip() for v in s.split(",")]
116
+ c1 = ",".join(vals[:32])
117
+ c2 = ",".join(vals[32:64])
118
+ c3 = ",".join(vals[64].replace("r", "").split("/"))
119
+ series = pd.Series(
120
+ {
121
+ "raw_drop_concentration": c1,
122
+ "raw_drop_average_velocity": c2,
123
+ "raw_drop_number": c3,
124
+ },
125
+ )
126
+ return series
127
+
128
+ splitted_string = df_raw_spectrum["TO_PARSE"].apply(split_string)
129
+ df_raw_spectrum["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
130
+ df_raw_spectrum["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
131
+ df_raw_spectrum["raw_drop_number"] = splitted_string["raw_drop_number"]
132
+ df_raw_spectrum = df_raw_spectrum.drop(columns=["date", "TO_PARSE"])
133
+
134
+ # Add raw array
135
+ df = df_data.set_index("time")
136
+ df_raw_spectrum = df_raw_spectrum.set_index("time")
137
+
138
+ df.update(df_raw_spectrum)
139
+
140
+ # Set back time as column
141
+ df = df.reset_index()
142
+
143
+ # Return the dataframe adhering to DISDRODB L0 standards
144
+ return df
@@ -0,0 +1,201 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import pandas as pd
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0a_processing import read_raw_text_file
22
+
23
+
24
+ def reader_parsivel(filepath, logger):
25
+ """Reader for Parsivel CR1000 Data Logger file."""
26
+ ##------------------------------------------------------------------------.
27
+ #### Define column names
28
+ column_names = [
29
+ "time",
30
+ "RECORD",
31
+ "rainfall_rate_32bit",
32
+ "rainfall_accumulated_32bit",
33
+ "weather_code_synop_4680",
34
+ "weather_code_synop_4677",
35
+ "reflectivity_32bit",
36
+ "mor_visibility",
37
+ "laser_amplitude",
38
+ "number_particles",
39
+ "sensor_temperature",
40
+ "sensor_heating_current",
41
+ "sensor_battery_voltage",
42
+ "sensor_status",
43
+ "rain_kinetic_energy",
44
+ "V_Batt_Min",
45
+ ]
46
+
47
+ ##------------------------------------------------------------------------.
48
+ #### Define reader options
49
+ reader_kwargs = {}
50
+ # - Define delimiter
51
+ reader_kwargs["delimiter"] = ","
52
+ # - Skip first row as columns names
53
+ reader_kwargs["header"] = None
54
+ # - Skip first 3 rows
55
+ reader_kwargs["skiprows"] = 4
56
+ # - Define encoding
57
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
58
+ # - Avoid first column to become df index !!!
59
+ reader_kwargs["index_col"] = False
60
+ # - Define behaviour when encountering bad lines
61
+ reader_kwargs["on_bad_lines"] = "skip"
62
+ # - Define reader engine
63
+ # - C engine is faster
64
+ # - Python engine is more feature-complete
65
+ reader_kwargs["engine"] = "python"
66
+ # - Define on-the-fly decompression of on-disk data
67
+ # - Available: gzip, bz2, zip
68
+ reader_kwargs["compression"] = "infer"
69
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
70
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
71
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
72
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
73
+ reader_kwargs["na_values"] = ["na", "", "error"]
74
+
75
+ ##------------------------------------------------------------------------.
76
+ #### Read the data
77
+ df = read_raw_text_file(
78
+ filepath=filepath,
79
+ column_names=column_names,
80
+ reader_kwargs=reader_kwargs,
81
+ logger=logger,
82
+ )
83
+
84
+ ##------------------------------------------------------------------------.
85
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
86
+ # Define time
87
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
88
+
89
+ # Drop columns not agreeing with DISDRODB L0 standards
90
+ df = df.drop(columns=["RECORD", "V_Batt_Min"])
91
+ return df
92
+
93
+
94
+ def reader_spectrum(filepath, logger):
95
+ """Reader for Spectrum CR1000 Data Logger file."""
96
+ ##------------------------------------------------------------------------.
97
+ #### Define column names
98
+ column_names = ["TO_PARSE"]
99
+
100
+ ##------------------------------------------------------------------------.
101
+ #### Define reader options
102
+ reader_kwargs = {}
103
+ # - Define delimiter
104
+ reader_kwargs["delimiter"] = "\\n"
105
+ # - Skip first row as columns names
106
+ reader_kwargs["header"] = None
107
+ # - Skip first 3 rows
108
+ reader_kwargs["skiprows"] = 4
109
+ # - Define encoding
110
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
111
+ # - Avoid first column to become df index !!!
112
+ reader_kwargs["index_col"] = False
113
+ # - Define behaviour when encountering bad lines
114
+ reader_kwargs["on_bad_lines"] = "skip"
115
+ # - Define reader engine
116
+ # - C engine is faster
117
+ # - Python engine is more feature-complete
118
+ reader_kwargs["engine"] = "python"
119
+ # - Define on-the-fly decompression of on-disk data
120
+ # - Available: gzip, bz2, zip
121
+ reader_kwargs["compression"] = "infer"
122
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
123
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
124
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
125
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
126
+ reader_kwargs["na_values"] = ["na", "", "error"]
127
+
128
+ ##------------------------------------------------------------------------.
129
+ #### Read the data
130
+ df = read_raw_text_file(
131
+ filepath=filepath,
132
+ column_names=column_names,
133
+ reader_kwargs=reader_kwargs,
134
+ logger=logger,
135
+ )
136
+
137
+ ##------------------------------------------------------------------------.
138
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
139
+ # Define time
140
+ df = df["TO_PARSE"].str.split(",", n=2, expand=True)
141
+ df.columns = ["time", "RECORD", "TO_PARSE"]
142
+
143
+ df["time"] = pd.to_datetime(df["time"].str.strip('"'), format="%Y-%m-%d %H:%M:%S", errors="coerce")
144
+
145
+ # Derive raw drop arrays
146
+ def split_string(s):
147
+ vals = [v.strip() for v in s.split(",")]
148
+ c1 = ",".join(vals[:32]) # -10
149
+ c1 = c1.replace("-10", "0")
150
+ c2 = "0,0," + ",".join(vals[32:62])
151
+ c3 = ",".join(vals[62:])
152
+ series = pd.Series(
153
+ {
154
+ "raw_drop_concentration": c1,
155
+ "raw_drop_average_velocity": c2,
156
+ "raw_drop_number": c3,
157
+ },
158
+ )
159
+ return series
160
+
161
+ splitted_string = df["TO_PARSE"].apply(split_string)
162
+ df["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
163
+ df["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
164
+ df["raw_drop_number"] = splitted_string["raw_drop_number"]
165
+
166
+ # Drop columns not agreeing with DISDRODB L0 standards
167
+ df = df.drop(columns=["TO_PARSE", "RECORD"])
168
+ return df
169
+
170
+
171
+ @is_documented_by(reader_generic_docstring)
172
+ def reader(
173
+ filepath,
174
+ logger=None,
175
+ ):
176
+ """Reader."""
177
+ # Retrieve Spectrum filepath
178
+ spectrum_filepath = filepath.replace("Parsivel", "Spectre")
179
+
180
+ # Read integral variables
181
+ df = reader_parsivel(filepath, logger=logger)
182
+
183
+ # Initialize empty arrays
184
+ # --> 0 values array produced in L0B
185
+ df["raw_drop_concentration"] = ""
186
+ df["raw_drop_average_velocity"] = ""
187
+ df["raw_drop_number"] = ""
188
+
189
+ # Read raw spectrum for corresponding timesteps
190
+ df_raw_spectrum = reader_spectrum(spectrum_filepath, logger=logger)
191
+
192
+ # Add raw array to df
193
+ df = df.set_index("time")
194
+ df_raw_spectrum = df_raw_spectrum.set_index("time")
195
+ df.update(df_raw_spectrum)
196
+
197
+ # Set back time as column
198
+ df = df.reset_index()
199
+
200
+ # Return the dataframe adhering to DISDRODB L0 standards
201
+ return df
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import pandas as pd
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0a_processing import read_raw_text_file
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Define column names
32
+ column_names = ["TO_PARSE"]
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Define reader options
36
+ reader_kwargs = {}
37
+ # - Define delimiter
38
+ reader_kwargs["delimiter"] = "\\n"
39
+ # - Skip first row as columns names
40
+ # - Define encoding
41
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
42
+ # - Avoid first column to become df index !!!
43
+ reader_kwargs["index_col"] = False
44
+ # - Define behaviour when encountering bad lines
45
+ reader_kwargs["on_bad_lines"] = "skip"
46
+ # - Define reader engine
47
+ # - C engine is faster
48
+ # - Python engine is more feature-complete
49
+ reader_kwargs["engine"] = "python"
50
+ # - Define on-the-fly decompression of on-disk data
51
+ # - Available: gzip, bz2, zip
52
+ reader_kwargs["compression"] = "infer"
53
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
54
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
55
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
56
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
57
+ reader_kwargs["na_values"] = ["na", "", "error"]
58
+
59
+ ##------------------------------------------------------------------------.
60
+ #### Read the data
61
+ df = read_raw_text_file(
62
+ filepath=filepath,
63
+ column_names=column_names,
64
+ reader_kwargs=reader_kwargs,
65
+ logger=logger,
66
+ )
67
+
68
+ ##------------------------------------------------------------------------.
69
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
70
+ # Define time
71
+ df = df["TO_PARSE"].str.split(",", n=2, expand=True)
72
+ df.columns = ["date", "time", "TO_PARSE"]
73
+ datetime_str = df["date"] + " " + df["time"]
74
+ df["time"] = pd.to_datetime(datetime_str, format="%d.%m.%Y %H:%M:%S", errors="coerce")
75
+
76
+ # Identify rows with integral variables
77
+ df_vars = df[df["TO_PARSE"].str.len() == 61]
78
+
79
+ # Split and assign column names
80
+ df_data = df_vars["TO_PARSE"].str.split(",", expand=True)
81
+ var_names = [
82
+ "rainfall_rate_32bit",
83
+ "rainfall_accumulated_32bit",
84
+ "weather_code_synop_4680",
85
+ "weather_code_synop_4677",
86
+ "reflectivity_32bit",
87
+ "mor_visibility",
88
+ "laser_amplitude",
89
+ "number_particles",
90
+ "sensor_temperature",
91
+ "sensor_heating_current",
92
+ "sensor_battery_voltage",
93
+ ]
94
+ df_data.columns = var_names
95
+ df_data["time"] = df_vars["time"]
96
+
97
+ # Initialize empty arrays
98
+ # --> 0 values array produced in L0B
99
+ df_data["raw_drop_concentration"] = ""
100
+ df_data["raw_drop_average_velocity"] = ""
101
+ df_data["raw_drop_number"] = ""
102
+
103
+ # Identify raw spectrum
104
+ df_raw_spectrum = df[df["TO_PARSE"].str.len() == 4545]
105
+
106
+ # Derive raw drop arrays
107
+ def split_string(s):
108
+ vals = [v.strip() for v in s.split(",")]
109
+ c1 = ",".join(vals[:32])
110
+ c2 = ",".join(vals[32:64])
111
+ c3 = ",".join(vals[64].replace("r", "").split("/"))
112
+ series = pd.Series(
113
+ {
114
+ "raw_drop_concentration": c1,
115
+ "raw_drop_average_velocity": c2,
116
+ "raw_drop_number": c3,
117
+ },
118
+ )
119
+ return series
120
+
121
+ splitted_string = df_raw_spectrum["TO_PARSE"].apply(split_string)
122
+ df_raw_spectrum["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
123
+ df_raw_spectrum["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
124
+ df_raw_spectrum["raw_drop_number"] = splitted_string["raw_drop_number"]
125
+ df_raw_spectrum = df_raw_spectrum.drop(columns=["date", "TO_PARSE"])
126
+
127
+ # Add raw array
128
+ df = df_data.set_index("time")
129
+ df_raw_spectrum = df_raw_spectrum.set_index("time")
130
+
131
+ df.update(df_raw_spectrum)
132
+
133
+ # Set back time as column
134
+ df = df.reset_index()
135
+
136
+ # Return the dataframe adhering to DISDRODB L0 standards
137
+ return df
@@ -29,33 +29,34 @@ def reader(
29
29
  """Reader."""
30
30
  ##------------------------------------------------------------------------.
31
31
  #### Define column names
32
- column_names = ["time", "epoch_time", "TO_BE_PARSED"]
32
+ column_names = ["TO_PARSE"]
33
33
 
34
34
  ##------------------------------------------------------------------------.
35
35
  #### Define reader options
36
36
  reader_kwargs = {}
37
- # - Define delimiter
38
- reader_kwargs["delimiter"] = ";"
39
- # - Skip first row as columns names
37
+ # Skip first row as columns names
40
38
  reader_kwargs["header"] = None
41
- # - Avoid first column to become df index
39
+ # Skip file with encoding errors
40
+ reader_kwargs["encoding_errors"] = "ignore"
41
+ # - Define delimiter
42
+ reader_kwargs["delimiter"] = "\\n"
43
+ # - Avoid first column to become df index !!!
42
44
  reader_kwargs["index_col"] = False
43
45
  # - Define behaviour when encountering bad lines
44
46
  reader_kwargs["on_bad_lines"] = "skip"
45
- # - Define parser engine
47
+ # - Define reader engine
46
48
  # - C engine is faster
47
49
  # - Python engine is more feature-complete
48
50
  reader_kwargs["engine"] = "python"
49
51
  # - Define on-the-fly decompression of on-disk data
50
52
  # - Available: gzip, bz2, zip
51
53
  reader_kwargs["compression"] = "infer"
52
- # reader_kwargs['zipped'] = False
53
- # reader_kwargs['zipped'] = True
54
54
  # - Strings to recognize as NA/NaN and replace with standard NA flags
55
55
  # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
56
56
  # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
57
57
  # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
58
- reader_kwargs["na_values"] = ["na", "", "error", "-.-", " NA"]
58
+ reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
59
+
59
60
  ##------------------------------------------------------------------------.
60
61
  #### Read the data
61
62
  df = read_raw_text_file(
@@ -67,98 +68,77 @@ def reader(
67
68
 
68
69
  ##------------------------------------------------------------------------.
69
70
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
70
- # NOTE: Station 8 has all raw_drop_number corrupted, so it can't be used
71
-
72
- # Remove rows that have a corrupted "TO_BE_PARSED" column
73
- # - PAR001, PAR002 have length 3726 (no station_name)
74
- # - PAR007 have length 3736 ()
75
- df = df.loc[df["TO_BE_PARSED"].astype(str).str.len() >= 3726]
76
- df = df.loc[df["TO_BE_PARSED"].astype(str).str.len() <= 3736]
71
+ # Define 'time' datetime
77
72
 
78
- # Convert 'time' column to datetime
79
- df_time = pd.to_datetime(df["time"], format="%Y%m%d-%H%M%S", errors="coerce")
73
+ # Split the columns
74
+ df["TO_PARSE"].iloc[0:5].str.split(";", n=16, expand=True).iloc[0]
80
75
 
81
- # Strip values from start and end of the string
82
- df["TO_BE_PARSED"] = df["TO_BE_PARSED"].str.lstrip("b'").str.rstrip("'").str.rstrip("\\r\\n'") # noqa: B005
83
-
84
- # Split the column 'TO_BE_PARSED'
85
- df_to_parse = df["TO_BE_PARSED"].str.split(";", expand=True, n=99)
86
-
87
- # Retrieve DISDRODB compliant columns
88
- df = df_to_parse.iloc[:, 0:35]
76
+ df = df["TO_PARSE"].str.split(";", n=16, expand=True)
89
77
 
90
78
  # Assign column names
91
- column_names = [
92
- "rainfall_rate_32bit",
93
- "rainfall_accumulated_32bit",
94
- "weather_code_synop_4680",
95
- "weather_code_synop_4677",
96
- "weather_code_metar_4678",
97
- "weather_code_nws",
98
- "reflectivity_32bit",
99
- "mor_visibility",
100
- "sample_interval",
101
- "laser_amplitude",
102
- "number_particles",
103
- "sensor_temperature",
79
+ names = [
104
80
  "sensor_serial_number",
105
- "firmware_iop",
106
- "firmware_dsp",
81
+ "sensor_status",
82
+ "laser_amplitude",
107
83
  "sensor_heating_current",
108
84
  "sensor_battery_voltage",
109
- "sensor_status",
110
- "sensor_time_measurement_start",
85
+ "dummy_date",
111
86
  "sensor_time",
112
87
  "sensor_date",
113
- "station_name",
114
- "station_number",
115
- "rainfall_amount_absolute_32bit",
116
- "error_code",
117
- "sensor_temperature_pcb",
118
- "sensor_temperature_receiver",
119
- "sensor_temperature_trasmitter",
120
- "rainfall_rate_16_bit_30",
121
- "rainfall_rate_16_bit_1200",
88
+ "sensor_temperature",
89
+ "number_particles",
90
+ "rainfall_rate_32bit",
91
+ "reflectivity_32bit",
122
92
  "rainfall_accumulated_16bit",
123
- "reflectivity_16bit",
124
- "rain_kinetic_energy",
125
- "snowfall_rate",
126
- "number_particles_all",
127
- # "number_particles_all_detected",
93
+ "mor_visibility",
94
+ "weather_code_synop_4680",
95
+ "weather_code_synop_4677",
96
+ "TO_SPLIT",
128
97
  ]
129
- df.columns = column_names
130
-
131
- # Add time column
132
- df["time"] = df_time
133
-
134
- # Retrieve raw_drop_concentration
135
- df["raw_drop_concentration"] = df_to_parse.iloc[:, 35:67].apply(
136
- lambda x: ",".join(x.dropna().astype(str)),
137
- axis=1,
138
- )
139
- # Retrieve raw_drop_average_velocity
140
- df["raw_drop_average_velocity"] = df_to_parse.iloc[:, 67:99].apply(
141
- lambda x: ",".join(x.dropna().astype(str)),
142
- axis=1,
143
- )
144
-
145
- # Retrieve raw_drop_number
146
- df_raw_drop_number = df_to_parse.iloc[:, 99].squeeze()
147
- df_raw_drop_number = df_raw_drop_number.str.replace(r"(\w{3})", r"\1,", regex=True)
148
- df["raw_drop_number"] = df_raw_drop_number
98
+ df.columns = names
99
+
100
+ # Derive raw drop arrays
101
+ def split_string(s):
102
+ vals = [v.strip() for v in s.split(";")]
103
+ c1 = ";".join(vals[:32])
104
+ c2 = ";".join(vals[32:64])
105
+ c3 = ";".join(vals[64:1088])
106
+ c4 = vals[1088]
107
+ c5 = vals[1089]
108
+ series = pd.Series(
109
+ {
110
+ "raw_drop_concentration": c1,
111
+ "raw_drop_average_velocity": c2,
112
+ "raw_drop_number": c3,
113
+ "rain_kinetic_energy": c4,
114
+ "CHECK_EMPTY": c5,
115
+ },
116
+ )
117
+ return series
118
+
119
+ splitted_string = df["TO_SPLIT"].apply(split_string)
120
+ df["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
121
+ df["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
122
+ df["raw_drop_number"] = splitted_string["raw_drop_number"]
123
+ df["rain_kinetic_energy"] = splitted_string["rain_kinetic_energy"]
124
+ df["CHECK_EMPTY"] = splitted_string["CHECK_EMPTY"]
125
+
126
+ # Ensure valid observation
127
+ df = df[df["CHECK_EMPTY"] == ""]
128
+
129
+ # Add the time column
130
+ time_str = df["sensor_date"] + "-" + df["sensor_time"]
131
+ df["time"] = pd.to_datetime(time_str, format="%d.%m.%Y-%H:%M:%S", errors="coerce")
149
132
 
150
133
  # Drop columns not agreeing with DISDRODB L0 standards
151
134
  columns_to_drop = [
152
- "firmware_iop",
153
- "firmware_dsp",
154
- "sensor_time_measurement_start",
155
- "sensor_time",
135
+ "dummy_date",
156
136
  "sensor_date",
157
- "station_name",
158
- "station_number",
159
- "sensor_serial_number",
137
+ "sensor_time",
160
138
  "sensor_serial_number",
161
- # "number_particles_all_detected",
139
+ "rainfall_accumulated_16bit", # unexpected format
140
+ "CHECK_EMPTY",
141
+ "TO_SPLIT",
162
142
  ]
163
143
  df = df.drop(columns=columns_to_drop)
164
144