disdrodb 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. disdrodb/__init__.py +64 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +139 -9
  7. disdrodb/api/configs.py +4 -2
  8. disdrodb/api/info.py +10 -10
  9. disdrodb/api/io.py +237 -18
  10. disdrodb/api/path.py +81 -75
  11. disdrodb/api/search.py +6 -6
  12. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  13. disdrodb/cli/disdrodb_run_l0.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
  19. disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
  20. disdrodb/configs.py +149 -4
  21. disdrodb/constants.py +61 -0
  22. disdrodb/data_transfer/download_data.py +145 -14
  23. disdrodb/etc/configs/attributes.yaml +339 -0
  24. disdrodb/etc/configs/encodings.yaml +473 -0
  25. disdrodb/etc/products/L1/global.yaml +13 -0
  26. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  27. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  28. disdrodb/etc/products/L2E/global.yaml +22 -0
  29. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  30. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  31. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  33. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/global.yaml +26 -0
  35. disdrodb/l0/__init__.py +13 -0
  36. disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
  37. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  38. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  39. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  40. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  41. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
  42. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
  43. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +44 -3
  44. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +41 -1
  45. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  46. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  47. disdrodb/l0/l0a_processing.py +30 -30
  48. disdrodb/l0/l0b_nc_processing.py +108 -2
  49. disdrodb/l0/l0b_processing.py +4 -4
  50. disdrodb/l0/l0c_processing.py +5 -13
  51. disdrodb/l0/manuals/SWS250.pdf +0 -0
  52. disdrodb/l0/manuals/VPF730.pdf +0 -0
  53. disdrodb/l0/manuals/VPF750.pdf +0 -0
  54. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  55. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  56. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  57. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +105 -0
  58. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +128 -0
  59. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  60. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  61. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  62. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  63. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  64. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  65. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  66. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  67. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  68. disdrodb/l0/readers/{PARSIVEL → PARSIVEL2}/KIT/BURKINA_FASO.py +1 -1
  69. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  70. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  71. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → NCAR/FARM_PARSIVEL2.py} +43 -70
  72. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  73. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  75. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  76. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +29 -12
  77. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +69 -0
  78. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  79. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  80. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  81. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
  82. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  83. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  84. disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +31 -14
  85. disdrodb/l0/routines.py +105 -14
  86. disdrodb/l1/__init__.py +5 -0
  87. disdrodb/l1/filters.py +34 -20
  88. disdrodb/l1/processing.py +45 -44
  89. disdrodb/l1/resampling.py +77 -66
  90. disdrodb/l1/routines.py +35 -42
  91. disdrodb/l1_env/routines.py +18 -3
  92. disdrodb/l2/__init__.py +7 -0
  93. disdrodb/l2/empirical_dsd.py +58 -10
  94. disdrodb/l2/event.py +27 -120
  95. disdrodb/l2/processing.py +267 -116
  96. disdrodb/l2/routines.py +618 -254
  97. disdrodb/metadata/standards.py +3 -1
  98. disdrodb/psd/fitting.py +463 -144
  99. disdrodb/psd/models.py +8 -5
  100. disdrodb/routines.py +3 -3
  101. disdrodb/scattering/__init__.py +16 -4
  102. disdrodb/scattering/axis_ratio.py +56 -36
  103. disdrodb/scattering/permittivity.py +486 -0
  104. disdrodb/scattering/routines.py +701 -159
  105. disdrodb/summary/__init__.py +17 -0
  106. disdrodb/summary/routines.py +4120 -0
  107. disdrodb/utils/attrs.py +68 -125
  108. disdrodb/utils/compression.py +30 -1
  109. disdrodb/utils/dask.py +59 -8
  110. disdrodb/utils/dataframe.py +63 -9
  111. disdrodb/utils/directories.py +49 -17
  112. disdrodb/utils/encoding.py +33 -19
  113. disdrodb/utils/logger.py +13 -6
  114. disdrodb/utils/manipulations.py +71 -0
  115. disdrodb/utils/subsetting.py +214 -0
  116. disdrodb/utils/time.py +165 -19
  117. disdrodb/utils/writer.py +20 -7
  118. disdrodb/utils/xarray.py +85 -4
  119. disdrodb/viz/__init__.py +13 -0
  120. disdrodb/viz/plots.py +327 -0
  121. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
  122. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/RECORD +127 -87
  123. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
  124. disdrodb/l1/encoding_attrs.py +0 -635
  125. disdrodb/l2/processing_options.py +0 -213
  126. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  127. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
  128. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
  129. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,195 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for GID LPM sensors not measuring wind."""
20
+ import pandas as pd
21
+
22
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
23
+ from disdrodb.l0.l0a_processing import read_raw_text_file
24
+
25
+
26
+ @is_documented_by(reader_generic_docstring)
27
+ def reader(
28
+ filepath,
29
+ logger=None,
30
+ ):
31
+ """Reader."""
32
+ ##------------------------------------------------------------------------.
33
+ #### - Define raw data headers
34
+ column_names = ["TO_BE_SPLITTED"]
35
+
36
+ ##------------------------------------------------------------------------.
37
+ #### Define reader options
38
+ # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
39
+ reader_kwargs = {}
40
+
41
+ # - Define delimiter
42
+ reader_kwargs["delimiter"] = "\\n"
43
+
44
+ # - Avoid first column to become df index !!!
45
+ reader_kwargs["index_col"] = False
46
+
47
+ # Since column names are expected to be passed explicitly, header is set to None
48
+ reader_kwargs["header"] = None
49
+
50
+ # - Number of rows to be skipped at the beginning of the file
51
+ reader_kwargs["skiprows"] = None
52
+
53
+ # - Define behaviour when encountering bad lines
54
+ reader_kwargs["on_bad_lines"] = "skip"
55
+
56
+ # - Define reader engine
57
+ # - C engine is faster
58
+ # - Python engine is more feature-complete
59
+ reader_kwargs["engine"] = "python"
60
+
61
+ # - Define on-the-fly decompression of on-disk data
62
+ # - Available: gzip, bz2, zip
63
+ reader_kwargs["compression"] = "infer"
64
+
65
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
66
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
67
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
68
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
69
+ reader_kwargs["na_values"] = ["na", "", "error"]
70
+
71
+ ##------------------------------------------------------------------------.
72
+ #### Read the data
73
+ df = read_raw_text_file(
74
+ filepath=filepath,
75
+ column_names=column_names,
76
+ reader_kwargs=reader_kwargs,
77
+ logger=logger,
78
+ )
79
+
80
+ ##------------------------------------------------------------------------.
81
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
82
+ # Count number of delimiters to identify valid rows
83
+ df = df[df["TO_BE_SPLITTED"].str.count(";") == 520]
84
+
85
+ # Split by ; delimiter (before raw drop number)
86
+ df = df["TO_BE_SPLITTED"].str.split(";", expand=True, n=79)
87
+
88
+ # Assign column names
89
+ column_names = [
90
+ "start_identifier",
91
+ "device_address",
92
+ "sensor_serial_number",
93
+ "sensor_date",
94
+ "sensor_time",
95
+ "weather_code_synop_4677_5min",
96
+ "weather_code_synop_4680_5min",
97
+ "weather_code_metar_4678_5min",
98
+ "precipitation_rate_5min",
99
+ "weather_code_synop_4677",
100
+ "weather_code_synop_4680",
101
+ "weather_code_metar_4678",
102
+ "precipitation_rate",
103
+ "rainfall_rate",
104
+ "snowfall_rate",
105
+ "precipitation_accumulated",
106
+ "mor_visibility",
107
+ "reflectivity",
108
+ "quality_index",
109
+ "max_hail_diameter",
110
+ "laser_status",
111
+ "static_signal",
112
+ "laser_temperature_analog_status",
113
+ "laser_temperature_digital_status",
114
+ "laser_current_analog_status",
115
+ "laser_current_digital_status",
116
+ "sensor_voltage_supply_status",
117
+ "current_heating_pane_transmitter_head_status",
118
+ "current_heating_pane_receiver_head_status",
119
+ "temperature_sensor_status",
120
+ "current_heating_voltage_supply_status",
121
+ "current_heating_house_status",
122
+ "current_heating_heads_status",
123
+ "current_heating_carriers_status",
124
+ "control_output_laser_power_status",
125
+ "reserve_status",
126
+ "temperature_interior",
127
+ "laser_temperature",
128
+ "laser_current_average",
129
+ "control_voltage",
130
+ "optical_control_voltage_output",
131
+ "sensor_voltage_supply",
132
+ "current_heating_pane_transmitter_head",
133
+ "current_heating_pane_receiver_head",
134
+ "temperature_ambient",
135
+ "current_heating_voltage_supply",
136
+ "current_heating_house",
137
+ "current_heating_heads",
138
+ "current_heating_carriers",
139
+ "number_particles",
140
+ "number_particles_internal_data",
141
+ "number_particles_min_speed",
142
+ "number_particles_min_speed_internal_data",
143
+ "number_particles_max_speed",
144
+ "number_particles_max_speed_internal_data",
145
+ "number_particles_min_diameter",
146
+ "number_particles_min_diameter_internal_data",
147
+ "number_particles_no_hydrometeor",
148
+ "number_particles_no_hydrometeor_internal_data",
149
+ "number_particles_unknown_classification",
150
+ "number_particles_unknown_classification_internal_data",
151
+ "number_particles_class_1",
152
+ "number_particles_class_1_internal_data",
153
+ "number_particles_class_2",
154
+ "number_particles_class_2_internal_data",
155
+ "number_particles_class_3",
156
+ "number_particles_class_3_internal_data",
157
+ "number_particles_class_4",
158
+ "number_particles_class_4_internal_data",
159
+ "number_particles_class_5",
160
+ "number_particles_class_5_internal_data",
161
+ "number_particles_class_6",
162
+ "number_particles_class_6_internal_data",
163
+ "number_particles_class_7",
164
+ "number_particles_class_7_internal_data",
165
+ "number_particles_class_8",
166
+ "number_particles_class_8_internal_data",
167
+ "number_particles_class_9",
168
+ "number_particles_class_9_internal_data",
169
+ "raw_drop_number",
170
+ ]
171
+ df.columns = column_names
172
+
173
+ # Remove checksum from raw_drop_number
174
+ df["raw_drop_number"] = df["raw_drop_number"].str.rsplit(";", n=2, expand=True)[0]
175
+
176
+ # Define datetime "time" column
177
+ df["time"] = df["sensor_date"] + "-" + df["sensor_time"]
178
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%y-%H:%M:%S", errors="coerce")
179
+
180
+ # Drop row if start_identifier different than 00
181
+ df = df[df["start_identifier"].astype(str) == "00"]
182
+
183
+ # Drop rows with invalid raw_drop_number
184
+ df = df[df["raw_drop_number"].astype(str).str.len() == 1759]
185
+
186
+ # Drop columns not agreeing with DISDRODB L0 standards
187
+ columns_to_drop = [
188
+ "start_identifier",
189
+ "device_address",
190
+ "sensor_serial_number",
191
+ "sensor_date",
192
+ "sensor_time",
193
+ ]
194
+ df = df.drop(columns=columns_to_drop)
195
+ return df
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import pandas as pd
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0a_processing import read_raw_text_file
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Define column names
32
+ column_names = ["time", "TO_BE_SPLITTED"]
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Define reader options
36
+ reader_kwargs = {}
37
+ # - Define delimiter
38
+ reader_kwargs["delimiter"] = ";"
39
+ # - Skip first row as columns names
40
+ reader_kwargs["header"] = None
41
+ reader_kwargs["skiprows"] = 0
42
+ # - Skip file with encoding errors
43
+ reader_kwargs["encoding_errors"] = "ignore"
44
+ # - Avoid first column to become df index !!!
45
+ reader_kwargs["index_col"] = False
46
+ # - Define behaviour when encountering bad lines
47
+ reader_kwargs["on_bad_lines"] = "skip"
48
+ # - Define reader engine
49
+ # - C engine is faster
50
+ # - Python engine is more feature-complete
51
+ reader_kwargs["engine"] = "python"
52
+ # - Define on-the-fly decompression of on-disk data
53
+ # - Available: gzip, bz2, zip
54
+ reader_kwargs["compression"] = "infer"
55
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
56
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
57
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
58
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
59
+ reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
60
+
61
+ ##------------------------------------------------------------------------.
62
+ #### Read the data
63
+ df = read_raw_text_file(
64
+ filepath=filepath,
65
+ column_names=column_names,
66
+ reader_kwargs=reader_kwargs,
67
+ logger=logger,
68
+ )
69
+
70
+ ##------------------------------------------------------------------------.
71
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
72
+ # Convert time column to datetime
73
+ df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
74
+
75
+ # Split the 'TO_BE_SPLITTED' column
76
+ df = df["TO_BE_SPLITTED"].str.split(",", expand=True, n=9)
77
+
78
+ # Assign column names
79
+ columns_names = [
80
+ "station_name",
81
+ "sensor_status",
82
+ "sensor_temperature",
83
+ "number_particles",
84
+ "rainfall_rate_32bit",
85
+ "reflectivity_16bit",
86
+ "mor_visibility",
87
+ "weather_code_synop_4680",
88
+ "weather_code_synop_4677",
89
+ "raw_drop_number",
90
+ ]
91
+ df.columns = columns_names
92
+
93
+ # Add the time column
94
+ df["time"] = df_time
95
+
96
+ # Drop columns not agreeing with DISDRODB L0 standards
97
+ df = df.drop(columns=["station_name"])
98
+
99
+ # Drop rows with invalid values
100
+ # --> Ensure that weather_code_synop_4677 has length 2
101
+ # --> If a previous column is missing it will have 000
102
+ df = df[df["weather_code_synop_4677"].str.len() == 2]
103
+
104
+ # Return the dataframe adhering to DISDRODB L0 standards
105
+ return df
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import pandas as pd
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0a_processing import read_raw_text_file
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Define column names
32
+ column_names = ["TO_SPLIT"]
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Define reader options
36
+ reader_kwargs = {}
37
+
38
+ # - Define delimiter
39
+ reader_kwargs["delimiter"] = "\\n"
40
+
41
+ # - Skip first row as columns names
42
+ reader_kwargs["header"] = None
43
+
44
+ # - Skip header
45
+ reader_kwargs["skiprows"] = 0
46
+
47
+ # - Define encoding
48
+ reader_kwargs["encoding"] = "ISO-8859-1"
49
+
50
+ # - Avoid first column to become df index !!!
51
+ reader_kwargs["index_col"] = False
52
+
53
+ # - Define behaviour when encountering bad lines
54
+ reader_kwargs["on_bad_lines"] = "skip"
55
+
56
+ # - Define reader engine
57
+ # - C engine is faster
58
+ # - Python engine is more feature-complete
59
+ reader_kwargs["engine"] = "python"
60
+
61
+ # - Define on-the-fly decompression of on-disk data
62
+ # - Available: gzip, bz2, zip
63
+ # reader_kwargs['compression'] = 'xz'
64
+
65
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
66
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
67
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
68
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
69
+ reader_kwargs["na_values"] = ["na", "error", "-.-", " NA"]
70
+
71
+ ##------------------------------------------------------------------------.
72
+ #### Read the data
73
+ df = read_raw_text_file(
74
+ filepath=filepath,
75
+ column_names=column_names,
76
+ reader_kwargs=reader_kwargs,
77
+ logger=logger,
78
+ )
79
+
80
+ ##------------------------------------------------------------------------.
81
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
82
+ # Remove rows with less than 97 characters (empty spectrum --> 97 characters)
83
+ df = df[df["TO_SPLIT"].str.len() >= 97]
84
+
85
+ # Split into columns and assign name
86
+ df = df["TO_SPLIT"].str.split(";", expand=True, n=14)
87
+ columns = [
88
+ "date",
89
+ "time",
90
+ "rainfall_rate_32bit",
91
+ "rainfall_accumulated_32bit",
92
+ "weather_code_synop_4680",
93
+ "weather_code_metar_4678",
94
+ "weather_code_nws",
95
+ "reflectivity_32bit",
96
+ "mor_visibility",
97
+ "laser_amplitude",
98
+ "number_particles",
99
+ "sensor_temperature",
100
+ "sensor_heating_current",
101
+ "sensor_battery_voltage",
102
+ "raw_drop_number",
103
+ ]
104
+ df.columns = columns
105
+
106
+ # Add datetime time column
107
+ df["time"] = df["date"] + "-" + df["time"]
108
+ df["time"] = pd.to_datetime(df["time"], format="%Y/%m/%d-%H:%M:%S", errors="coerce")
109
+ df = df.drop(columns=["date"])
110
+
111
+ # Convert timezone from JST to UTC
112
+ df = df.set_index("time").tz_localize("Asia/Tokyo").tz_convert(None).reset_index()
113
+
114
+ # Preprocess the raw spectrum
115
+ # - The '<SPECTRUM>ZERO</SPECTRUM>' indicates no drops detected
116
+ # --> "" generates an array of zeros in L0B processing
117
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
118
+
119
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
120
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
121
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
122
+
123
+ # Add 0 before every , if , not preceded by a digit
124
+ # Example: ',,1,,' --> '0,0,1,0,'
125
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"(?<!\d);", "0;", regex=True)
126
+
127
+ # Return the dataframe adhering to DISDRODB L0 standards
128
+ return df
@@ -113,7 +113,7 @@ def reader(
113
113
  # --> "" generates an array of zeros in L0B processing
114
114
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
115
115
 
116
- # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
116
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
117
117
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
118
118
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
119
119
 
@@ -102,7 +102,7 @@ def reader(
102
102
  # --> "" generates an array of zeros in L0B processing
103
103
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
104
104
 
105
- # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
105
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
106
106
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
107
107
  df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
108
108
 
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for UGENT ILVO Parsivel2 raw text data."""
20
+ import tarfile
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
26
+ from disdrodb.l0.l0a_processing import read_raw_text_file
27
+ from disdrodb.utils.logger import log_error
28
+
29
+
30
+ @is_documented_by(reader_generic_docstring)
31
+ def reader(
32
+ filepath,
33
+ logger=None,
34
+ ):
35
+ """Reader."""
36
+
37
+ ##------------------------------------------------------------------------.
38
+ #### Define function to read each txt file inside each daily zip file
39
+ def read_txt_file(file, filename):
40
+ """Parse a single txt file within the daily zip file."""
41
+ ##------------------------------------------------------------------------.
42
+ #### Define column names
43
+ column_names = ["TO_PARSE"]
44
+
45
+ ##------------------------------------------------------------------------.
46
+ #### Define reader options
47
+ reader_kwargs = {}
48
+ # - Define delimiter
49
+ reader_kwargs["delimiter"] = "\\n"
50
+ # - Skip first row as columns names
51
+ # - Define encoding
52
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
53
+ # - Avoid first column to become df index !!!
54
+ reader_kwargs["index_col"] = False
55
+ # - Define behaviour when encountering bad lines
56
+ reader_kwargs["on_bad_lines"] = "skip"
57
+ # - Define reader engine
58
+ # - C engine is faster
59
+ # - Python engine is more feature-complete
60
+ reader_kwargs["engine"] = "python"
61
+ # - Define on-the-fly decompression of on-disk data
62
+ # - Available: gzip, bz2, zip
63
+ reader_kwargs["compression"] = "infer"
64
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
65
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
66
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
67
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
68
+ reader_kwargs["na_values"] = ["na", "", "error"]
69
+
70
+ ##------------------------------------------------------------------------.
71
+ #### Read the data
72
+ df = read_raw_text_file(
73
+ filepath=file,
74
+ column_names=column_names,
75
+ reader_kwargs=reader_kwargs,
76
+ logger=logger,
77
+ )
78
+
79
+ ##------------------------------------------------------------------------.
80
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
81
+ # Create ID and Value columns
82
+ df = df["TO_PARSE"].str.split(":", expand=True, n=1)
83
+ df.columns = ["ID", "Value"]
84
+
85
+ # Select only rows with values
86
+ df = df[df["Value"].apply(lambda x: x is not None)]
87
+
88
+ # Drop rows with invalid IDs
89
+ valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
90
+ df = df[df["ID"].astype(str).isin(valid_id_str)]
91
+
92
+ # Reshape dataframe
93
+ df = df.set_index("ID").T
94
+
95
+ # Assign column names
96
+ column_dict = {
97
+ "01": "rainfall_rate_32bit",
98
+ "02": "rainfall_accumulated_32bit",
99
+ "03": "weather_code_synop_4680",
100
+ "04": "weather_code_synop_4677",
101
+ "05": "weather_code_metar_4678",
102
+ "06": "weather_code_nws",
103
+ "07": "reflectivity_32bit",
104
+ "08": "mor_visibility",
105
+ "09": "sample_interval",
106
+ "10": "laser_amplitude",
107
+ "11": "number_particles",
108
+ "12": "sensor_temperature",
109
+ # "13": "sensor_serial_number",
110
+ # "14": "firmware_iop",
111
+ # "15": "firmware_dsp",
112
+ "16": "sensor_heating_current",
113
+ "17": "sensor_battery_voltage",
114
+ "18": "sensor_status",
115
+ # "19": "start_time",
116
+ # "20": "sensor_time",
117
+ # "21": "sensor_date",
118
+ # "22": "station_name",
119
+ # "23": "station_number",
120
+ "24": "rainfall_amount_absolute_32bit",
121
+ "25": "error_code",
122
+ "26": "sensor_temperature_pcb",
123
+ "27": "sensor_temperature_receiver",
124
+ "28": "sensor_temperature_trasmitter",
125
+ "30": "rainfall_rate_16_bit_30",
126
+ "31": "rainfall_rate_16_bit_1200",
127
+ "32": "rainfall_accumulated_16bit",
128
+ "34": "rain_kinetic_energy",
129
+ "35": "snowfall_rate",
130
+ "90": "raw_drop_concentration",
131
+ "91": "raw_drop_average_velocity",
132
+ "93": "raw_drop_number",
133
+ }
134
+
135
+ df = df.rename(column_dict, axis=1)
136
+
137
+ # Keep only columns defined in the dictionary
138
+ df = df[list(column_dict.values())]
139
+
140
+ # Define datetime "time" column from filename
141
+ datetime_str = filename.replace(".txt", "").split("_")[1]
142
+ df["time"] = pd.to_datetime(datetime_str, format="%Y%m%d%H%M%S")
143
+
144
+ return df
145
+
146
+ # ---------------------------------------------------------------------.
147
+ #### Iterate over all files (aka timesteps) in the daily zip archive
148
+ # - Each file contain a single timestep !
149
+ list_df = []
150
+ with tarfile.open(filepath, "r:gz") as tar:
151
+ members = sorted(tar.getmembers(), key=lambda m: m.name)
152
+ for member in members:
153
+ filename = member.name
154
+ if member.isfile() and filename.endswith(".txt"):
155
+ # Open file
156
+ with tar.extractfile(member) as file:
157
+ try:
158
+ df = read_txt_file(file=file, filename=filename)
159
+ list_df.append(df)
160
+ except Exception as e:
161
+ msg = f"An error occurred while reading {filename}. The error is: {e}."
162
+ log_error(logger=logger, msg=msg, verbose=True)
163
+
164
+ # Concatenate all dataframes into a single one
165
+ df = pd.concat(list_df)
166
+
167
+ # ---------------------------------------------------------------------.
168
+ return df