disdrodb 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. disdrodb/__init__.py +1 -5
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +14 -3
  4. disdrodb/api/checks.py +10 -0
  5. disdrodb/api/create_directories.py +0 -2
  6. disdrodb/api/io.py +14 -17
  7. disdrodb/api/path.py +42 -77
  8. disdrodb/api/search.py +89 -23
  9. disdrodb/cli/disdrodb_create_summary.py +11 -1
  10. disdrodb/cli/disdrodb_create_summary_station.py +10 -0
  11. disdrodb/cli/disdrodb_run_l0.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0a.py +1 -1
  13. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  15. disdrodb/cli/disdrodb_run_l1.py +1 -1
  16. disdrodb/cli/disdrodb_run_l2e.py +1 -1
  17. disdrodb/cli/disdrodb_run_l2m.py +1 -1
  18. disdrodb/configs.py +30 -83
  19. disdrodb/constants.py +4 -3
  20. disdrodb/data_transfer/download_data.py +4 -2
  21. disdrodb/docs.py +2 -2
  22. disdrodb/etc/products/L1/1MIN.yaml +13 -0
  23. disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
  24. disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
  25. disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
  26. disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
  27. disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
  28. disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
  29. disdrodb/etc/products/L1/global.yaml +7 -1
  30. disdrodb/etc/products/L2E/10MIN.yaml +1 -12
  31. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +1 -1
  33. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +1 -1
  35. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +8 -0
  38. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
  39. disdrodb/etc/products/L2M/global.yaml +11 -3
  40. disdrodb/l0/check_configs.py +49 -16
  41. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  42. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  43. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  44. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  45. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
  46. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  47. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  48. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  49. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  50. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  51. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  52. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  53. disdrodb/l0/l0_reader.py +2 -2
  54. disdrodb/l0/l0b_processing.py +70 -15
  55. disdrodb/l0/l0c_processing.py +7 -3
  56. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -1
  57. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  58. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  59. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  60. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  61. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  62. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  63. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  64. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  65. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  66. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  67. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  68. disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
  69. disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/LPVEX.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  71. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +8 -17
  72. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  73. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
  74. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  75. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  76. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  77. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  78. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  79. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  80. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  81. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  83. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  84. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  85. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  86. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  87. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +232 -0
  88. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  89. disdrodb/l0/readers/PARSIVEL2/{NASA/LPVEX.py → SPAIN/GRANADA.py} +46 -35
  90. disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
  91. disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +10 -28
  92. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  93. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  94. disdrodb/l1/beard_model.py +31 -129
  95. disdrodb/l1/fall_velocity.py +136 -83
  96. disdrodb/l1/filters.py +25 -28
  97. disdrodb/l1/processing.py +16 -17
  98. disdrodb/l1/resampling.py +101 -38
  99. disdrodb/l1_env/routines.py +46 -17
  100. disdrodb/l2/empirical_dsd.py +6 -0
  101. disdrodb/l2/processing.py +6 -5
  102. disdrodb/metadata/geolocation.py +0 -2
  103. disdrodb/metadata/search.py +3 -4
  104. disdrodb/psd/fitting.py +16 -13
  105. disdrodb/routines/l0.py +2 -2
  106. disdrodb/routines/l1.py +173 -60
  107. disdrodb/routines/l2.py +148 -284
  108. disdrodb/routines/options.py +345 -0
  109. disdrodb/routines/wrappers.py +14 -1
  110. disdrodb/scattering/axis_ratio.py +90 -84
  111. disdrodb/scattering/permittivity.py +6 -0
  112. disdrodb/summary/routines.py +735 -670
  113. disdrodb/utils/archiving.py +51 -44
  114. disdrodb/utils/attrs.py +3 -1
  115. disdrodb/utils/dask.py +4 -4
  116. disdrodb/utils/dict.py +33 -0
  117. disdrodb/utils/encoding.py +6 -1
  118. disdrodb/utils/routines.py +9 -8
  119. disdrodb/utils/time.py +11 -3
  120. disdrodb/viz/__init__.py +0 -13
  121. disdrodb/viz/plots.py +231 -1
  122. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/METADATA +2 -1
  123. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/RECORD +135 -103
  124. /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
  125. /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
  126. /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
  127. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/IFLOODS.py +0 -0
  128. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +0 -0
  129. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/PIERS.py +0 -0
  130. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/GCPEX.py +0 -0
  131. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
  132. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/WHEEL +0 -0
  133. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/entry_points.txt +0 -0
  134. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/licenses/LICENSE +0 -0
  135. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,239 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """Reader for HYDROX PARSIVEL2 disdrometer located at Trafoi (Italy)."""
19
+ import os
20
+
21
+ import pandas as pd
22
+
23
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
24
+ from disdrodb.l0.l0a_processing import read_raw_text_file
25
+
26
+
27
+ def read_old_format(filepath, logger):
28
+ """Read old format."""
29
+ ##------------------------------------------------------------------------.
30
+ #### Define column names
31
+ column_names = ["TO_SPLIT"]
32
+
33
+ ##------------------------------------------------------------------------.
34
+ #### Define reader options
35
+ reader_kwargs = {}
36
+
37
+ # - Define delimiter
38
+ reader_kwargs["delimiter"] = "\\n"
39
+
40
+ # - Skip first row as columns names
41
+ reader_kwargs["header"] = None
42
+
43
+ # - Skip header
44
+ reader_kwargs["skiprows"] = 0
45
+
46
+ # - Define encoding
47
+ reader_kwargs["encoding"] = "ISO-8859-1"
48
+
49
+ # - Avoid first column to become df index !!!
50
+ reader_kwargs["index_col"] = False
51
+
52
+ # - Define behaviour when encountering bad lines
53
+ reader_kwargs["on_bad_lines"] = "skip"
54
+
55
+ # - Define reader engine
56
+ # - C engine is faster
57
+ # - Python engine is more feature-complete
58
+ reader_kwargs["engine"] = "python"
59
+
60
+ # - Define on-the-fly decompression of on-disk data
61
+ # - Available: gzip, bz2, zip
62
+ # reader_kwargs['compression'] = 'xz'
63
+
64
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
65
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
66
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
67
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
68
+ reader_kwargs["na_values"] = ["na", "error", "-.-", " NA"]
69
+
70
+ ##------------------------------------------------------------------------.
71
+ #### Read the data
72
+ df = read_raw_text_file(
73
+ filepath=filepath,
74
+ column_names=column_names,
75
+ reader_kwargs=reader_kwargs,
76
+ logger=logger,
77
+ )
78
+
79
+ ##------------------------------------------------------------------------.
80
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
81
+ # Remove corrupted rows
82
+ df = df[df["TO_SPLIT"].str.count(";").isin([7, 1031])]
83
+
84
+ # Split into columns
85
+ df = df["TO_SPLIT"].str.split(";", expand=True, n=7)
86
+
87
+ # Assign columns names
88
+ names = [
89
+ "date",
90
+ "time",
91
+ "rainfall_rate_32bit",
92
+ "rainfall_accumulated_32bit",
93
+ "laser_amplitude",
94
+ "number_particles",
95
+ "sensor_temperature",
96
+ "raw_drop_number",
97
+ ]
98
+ df.columns = names
99
+
100
+ # Add datetime time column
101
+ df["time"] = df["date"] + "-" + df["time"]
102
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%Y-%H:%M:%S", errors="coerce")
103
+ df = df.drop(columns=["date"])
104
+
105
+ # Correct for UTC time (from UTC+1)
106
+ df["time"] = df["time"] - pd.Timedelta(hours=1)
107
+
108
+ # Preprocess the raw spectrum
109
+ # - The '<SPECTRUM>ZERO</SPECTRUM>' indicates no drops detected
110
+ # --> "" generates an array of zeros in L0B processing
111
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
112
+
113
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
114
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
115
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
116
+
117
+ # Add 0 before every , if , not preceded by a digit
118
+ # Example: ',,1,,' --> '0,0,1,0,'
119
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"(?<!\d);", "0;", regex=True)
120
+
121
+ # Replace ending 999; with 0;
122
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"999;$", "0", regex=True)
123
+
124
+ # Return the dataframe adhering to DISDRODB L0 standards
125
+ return df
126
+
127
+
128
+ def read_new_format(filepath, logger):
129
+ """Read new NOA data format."""
130
+ ##------------------------------------------------------------------------.
131
+ #### Define column names
132
+ column_names = ["TO_SPLIT"]
133
+
134
+ ##------------------------------------------------------------------------.
135
+ #### Define reader options
136
+ reader_kwargs = {}
137
+
138
+ # - Define delimiter
139
+ reader_kwargs["delimiter"] = "\\n"
140
+
141
+ # - Skip first row as columns names
142
+ reader_kwargs["header"] = None
143
+
144
+ # - Skip header
145
+ reader_kwargs["skiprows"] = 0
146
+
147
+ # - Define encoding
148
+ reader_kwargs["encoding"] = "ISO-8859-1"
149
+
150
+ # - Avoid first column to become df index !!!
151
+ reader_kwargs["index_col"] = False
152
+
153
+ # - Define behaviour when encountering bad lines
154
+ reader_kwargs["on_bad_lines"] = "skip"
155
+
156
+ # - Define reader engine
157
+ # - C engine is faster
158
+ # - Python engine is more feature-complete
159
+ reader_kwargs["engine"] = "python"
160
+
161
+ # - Define on-the-fly decompression of on-disk data
162
+ # - Available: gzip, bz2, zip
163
+ # reader_kwargs['compression'] = 'xz'
164
+
165
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
166
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
167
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
168
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
169
+ reader_kwargs["na_values"] = ["na", "error", "-.-", " NA"]
170
+
171
+ ##------------------------------------------------------------------------.
172
+ #### Read the data
173
+ df = read_raw_text_file(
174
+ filepath=filepath,
175
+ column_names=column_names,
176
+ reader_kwargs=reader_kwargs,
177
+ logger=logger,
178
+ )
179
+
180
+ ##------------------------------------------------------------------------.
181
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
182
+ # Remove corrupted rows
183
+ df = df[df["TO_SPLIT"].str.count(";").isin([11, 1035])]
184
+
185
+ # Split into columns
186
+ df = df["TO_SPLIT"].str.split(";", expand=True, n=11)
187
+
188
+ # Assign columns names
189
+ names = [
190
+ "date",
191
+ "time",
192
+ "rainfall_rate_32bit",
193
+ "rainfall_accumulated_32bit",
194
+ "reflectivity_32bit",
195
+ "mor_visibility",
196
+ "laser_amplitude",
197
+ "number_particles",
198
+ "sensor_temperature",
199
+ "sensor_heating_current",
200
+ "sensor_battery_voltage",
201
+ "raw_drop_number",
202
+ ]
203
+ df.columns = names
204
+
205
+ # Add datetime time column
206
+ df["time"] = df["date"] + "-" + df["time"]
207
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%Y-%H:%M:%S", errors="coerce")
208
+ df = df.drop(columns=["date"])
209
+
210
+ # Preprocess the raw spectrum
211
+ # - The '<SPECTRUM>ZERO</SPECTRUM>' indicates no drops detected
212
+ # --> "" generates an array of zeros in L0B processing
213
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
214
+
215
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
216
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
217
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")
218
+
219
+ # Add 0 before every , if , not preceded by a digit
220
+ # Example: ',,1,,' --> '0,0,1,0,'
221
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"(?<!\d);", "0;", regex=True)
222
+
223
+ # Replace ending 999; with 0;
224
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"999;$", "0", regex=True)
225
+
226
+ # Return the dataframe adhering to DISDRODB L0 standards
227
+ return df
228
+
229
+
230
+ @is_documented_by(reader_generic_docstring)
231
+ def reader(
232
+ filepath,
233
+ logger=None,
234
+ ):
235
+ """Reader."""
236
+ date = int(os.path.basename(filepath)[-12:-4])
237
+ if date > 20140000:
238
+ return read_new_format(filepath, logger)
239
+ return read_old_format(filepath, logger)
@@ -109,17 +109,11 @@ def reader(
109
109
  df.columns = names
110
110
 
111
111
  # Derive raw drop arrays
112
- def split_string(s):
113
- vals = [v.strip() for v in s.split(",")]
114
- c1 = ", ".join(vals[:32])
115
- c2 = ", ".join(vals[32:64])
116
- c3 = ", ".join(vals[64:])
117
- return pd.Series({"raw_drop_concentration": c1, "raw_drop_average_velocity": c2, "raw_drop_number": c3})
118
-
119
- splitted_string = df["TO_SPLIT"].apply(split_string)
120
- df["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
121
- df["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
122
- df["raw_drop_number"] = splitted_string["raw_drop_number"]
112
+ df_split = df["TO_SPLIT"].str.split(",", expand=True)
113
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
114
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
115
+ df["raw_drop_number"] = df_split.iloc[:, 64:].agg(",".join, axis=1)
116
+ del df_split
123
117
 
124
118
  # Define datetime "time" column
125
119
  df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
@@ -89,23 +89,10 @@ def reader(
89
89
  df.columns = names
90
90
 
91
91
  # Derive raw drop arrays
92
- def split_string(s):
93
- vals = [v.strip() for v in s.split(",")]
94
- c1 = ", ".join(vals[:32])
95
- c2 = ", ".join(vals[32:64])
96
- # c3 = ", ".join(vals[64:])
97
- series = pd.Series(
98
- {
99
- "raw_drop_concentration": c1,
100
- "raw_drop_average_velocity": c2,
101
- # "raw_drop_number": c3,
102
- },
103
- )
104
- return series
105
-
106
- splitted_string = df["TO_SPLIT"].apply(split_string)
107
- df["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
108
- df["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
92
+ # - raw_drop_number is missing !
93
+ df_split = df["TO_SPLIT"].str.split(",", expand=True)
94
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
95
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
109
96
 
110
97
  # Define datetime time column
111
98
  df["year"] = df["year"].str.replace(".0", "")
@@ -90,20 +90,11 @@ def reader(
90
90
  valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
91
91
  df = df[df["ID"].astype(str).isin(valid_id_str)]
92
92
 
93
- # Create the dataframe with each row corresponding to a timestep
94
- # - Group rows based on when ID values restart
95
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
96
-
97
- # Reshape the dataframe
98
- group_dfs = []
99
- for _, group in groups:
100
- group_df = group.set_index("ID").T
101
- group_dfs.append(group_df)
102
-
103
- # Merge each timestep dataframe
104
- # --> Missing columns are infilled by NaN
105
- df = pd.concat(group_dfs, axis=0)
106
- df.columns = df.columns.astype(str).str.pad(width=2, side="left", fillchar="0")
93
+ # Create the dataframe where each row corresponds to a timestep
94
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
95
+ df = df.pivot(index="_group", columns="ID") # noqa
96
+ df.columns = df.columns.get_level_values("ID")
97
+ df = df.reset_index(drop=True)
107
98
 
108
99
  # Assign column names
109
100
  column_dict = {
@@ -81,19 +81,11 @@ def reader(
81
81
  valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
82
82
  df = df[df["ID"].astype(str).isin(valid_id_str)]
83
83
 
84
- # Create the dataframe with each row corresponding to a timestep
85
- # - Group rows based on when ID values restart
86
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
87
-
88
- # Reshape the dataframe
89
- group_dfs = []
90
- for _, group in groups:
91
- group_df = group.set_index("ID").T
92
- group_dfs.append(group_df)
93
-
94
- # Merge each timestep dataframe
95
- # --> Missing columns are infilled by NaN
96
- df = pd.concat(group_dfs, axis=0)
84
+ # Create the dataframe where each row corresponds to a timestep
85
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
86
+ df = df.pivot(index="_group", columns="ID") # noqa
87
+ df.columns = df.columns.get_level_values("ID")
88
+ df = df.reset_index(drop=True)
97
89
 
98
90
  # Assign column names
99
91
  column_dict = {
@@ -122,9 +114,14 @@ def reader(
122
114
  # "23": "station_number",
123
115
  "24": "rainfall_amount_absolute_32bit",
124
116
  "25": "error_code",
117
+ # "26": "sensor_temperature_pcb",
118
+ # "27": "sensor_temperature_receiver",
119
+ # "28": "sensor_temperature_trasmitter",
125
120
  "30": "rainfall_rate_16_bit_30",
126
121
  "31": "rainfall_rate_16_bit_1200",
127
122
  "32": "rainfall_accumulated_16bit",
123
+ # "34": "rain_kinetic_energy",
124
+ # "35": "snowfall_rate",
128
125
  "90": "raw_drop_concentration",
129
126
  "91": "raw_drop_average_velocity",
130
127
  "93": "raw_drop_number",
@@ -81,19 +81,11 @@ def reader(
81
81
  valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
82
82
  df = df[df["ID"].astype(str).isin(valid_id_str)]
83
83
 
84
- # Create the dataframe with each row corresponding to a timestep
85
- # - Group rows based on when ID values restart
86
- groups = df.groupby((df["ID"].astype(int).diff() <= 0).cumsum())
87
-
88
- # Reshape the dataframe
89
- group_dfs = []
90
- for _, group in groups:
91
- group_df = group.set_index("ID").T
92
- group_dfs.append(group_df)
93
-
94
- # Merge each timestep dataframe
95
- # --> Missing columns are infilled by NaN
96
- df = pd.concat(group_dfs, axis=0)
84
+ # Create the dataframe where each row corresponds to a timestep
85
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
86
+ df = df.pivot(index="_group", columns="ID") # noqa
87
+ df.columns = df.columns.get_level_values("ID")
88
+ df = df.reset_index(drop=True)
97
89
 
98
90
  # Assign column names
99
91
  column_dict = {
@@ -122,9 +114,14 @@ def reader(
122
114
  # "23": "station_number",
123
115
  "24": "rainfall_amount_absolute_32bit",
124
116
  "25": "error_code",
117
+ # "26": "sensor_temperature_pcb",
118
+ # "27": "sensor_temperature_receiver",
119
+ # "28": "sensor_temperature_trasmitter",
125
120
  "30": "rainfall_rate_16_bit_30",
126
121
  "31": "rainfall_rate_16_bit_1200",
127
122
  "32": "rainfall_accumulated_16bit",
123
+ # "34": "rain_kinetic_energy",
124
+ # "35": "snowfall_rate",
128
125
  "90": "raw_drop_concentration",
129
126
  "91": "raw_drop_average_velocity",
130
127
  "93": "raw_drop_number",
@@ -0,0 +1,232 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """DISDRODB reader for PANGASA PARSIVEL2 raw text data."""
20
+ # import os
21
+ # import tempfile
22
+ # from disdrodb.utils.compression import unzip_file_on_terminal
23
+
24
+ import numpy as np
25
+ import pandas as pd
26
+
27
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
28
+ from disdrodb.l0.l0a_processing import read_raw_text_file
29
+ from disdrodb.utils.logger import log_error
30
+
31
+
32
+ def read_txt_file(file, filename, logger):
33
+ """Parse a single txt file within the daily zip file."""
34
+ ##------------------------------------------------------------------------.
35
+ #### Define column names
36
+ column_names = ["TO_PARSE"]
37
+
38
+ ##------------------------------------------------------------------------.
39
+ #### Define reader options
40
+ reader_kwargs = {}
41
+ # - Define delimiter
42
+ reader_kwargs["delimiter"] = "\\n"
43
+ # - Skip first row as columns names
44
+ # - Define encoding
45
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
46
+ # - Avoid first column to become df index !!!
47
+ reader_kwargs["index_col"] = False
48
+ # - Define behaviour when encountering bad lines
49
+ reader_kwargs["on_bad_lines"] = "skip"
50
+ # - Define reader engine
51
+ # - C engine is faster
52
+ # - Python engine is more feature-complete
53
+ reader_kwargs["engine"] = "python"
54
+ # - Define on-the-fly decompression of on-disk data
55
+ # - Available: gzip, bz2, zip
56
+ reader_kwargs["compression"] = "infer"
57
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
58
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
59
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
60
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
61
+ reader_kwargs["na_values"] = ["na", "", "error"]
62
+
63
+ ##------------------------------------------------------------------------.
64
+ #### Read the data
65
+ df = read_raw_text_file(
66
+ filepath=file,
67
+ column_names=column_names,
68
+ reader_kwargs=reader_kwargs,
69
+ logger=logger,
70
+ )
71
+ ##--------------------------------\----------------------------------------.
72
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
73
+ # Empty file, return None
74
+ if len(df) == 0:
75
+ raise ValueError(f"{filename} is empty.")
76
+
77
+ # Create ID and Value columns
78
+ df = df["TO_PARSE"].str.split(":", expand=True, n=1)
79
+ df.columns = ["ID", "Value"]
80
+
81
+ # Select only rows with values
82
+ df = df[df["Value"].astype(bool)]
83
+ df = df[df["Value"].apply(lambda x: x is not None)]
84
+
85
+ # Drop rows with invalid IDs
86
+ # - Corrupted rows
87
+ valid_id_str = np.char.rjust(np.arange(0, 94).astype(str), width=2, fillchar="0")
88
+ df = df[df["ID"].astype(str).isin(valid_id_str)]
89
+
90
+ # Create the dataframe where each row corresponds to a timestep
91
+ df["_group"] = (df["ID"].astype(int).diff() <= 0).cumsum()
92
+ df = df.pivot(index="_group", columns="ID") # noqa
93
+ df.columns = df.columns.get_level_values("ID")
94
+ df = df.reset_index(drop=True)
95
+
96
+ # Assign column names
97
+ column_dict = {
98
+ "01": "rainfall_rate_32bit",
99
+ "02": "rainfall_accumulated_32bit",
100
+ "03": "weather_code_synop_4680",
101
+ "04": "weather_code_synop_4677",
102
+ "05": "weather_code_metar_4678",
103
+ "06": "weather_code_nws",
104
+ "07": "reflectivity_32bit",
105
+ "08": "mor_visibility",
106
+ "09": "sample_interval",
107
+ "10": "laser_amplitude",
108
+ "11": "number_particles",
109
+ "12": "sensor_temperature",
110
+ # "13": "sensor_serial_number",
111
+ # "14": "firmware_iop",
112
+ # "15": "firmware_dsp",
113
+ "16": "sensor_heating_current",
114
+ "17": "sensor_battery_voltage",
115
+ "18": "sensor_status",
116
+ # "19": "start_time",
117
+ # "20": "sensor_time",
118
+ # "21": "sensor_date",
119
+ # "22": "station_name",
120
+ # "23": "station_number",
121
+ "24": "rainfall_amount_absolute_32bit",
122
+ "25": "error_code",
123
+ "26": "sensor_temperature_pcb",
124
+ "27": "sensor_temperature_receiver",
125
+ "28": "sensor_temperature_trasmitter",
126
+ "30": "rainfall_rate_16_bit_30",
127
+ "31": "rainfall_rate_16_bit_1200",
128
+ "32": "rainfall_accumulated_16bit",
129
+ # "33": "reflectivity_16bit",
130
+ "34": "rain_kinetic_energy",
131
+ "35": "snowfall_rate",
132
+ # 60: "number_particles_all",
133
+ # 61: "list_particles",
134
+ "90": "raw_drop_concentration",
135
+ "91": "raw_drop_average_velocity",
136
+ "93": "raw_drop_number",
137
+ }
138
+
139
+ # Identify missing columns and add NaN
140
+ expected_columns = np.array(list(column_dict.keys()))
141
+ missing_columns = expected_columns[np.isin(expected_columns, df.columns, invert=True)].tolist()
142
+ if len(missing_columns) > 0:
143
+ for column in missing_columns:
144
+ df[column] = "NaN"
145
+
146
+ # Rename columns
147
+ df = df.rename(column_dict, axis=1)
148
+
149
+ # Keep only columns defined in the dictionary
150
+ df = df[list(column_dict.values())]
151
+
152
+ # Define datetime "time" column
153
+ time_str = filename.split("_")[-1].replace(".txt", "")
154
+ df["time"] = pd.to_datetime(time_str, format="%Y%m%d%H%M%S", errors="coerce")
155
+
156
+ # Keep only rows with valid raw_drop_number
157
+ df = df[df["raw_drop_number"].str.count(";") == 1024]
158
+ if len(df) == 0:
159
+ raise ValueError("Invalid raw drop number field.")
160
+
161
+ # Drop columns not agreeing with DISDRODB L0 standards
162
+ # columns_to_drop = [
163
+ # # "sensor_date",
164
+ # # "sensor_time",
165
+ # # "firmware_iop",
166
+ # # "firmware_dsp",
167
+ # # "sensor_serial_number",
168
+ # # "station_name",
169
+ # # "station_number",
170
+ # ]
171
+ # df = df.drop(columns=columns_to_drop)
172
+ return df
173
+
174
+
175
+ @is_documented_by(reader_generic_docstring)
176
+ def reader(
177
+ filepath,
178
+ logger=None,
179
+ ):
180
+ """Reader."""
181
+ import zipfile
182
+
183
+ # return read_txt_file(file=filepath,
184
+ # filename=os.path.basename(filepath),
185
+ # logger=logger,
186
+ # )
187
+
188
+ # ---------------------------------------------------------------------.
189
+ #### Iterate over all files (aka timesteps) in the daily zip archive
190
+ # - Each file contain a single timestep !
191
+ # list_df = []
192
+ # with tempfile.TemporaryDirectory() as temp_dir:
193
+ # # Extract all files
194
+ # unzip_file_on_terminal(filepath, temp_dir)
195
+
196
+ # # Walk through extracted files
197
+ # for root, _, files in os.walk(temp_dir):
198
+ # for filename in sorted(files):
199
+ # if filename.endswith(".txt"):
200
+ # full_path = os.path.join(root, filename)
201
+ # try:
202
+ # df = read_txt_file(file=full_path, filename=filename, logger=logger)
203
+ # if df is not None:
204
+ # list_df.append(df)
205
+ # except Exception as e:
206
+ # msg = f"An error occurred while reading {filename}: {e}"
207
+ # log_error(logger=logger, msg=msg, verbose=True)
208
+
209
+ list_df = []
210
+ with zipfile.ZipFile(filepath, "r") as zip_ref:
211
+ filenames = sorted(zip_ref.namelist())
212
+ for filename in filenames:
213
+ if filename.endswith(".txt"):
214
+ # Open file
215
+ with zip_ref.open(filename) as file:
216
+ try:
217
+ df = read_txt_file(file=file, filename=filename, logger=logger)
218
+ if df is not None:
219
+ list_df.append(df)
220
+ except Exception as e:
221
+ msg = f"An error occurred while reading {filename}. The error is: {e}."
222
+ log_error(logger=logger, msg=msg, verbose=True)
223
+
224
+ # Check the zip file contains at least some non.empty files
225
+ if len(list_df) == 0:
226
+ raise ValueError(f"{filepath} contains only empty files!")
227
+
228
+ # Concatenate all dataframes into a single one
229
+ df = pd.concat(list_df)
230
+
231
+ # ---------------------------------------------------------------------.
232
+ return df
@@ -111,31 +111,19 @@ def reader(
111
111
  df_raw_spectrum = df[df["TO_PARSE"].str.len() == 4545]
112
112
 
113
113
  # Derive raw drop arrays
114
- def split_string(s):
115
- vals = [v.strip() for v in s.split(",")]
116
- c1 = ",".join(vals[:32])
117
- c2 = ",".join(vals[32:64])
118
- c3 = ",".join(vals[64].replace("r", "").split("/"))
119
- series = pd.Series(
120
- {
121
- "raw_drop_concentration": c1,
122
- "raw_drop_average_velocity": c2,
123
- "raw_drop_number": c3,
124
- },
125
- )
126
- return series
127
-
128
- splitted_string = df_raw_spectrum["TO_PARSE"].apply(split_string)
129
- df_raw_spectrum["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
130
- df_raw_spectrum["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
131
- df_raw_spectrum["raw_drop_number"] = splitted_string["raw_drop_number"]
114
+ df_split = df["TO_PARSE"].str.split(",", expand=True)
115
+ df_raw_spectrum["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
116
+ df_raw_spectrum["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
117
+ df_raw_spectrum["raw_drop_number"] = df_split.iloc[:, 64:].agg(",".join, axis=1)
132
118
  df_raw_spectrum = df_raw_spectrum.drop(columns=["date", "TO_PARSE"])
119
+ del df_split
133
120
 
134
121
  # Add raw array
135
122
  df = df_data.set_index("time")
136
123
  df_raw_spectrum = df_raw_spectrum.set_index("time")
137
124
 
138
125
  df.update(df_raw_spectrum)
126
+ del df_raw_spectrum
139
127
 
140
128
  # Set back time as column
141
129
  df = df.reset_index()