disdrodb 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. disdrodb/__init__.py +1 -5
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +14 -3
  4. disdrodb/api/checks.py +10 -0
  5. disdrodb/api/create_directories.py +0 -2
  6. disdrodb/api/io.py +14 -17
  7. disdrodb/api/path.py +42 -77
  8. disdrodb/api/search.py +89 -23
  9. disdrodb/cli/disdrodb_create_summary.py +11 -1
  10. disdrodb/cli/disdrodb_create_summary_station.py +10 -0
  11. disdrodb/cli/disdrodb_run_l0.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0a.py +1 -1
  13. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  15. disdrodb/cli/disdrodb_run_l1.py +1 -1
  16. disdrodb/cli/disdrodb_run_l2e.py +1 -1
  17. disdrodb/cli/disdrodb_run_l2m.py +1 -1
  18. disdrodb/configs.py +30 -83
  19. disdrodb/constants.py +4 -3
  20. disdrodb/data_transfer/download_data.py +4 -2
  21. disdrodb/docs.py +2 -2
  22. disdrodb/etc/products/L1/1MIN.yaml +13 -0
  23. disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
  24. disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
  25. disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
  26. disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
  27. disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
  28. disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
  29. disdrodb/etc/products/L1/global.yaml +7 -1
  30. disdrodb/etc/products/L2E/10MIN.yaml +1 -12
  31. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +1 -1
  33. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +1 -1
  35. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +8 -0
  38. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
  39. disdrodb/etc/products/L2M/global.yaml +11 -3
  40. disdrodb/l0/check_configs.py +49 -16
  41. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  42. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  43. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  44. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  45. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
  46. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  47. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  48. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  49. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  50. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  51. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  52. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  53. disdrodb/l0/l0_reader.py +2 -2
  54. disdrodb/l0/l0b_processing.py +70 -15
  55. disdrodb/l0/l0c_processing.py +7 -3
  56. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -1
  57. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  58. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  59. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  60. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  61. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  62. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  63. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  64. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  65. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  66. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  67. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  68. disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
  69. disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/LPVEX.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  71. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +8 -17
  72. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  73. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
  74. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  75. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  76. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  77. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  78. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  79. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  80. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  81. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  83. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  84. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  85. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  86. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  87. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +232 -0
  88. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  89. disdrodb/l0/readers/PARSIVEL2/{NASA/LPVEX.py → SPAIN/GRANADA.py} +46 -35
  90. disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
  91. disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +10 -28
  92. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  93. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  94. disdrodb/l1/beard_model.py +31 -129
  95. disdrodb/l1/fall_velocity.py +136 -83
  96. disdrodb/l1/filters.py +25 -28
  97. disdrodb/l1/processing.py +16 -17
  98. disdrodb/l1/resampling.py +101 -38
  99. disdrodb/l1_env/routines.py +46 -17
  100. disdrodb/l2/empirical_dsd.py +6 -0
  101. disdrodb/l2/processing.py +6 -5
  102. disdrodb/metadata/geolocation.py +0 -2
  103. disdrodb/metadata/search.py +3 -4
  104. disdrodb/psd/fitting.py +16 -13
  105. disdrodb/routines/l0.py +2 -2
  106. disdrodb/routines/l1.py +173 -60
  107. disdrodb/routines/l2.py +148 -284
  108. disdrodb/routines/options.py +345 -0
  109. disdrodb/routines/wrappers.py +14 -1
  110. disdrodb/scattering/axis_ratio.py +90 -84
  111. disdrodb/scattering/permittivity.py +6 -0
  112. disdrodb/summary/routines.py +735 -670
  113. disdrodb/utils/archiving.py +51 -44
  114. disdrodb/utils/attrs.py +3 -1
  115. disdrodb/utils/dask.py +4 -4
  116. disdrodb/utils/dict.py +33 -0
  117. disdrodb/utils/encoding.py +6 -1
  118. disdrodb/utils/routines.py +9 -8
  119. disdrodb/utils/time.py +11 -3
  120. disdrodb/viz/__init__.py +0 -13
  121. disdrodb/viz/plots.py +231 -1
  122. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/METADATA +2 -1
  123. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/RECORD +135 -103
  124. /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
  125. /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
  126. /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
  127. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/IFLOODS.py +0 -0
  128. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +0 -0
  129. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/PIERS.py +0 -0
  130. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/GCPEX.py +0 -0
  131. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
  132. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/WHEEL +0 -0
  133. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/entry_points.txt +0 -0
  134. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/licenses/LICENSE +0 -0
  135. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env python3
2
+
2
3
  # -----------------------------------------------------------------------------.
3
4
  # Copyright (c) 2021-2023 DISDRODB developers
4
5
  #
@@ -15,21 +16,7 @@
15
16
  # You should have received a copy of the GNU General Public License
16
17
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18
  # -----------------------------------------------------------------------------.
18
- """This reader allows to read raw data from NASA APU stations.
19
-
20
- The reader allows to read raw APU data from the following NASA campaigns:
21
-
22
- - HYMEX
23
- - IFLOODS
24
- - IPHEX
25
- - OLYMPEX
26
- - ICEPOP
27
- - IMPACTS
28
- - GCPEX
29
- - WFF
30
-
31
- """
32
-
19
+ """DISDRODB reader for Granada Parsivel2 raw text data."""
33
20
  import pandas as pd
34
21
 
35
22
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -44,22 +31,22 @@ def reader(
44
31
  """Reader."""
45
32
  ##------------------------------------------------------------------------.
46
33
  #### Define column names
47
- column_names = ["time", "TO_BE_SPLITTED"]
34
+ column_names = ["TO_PARSE"]
48
35
 
49
36
  ##------------------------------------------------------------------------.
50
37
  #### Define reader options
51
38
  reader_kwargs = {}
52
39
  # - Define delimiter
53
- reader_kwargs["delimiter"] = ";"
40
+ reader_kwargs["delimiter"] = "\\n"
54
41
  # - Skip first row as columns names
55
- reader_kwargs["header"] = None
56
- reader_kwargs["skiprows"] = 0
57
- # - Skip file with encoding errors
58
- reader_kwargs["encoding_errors"] = "ignore"
42
+ # - Define encoding
43
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
59
44
  # - Avoid first column to become df index !!!
60
45
  reader_kwargs["index_col"] = False
61
46
  # - Define behaviour when encountering bad lines
62
47
  reader_kwargs["on_bad_lines"] = "skip"
48
+ reader_kwargs["skiprows"] = 4
49
+
63
50
  # - Define reader engine
64
51
  # - C engine is faster
65
52
  # - Python engine is more feature-complete
@@ -71,11 +58,11 @@ def reader(
71
58
  # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
72
59
  # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
73
60
  # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
74
- reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
61
+ reader_kwargs["na_values"] = ["na", "", "error"]
75
62
 
76
63
  ##------------------------------------------------------------------------.
77
64
  #### Read the data
78
- df = read_raw_text_file(
65
+ df_raw = read_raw_text_file(
79
66
  filepath=filepath,
80
67
  column_names=column_names,
81
68
  reader_kwargs=reader_kwargs,
@@ -84,26 +71,50 @@ def reader(
84
71
 
85
72
  ##------------------------------------------------------------------------.
86
73
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
87
- # Convert time column to datetime
88
- df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
74
+ # Remove corrupted rows
75
+ df_raw = df_raw[df_raw["TO_PARSE"].str.count(",") == 1106]
89
76
 
90
- # Split the 'TO_BE_SPLITTED' column
91
- df = df["TO_BE_SPLITTED"].str.split(",", n=3, expand=True)
77
+ # Create ID and Value columns
78
+ df = df_raw["TO_PARSE"].str.split(",", expand=True, n=19)
92
79
 
93
- # Assign column names
80
+ # Assign names
94
81
  names = [
95
- "station_name",
96
- "unknown",
97
- "unknown2",
98
- "raw_drop_number",
82
+ "time",
83
+ "id",
84
+ "rainfall_rate_32bit",
85
+ "snowfall_rate",
86
+ "rainfall_accumulated_32bit",
87
+ "weather_code_synop_4680",
88
+ "reflectivity_32bit",
89
+ "mor_visibility",
90
+ "rain_kinetic_energy",
91
+ "laser_amplitude",
92
+ "sensor_temperature",
93
+ "sensor_temperature_pcb",
94
+ "sensor_temperature_receiver",
95
+ "sensor_temperature_trasmitter",
96
+ "sensor_heating_current",
97
+ "sensor_battery_voltage",
98
+ "sensor_status",
99
+ "error_code",
100
+ "number_particles",
101
+ "TO_SPLIT",
99
102
  ]
100
103
  df.columns = names
101
104
 
102
- # Add the time column
103
- df["time"] = df_time
105
+ # Define datetime "time" column
106
+ df["time"] = df["time"].str.replace('"', "")
107
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S")
108
+
109
+ # Retrieve raw array
110
+ df_split = df["TO_SPLIT"].str.split(",", expand=True)
111
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
112
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
113
+ df["raw_drop_number"] = df_split.iloc[:, 64:].agg(",".join, axis=1)
114
+ del df_split
104
115
 
105
116
  # Drop columns not agreeing with DISDRODB L0 standards
106
- df = df.drop(columns=["station_name", "unknown", "unknown2"])
117
+ df = df.drop(columns=["TO_SPLIT", "id"])
107
118
 
108
119
  # Return the dataframe adhering to DISDRODB L0 standards
109
120
  return df
@@ -0,0 +1,189 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """Reader for SHMI OTT Parsivel2."""
19
+ import pandas as pd
20
+
21
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
22
+ from disdrodb.l0.l0a_processing import read_raw_text_file
23
+
24
+
25
+ def parse_old_format(df):
26
+ """Reformat old format."""
27
+ # Remove rows with invalid number of separators
28
+ df = df[df["TO_PARSE"].str.count(";") == 1106]
29
+
30
+ # Split the columns
31
+ df = df["TO_PARSE"].str.split(";", n=18, expand=True)
32
+
33
+ # Assign column names
34
+ names = [
35
+ "time",
36
+ "rainfall_rate_32bit",
37
+ "rainfall_accumulated_32bit",
38
+ "weather_code_synop_4680", # wawa
39
+ "reflectivity_32bit",
40
+ "mor_visibility",
41
+ "sample_interval",
42
+ "laser_amplitude",
43
+ "number_particles",
44
+ "sensor_temperature",
45
+ "sensor_serial_number",
46
+ "firmware_iop",
47
+ "sensor_heating_current",
48
+ "sensor_battery_voltage",
49
+ "sensor_status",
50
+ "station_id",
51
+ "rainfall_amount_absolute_32bit",
52
+ "error_code",
53
+ "TO_SPLIT",
54
+ ]
55
+
56
+ df.columns = names
57
+
58
+ # Derive raw arrays
59
+ df_split = df["TO_SPLIT"].str.split(";", expand=True)
60
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
61
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
62
+ df["raw_drop_number"] = df_split.iloc[:, 64:1088].agg(",".join, axis=1)
63
+
64
+ # Ensure the time column is datetime dtype
65
+ df["time"] = df["time"].str[0:12] + "00"
66
+ df["time"] = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
67
+
68
+ # Drop columns not agreeing with DISDRODB L0 standards
69
+ columns_to_drop = [
70
+ "sensor_serial_number",
71
+ "firmware_iop",
72
+ "station_id",
73
+ "TO_SPLIT",
74
+ ]
75
+ df = df.drop(columns=columns_to_drop)
76
+
77
+ # Return the dataframe adhering to DISDRODB L0 standards
78
+ return df
79
+
80
+
81
+ def parse_new_format(df):
82
+ """Reformat new format."""
83
+ # Remove rows with invalid number of separators
84
+ df = df[df["TO_PARSE"].str.count(";") == 1106]
85
+
86
+ # Split the columns
87
+ df = df["TO_PARSE"].str.split(";", n=18, expand=True)
88
+
89
+ # Assign column names
90
+ names = [
91
+ "time",
92
+ "rainfall_rate_32bit",
93
+ "rainfall_accumulated_32bit",
94
+ "weather_code_synop_4680", # wawa
95
+ "reflectivity_32bit",
96
+ "mor_visibility",
97
+ "sample_interval",
98
+ "laser_amplitude",
99
+ "number_particles",
100
+ "sensor_temperature",
101
+ "sensor_serial_number",
102
+ "firmware_iop",
103
+ "sensor_heating_current",
104
+ "sensor_battery_voltage",
105
+ "sensor_status",
106
+ "station_id",
107
+ "rainfall_amount_absolute_32bit",
108
+ "error_code",
109
+ "TO_SPLIT",
110
+ ]
111
+
112
+ df.columns = names
113
+
114
+ # Derive raw arrays
115
+ df_split = df["TO_SPLIT"].str.split(";", expand=True)
116
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
117
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
118
+ df["raw_drop_number"] = df_split.iloc[:, 64:1088].agg(",".join, axis=1)
119
+
120
+ # Add the time column
121
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
122
+
123
+ # Drop columns not agreeing with DISDRODB L0 standards
124
+ columns_to_drop = [
125
+ "sensor_serial_number",
126
+ "firmware_iop",
127
+ "station_id",
128
+ "TO_SPLIT",
129
+ ]
130
+ df = df.drop(columns=columns_to_drop)
131
+
132
+ # Return the dataframe adhering to DISDRODB L0 standards
133
+ return df
134
+
135
+
136
+ @is_documented_by(reader_generic_docstring)
137
+ def reader(
138
+ filepath,
139
+ logger=None,
140
+ ):
141
+ """Reader."""
142
+ ##------------------------------------------------------------------------.
143
+ #### Define column names
144
+ column_names = ["TO_PARSE"]
145
+
146
+ ##------------------------------------------------------------------------.
147
+ #### Define reader options
148
+ reader_kwargs = {}
149
+ # Skip first row as columns names
150
+ reader_kwargs["header"] = None
151
+ # Skip file with encoding errors
152
+ reader_kwargs["encoding_errors"] = "ignore"
153
+ # - Define delimiter
154
+ reader_kwargs["delimiter"] = "\\n"
155
+ # - Avoid first column to become df index !!!
156
+ reader_kwargs["index_col"] = False
157
+ # - Define behaviour when encountering bad lines
158
+ reader_kwargs["on_bad_lines"] = "skip"
159
+
160
+ # - Define reader engine
161
+ # - C engine is faster
162
+ # - Python engine is more feature-complete
163
+ reader_kwargs["engine"] = "python"
164
+ # - Define on-the-fly decompression of on-disk data
165
+ # - Available: gzip, bz2, zip
166
+ reader_kwargs["compression"] = "infer"
167
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
168
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
169
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
170
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
171
+ reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
172
+
173
+ ##------------------------------------------------------------------------.
174
+ #### Read the data
175
+ df = read_raw_text_file(
176
+ filepath=filepath,
177
+ column_names=column_names,
178
+ reader_kwargs=reader_kwargs,
179
+ logger=logger,
180
+ )
181
+
182
+ ##------------------------------------------------------------------------.
183
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
184
+ if df["TO_PARSE"].iloc[0].startswith("datetime_utc"):
185
+ # Remove header if present (2025 onward)
186
+ df = df.iloc[1:]
187
+ # Parse new format
188
+ return parse_new_format(df)
189
+ return parse_old_format(df)
@@ -15,6 +15,7 @@
15
15
  # You should have received a copy of the GNU General Public License
16
16
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
17
  # -----------------------------------------------------------------------------.
18
+ """Reader for the OTT Parsivel2 sensors of the CW3E network."""
18
19
  import pandas as pd
19
20
 
20
21
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -68,11 +69,10 @@ def reader(
68
69
 
69
70
  ##------------------------------------------------------------------------.
70
71
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
71
- # Define 'time' datetime
72
+ # Remove rows with invalid number of separators
73
+ df = df[df["TO_PARSE"].str.count(";") == 1105]
72
74
 
73
75
  # Split the columns
74
- df["TO_PARSE"].iloc[0:5].str.split(";", n=16, expand=True).iloc[0]
75
-
76
76
  df = df["TO_PARSE"].str.split(";", n=16, expand=True)
77
77
 
78
78
  # Assign column names
@@ -97,31 +97,13 @@ def reader(
97
97
  ]
98
98
  df.columns = names
99
99
 
100
- # Derive raw drop arrays
101
- def split_string(s):
102
- vals = [v.strip() for v in s.split(";")]
103
- c1 = ";".join(vals[:32])
104
- c2 = ";".join(vals[32:64])
105
- c3 = ";".join(vals[64:1088])
106
- c4 = vals[1088]
107
- c5 = vals[1089]
108
- series = pd.Series(
109
- {
110
- "raw_drop_concentration": c1,
111
- "raw_drop_average_velocity": c2,
112
- "raw_drop_number": c3,
113
- "rain_kinetic_energy": c4,
114
- "CHECK_EMPTY": c5,
115
- },
116
- )
117
- return series
118
-
119
- splitted_string = df["TO_SPLIT"].apply(split_string)
120
- df["raw_drop_concentration"] = splitted_string["raw_drop_concentration"]
121
- df["raw_drop_average_velocity"] = splitted_string["raw_drop_average_velocity"]
122
- df["raw_drop_number"] = splitted_string["raw_drop_number"]
123
- df["rain_kinetic_energy"] = splitted_string["rain_kinetic_energy"]
124
- df["CHECK_EMPTY"] = splitted_string["CHECK_EMPTY"]
100
+ # Derive raw arrays
101
+ df_split = df["TO_SPLIT"].str.split(";", expand=True)
102
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(",".join, axis=1)
103
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(",".join, axis=1)
104
+ df["raw_drop_number"] = df_split.iloc[:, 64:1088].agg(",".join, axis=1)
105
+ df["rain_kinetic_energy"] = df_split.iloc[:, 1088]
106
+ df["CHECK_EMPTY"] = df_split.iloc[:, 1089]
125
107
 
126
108
  # Ensure valid observation
127
109
  df = df[df["CHECK_EMPTY"] == ""]