disdrodb 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. disdrodb/_version.py +2 -2
  2. disdrodb/accessor/methods.py +10 -3
  3. disdrodb/api/checks.py +1 -1
  4. disdrodb/api/io.py +6 -1
  5. disdrodb/constants.py +1 -1
  6. disdrodb/etc/products/L1/LPM_V0/1MIN.yaml +13 -0
  7. disdrodb/etc/products/L1/global.yaml +1 -1
  8. disdrodb/etc/products/L2E/global.yaml +1 -1
  9. disdrodb/etc/products/L2M/global.yaml +1 -1
  10. disdrodb/issue/checks.py +2 -2
  11. disdrodb/l0/check_configs.py +1 -1
  12. disdrodb/l0/configs/LPM/l0a_encodings.yml +0 -1
  13. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +0 -4
  14. disdrodb/l0/configs/LPM/l0b_encodings.yml +9 -9
  15. disdrodb/l0/configs/LPM/raw_data_format.yml +11 -11
  16. disdrodb/l0/configs/LPM_V0/bins_diameter.yml +103 -0
  17. disdrodb/l0/configs/LPM_V0/bins_velocity.yml +103 -0
  18. disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +45 -0
  19. disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +180 -0
  20. disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +410 -0
  21. disdrodb/l0/configs/LPM_V0/raw_data_format.yml +474 -0
  22. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +8 -8
  23. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +9 -9
  24. disdrodb/l0/l0a_processing.py +6 -2
  25. disdrodb/l0/l0b_processing.py +26 -19
  26. disdrodb/l0/l0c_processing.py +10 -0
  27. disdrodb/l0/manuals/LPM_V0.pdf +0 -0
  28. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +15 -7
  29. disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +279 -0
  30. disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +276 -0
  31. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  32. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +103 -0
  33. disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +216 -0
  34. disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +208 -0
  35. disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +219 -0
  36. disdrodb/l0/readers/LPM/USA/CHARLESTON.py +229 -0
  37. disdrodb/l0/readers/{LPM → LPM_V0}/BELGIUM/ULIEGE.py +33 -49
  38. disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +240 -0
  39. disdrodb/l0/readers/PARSIVEL/NASA/LPVEX.py +25 -13
  40. disdrodb/l0/readers/PARSIVEL/NASA/MC3E.py +1 -1
  41. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +1 -1
  42. disdrodb/l0/readers/PARSIVEL2/JAPAN/PRECIP.py +155 -0
  43. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +14 -7
  44. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +8 -3
  45. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +28 -5
  46. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +1 -1
  47. disdrodb/l0/readers/PARSIVEL2/{NASA/GCPEX.py → NORWAY/UIB.py} +54 -29
  48. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +6 -3
  49. disdrodb/l0/readers/{PARSIVEL/NASA/PIERS.py → PARSIVEL2/USA/CSU.py} +62 -29
  50. disdrodb/l0/readers/PARSIVEL2/USA/CW3E.py +48 -21
  51. disdrodb/l0/readers/{PARSIVEL/NASA/IFLOODS.py → RD80/BRAZIL/ATTO_RD80.py} +50 -34
  52. disdrodb/l0/readers/{SW250 → SWS250}/BELGIUM/KMI.py +1 -1
  53. disdrodb/l1/beard_model.py +45 -1
  54. disdrodb/l1/fall_velocity.py +1 -6
  55. disdrodb/l1/filters.py +2 -0
  56. disdrodb/l2/empirical_dsd.py +12 -8
  57. disdrodb/routines/l0.py +2 -2
  58. disdrodb/routines/options.py +2 -0
  59. disdrodb/scattering/axis_ratio.py +3 -0
  60. disdrodb/scattering/routines.py +1 -1
  61. disdrodb/summary/routines.py +63 -61
  62. disdrodb/utils/compression.py +4 -2
  63. disdrodb/utils/dask.py +31 -11
  64. disdrodb/utils/manipulations.py +7 -1
  65. disdrodb/viz/plots.py +5 -3
  66. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/METADATA +1 -1
  67. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/RECORD +71 -54
  68. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/WHEEL +0 -0
  69. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/entry_points.txt +0 -0
  70. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/licenses/LICENSE +0 -0
  71. {disdrodb-0.2.0.dist-info → disdrodb-0.2.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env python3
2
1
  # -----------------------------------------------------------------------------.
3
2
  # Copyright (c) 2021-2023 DISDRODB developers
4
3
  #
@@ -15,7 +14,8 @@
15
14
  # You should have received a copy of the GNU General Public License
16
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
16
  # -----------------------------------------------------------------------------.
18
- """This reader allows to read raw data from NASA GCPEX, OLYMPEX and IPHEX campaigns."""
17
+ """DISDRODB reader for University of Bergen OTT Parsivel 2 raw data."""
18
+
19
19
  import pandas as pd
20
20
 
21
21
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -30,34 +30,44 @@ def reader(
30
30
  """Reader."""
31
31
  ##------------------------------------------------------------------------.
32
32
  #### Define column names
33
- column_names = ["time", "TO_BE_SPLITTED"]
33
+ column_names = ["TO_PARSE"]
34
34
 
35
35
  ##------------------------------------------------------------------------.
36
36
  #### Define reader options
37
37
  reader_kwargs = {}
38
+
38
39
  # - Define delimiter
39
- reader_kwargs["delimiter"] = ";"
40
+ reader_kwargs["delimiter"] = "\\n"
41
+
40
42
  # - Skip first row as columns names
41
43
  reader_kwargs["header"] = None
44
+
45
+ # - Skip header
42
46
  reader_kwargs["skiprows"] = 0
43
- # - Skip file with encoding errors
44
- reader_kwargs["encoding_errors"] = "ignore"
47
+
48
+ # - Define encoding
49
+ reader_kwargs["encoding"] = "ISO-8859-1"
50
+
45
51
  # - Avoid first column to become df index !!!
46
52
  reader_kwargs["index_col"] = False
53
+
47
54
  # - Define behaviour when encountering bad lines
48
55
  reader_kwargs["on_bad_lines"] = "skip"
56
+
49
57
  # - Define reader engine
50
58
  # - C engine is faster
51
59
  # - Python engine is more feature-complete
52
60
  reader_kwargs["engine"] = "python"
61
+
53
62
  # - Define on-the-fly decompression of on-disk data
54
63
  # - Available: gzip, bz2, zip
55
- reader_kwargs["compression"] = "infer"
64
+ # reader_kwargs['compression'] = 'xz'
65
+
56
66
  # - Strings to recognize as NA/NaN and replace with standard NA flags
57
67
  # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
58
68
  # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
59
69
  # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
60
- reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
70
+ reader_kwargs["na_values"] = ["na", "error", "-.-", " NA"]
61
71
 
62
72
  ##------------------------------------------------------------------------.
63
73
  #### Read the data
@@ -70,37 +80,52 @@ def reader(
70
80
 
71
81
  ##------------------------------------------------------------------------.
72
82
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
73
- # Convert time column to datetime
74
- df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
83
+ # Remove corrupted rows
84
+ df = df[df["TO_PARSE"].str.count(";") == 1101]
75
85
 
76
- # Split the 'TO_BE_SPLITTED' column
77
- df = df["TO_BE_SPLITTED"].str.split(",", n=9, expand=True)
86
+ # Split into columns
87
+ df = df["TO_PARSE"].str.split(";", expand=True, n=13)
78
88
 
79
- # Assign column names
89
+ # Assign columns names
80
90
  names = [
81
- "station_name",
82
- "sensor_status",
83
- "sensor_temperature",
84
- "number_particles",
91
+ "date",
92
+ "time",
85
93
  "rainfall_rate_32bit",
86
- "reflectivity_16bit",
87
- "mor_visibility",
94
+ "rainfall_accumulated_32bit",
95
+ "snowfall_rate",
88
96
  "weather_code_synop_4680",
89
- "weather_code_synop_4677",
90
- "raw_drop_number",
97
+ "reflectivity_32bit",
98
+ "mor_visibility",
99
+ "rain_kinetic_energy",
100
+ "sensor_temperature",
101
+ "laser_amplitude",
102
+ "number_particles",
103
+ "sensor_battery_voltage",
104
+ "TO_SPLIT",
91
105
  ]
92
106
  df.columns = names
93
107
 
94
- # Add the time column
95
- df["time"] = df_time
108
+ # Sanitize date
109
+ date = pd.to_datetime(df["date"], format="%d.%m.%Y", errors="coerce")
110
+ date = date.ffill().bfill()
96
111
 
97
- # Drop columns not agreeing with DISDRODB L0 standards
98
- df = df.drop(columns=["station_name"])
112
+ # Add datetime time column
113
+ time_str = date.astype(str) + "T" + df["time"]
114
+ df["time"] = pd.to_datetime(time_str, format="%Y-%m-%dT%H:%M:%S", errors="coerce")
115
+ df = df.drop(columns=["date"])
116
+
117
+ # Derive raw drop arrays
118
+ df_split = df["TO_SPLIT"].str.split(";", expand=True)
119
+ df["raw_drop_concentration"] = df_split.iloc[:, :32].agg(";".join, axis=1)
120
+ df["raw_drop_average_velocity"] = df_split.iloc[:, 32:64].agg(";".join, axis=1)
121
+ df["raw_drop_number"] = df_split.iloc[:, 64:].agg(";".join, axis=1)
122
+ del df_split
99
123
 
100
- # Drop rows with invalid values
101
- # --> Ensure that weather_code_synop_4677 has length 2
102
- # --> If a previous column is missing it will have 000
103
- df = df[df["weather_code_synop_4677"].str.len() == 2]
124
+ # Drop columns not agreeing with DISDRODB L0 standards
125
+ columns_to_drop = [
126
+ "TO_SPLIT",
127
+ ]
128
+ df = df.drop(columns=columns_to_drop)
104
129
 
105
130
  # Return the dataframe adhering to DISDRODB L0 standards
106
131
  return df
@@ -154,9 +154,12 @@ def read_txt_file(file, filename, logger):
154
154
  df["time"] = pd.to_datetime(time_str, format="%Y%m%d%H%M%S", errors="coerce")
155
155
 
156
156
  # Keep only rows with valid raw_drop_number
157
- df = df[df["raw_drop_number"].str.count(";") == 1024]
158
- if len(df) == 0:
159
- raise ValueError("Invalid raw drop number field.")
157
+ invalid_data = df["raw_drop_number"].str.count(";") != 1024
158
+ df.loc[invalid_data, "raw_drop_number"] = "NaN" # TODO: if number_particles = 0, could be set to "0".
159
+
160
+ # df = df[df["raw_drop_number"].str.count(";") == 1024]
161
+ # if len(df) == 0:
162
+ # raise ValueError("Invalid raw drop number field.")
160
163
 
161
164
  # Drop columns not agreeing with DISDRODB L0 standards
162
165
  # columns_to_drop = [
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env python3
2
1
  # -----------------------------------------------------------------------------.
3
2
  # Copyright (c) 2021-2023 DISDRODB developers
4
3
  #
@@ -15,6 +14,8 @@
15
14
  # You should have received a copy of the GNU General Public License
16
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
16
  # -----------------------------------------------------------------------------.
17
+ """DISDRODB reader for Colorado State University OTT Parsivel 2 raw data."""
18
+
18
19
  import pandas as pd
19
20
 
20
21
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -29,34 +30,44 @@ def reader(
29
30
  """Reader."""
30
31
  ##------------------------------------------------------------------------.
31
32
  #### Define column names
32
- column_names = ["time", "TO_BE_SPLITTED"]
33
+ column_names = ["TO_PARSE"]
33
34
 
34
35
  ##------------------------------------------------------------------------.
35
36
  #### Define reader options
36
37
  reader_kwargs = {}
38
+
37
39
  # - Define delimiter
38
- reader_kwargs["delimiter"] = ";"
40
+ reader_kwargs["delimiter"] = "\\n"
41
+
39
42
  # - Skip first row as columns names
40
43
  reader_kwargs["header"] = None
44
+
45
+ # - Skip header
41
46
  reader_kwargs["skiprows"] = 0
42
- # - Skip file with encoding errors
43
- reader_kwargs["encoding_errors"] = "ignore"
47
+
48
+ # - Define encoding
49
+ reader_kwargs["encoding"] = "ISO-8859-1"
50
+
44
51
  # - Avoid first column to become df index !!!
45
52
  reader_kwargs["index_col"] = False
53
+
46
54
  # - Define behaviour when encountering bad lines
47
55
  reader_kwargs["on_bad_lines"] = "skip"
56
+
48
57
  # - Define reader engine
49
58
  # - C engine is faster
50
59
  # - Python engine is more feature-complete
51
60
  reader_kwargs["engine"] = "python"
61
+
52
62
  # - Define on-the-fly decompression of on-disk data
53
63
  # - Available: gzip, bz2, zip
54
- reader_kwargs["compression"] = "infer"
64
+ # reader_kwargs['compression'] = 'xz'
65
+
55
66
  # - Strings to recognize as NA/NaN and replace with standard NA flags
56
67
  # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
57
68
  # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
58
69
  # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
59
- reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
70
+ reader_kwargs["na_values"] = ["na", "error", "-.-", " NA"]
60
71
 
61
72
  ##------------------------------------------------------------------------.
62
73
  #### Read the data
@@ -69,37 +80,59 @@ def reader(
69
80
 
70
81
  ##------------------------------------------------------------------------.
71
82
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
72
- # Convert time column to datetime
73
- df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
83
+ # Remove corrupted rows
84
+ df = df[df["TO_PARSE"].str.count(",").isin([15, 1040])]
74
85
 
75
- # Split the 'TO_BE_SPLITTED' column
76
- df = df["TO_BE_SPLITTED"].str.split(",", expand=True, n=9)
86
+ # Split into columns
87
+ df["TO_PARSE"] = df["TO_PARSE"] + ",0"
88
+ df = df["TO_PARSE"].str.split(",", expand=True, n=16)
77
89
 
78
- # Assign column names
79
- columns_names = [
80
- "station_name",
81
- "sensor_status",
82
- "sensor_temperature",
83
- "number_particles",
90
+ # Assign columns names
91
+ names = [
92
+ "date",
93
+ "time",
84
94
  "rainfall_rate_32bit",
85
- "reflectivity_16bit",
86
- "mor_visibility",
95
+ "rainfall_accumulated_32bit",
87
96
  "weather_code_synop_4680",
88
- "weather_code_synop_4677",
97
+ "weather_code_metar_4678",
98
+ "weather_code_nws",
99
+ "reflectivity_32bit",
100
+ "mor_visibility",
101
+ "laser_amplitude",
102
+ "number_particles",
103
+ "sensor_temperature",
104
+ "sensor_heating_current",
105
+ "sensor_battery_voltage",
106
+ "rain_kinetic_energy",
107
+ "snowfall_rate",
89
108
  "raw_drop_number",
90
109
  ]
91
- df.columns = columns_names
110
+ df.columns = names
92
111
 
93
- # Add the time column
94
- df["time"] = df_time
112
+ # Add datetime time column
113
+ time_str = df["date"] + "-" + df["time"]
114
+ df["time"] = pd.to_datetime(time_str, format="%d.%m.%Y-%H:%M:%S", errors="coerce")
95
115
 
96
- # Drop columns not agreeing with DISDRODB L0 standards
97
- df = df.drop(columns=["station_name"])
116
+ # Derive the raw spectrum
117
+ # - When no drops detected, None
118
+ # - After conversion to string, becomes NaN
119
+ df["raw_drop_number"] = df["raw_drop_number"].astype("string")
120
+ df["raw_drop_number"] = df["raw_drop_number"].str.strip()
121
+
122
+ # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
123
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
124
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>,0", "")
125
+
126
+ # Preprocess the raw spectrum and raw_drop_average_velocity
127
+ # - Add 0 before every ; if ; not preceded by a digit
128
+ # - Example: ';;1;;' --> '0;0;1;0;'
129
+ df["raw_drop_number"] = df["raw_drop_number"].str.replace(r"(?<!\d),", "0,", regex=True)
98
130
 
99
- # Drop rows with invalid values
100
- # --> Ensure that weather_code_synop_4677 has length 2
101
- # --> If a previous column is missing it will have 000
102
- df = df[df["weather_code_synop_4677"].str.len() == 2]
131
+ # Drop columns not agreeing with DISDRODB L0 standards
132
+ columns_to_drop = [
133
+ "date",
134
+ ]
135
+ df = df.drop(columns=columns_to_drop)
103
136
 
104
137
  # Return the dataframe adhering to DISDRODB L0 standards
105
138
  return df
@@ -70,31 +70,58 @@ def reader(
70
70
  ##------------------------------------------------------------------------.
71
71
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
72
72
  # Remove rows with invalid number of separators
73
- df = df[df["TO_PARSE"].str.count(";") == 1105]
73
+ df = df[df["TO_PARSE"].str.count(";").isin([1104, 1105])]
74
+ if len(df) == 0:
75
+ raise ValueError(f"No valid data in {filepath}")
76
+
77
+ n_delimiters = int(df["TO_PARSE"].str.count(";").iloc[0])
78
+ if n_delimiters == 1104:
79
+ names = [
80
+ "sensor_serial_number",
81
+ "sensor_status",
82
+ "laser_amplitude",
83
+ "sensor_heating_current",
84
+ "sensor_battery_voltage",
85
+ "dummy_date",
86
+ "sensor_time",
87
+ "sensor_date",
88
+ # "sensor_temperature",
89
+ "number_particles",
90
+ "rainfall_rate_32bit",
91
+ "reflectivity_32bit",
92
+ "rainfall_accumulated_16bit",
93
+ "mor_visibility",
94
+ "weather_code_synop_4680",
95
+ "weather_code_synop_4677",
96
+ "TO_SPLIT",
97
+ ]
98
+ n = 15
99
+ else:
100
+ names = [
101
+ "sensor_serial_number",
102
+ "sensor_status",
103
+ "laser_amplitude",
104
+ "sensor_heating_current",
105
+ "sensor_battery_voltage",
106
+ "dummy_date",
107
+ "sensor_time",
108
+ "sensor_date",
109
+ "sensor_temperature",
110
+ "number_particles",
111
+ "rainfall_rate_32bit",
112
+ "reflectivity_32bit",
113
+ "rainfall_accumulated_16bit",
114
+ "mor_visibility",
115
+ "weather_code_synop_4680",
116
+ "weather_code_synop_4677",
117
+ "TO_SPLIT",
118
+ ]
119
+ n = 16
74
120
 
75
121
  # Split the columns
76
- df = df["TO_PARSE"].str.split(";", n=16, expand=True)
122
+ df = df["TO_PARSE"].str.split(";", n=n, expand=True)
77
123
 
78
124
  # Assign column names
79
- names = [
80
- "sensor_serial_number",
81
- "sensor_status",
82
- "laser_amplitude",
83
- "sensor_heating_current",
84
- "sensor_battery_voltage",
85
- "dummy_date",
86
- "sensor_time",
87
- "sensor_date",
88
- "sensor_temperature",
89
- "number_particles",
90
- "rainfall_rate_32bit",
91
- "reflectivity_32bit",
92
- "rainfall_accumulated_16bit",
93
- "mor_visibility",
94
- "weather_code_synop_4680",
95
- "weather_code_synop_4677",
96
- "TO_SPLIT",
97
- ]
98
125
  df.columns = names
99
126
 
100
127
  # Derive raw arrays
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env python3
2
+
2
3
  # -----------------------------------------------------------------------------.
3
4
  # Copyright (c) 2021-2023 DISDRODB developers
4
5
  #
@@ -29,17 +30,47 @@ def reader(
29
30
  """Reader."""
30
31
  ##------------------------------------------------------------------------.
31
32
  #### Define column names
32
- column_names = ["time", "TO_BE_SPLITTED"]
33
+ column_names = [
34
+ "date",
35
+ "time",
36
+ "sensor_status",
37
+ "sample_interval",
38
+ "n1",
39
+ "n2",
40
+ "n3",
41
+ "n4",
42
+ "n5",
43
+ "n6",
44
+ "n7",
45
+ "n8",
46
+ "n9",
47
+ "n10",
48
+ "n11",
49
+ "n12",
50
+ "n13",
51
+ "n14",
52
+ "n15",
53
+ "n16",
54
+ "n17",
55
+ "n18",
56
+ "n19",
57
+ "n20",
58
+ "RI",
59
+ "RA",
60
+ "RAT",
61
+ ]
33
62
 
34
63
  ##------------------------------------------------------------------------.
35
64
  #### Define reader options
36
65
  reader_kwargs = {}
37
66
  # - Define delimiter
38
- reader_kwargs["delimiter"] = ";"
39
- # - Skip first row as columns names
67
+ reader_kwargs["delimiter"] = "\\t"
68
+ # Skip header
40
69
  reader_kwargs["header"] = None
41
- # - Skip file with encoding errors
42
- reader_kwargs["encoding_errors"] = "ignore"
70
+ # Skip first row as columns names
71
+ reader_kwargs["skiprows"] = 1
72
+ # - Define encoding
73
+ reader_kwargs["encoding"] = "ISO-8859-1"
43
74
  # - Avoid first column to become df index !!!
44
75
  reader_kwargs["index_col"] = False
45
76
  # - Define behaviour when encountering bad lines
@@ -55,7 +86,7 @@ def reader(
55
86
  # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
56
87
  # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
57
88
  # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
58
- reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
89
+ reader_kwargs["na_values"] = ["na", "", "error"]
59
90
 
60
91
  ##------------------------------------------------------------------------.
61
92
  #### Read the data
@@ -68,37 +99,22 @@ def reader(
68
99
 
69
100
  ##------------------------------------------------------------------------.
70
101
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
71
- # Convert time column to datetime
72
- df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
73
-
74
- # Split the 'TO_BE_SPLITTED' column
75
- df = df["TO_BE_SPLITTED"].str.split(",", expand=True, n=9)
76
-
77
- # Assign column names
78
- columns_names = [
79
- "station_name",
80
- "sensor_status",
81
- "sensor_temperature",
82
- "number_particles",
83
- "rainfall_rate_32bit",
84
- "reflectivity_16bit",
85
- "mor_visibility",
86
- "weather_code_synop_4680",
87
- "weather_code_synop_4677",
88
- "raw_drop_number",
89
- ]
90
- df.columns = columns_names
102
+ # Replace 'status' NaN with 0
103
+ df["sensor_status"] = df["sensor_status"].astype(float).fillna(value=0).astype(int)
91
104
 
92
- # Add the time column
93
- df["time"] = df_time
105
+ # Define 'time' datetime column
106
+ df["time"] = df["date"].astype(str) + " " + df["time"].astype(str)
107
+ df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
108
+ df = df.drop(columns=["date"])
94
109
 
95
- # Drop columns not agreeing with DISDRODB L0 standards
96
- df = df.drop(columns=["station_name"])
110
+ # Create raw_drop_number column
111
+ bin_columns = ["n" + str(i) for i in range(1, 21)]
112
+ df_arr = df[bin_columns]
113
+ df_raw_drop_number = df_arr.agg(";".join, axis=1)
114
+ df["raw_drop_number"] = df_raw_drop_number
97
115
 
98
- # Drop rows with invalid values
99
- # --> Ensure that weather_code_synop_4677 has length 2
100
- # --> If a previous column is missing it will have 000
101
- df = df[df["weather_code_synop_4677"].str.len() == 2]
116
+ # Remove bins columns
117
+ df = df.drop(columns=bin_columns)
102
118
 
103
119
  # Return the dataframe adhering to DISDRODB L0 standards
104
120
  return df
@@ -16,7 +16,7 @@
16
16
  # You should have received a copy of the GNU General Public License
17
17
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
18
  # -----------------------------------------------------------------------------.
19
- """DISDRODB reader for KMI Biral SW250 sensors."""
19
+ """DISDRODB reader for KMI Biral SWS250 sensors."""
20
20
  import pandas as pd
21
21
 
22
22
  from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -385,6 +385,49 @@ def get_water_density(temperature, air_pressure, sea_level_air_pressure=101_325)
385
385
  return get_pure_water_density(temperature) * np.exp(-1 * water_compressibility * delta_pressure)
386
386
 
387
387
 
388
+ ####---------------------------------------------------------------------------.
389
+ #### Wrappers
390
+ def retrieve_air_pressure(ds_env):
391
+ """Retrieve air pressure."""
392
+ if "air_pressure" in ds_env:
393
+ return ds_env["air_pressure"]
394
+ air_pressure = get_air_pressure_at_height(
395
+ altitude=ds_env["altitude"],
396
+ latitude=ds_env["latitude"],
397
+ temperature=ds_env["temperature"],
398
+ sea_level_air_pressure=ds_env["sea_level_air_pressure"],
399
+ lapse_rate=ds_env["lapse_rate"],
400
+ )
401
+ return air_pressure
402
+
403
+
404
+ def retrieve_air_dynamic_viscosity(ds_env):
405
+ """Retrieve air dynamic viscosity."""
406
+ air_viscosity = get_air_dynamic_viscosity(ds_env["temperature"])
407
+ return air_viscosity
408
+
409
+
410
+ def retrieve_air_density(ds_env):
411
+ """Retrieve air density."""
412
+ temperature = ds_env["temperature"]
413
+ relative_humidity = ds_env["relative_humidity"]
414
+ air_pressure = retrieve_air_pressure(ds_env)
415
+ vapor_pressure = get_vapor_actual_pressure(
416
+ relative_humidity=relative_humidity,
417
+ temperature=temperature,
418
+ )
419
+ air_density = get_air_density(
420
+ temperature=temperature,
421
+ air_pressure=air_pressure,
422
+ vapor_pressure=vapor_pressure,
423
+ )
424
+ return air_density
425
+
426
+
427
+ ####---------------------------------------------------------------------------.
428
+ #### Beard model
429
+
430
+
388
431
  def get_raindrop_reynolds_number(diameter, temperature, air_density, water_density, g):
389
432
  """Compute raindrop Reynolds number.
390
433
 
@@ -395,6 +438,7 @@ def get_raindrop_reynolds_number(diameter, temperature, air_density, water_densi
395
438
  Coefficients are taken from Table 1 of Beard 1976.
396
439
 
397
440
  Reference: Beard 1976; Pruppacher & Klett 1978
441
+ See also Table A1 in Rahman et al., 2020.
398
442
 
399
443
  Parameters
400
444
  ----------
@@ -422,7 +466,7 @@ def get_raindrop_reynolds_number(diameter, temperature, air_density, water_densi
422
466
  air_viscosity = get_air_dynamic_viscosity(temperature) # kg/(m*s) (aka Pa*s).
423
467
  delta_density = water_density - air_density
424
468
 
425
- # Compute Davis number for small droplets
469
+ # Compute Davies number for small droplets
426
470
  davis_number = 4 * air_density * delta_density * g * diameter**3 / (3 * air_viscosity**2)
427
471
 
428
472
  # Compute the slip correction (is approx 1 and can be discarded)
@@ -45,11 +45,6 @@ def get_fall_velocity_atlas_1973(diameter):
45
45
  Reviews of Geophysics, 11(1), 1-35.
46
46
  https://doi.org/10.1029/RG011i001p00001
47
47
 
48
- Atlas, D., & Ulbrich, C. W. (1977).
49
- Path- and area-integrated rainfall measurement by microwave attenuation in the 1-3 cm band.
50
- Journal of Applied Meteorology, 16(12), 1322-1331.
51
- https://doi.org/10.1175/1520-0450(1977)016<1322:PAAIRM>2.0.CO;2
52
-
53
48
  Gunn, R., & Kinzer, G. D. (1949).
54
49
  The terminal velocity of fall for water droplets in stagnant air.
55
50
  Journal of Meteorology, 6(4), 243-248.
@@ -111,7 +106,7 @@ def get_fall_velocity_uplinger_1981(diameter):
111
106
 
112
107
  """
113
108
  # Valid between 0.1 and 7 mm
114
- fall_velocity = 4.874 * diameter * np.exp(-0.195 * diameter)
109
+ fall_velocity = 4.874 * diameter * np.exp(-0.195 * diameter) # 4.854?
115
110
  fall_velocity = fall_velocity.clip(min=0, max=None)
116
111
  return fall_velocity
117
112
 
disdrodb/l1/filters.py CHANGED
@@ -157,6 +157,8 @@ def define_raindrop_spectrum_mask(
157
157
  A boolean mask array indicating valid bins according to the specified criteria.
158
158
 
159
159
  """
160
+ # TODO: use lower and upper fall_velocity !
161
+
160
162
  # Ensure it creates a 2D mask if the fall_velocity does not vary over time
161
163
  if "time" in drop_number.dims and "time" not in fall_velocity.dims:
162
164
  drop_number = drop_number.isel(time=0)
@@ -220,27 +220,31 @@ def get_effective_sampling_area(sensor_name, diameter):
220
220
  check_sensor_name(sensor_name)
221
221
  if sensor_name in ["PARSIVEL", "PARSIVEL2"]:
222
222
  # Calculate sampling area for each diameter bin (S_i)
223
+ # - Parsivel remove margin fallers !
224
+ # - The effective sampling area decreases with increasing drop diameter
225
+ # sampling_area = 0.0054 # m2
223
226
  L = 180 / 1000 # Length of the Parsivel beam in m (180 mm)
224
227
  B = 30 / 1000 # Width of the Parsivel beam in m (30mm)
225
- sampling_area = L * (B - diameter / 2)
228
+ sampling_area = L * (B - diameter / 2) # d_eq
226
229
  return sampling_area
227
- if sensor_name == "LPM":
230
+ if sensor_name in ["LPM", "LPM_V0"]:
228
231
  # Calculate sampling area for each diameter bin (S_i)
229
- L = 228 / 1000 # Length of the Parsivel beam in m (228 mm)
230
- B = 20 / 1000 # Width of the Parsivel beam in m (20 mm)
231
- sampling_area = L * (B - diameter / 2)
232
+ # L = 228 / 1000 # Length of the beam in m (228 mm)
233
+ # B = 20 / 1000 # Width of the beam in m (20 mm)
234
+ # sampling_area = L * (B - diameter / 2)
235
+ sampling_area = 0.0045 # m2
232
236
  return sampling_area
233
237
  if sensor_name == "PWS100":
234
- sampling_area = 0.004 # m2 # TODO: L * (B - diameter / 2) ?
238
+ sampling_area = 0.004 # m2
235
239
  return sampling_area
236
240
  if sensor_name == "RD80":
237
241
  sampling_area = 0.005 # m2
238
242
  return sampling_area
239
- if sensor_name == "SWS250": # TODO: L * (B - diameter / 2) ?
243
+ if sensor_name == "SWS250":
240
244
  # Table 29 of the manual that the sample volume is 400cm3, path length?
241
245
  # Distance between the end of the hood heaters is 291 mm.
242
246
  # Adding a factor of 1.5 for better representation of the Tx-Rx distance: L= 436 mm.
243
- sampling_area = 0.0091 # m2
247
+ sampling_area = 0.0091 # m2 # 0.006504 m2 maybe?
244
248
  return sampling_area
245
249
  raise NotImplementedError(f"Effective sampling area for {sensor_name} must yet to be specified in the software.")
246
250