disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. disdrodb/__init__.py +68 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +177 -24
  7. disdrodb/api/configs.py +3 -3
  8. disdrodb/api/info.py +13 -13
  9. disdrodb/api/io.py +281 -22
  10. disdrodb/api/path.py +184 -195
  11. disdrodb/api/search.py +18 -9
  12. disdrodb/cli/disdrodb_create_summary.py +103 -0
  13. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  14. disdrodb/cli/disdrodb_run_l0.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
  19. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  20. disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
  21. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  22. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  23. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  24. disdrodb/configs.py +149 -4
  25. disdrodb/constants.py +61 -0
  26. disdrodb/data_transfer/download_data.py +127 -11
  27. disdrodb/etc/configs/attributes.yaml +339 -0
  28. disdrodb/etc/configs/encodings.yaml +473 -0
  29. disdrodb/etc/products/L1/global.yaml +13 -0
  30. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  31. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +22 -0
  33. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  34. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  35. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  38. disdrodb/etc/products/L2M/global.yaml +26 -0
  39. disdrodb/issue/writer.py +2 -0
  40. disdrodb/l0/__init__.py +13 -0
  41. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  42. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  43. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  44. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  45. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
  46. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
  47. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
  48. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  49. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  50. disdrodb/l0/l0a_processing.py +37 -32
  51. disdrodb/l0/l0b_nc_processing.py +118 -8
  52. disdrodb/l0/l0b_processing.py +30 -65
  53. disdrodb/l0/l0c_processing.py +369 -259
  54. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
  55. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  56. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  58. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
  59. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
  60. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  61. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  62. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  63. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  66. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  67. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  69. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  71. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
  72. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  73. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
  74. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  75. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  76. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  79. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  80. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  81. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
  83. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
  84. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  85. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  86. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  87. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
  88. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  89. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  90. disdrodb/l1/__init__.py +5 -0
  91. disdrodb/l1/fall_velocity.py +46 -0
  92. disdrodb/l1/filters.py +34 -20
  93. disdrodb/l1/processing.py +46 -45
  94. disdrodb/l1/resampling.py +77 -66
  95. disdrodb/l1_env/routines.py +18 -3
  96. disdrodb/l2/__init__.py +7 -0
  97. disdrodb/l2/empirical_dsd.py +58 -10
  98. disdrodb/l2/processing.py +268 -117
  99. disdrodb/metadata/checks.py +132 -125
  100. disdrodb/metadata/standards.py +3 -1
  101. disdrodb/psd/fitting.py +631 -345
  102. disdrodb/psd/models.py +9 -6
  103. disdrodb/routines/__init__.py +54 -0
  104. disdrodb/{l0/routines.py → routines/l0.py} +316 -355
  105. disdrodb/{l1/routines.py → routines/l1.py} +76 -116
  106. disdrodb/routines/l2.py +1019 -0
  107. disdrodb/{routines.py → routines/wrappers.py} +98 -10
  108. disdrodb/scattering/__init__.py +16 -4
  109. disdrodb/scattering/axis_ratio.py +61 -37
  110. disdrodb/scattering/permittivity.py +504 -0
  111. disdrodb/scattering/routines.py +746 -184
  112. disdrodb/summary/__init__.py +17 -0
  113. disdrodb/summary/routines.py +4196 -0
  114. disdrodb/utils/archiving.py +434 -0
  115. disdrodb/utils/attrs.py +68 -125
  116. disdrodb/utils/cli.py +5 -5
  117. disdrodb/utils/compression.py +30 -1
  118. disdrodb/utils/dask.py +121 -9
  119. disdrodb/utils/dataframe.py +61 -7
  120. disdrodb/utils/decorators.py +31 -0
  121. disdrodb/utils/directories.py +35 -15
  122. disdrodb/utils/encoding.py +37 -19
  123. disdrodb/{l2 → utils}/event.py +15 -173
  124. disdrodb/utils/logger.py +14 -7
  125. disdrodb/utils/manipulations.py +81 -0
  126. disdrodb/utils/routines.py +166 -0
  127. disdrodb/utils/subsetting.py +214 -0
  128. disdrodb/utils/time.py +35 -177
  129. disdrodb/utils/writer.py +20 -7
  130. disdrodb/utils/xarray.py +5 -4
  131. disdrodb/viz/__init__.py +13 -0
  132. disdrodb/viz/plots.py +398 -0
  133. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
  134. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
  135. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
  136. disdrodb/l1/encoding_attrs.py +0 -642
  137. disdrodb/l2/processing_options.py +0 -213
  138. disdrodb/l2/routines.py +0 -868
  139. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  140. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
  141. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
  142. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,123 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import pandas as pd
19
+
20
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
21
+ from disdrodb.l0.l0a_processing import read_raw_text_file
22
+
23
+
24
+ @is_documented_by(reader_generic_docstring)
25
+ def reader(
26
+ filepath,
27
+ logger=None,
28
+ ):
29
+ """Reader."""
30
+ ##------------------------------------------------------------------------.
31
+ #### Define column names
32
+ column_names = ["TO_PARSE"]
33
+
34
+ ##------------------------------------------------------------------------.
35
+ #### Define reader options
36
+ reader_kwargs = {}
37
+ # - Define delimiter
38
+ reader_kwargs["delimiter"] = "\\n"
39
+ # - Skip first row as columns names
40
+ # - Define encoding
41
+ reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
42
+ # - Avoid first column to become df index !!!
43
+ reader_kwargs["index_col"] = False
44
+ # - Define behaviour when encountering bad lines
45
+ reader_kwargs["on_bad_lines"] = "skip"
46
+ # - Define reader engine
47
+ # - C engine is faster
48
+ # - Python engine is more feature-complete
49
+ reader_kwargs["engine"] = "python"
50
+ # - Define on-the-fly decompression of on-disk data
51
+ # - Available: gzip, bz2, zip
52
+ reader_kwargs["compression"] = "infer"
53
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
54
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
55
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
56
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
57
+ reader_kwargs["na_values"] = ["na", "", "error"]
58
+
59
+ ##------------------------------------------------------------------------.
60
+ #### Read the data
61
+ df = read_raw_text_file(
62
+ filepath=filepath,
63
+ column_names=column_names,
64
+ reader_kwargs=reader_kwargs,
65
+ logger=logger,
66
+ )
67
+
68
+ ##------------------------------------------------------------------------.
69
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
70
+ # Identify groups of lines corresponding to same measurement
71
+ is_start = df["TO_PARSE"].str.startswith("#0#,")
72
+ df["observation_id"] = is_start.cumsum()
73
+
74
+ # Loop over groups and create a dataframe with a single row for each measurement
75
+ list_obs = []
76
+ for _, df_obs in df.groupby("observation_id", sort=False):
77
+ if len(df_obs) not in [6, 7]:
78
+ pass
79
+
80
+ # Remove #<id># and last comma
81
+ series = df_obs["TO_PARSE"].str.split(",", n=1, expand=True)[1].str.rstrip(",")
82
+ if len(df_obs) == 7:
83
+ series = series.iloc[0:6]
84
+
85
+ # Create dataframe and name columns
86
+ df_obs = series.to_frame().T
87
+ df_obs.columns = [
88
+ "time",
89
+ "TO_SPLIT1",
90
+ "TO_SPLIT2",
91
+ "raw_drop_concentration",
92
+ "raw_drop_average_velocity",
93
+ "raw_drop_number",
94
+ ]
95
+
96
+ # Append to the list
97
+ list_obs.append(df_obs)
98
+
99
+ # Concat all timesteps into a single dataframe
100
+ df = pd.concat(list_obs)
101
+
102
+ # Split and rename remaining variables
103
+ df_split1 = df["TO_SPLIT1"].str.split(",", expand=True)
104
+ df_split1.columns = [
105
+ "weather_code_synop_4680",
106
+ "rainfall_accumulated_32bit",
107
+ "rainfall_rate32bit",
108
+ "reflectivity_32bit",
109
+ ]
110
+ df_split2 = df["TO_SPLIT2"].str.split(",", expand=True)
111
+ df_split2.columns = ["parsivel_id", "sensor_serial_number", "mor_visibility", "laser_amplitude", "sensor_status"]
112
+
113
+ # Merge everything into a single dataframe
114
+ df = pd.concat([df, df_split1, df_split2], axis=1)
115
+
116
+ # Define time as datetime64
117
+ df["time"] = pd.to_datetime(df["time"], format="%d.%m.%Y %H:%M:%S", errors="coerce")
118
+
119
+ # Remove unused variables
120
+ df = df.drop(columns=["TO_SPLIT1", "TO_SPLIT2", "parsivel_id", "sensor_serial_number"])
121
+
122
+ # Return the dataframe adhering to DISDRODB L0 standards
123
+ return df
@@ -29,33 +29,42 @@ def reader(
29
29
  """Reader."""
30
30
  ##------------------------------------------------------------------------.
31
31
  #### Define column names
32
- column_names = ["time", "epoch_time", "TO_BE_PARSED"]
32
+ column_names = ["TO_BE_PARSED"]
33
33
 
34
34
  ##------------------------------------------------------------------------.
35
35
  #### Define reader options
36
36
  reader_kwargs = {}
37
+
37
38
  # - Define delimiter
38
- reader_kwargs["delimiter"] = ";"
39
- # - Skip first row as columns names
39
+ reader_kwargs["delimiter"] = "/\n"
40
+
41
+ # Skip first row as columns names
40
42
  reader_kwargs["header"] = None
41
- # - Avoid first column to become df index
43
+
44
+ # Skip first 2 rows
45
+ reader_kwargs["skiprows"] = 1
46
+
47
+ # - Avoid first column to become df index !!!
42
48
  reader_kwargs["index_col"] = False
49
+
43
50
  # - Define behaviour when encountering bad lines
44
51
  reader_kwargs["on_bad_lines"] = "skip"
45
- # - Define parser engine
52
+
53
+ # - Define reader engine
46
54
  # - C engine is faster
47
55
  # - Python engine is more feature-complete
48
56
  reader_kwargs["engine"] = "python"
57
+
49
58
  # - Define on-the-fly decompression of on-disk data
50
59
  # - Available: gzip, bz2, zip
51
60
  reader_kwargs["compression"] = "infer"
52
- # reader_kwargs['zipped'] = False
53
- # reader_kwargs['zipped'] = True
61
+
54
62
  # - Strings to recognize as NA/NaN and replace with standard NA flags
55
63
  # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
56
64
  # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
57
65
  # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
58
- reader_kwargs["na_values"] = ["na", "", "error", "-.-", " NA"]
66
+ reader_kwargs["na_values"] = ["na", "", "error", "NA"]
67
+
59
68
  ##------------------------------------------------------------------------.
60
69
  #### Read the data
61
70
  df = read_raw_text_file(
@@ -67,34 +76,24 @@ def reader(
67
76
 
68
77
  ##------------------------------------------------------------------------.
69
78
  #### Adapt the dataframe to adhere to DISDRODB L0 standards
70
- # NOTE: Station 8 has all raw_drop_number corrupted, so it can't be used
79
+ # Remove rows with invalid length
80
+ # df = df[df["TO_BE_PARSED"].str.len().isin([4664])]
71
81
 
72
- # Remove rows that have a corrupted "TO_BE_PARSED" column
73
- # - PAR001, PAR002 have length 3726 (no station_name)
74
- # - PAR007 have length 3736 ()
75
- df = df.loc[df["TO_BE_PARSED"].astype(str).str.len() >= 3726]
76
- df = df.loc[df["TO_BE_PARSED"].astype(str).str.len() <= 3736]
82
+ # Count number of delimiters to select valid rows
83
+ df = df[df["TO_BE_PARSED"].str.count(";") == 1107]
77
84
 
78
- # Convert 'time' column to datetime
79
- df_time = pd.to_datetime(df["time"], format="%Y%m%d-%H%M%S", errors="coerce")
80
-
81
- # Strip values from start and end of the string
82
- df["TO_BE_PARSED"] = df["TO_BE_PARSED"].str.lstrip("b'").str.rstrip("'").str.rstrip("\\r\\n'") # noqa: B005
83
-
84
- # Split the column 'TO_BE_PARSED'
85
- df_to_parse = df["TO_BE_PARSED"].str.split(";", expand=True, n=99)
86
-
87
- # Retrieve DISDRODB compliant columns
88
- df = df_to_parse.iloc[:, 0:35]
85
+ # Split by ; delimiter
86
+ df = df["TO_BE_PARSED"].str.split(";", expand=True, n=19)
89
87
 
90
88
  # Assign column names
91
- column_names = [
89
+ names = [
90
+ "date",
91
+ "time",
92
92
  "rainfall_rate_32bit",
93
93
  "rainfall_accumulated_32bit",
94
94
  "weather_code_synop_4680",
95
- "weather_code_synop_4677",
96
- "weather_code_metar_4678",
97
- "weather_code_nws",
95
+ # "weather_code_synop_4677",
96
+ # "weather_code_metar_4678",
98
97
  "reflectivity_32bit",
99
98
  "mor_visibility",
100
99
  "sample_interval",
@@ -103,64 +102,35 @@ def reader(
103
102
  "sensor_temperature",
104
103
  "sensor_serial_number",
105
104
  "firmware_iop",
106
- "firmware_dsp",
107
105
  "sensor_heating_current",
108
106
  "sensor_battery_voltage",
109
107
  "sensor_status",
110
- "sensor_time_measurement_start",
111
- "sensor_time",
112
- "sensor_date",
113
108
  "station_name",
114
- "station_number",
115
109
  "rainfall_amount_absolute_32bit",
116
110
  "error_code",
117
- "sensor_temperature_pcb",
118
- "sensor_temperature_receiver",
119
- "sensor_temperature_trasmitter",
120
- "rainfall_rate_16_bit_30",
121
- "rainfall_rate_16_bit_1200",
122
- "rainfall_accumulated_16bit",
123
- "reflectivity_16bit",
124
- "rain_kinetic_energy",
125
- "snowfall_rate",
126
- "number_particles_all",
127
- # "number_particles_all_detected",
111
+ "ARRAY_TO_SPLIT",
128
112
  ]
129
- df.columns = column_names
130
113
 
131
- # Add time column
132
- df["time"] = df_time
114
+ df.columns = names
133
115
 
134
- # Retrieve raw_drop_concentration
135
- df["raw_drop_concentration"] = df_to_parse.iloc[:, 35:67].apply(
136
- lambda x: ",".join(x.dropna().astype(str)),
137
- axis=1,
138
- )
139
- # Retrieve raw_drop_average_velocity
140
- df["raw_drop_average_velocity"] = df_to_parse.iloc[:, 67:99].apply(
141
- lambda x: ",".join(x.dropna().astype(str)),
142
- axis=1,
143
- )
116
+ # Define time in datetime format
117
+ time_str = df["date"] + " " + df["time"]
118
+ df["time"] = pd.to_datetime(time_str, format="%d.%m.%Y %H:%M:%S", errors="coerce")
144
119
 
145
- # Retrieve raw_drop_number
146
- df_raw_drop_number = df_to_parse.iloc[:, 99].squeeze()
147
- df_raw_drop_number = df_raw_drop_number.str.replace(r"(\w{3})", r"\1,", regex=True)
148
- df["raw_drop_number"] = df_raw_drop_number
120
+ # Add raw array
121
+ df["raw_drop_concentration"] = df["ARRAY_TO_SPLIT"].str[:224]
122
+ df["raw_drop_average_velocity"] = df["ARRAY_TO_SPLIT"].str[224:448]
123
+ df["raw_drop_number"] = df["ARRAY_TO_SPLIT"].str[448:]
149
124
 
150
125
  # Drop columns not agreeing with DISDRODB L0 standards
151
126
  columns_to_drop = [
152
- "firmware_iop",
153
- "firmware_dsp",
154
- "sensor_time_measurement_start",
155
- "sensor_time",
156
- "sensor_date",
127
+ "date",
157
128
  "station_name",
158
- "station_number",
159
- "sensor_serial_number",
129
+ "firmware_iop",
130
+ "ARRAY_TO_SPLIT",
160
131
  "sensor_serial_number",
161
- # "number_particles_all_detected",
132
+ "sample_interval",
162
133
  ]
163
134
  df = df.drop(columns=columns_to_drop)
164
135
 
165
- # Return the dataframe adhering to DISDRODB L0 standards
166
136
  return df
@@ -0,0 +1,220 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ import os
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+
23
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
24
+ from disdrodb.l0.l0a_processing import read_raw_text_file
25
+
26
+ TRACKS_DICT = {
27
+ "M203": ("2024-08-16 02:53:21", "2024-09-24 07:59:57"),
28
+ "M204": ("2024-09-27 08:00:00", "2024-10-20 07:59:57"),
29
+ "M205": ("2024-10-23 08:00:01", "2024-11-28 13:00:01"),
30
+ "M206": ("2024-12-01 08:00:02", "2024-12-30 07:59:57"),
31
+ "M207": ("2025-01-04 08:00:01", "2025-02-11 10:25:15"),
32
+ }
33
+
34
+
35
+ def get_track_for_dataframe(df):
36
+ """Retrieve ship track identifier."""
37
+ df_start, df_end = df["time"].min(), df["time"].max()
38
+
39
+ overlaps = []
40
+ for key, (start, end) in TRACKS_DICT.items():
41
+ start, end = pd.to_datetime(start), pd.to_datetime(end)
42
+ # check if df range lies within track coverage
43
+ if df_start <= end and df_end >= start:
44
+ overlaps.append(key)
45
+ return overlaps
46
+
47
+
48
+ def read_tracks_file(tracks_filepath):
49
+ """Read GPS master track file."""
50
+ df = pd.read_csv(
51
+ tracks_filepath,
52
+ names=["time", "latitude", "longitude", "flag"],
53
+ dtype={"time": str, "latitude": float, "longitude": float, "flag": str},
54
+ sep="\t", # tab-separated
55
+ skiprows=1, # skip the weird first line
56
+ engine="c", # speed up reading
57
+ )
58
+ df["time"] = pd.to_datetime(df["time"])
59
+ return df
60
+
61
+
62
+ def add_gps_coordinates(df, filepath):
63
+ """Add GPS coordinates to dataframe."""
64
+ # Retrieve useful tracks ids
65
+ tracks_ids = get_track_for_dataframe(df)
66
+
67
+ if len(tracks_ids) == 0:
68
+ df["latitude"] = np.nan
69
+ df["longitude"] = np.nan
70
+ return df
71
+
72
+ # Retrieve station base directory
73
+ station_base_dir = os.path.join(os.path.sep, *filepath.split(os.path.sep)[:-2])
74
+ # Define GPS files to read
75
+ tracks_filepaths = [os.path.join(station_base_dir, f"{tracks_id}_mastertrack.zip") for tracks_id in tracks_ids]
76
+ # Read GPS files
77
+ list_df_tracks = [read_tracks_file(fpath) for fpath in tracks_filepaths]
78
+ df_tracks = pd.concat(list_df_tracks)
79
+ df_tracks = df_tracks.dropna(subset=["time"])
80
+
81
+ # Ensure dataframes are sorted by time
82
+ df = df.sort_values("time")
83
+ df_tracks = df_tracks.sort_values("time")
84
+
85
+ # Remove bad flags
86
+ # df_tracks = df_tracks[df_tracks["flag"] == "1"]
87
+
88
+ # Remove flag column
89
+ df_tracks = df_tracks.drop(columns="flag")
90
+
91
+ # Add GPS coordinate to dataframe
92
+ df = pd.merge_asof(
93
+ df,
94
+ df_tracks,
95
+ on="time",
96
+ direction="nearest",
97
+ tolerance=pd.Timedelta("5min"),
98
+ )
99
+ return df
100
+
101
+
102
+ @is_documented_by(reader_generic_docstring)
103
+ def reader(
104
+ filepath,
105
+ logger=None,
106
+ ):
107
+ """Reader."""
108
+ ##------------------------------------------------------------------------.
109
+ #### Define column names
110
+ column_names = ["TO_BE_PARSED"]
111
+
112
+ ##------------------------------------------------------------------------.
113
+ #### Define reader options
114
+ reader_kwargs = {}
115
+
116
+ # - Define delimiter
117
+ reader_kwargs["delimiter"] = "/\n"
118
+
119
+ # Skip first row as columns names
120
+ reader_kwargs["header"] = None
121
+
122
+ # Skip first 2 rows
123
+ reader_kwargs["skiprows"] = 1
124
+
125
+ # - Avoid first column to become df index !!!
126
+ reader_kwargs["index_col"] = False
127
+
128
+ # - Define behaviour when encountering bad lines
129
+ reader_kwargs["on_bad_lines"] = "skip"
130
+
131
+ # - Define reader engine
132
+ # - C engine is faster
133
+ # - Python engine is more feature-complete
134
+ reader_kwargs["engine"] = "python"
135
+
136
+ # - Define on-the-fly decompression of on-disk data
137
+ # - Available: gzip, bz2, zip
138
+ reader_kwargs["compression"] = "infer"
139
+
140
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
141
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
142
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
143
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
144
+ reader_kwargs["na_values"] = ["na", "", "error", "NA"]
145
+
146
+ # - Define encoding
147
+ reader_kwargs["encoding"] = "latin1"
148
+
149
+ ##------------------------------------------------------------------------.
150
+ #### Read the data
151
+ df = read_raw_text_file(
152
+ filepath=filepath,
153
+ column_names=column_names,
154
+ reader_kwargs=reader_kwargs,
155
+ logger=logger,
156
+ )
157
+
158
+ ##------------------------------------------------------------------------.
159
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
160
+ # Remove rows with invalid length
161
+ # df = df[df["TO_BE_PARSED"].str.len().isin([4664])]
162
+
163
+ # Count number of delimiters to select valid rows
164
+ df = df[df["TO_BE_PARSED"].str.count(";") == 1107]
165
+
166
+ # Split by ; delimiter
167
+ df = df["TO_BE_PARSED"].str.split(";", expand=True, n=19)
168
+
169
+ # Assign column names
170
+ names = [
171
+ "date",
172
+ "time",
173
+ "rainfall_rate_32bit",
174
+ "rainfall_accumulated_32bit",
175
+ "weather_code_synop_4680",
176
+ # "weather_code_synop_4677",
177
+ # "weather_code_metar_4678",
178
+ "reflectivity_32bit",
179
+ "mor_visibility",
180
+ "sample_interval",
181
+ "laser_amplitude",
182
+ "number_particles",
183
+ "sensor_temperature",
184
+ "sensor_serial_number",
185
+ "firmware_iop",
186
+ "sensor_heating_current",
187
+ "sensor_battery_voltage",
188
+ "sensor_status",
189
+ "station_name",
190
+ "rainfall_amount_absolute_32bit",
191
+ "error_code",
192
+ "ARRAY_TO_SPLIT",
193
+ ]
194
+
195
+ df.columns = names
196
+
197
+ # Define time in datetime format
198
+ time_str = df["date"] + " " + df["time"]
199
+ df["time"] = pd.to_datetime(time_str, format="%d.%m.%Y %H:%M:%S", errors="coerce")
200
+ df = df.dropna(subset=["time"])
201
+
202
+ # Add raw array
203
+ df["raw_drop_concentration"] = df["ARRAY_TO_SPLIT"].str[:224]
204
+ df["raw_drop_average_velocity"] = df["ARRAY_TO_SPLIT"].str[224:448]
205
+ df["raw_drop_number"] = df["ARRAY_TO_SPLIT"].str[448:]
206
+
207
+ # Drop columns not agreeing with DISDRODB L0 standards
208
+ columns_to_drop = [
209
+ "date",
210
+ "station_name",
211
+ "firmware_iop",
212
+ "ARRAY_TO_SPLIT",
213
+ "sensor_serial_number",
214
+ "sample_interval",
215
+ ]
216
+ df = df.drop(columns=columns_to_drop)
217
+
218
+ # Add GPS coordinates
219
+ df = add_gps_coordinates(df, filepath=filepath)
220
+ return df
@@ -0,0 +1,120 @@
1
+ #!/usr/bin/env python3
2
+ # -----------------------------------------------------------------------------.
3
+ # Copyright (c) 2021-2023 DISDRODB developers
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ # -----------------------------------------------------------------------------.
18
+ """This reader allows to read raw data from NASA APU stations.
19
+
20
+ The reader allows to read raw APU data from the following NASA campaigns:
21
+
22
+ - HYMEX
23
+ - IFLOODS
24
+ - IPHEX
25
+ - OLYMPEX
26
+ - ICEPOP
27
+ - IMPACTS
28
+ - GCPEX
29
+ - WFF
30
+
31
+ """
32
+
33
+ import pandas as pd
34
+
35
+ from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
36
+ from disdrodb.l0.l0a_processing import read_raw_text_file
37
+
38
+
39
+ @is_documented_by(reader_generic_docstring)
40
+ def reader(
41
+ filepath,
42
+ logger=None,
43
+ ):
44
+ """Reader."""
45
+ ##------------------------------------------------------------------------.
46
+ #### Define column names
47
+ column_names = ["time", "TO_BE_SPLITTED"]
48
+
49
+ ##------------------------------------------------------------------------.
50
+ #### Define reader options
51
+ reader_kwargs = {}
52
+ # - Define delimiter
53
+ reader_kwargs["delimiter"] = ";"
54
+ # - Skip first row as columns names
55
+ reader_kwargs["header"] = None
56
+ reader_kwargs["skiprows"] = 0
57
+ # - Skip file with encoding errors
58
+ reader_kwargs["encoding_errors"] = "ignore"
59
+ # - Avoid first column to become df index !!!
60
+ reader_kwargs["index_col"] = False
61
+ # - Define behaviour when encountering bad lines
62
+ reader_kwargs["on_bad_lines"] = "skip"
63
+ # - Define reader engine
64
+ # - C engine is faster
65
+ # - Python engine is more feature-complete
66
+ reader_kwargs["engine"] = "python"
67
+ # - Define on-the-fly decompression of on-disk data
68
+ # - Available: gzip, bz2, zip
69
+ reader_kwargs["compression"] = "infer"
70
+ # - Strings to recognize as NA/NaN and replace with standard NA flags
71
+ # - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
72
+ # '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
73
+ # 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
74
+ reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
75
+
76
+ ##------------------------------------------------------------------------.
77
+ #### Read the data
78
+ df = read_raw_text_file(
79
+ filepath=filepath,
80
+ column_names=column_names,
81
+ reader_kwargs=reader_kwargs,
82
+ logger=logger,
83
+ )
84
+
85
+ ##------------------------------------------------------------------------.
86
+ #### Adapt the dataframe to adhere to DISDRODB L0 standards
87
+ # Convert time column to datetime
88
+ df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
89
+
90
+ # Split the 'TO_BE_SPLITTED' column
91
+ df = df["TO_BE_SPLITTED"].str.split(",", n=9, expand=True)
92
+
93
+ # Assign column names
94
+ names = [
95
+ "station_name",
96
+ "sensor_status",
97
+ "sensor_temperature",
98
+ "number_particles",
99
+ "rainfall_rate_32bit",
100
+ "reflectivity_16bit",
101
+ "mor_visibility",
102
+ "weather_code_synop_4680",
103
+ "weather_code_synop_4677",
104
+ "raw_drop_number",
105
+ ]
106
+ df.columns = names
107
+
108
+ # Add the time column
109
+ df["time"] = df_time
110
+
111
+ # Drop columns not agreeing with DISDRODB L0 standards
112
+ df = df.drop(columns=["station_name"])
113
+
114
+ # Drop rows with invalid values
115
+ # --> Ensure that weather_code_synop_4677 has length 2
116
+ # --> If a previous column is missing it will have 000
117
+ df = df[df["weather_code_synop_4677"].str.len() == 2]
118
+
119
+ # Return the dataframe adhering to DISDRODB L0 standards
120
+ return df