disdrodb 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +64 -34
- disdrodb/_config.py +5 -4
- disdrodb/_version.py +16 -3
- disdrodb/accessor/__init__.py +20 -0
- disdrodb/accessor/methods.py +125 -0
- disdrodb/api/checks.py +139 -9
- disdrodb/api/configs.py +4 -2
- disdrodb/api/info.py +10 -10
- disdrodb/api/io.py +237 -18
- disdrodb/api/path.py +81 -75
- disdrodb/api/search.py +6 -6
- disdrodb/cli/disdrodb_create_summary_station.py +91 -0
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
- disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
- disdrodb/configs.py +149 -4
- disdrodb/constants.py +61 -0
- disdrodb/data_transfer/download_data.py +5 -5
- disdrodb/etc/configs/attributes.yaml +339 -0
- disdrodb/etc/configs/encodings.yaml +473 -0
- disdrodb/etc/products/L1/global.yaml +13 -0
- disdrodb/etc/products/L2E/10MIN.yaml +12 -0
- disdrodb/etc/products/L2E/1MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +22 -0
- disdrodb/etc/products/L2M/10MIN.yaml +12 -0
- disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +26 -0
- disdrodb/l0/__init__.py +13 -0
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
- disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
- disdrodb/l0/l0a_processing.py +30 -30
- disdrodb/l0/l0b_nc_processing.py +108 -2
- disdrodb/l0/l0b_processing.py +4 -4
- disdrodb/l0/l0c_processing.py +5 -13
- disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
- disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
- disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
- disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
- disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +2 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
- disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → USA/C3WE.py} +65 -85
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
- disdrodb/l0/routines.py +105 -14
- disdrodb/l1/__init__.py +5 -0
- disdrodb/l1/filters.py +34 -20
- disdrodb/l1/processing.py +45 -44
- disdrodb/l1/resampling.py +77 -66
- disdrodb/l1/routines.py +35 -43
- disdrodb/l1_env/routines.py +18 -3
- disdrodb/l2/__init__.py +7 -0
- disdrodb/l2/empirical_dsd.py +58 -10
- disdrodb/l2/event.py +27 -120
- disdrodb/l2/processing.py +267 -116
- disdrodb/l2/routines.py +618 -254
- disdrodb/metadata/standards.py +3 -1
- disdrodb/psd/fitting.py +463 -144
- disdrodb/psd/models.py +8 -5
- disdrodb/routines.py +3 -3
- disdrodb/scattering/__init__.py +16 -4
- disdrodb/scattering/axis_ratio.py +56 -36
- disdrodb/scattering/permittivity.py +486 -0
- disdrodb/scattering/routines.py +701 -159
- disdrodb/summary/__init__.py +17 -0
- disdrodb/summary/routines.py +4120 -0
- disdrodb/utils/attrs.py +68 -125
- disdrodb/utils/compression.py +30 -1
- disdrodb/utils/dask.py +59 -8
- disdrodb/utils/dataframe.py +61 -7
- disdrodb/utils/directories.py +35 -15
- disdrodb/utils/encoding.py +33 -19
- disdrodb/utils/logger.py +13 -6
- disdrodb/utils/manipulations.py +71 -0
- disdrodb/utils/subsetting.py +214 -0
- disdrodb/utils/time.py +165 -19
- disdrodb/utils/writer.py +20 -7
- disdrodb/utils/xarray.py +2 -4
- disdrodb/viz/__init__.py +13 -0
- disdrodb/viz/plots.py +327 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/RECORD +121 -88
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
- disdrodb/l1/encoding_attrs.py +0 -642
- disdrodb/l2/processing_options.py +0 -213
- /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
|
@@ -47,7 +47,7 @@ number_particles:
|
|
|
47
47
|
sensor_temperature:
|
|
48
48
|
description: Temperature in sensor housing
|
|
49
49
|
long_name: Temperature of the sensor
|
|
50
|
-
units: "
|
|
50
|
+
units: "degC"
|
|
51
51
|
sensor_serial_number:
|
|
52
52
|
description: Sensor serial number
|
|
53
53
|
long_name: Serial number of the sensor
|
|
@@ -105,15 +105,15 @@ error_code:
|
|
|
105
105
|
sensor_temperature_pcb:
|
|
106
106
|
description: Temperature in printed circuit board
|
|
107
107
|
long_name: Sensor PCB temperature
|
|
108
|
-
units: "
|
|
108
|
+
units: "degC"
|
|
109
109
|
sensor_temperature_receiver:
|
|
110
110
|
description: Temperature in right sensor head
|
|
111
111
|
long_name: Sensor receiver temperature
|
|
112
|
-
units: "
|
|
112
|
+
units: "degC"
|
|
113
113
|
sensor_temperature_trasmitter:
|
|
114
114
|
description: Temperature in left sensor head
|
|
115
115
|
long_name: Sensor trasmitter temperature
|
|
116
|
-
units: "
|
|
116
|
+
units: "degC"
|
|
117
117
|
rainfall_rate_16_bit_30:
|
|
118
118
|
description: Rainfall rate
|
|
119
119
|
long_name: Rainfall rate max 30 mm/h 16 bit
|
|
@@ -161,7 +161,7 @@ raw_drop_number:
|
|
|
161
161
|
air_temperature:
|
|
162
162
|
description: "Air temperature in degrees Celsius (C)"
|
|
163
163
|
long_name: Air temperature
|
|
164
|
-
units: "
|
|
164
|
+
units: "degC"
|
|
165
165
|
relative_humidity:
|
|
166
166
|
description: "Relative humidity in percent (%)"
|
|
167
167
|
long_name: Relative humidity
|
|
@@ -102,7 +102,7 @@ sensor_temperature:
|
|
|
102
102
|
chunksizes: 5000
|
|
103
103
|
_FillValue: 127
|
|
104
104
|
sensor_serial_number:
|
|
105
|
-
dtype:
|
|
105
|
+
dtype: str
|
|
106
106
|
zlib: false
|
|
107
107
|
complevel: 3
|
|
108
108
|
shuffle: true
|
|
@@ -110,7 +110,7 @@ sensor_serial_number:
|
|
|
110
110
|
contiguous: false
|
|
111
111
|
chunksizes: 5000
|
|
112
112
|
firmware_iop:
|
|
113
|
-
dtype:
|
|
113
|
+
dtype: str
|
|
114
114
|
zlib: false
|
|
115
115
|
complevel: 3
|
|
116
116
|
shuffle: true
|
|
@@ -118,7 +118,7 @@ firmware_iop:
|
|
|
118
118
|
contiguous: false
|
|
119
119
|
chunksizes: 5000
|
|
120
120
|
firmware_dsp:
|
|
121
|
-
dtype:
|
|
121
|
+
dtype: str
|
|
122
122
|
zlib: false
|
|
123
123
|
complevel: 3
|
|
124
124
|
shuffle: true
|
|
@@ -25,7 +25,7 @@ sensor_status:
|
|
|
25
25
|
air_temperature:
|
|
26
26
|
description: "Air temperature in degrees Celsius"
|
|
27
27
|
long_name: Air temperature
|
|
28
|
-
units: "
|
|
28
|
+
units: "degC"
|
|
29
29
|
relative_humidity:
|
|
30
30
|
description: "Relative humidity in percent (%)"
|
|
31
31
|
long_name: Relative humidity
|
|
@@ -33,15 +33,15 @@ relative_humidity:
|
|
|
33
33
|
wetbulb_temperature:
|
|
34
34
|
description: "Wet bulb temperature in degrees Celsius"
|
|
35
35
|
long_name: Wet bulb temperature
|
|
36
|
-
units: "
|
|
36
|
+
units: "degC"
|
|
37
37
|
air_temperature_max:
|
|
38
38
|
description: "Maximum air temperature in degrees Celsius"
|
|
39
39
|
long_name: Maximum air temperature
|
|
40
|
-
units: "
|
|
40
|
+
units: "degC"
|
|
41
41
|
air_temperature_min:
|
|
42
42
|
description: "Minimum air temperature in degrees Celsius"
|
|
43
43
|
long_name: Minimum air temperature
|
|
44
|
-
units: "
|
|
44
|
+
units: "degC"
|
|
45
45
|
rainfall_rate:
|
|
46
46
|
description: Rainfall rate
|
|
47
47
|
long_name: Rainfall rate
|
disdrodb/l0/l0a_processing.py
CHANGED
|
@@ -18,13 +18,13 @@
|
|
|
18
18
|
# -----------------------------------------------------------------------------.
|
|
19
19
|
"""Functions to process raw text files into DISDRODB L0A Apache Parquet."""
|
|
20
20
|
|
|
21
|
-
|
|
22
21
|
import logging
|
|
23
22
|
import os
|
|
24
23
|
from typing import Union
|
|
25
24
|
|
|
26
25
|
import numpy as np
|
|
27
26
|
import pandas as pd
|
|
27
|
+
import pyarrow.parquet as pq
|
|
28
28
|
|
|
29
29
|
from disdrodb.l0.check_standards import check_l0a_column_names, check_l0a_standards
|
|
30
30
|
from disdrodb.l0.l0b_processing import infer_split_str
|
|
@@ -130,11 +130,15 @@ def read_raw_text_file(
|
|
|
130
130
|
try:
|
|
131
131
|
df = pd.read_csv(filepath, names=column_names, dtype=dtype, **reader_kwargs)
|
|
132
132
|
except pd.errors.EmptyDataError:
|
|
133
|
+
# if isinstance(filepath, zipfile.ZipExtFile):
|
|
134
|
+
# filepath = filepath.name
|
|
133
135
|
msg = f"The following file is empty: {filepath}"
|
|
134
136
|
raise ValueError(msg)
|
|
135
137
|
|
|
136
138
|
# Check the dataframe is not empty
|
|
137
139
|
if len(df.index) == 0:
|
|
140
|
+
# if isinstance(filepath, zipfile.ZipExtFile):
|
|
141
|
+
# filepath = filepath.name
|
|
138
142
|
msg = f"The following file is empty: {filepath}"
|
|
139
143
|
raise ValueError(msg)
|
|
140
144
|
|
|
@@ -413,6 +417,8 @@ def is_raw_array_string_not_corrupted(string):
|
|
|
413
417
|
"""Check if the raw array is corrupted."""
|
|
414
418
|
if not isinstance(string, str):
|
|
415
419
|
return False
|
|
420
|
+
if string in ["", "NAN", "NaN"]:
|
|
421
|
+
return True
|
|
416
422
|
split_str = infer_split_str(string=string)
|
|
417
423
|
list_values = string.split(split_str)
|
|
418
424
|
values = pd.to_numeric(list_values, errors="coerce")
|
|
@@ -625,6 +631,9 @@ def sanitize_df(
|
|
|
625
631
|
# - Sort by time
|
|
626
632
|
df = df.sort_values("time")
|
|
627
633
|
|
|
634
|
+
# - Drop index
|
|
635
|
+
df = df.reset_index(drop=True)
|
|
636
|
+
|
|
628
637
|
# ------------------------------------------------------.
|
|
629
638
|
# - Check column names agrees to DISDRODB standards
|
|
630
639
|
check_l0a_column_names(df, sensor_name=sensor_name)
|
|
@@ -755,24 +764,8 @@ def concatenate_dataframe(list_df: list, logger=None, verbose: bool = False) ->
|
|
|
755
764
|
return df
|
|
756
765
|
|
|
757
766
|
|
|
758
|
-
def _read_l0a(filepath: str, verbose: bool = False, logger=None, debugging_mode: bool = False) -> pd.DataFrame:
|
|
759
|
-
# Log
|
|
760
|
-
msg = f"Reading L0 Apache Parquet file at {filepath} started."
|
|
761
|
-
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
762
|
-
# Open file
|
|
763
|
-
df = pd.read_parquet(filepath)
|
|
764
|
-
if debugging_mode:
|
|
765
|
-
df = df.iloc[0:100]
|
|
766
|
-
# Log
|
|
767
|
-
msg = f"Reading L0 Apache Parquet file at {filepath} ended."
|
|
768
|
-
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
769
|
-
return df
|
|
770
|
-
|
|
771
|
-
|
|
772
767
|
def read_l0a_dataframe(
|
|
773
768
|
filepaths: Union[str, list],
|
|
774
|
-
verbose: bool = False,
|
|
775
|
-
logger=None,
|
|
776
769
|
debugging_mode: bool = False,
|
|
777
770
|
) -> pd.DataFrame:
|
|
778
771
|
"""Read DISDRODB L0A Apache Parquet file(s).
|
|
@@ -781,13 +774,10 @@ def read_l0a_dataframe(
|
|
|
781
774
|
----------
|
|
782
775
|
filepaths : str or list
|
|
783
776
|
Either a list or a single filepath.
|
|
784
|
-
verbose : bool
|
|
785
|
-
Whether to print detailed processing information into terminal.
|
|
786
|
-
The default is ``False``.
|
|
787
777
|
debugging_mode : bool
|
|
788
778
|
If ``True``, it reduces the amount of data to process.
|
|
789
779
|
If filepaths is a list, it reads only the first 3 files.
|
|
790
|
-
|
|
780
|
+
It selects only 100 rows sampled from the first 3 files.
|
|
791
781
|
The default is ``False``.
|
|
792
782
|
|
|
793
783
|
Returns
|
|
@@ -796,8 +786,6 @@ def read_l0a_dataframe(
|
|
|
796
786
|
L0A Dataframe.
|
|
797
787
|
|
|
798
788
|
"""
|
|
799
|
-
from disdrodb.l0.l0a_processing import concatenate_dataframe
|
|
800
|
-
|
|
801
789
|
# ----------------------------------------
|
|
802
790
|
# Check filepaths validity
|
|
803
791
|
if not isinstance(filepaths, (list, str)):
|
|
@@ -814,12 +802,15 @@ def read_l0a_dataframe(
|
|
|
814
802
|
|
|
815
803
|
# ---------------------------------------------------
|
|
816
804
|
# Define the list of dataframe
|
|
817
|
-
|
|
818
|
-
_read_l0a(filepath, verbose=verbose, logger=logger, debugging_mode=debugging_mode) for filepath in filepaths
|
|
819
|
-
]
|
|
805
|
+
df = pq.ParquetDataset(filepaths).read().to_pandas()
|
|
820
806
|
|
|
821
|
-
#
|
|
822
|
-
df =
|
|
807
|
+
# Ensure no index
|
|
808
|
+
df = df.reset_index(drop=True)
|
|
809
|
+
|
|
810
|
+
# Reduce rows
|
|
811
|
+
if debugging_mode:
|
|
812
|
+
n_rows = min(100, len(df))
|
|
813
|
+
df = df.sample(n=n_rows)
|
|
823
814
|
|
|
824
815
|
# Ensure time is in nanoseconds
|
|
825
816
|
df["time"] = df["time"].astype("M8[ns]")
|
|
@@ -833,14 +824,15 @@ def read_l0a_dataframe(
|
|
|
833
824
|
#### L0A Utility
|
|
834
825
|
|
|
835
826
|
|
|
836
|
-
def
|
|
827
|
+
def generate_l0a(
|
|
837
828
|
filepaths: Union[list, str],
|
|
838
829
|
reader,
|
|
839
830
|
sensor_name,
|
|
831
|
+
issue_dict=None,
|
|
840
832
|
verbose=True,
|
|
841
833
|
logger=None,
|
|
842
834
|
) -> pd.DataFrame:
|
|
843
|
-
"""Read and parse a list
|
|
835
|
+
"""Read and parse a list of raw files and generate a DISDRODB L0A dataframe.
|
|
844
836
|
|
|
845
837
|
Parameters
|
|
846
838
|
----------
|
|
@@ -851,6 +843,13 @@ def read_raw_text_files(
|
|
|
851
843
|
Format: reader(filepath, logger=None)
|
|
852
844
|
sensor_name : str
|
|
853
845
|
Name of the sensor.
|
|
846
|
+
issue_dict : dict, optional
|
|
847
|
+
Issue dictionary providing information on timesteps to remove.
|
|
848
|
+
The default is an empty dictionary ``{}``.
|
|
849
|
+
Valid issue_dict key are ``'timesteps'`` and ``'time_periods'``.
|
|
850
|
+
Valid issue_dict values are list of datetime64 values (with second accuracy).
|
|
851
|
+
To correctly format and check the validity of the ``issue_dict``, use
|
|
852
|
+
the ``disdrodb.l0.issue.check_issue_dict`` function.
|
|
854
853
|
verbose : bool
|
|
855
854
|
Whether to verbose the processing. The default is ``True``.
|
|
856
855
|
|
|
@@ -886,6 +885,7 @@ def read_raw_text_files(
|
|
|
886
885
|
df = sanitize_df(
|
|
887
886
|
df=df,
|
|
888
887
|
sensor_name=sensor_name,
|
|
888
|
+
issue_dict=issue_dict,
|
|
889
889
|
logger=logger,
|
|
890
890
|
verbose=verbose,
|
|
891
891
|
)
|
disdrodb/l0/l0b_nc_processing.py
CHANGED
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
"""Functions to process DISDRODB raw netCDF files into DISDRODB L0B netCDF files."""
|
|
20
20
|
|
|
21
21
|
import logging
|
|
22
|
+
from typing import Union
|
|
22
23
|
|
|
23
24
|
import numpy as np
|
|
24
25
|
|
|
@@ -33,6 +34,7 @@ from disdrodb.l0.standards import (
|
|
|
33
34
|
get_valid_variable_names,
|
|
34
35
|
)
|
|
35
36
|
from disdrodb.utils.logger import (
|
|
37
|
+
log_error,
|
|
36
38
|
# log_warning,
|
|
37
39
|
# log_debug,
|
|
38
40
|
log_info,
|
|
@@ -169,6 +171,8 @@ def standardize_raw_dataset(ds, dict_names, sensor_name):
|
|
|
169
171
|
|
|
170
172
|
# If missing variables, infill with NaN array
|
|
171
173
|
missing_vars = _get_missing_variables(ds, dict_names, sensor_name)
|
|
174
|
+
if "raw_drop_number" in missing_vars:
|
|
175
|
+
raise ValueError("The raw drop spectrum is not present in the netCDF file!")
|
|
172
176
|
if len(missing_vars) > 0:
|
|
173
177
|
ds = add_dataset_missing_variables(ds=ds, missing_vars=missing_vars, sensor_name=sensor_name)
|
|
174
178
|
|
|
@@ -454,8 +458,8 @@ def sanitize_ds(
|
|
|
454
458
|
----------
|
|
455
459
|
ds : xarray.Dataset
|
|
456
460
|
Raw xarray dataset
|
|
457
|
-
|
|
458
|
-
|
|
461
|
+
metadata: dict
|
|
462
|
+
Station metadata to attach as global attributes to the xr.Dataset.
|
|
459
463
|
sensor_name : str
|
|
460
464
|
Name of the sensor.
|
|
461
465
|
verbose : bool
|
|
@@ -525,3 +529,105 @@ def open_raw_netcdf_file(
|
|
|
525
529
|
# Log information
|
|
526
530
|
log_info(logger=logger, msg=f"netCDF file {filepath} has been loaded successively into xarray.", verbose=False)
|
|
527
531
|
return ds
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def generate_l0b_from_nc(
|
|
535
|
+
filepaths: Union[list, str],
|
|
536
|
+
reader,
|
|
537
|
+
sensor_name,
|
|
538
|
+
metadata,
|
|
539
|
+
issue_dict=None,
|
|
540
|
+
verbose=True,
|
|
541
|
+
logger=None,
|
|
542
|
+
):
|
|
543
|
+
"""Read and parse a list of raw netCDF files and generate a DISDRODB L0B dataset.
|
|
544
|
+
|
|
545
|
+
Parameters
|
|
546
|
+
----------
|
|
547
|
+
filepaths : Union[list,str]
|
|
548
|
+
File(s) path(s)
|
|
549
|
+
reader:
|
|
550
|
+
DISDRODB reader function.
|
|
551
|
+
Format: reader(filepath, logger=None)
|
|
552
|
+
sensor_name : str
|
|
553
|
+
Name of the sensor.
|
|
554
|
+
metadata: dict
|
|
555
|
+
Station metadata to attach as global attributes to the xr.Dataset.
|
|
556
|
+
issue_dict : dict, optional
|
|
557
|
+
Issue dictionary providing information on timesteps to remove.
|
|
558
|
+
The default is an empty dictionary ``{}``.
|
|
559
|
+
Valid issue_dict key are ``'timesteps'`` and ``'time_periods'``.
|
|
560
|
+
Valid issue_dict values are list of datetime64 values (with second accuracy).
|
|
561
|
+
To correctly format and check the validity of the ``issue_dict``, use
|
|
562
|
+
the ``disdrodb.l0.issue.check_issue_dict`` function.
|
|
563
|
+
verbose : bool
|
|
564
|
+
Whether to verbose the processing. The default is ``True``.
|
|
565
|
+
|
|
566
|
+
Returns
|
|
567
|
+
-------
|
|
568
|
+
xarray.Dataset
|
|
569
|
+
DISDRODB L0B Dataset.
|
|
570
|
+
|
|
571
|
+
Raises
|
|
572
|
+
------
|
|
573
|
+
ValueError
|
|
574
|
+
Input parameters can not be used or the raw file can not be processed.
|
|
575
|
+
|
|
576
|
+
"""
|
|
577
|
+
import xarray as xr
|
|
578
|
+
|
|
579
|
+
# Check input list
|
|
580
|
+
if isinstance(filepaths, str):
|
|
581
|
+
filepaths = [filepaths]
|
|
582
|
+
if len(filepaths) == 0:
|
|
583
|
+
raise ValueError("'filepaths' must contains at least 1 filepath.")
|
|
584
|
+
|
|
585
|
+
# ------------------------------------------------------.
|
|
586
|
+
# Loop over all raw files
|
|
587
|
+
n_files = len(filepaths)
|
|
588
|
+
processed_file_counter = 0
|
|
589
|
+
list_skipped_files_msg = []
|
|
590
|
+
list_ds = []
|
|
591
|
+
for filepath in filepaths:
|
|
592
|
+
# Try read the raw netCDF file
|
|
593
|
+
try:
|
|
594
|
+
ds = reader(filepath, logger=logger)
|
|
595
|
+
# Sanitize the dataframe
|
|
596
|
+
ds = sanitize_ds(
|
|
597
|
+
ds=ds,
|
|
598
|
+
sensor_name=sensor_name,
|
|
599
|
+
metadata=metadata,
|
|
600
|
+
issue_dict=issue_dict,
|
|
601
|
+
verbose=verbose,
|
|
602
|
+
logger=logger,
|
|
603
|
+
)
|
|
604
|
+
# Append dataframe to the list
|
|
605
|
+
list_ds.append(ds)
|
|
606
|
+
# Update the logger
|
|
607
|
+
processed_file_counter += 1
|
|
608
|
+
msg = f"Raw file '{filepath}' processed successfully ({processed_file_counter}/{n_files})."
|
|
609
|
+
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
610
|
+
|
|
611
|
+
# Skip the file if the processing fails
|
|
612
|
+
except Exception as e:
|
|
613
|
+
# Update the logger
|
|
614
|
+
msg = f"{filepath} has been skipped. The error is: {e}."
|
|
615
|
+
log_error(logger=logger, msg=msg, verbose=verbose)
|
|
616
|
+
list_skipped_files_msg.append(msg)
|
|
617
|
+
|
|
618
|
+
# Update logger
|
|
619
|
+
msg = f"{len(list_skipped_files_msg)} of {n_files} have been skipped."
|
|
620
|
+
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
621
|
+
|
|
622
|
+
# Check if there are files to concatenate
|
|
623
|
+
if len(list_ds) == 0:
|
|
624
|
+
raise ValueError("Any raw file could be read!")
|
|
625
|
+
|
|
626
|
+
##----------------------------------------------------------------.
|
|
627
|
+
# Concatenate the datasets
|
|
628
|
+
list_ds = [ds.chunk({"time": -1}) for ds in list_ds]
|
|
629
|
+
ds = xr.concat(list_ds, dim="time", join="outer", compat="no_conflicts", combine_attrs="override").sortby("time")
|
|
630
|
+
ds = ds.compute()
|
|
631
|
+
|
|
632
|
+
# Return the dataframe
|
|
633
|
+
return ds
|
disdrodb/l0/l0b_processing.py
CHANGED
|
@@ -386,13 +386,13 @@ def _define_dataset_variables(df, sensor_name, logger=None, verbose=False):
|
|
|
386
386
|
return data_vars
|
|
387
387
|
|
|
388
388
|
|
|
389
|
-
def
|
|
389
|
+
def generate_l0b(
|
|
390
390
|
df: pd.DataFrame,
|
|
391
391
|
metadata: dict,
|
|
392
392
|
logger=None,
|
|
393
393
|
verbose: bool = False,
|
|
394
394
|
) -> xr.Dataset:
|
|
395
|
-
"""Transform the L0A dataframe to the L0B xr.Dataset.
|
|
395
|
+
"""Transform the DISDRODB L0A dataframe to the DISDRODB L0B xr.Dataset.
|
|
396
396
|
|
|
397
397
|
Parameters
|
|
398
398
|
----------
|
|
@@ -503,8 +503,8 @@ def set_l0b_encodings(ds: xr.Dataset, sensor_name: str):
|
|
|
503
503
|
xarray.Dataset
|
|
504
504
|
Output xarray dataset.
|
|
505
505
|
"""
|
|
506
|
-
|
|
507
|
-
ds = set_encodings(ds=ds,
|
|
506
|
+
encodings_dict = get_l0b_encodings_dict(sensor_name)
|
|
507
|
+
ds = set_encodings(ds=ds, encodings_dict=encodings_dict)
|
|
508
508
|
return ds
|
|
509
509
|
|
|
510
510
|
|
disdrodb/l0/l0c_processing.py
CHANGED
|
@@ -388,11 +388,10 @@ def check_timesteps_regularity(ds, sample_interval, verbose=False, logger=None):
|
|
|
388
388
|
return ds
|
|
389
389
|
|
|
390
390
|
|
|
391
|
-
def finalize_l0c_dataset(ds, sample_interval,
|
|
391
|
+
def finalize_l0c_dataset(ds, sample_interval, verbose=True, logger=None):
|
|
392
392
|
"""Finalize a L0C dataset with unique sampling interval.
|
|
393
393
|
|
|
394
|
-
It adds the sampling_interval coordinate and it regularizes
|
|
395
|
-
the timesteps for trailing seconds.
|
|
394
|
+
It adds the sampling_interval coordinate and it regularizes the timesteps for trailing seconds.
|
|
396
395
|
"""
|
|
397
396
|
# Add sample interval as coordinate
|
|
398
397
|
ds = add_sample_interval(ds, sample_interval=sample_interval)
|
|
@@ -409,9 +408,6 @@ def finalize_l0c_dataset(ds, sample_interval, start_day, end_day, verbose=True,
|
|
|
409
408
|
|
|
410
409
|
# Performs checks about timesteps regularity
|
|
411
410
|
ds = check_timesteps_regularity(ds=ds, sample_interval=sample_interval, verbose=verbose, logger=logger)
|
|
412
|
-
|
|
413
|
-
# Slice for requested day
|
|
414
|
-
ds = ds.sel({"time": slice(start_day, end_day)})
|
|
415
411
|
return ds
|
|
416
412
|
|
|
417
413
|
|
|
@@ -442,7 +438,7 @@ def create_daily_file(day, filepaths, measurement_intervals, ensure_variables_eq
|
|
|
442
438
|
- The function adds a tolerance for searching timesteps
|
|
443
439
|
before and after 00:00 to account for imprecise logging times.
|
|
444
440
|
- It checks that duplicated timesteps have the same raw drop number values.
|
|
445
|
-
- The function infers the
|
|
441
|
+
- The function infers the sample interval and
|
|
446
442
|
regularizes timesteps to handle trailing seconds.
|
|
447
443
|
- The data is loaded into memory and connections to source files
|
|
448
444
|
are closed before returning the dataset.
|
|
@@ -461,10 +457,8 @@ def create_daily_file(day, filepaths, measurement_intervals, ensure_variables_eq
|
|
|
461
457
|
|
|
462
458
|
# ---------------------------------------------------------------------------------------.
|
|
463
459
|
# Open files with data within the provided day and concatenate them
|
|
464
|
-
# list_ds = [xr.open_dataset(filepath, chunks={}).sel({"time": slice(start_day_tol, end_day_tol)})
|
|
465
|
-
# for filepath in filepaths]
|
|
466
460
|
list_ds = [
|
|
467
|
-
xr.open_dataset(filepath, decode_timedelta=False, chunks
|
|
461
|
+
xr.open_dataset(filepath, decode_timedelta=False, chunks=-1, cache=False).sortby("time")
|
|
468
462
|
for filepath in filepaths
|
|
469
463
|
]
|
|
470
464
|
list_ds = [ds.sel({"time": slice(start_day_tol, end_day_tol)}) for ds in list_ds]
|
|
@@ -533,11 +527,9 @@ def create_daily_file(day, filepaths, measurement_intervals, ensure_variables_eq
|
|
|
533
527
|
sample_interval: finalize_l0c_dataset(
|
|
534
528
|
ds=ds,
|
|
535
529
|
sample_interval=sample_interval,
|
|
536
|
-
start_day=start_day,
|
|
537
|
-
end_day=end_day,
|
|
538
530
|
verbose=verbose,
|
|
539
531
|
logger=logger,
|
|
540
|
-
)
|
|
532
|
+
).sel({"time": slice(start_day, end_day)})
|
|
541
533
|
for sample_interval, ds in dict_ds.items()
|
|
542
534
|
}
|
|
543
535
|
return dict_ds
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -----------------------------------------------------------------------------.
|
|
3
|
+
# Copyright (c) 2021-2023 DISDRODB developers
|
|
4
|
+
#
|
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
8
|
+
# (at your option) any later version.
|
|
9
|
+
#
|
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
# GNU General Public License for more details.
|
|
14
|
+
#
|
|
15
|
+
# You should have received a copy of the GNU General Public License
|
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
17
|
+
# -----------------------------------------------------------------------------.
|
|
18
|
+
"""Reader for DELFT Thies LPM sensor in netCDF format."""
|
|
19
|
+
|
|
20
|
+
from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
|
|
21
|
+
from disdrodb.l0.l0b_nc_processing import open_raw_netcdf_file, standardize_raw_dataset
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@is_documented_by(reader_generic_docstring)
|
|
25
|
+
def reader(
|
|
26
|
+
filepath,
|
|
27
|
+
logger=None,
|
|
28
|
+
):
|
|
29
|
+
"""Reader."""
|
|
30
|
+
##------------------------------------------------------------------------.
|
|
31
|
+
#### Open the netCDF
|
|
32
|
+
ds = open_raw_netcdf_file(filepath=filepath, logger=logger)
|
|
33
|
+
|
|
34
|
+
##------------------------------------------------------------------------.
|
|
35
|
+
#### Adapt the dataframe to adhere to DISDRODB L0 standards
|
|
36
|
+
# Add time coordinate
|
|
37
|
+
ds["time"] = ds["time_as_string"].astype("M8[s]")
|
|
38
|
+
ds["time"].attrs.pop("comment", None)
|
|
39
|
+
ds["time"].attrs.pop("units", None)
|
|
40
|
+
ds = ds.set_coords("time")
|
|
41
|
+
|
|
42
|
+
# Define dictionary mapping dataset variables to select and rename
|
|
43
|
+
dict_names = {
|
|
44
|
+
### Dimensions
|
|
45
|
+
"diameter_classes": "diameter_bin_center",
|
|
46
|
+
"velocity_classes": "velocity_bin_center",
|
|
47
|
+
### Variables
|
|
48
|
+
"liquid_precip_intensity": "rainfall_rate",
|
|
49
|
+
"solid_precip_intensity": "snowfall_rate",
|
|
50
|
+
"all_precip_intensity": "precipitation_rate",
|
|
51
|
+
"weather_code_synop_4680": "weather_code_synop_4680",
|
|
52
|
+
"weather_code_synop_4677": "weather_code_synop_4677",
|
|
53
|
+
"reflectivity": "reflectivity",
|
|
54
|
+
"visibility": "mor_visibility",
|
|
55
|
+
"total_number_particles": "number_particles",
|
|
56
|
+
"ambient_temperature": "temperature_ambient",
|
|
57
|
+
"status_laser": "laser_status",
|
|
58
|
+
"measurement_quality": "quality_index",
|
|
59
|
+
"raw_spectrum": "raw_drop_number",
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# Rename dataset variables and columns and infill missing variables
|
|
63
|
+
ds = standardize_raw_dataset(ds=ds, dict_names=dict_names, sensor_name="LPM")
|
|
64
|
+
|
|
65
|
+
# Return the dataset adhering to DISDRODB L0B standards
|
|
66
|
+
return ds
|
|
@@ -62,6 +62,9 @@ def reader(
|
|
|
62
62
|
# - Available: gzip, bz2, zip
|
|
63
63
|
reader_kwargs["compression"] = "infer"
|
|
64
64
|
|
|
65
|
+
# - Skip rows with badly encoded data
|
|
66
|
+
reader_kwargs["encoding_errors"] = "replace"
|
|
67
|
+
|
|
65
68
|
# - Strings to recognize as NA/NaN and replace with standard NA flags
|
|
66
69
|
# - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
|
|
67
70
|
# '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
|