cloudnetpy 1.49.9__py3-none-any.whl → 1.87.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. cloudnetpy/categorize/__init__.py +1 -2
  2. cloudnetpy/categorize/atmos_utils.py +297 -67
  3. cloudnetpy/categorize/attenuation.py +31 -0
  4. cloudnetpy/categorize/attenuations/__init__.py +37 -0
  5. cloudnetpy/categorize/attenuations/gas_attenuation.py +30 -0
  6. cloudnetpy/categorize/attenuations/liquid_attenuation.py +84 -0
  7. cloudnetpy/categorize/attenuations/melting_attenuation.py +78 -0
  8. cloudnetpy/categorize/attenuations/rain_attenuation.py +84 -0
  9. cloudnetpy/categorize/categorize.py +332 -156
  10. cloudnetpy/categorize/classify.py +127 -125
  11. cloudnetpy/categorize/containers.py +107 -76
  12. cloudnetpy/categorize/disdrometer.py +40 -0
  13. cloudnetpy/categorize/droplet.py +23 -21
  14. cloudnetpy/categorize/falling.py +53 -24
  15. cloudnetpy/categorize/freezing.py +25 -12
  16. cloudnetpy/categorize/insects.py +35 -23
  17. cloudnetpy/categorize/itu.py +243 -0
  18. cloudnetpy/categorize/lidar.py +36 -41
  19. cloudnetpy/categorize/melting.py +34 -26
  20. cloudnetpy/categorize/model.py +84 -37
  21. cloudnetpy/categorize/mwr.py +18 -14
  22. cloudnetpy/categorize/radar.py +215 -102
  23. cloudnetpy/cli.py +578 -0
  24. cloudnetpy/cloudnetarray.py +43 -89
  25. cloudnetpy/concat_lib.py +218 -78
  26. cloudnetpy/constants.py +28 -10
  27. cloudnetpy/datasource.py +61 -86
  28. cloudnetpy/exceptions.py +49 -20
  29. cloudnetpy/instruments/__init__.py +5 -0
  30. cloudnetpy/instruments/basta.py +29 -12
  31. cloudnetpy/instruments/bowtie.py +135 -0
  32. cloudnetpy/instruments/ceilo.py +138 -115
  33. cloudnetpy/instruments/ceilometer.py +164 -80
  34. cloudnetpy/instruments/cl61d.py +21 -5
  35. cloudnetpy/instruments/cloudnet_instrument.py +74 -36
  36. cloudnetpy/instruments/copernicus.py +108 -30
  37. cloudnetpy/instruments/da10.py +54 -0
  38. cloudnetpy/instruments/disdrometer/common.py +126 -223
  39. cloudnetpy/instruments/disdrometer/parsivel.py +453 -94
  40. cloudnetpy/instruments/disdrometer/thies.py +254 -87
  41. cloudnetpy/instruments/fd12p.py +201 -0
  42. cloudnetpy/instruments/galileo.py +65 -23
  43. cloudnetpy/instruments/hatpro.py +123 -49
  44. cloudnetpy/instruments/instruments.py +113 -1
  45. cloudnetpy/instruments/lufft.py +39 -17
  46. cloudnetpy/instruments/mira.py +268 -61
  47. cloudnetpy/instruments/mrr.py +187 -0
  48. cloudnetpy/instruments/nc_lidar.py +19 -8
  49. cloudnetpy/instruments/nc_radar.py +109 -55
  50. cloudnetpy/instruments/pollyxt.py +135 -51
  51. cloudnetpy/instruments/radiometrics.py +313 -59
  52. cloudnetpy/instruments/rain_e_h3.py +171 -0
  53. cloudnetpy/instruments/rpg.py +321 -189
  54. cloudnetpy/instruments/rpg_reader.py +74 -40
  55. cloudnetpy/instruments/toa5.py +49 -0
  56. cloudnetpy/instruments/vaisala.py +95 -343
  57. cloudnetpy/instruments/weather_station.py +774 -105
  58. cloudnetpy/metadata.py +90 -19
  59. cloudnetpy/model_evaluation/file_handler.py +55 -52
  60. cloudnetpy/model_evaluation/metadata.py +46 -20
  61. cloudnetpy/model_evaluation/model_metadata.py +1 -1
  62. cloudnetpy/model_evaluation/plotting/plot_tools.py +32 -37
  63. cloudnetpy/model_evaluation/plotting/plotting.py +327 -117
  64. cloudnetpy/model_evaluation/products/advance_methods.py +92 -83
  65. cloudnetpy/model_evaluation/products/grid_methods.py +88 -63
  66. cloudnetpy/model_evaluation/products/model_products.py +43 -35
  67. cloudnetpy/model_evaluation/products/observation_products.py +41 -35
  68. cloudnetpy/model_evaluation/products/product_resampling.py +17 -7
  69. cloudnetpy/model_evaluation/products/tools.py +29 -20
  70. cloudnetpy/model_evaluation/statistics/statistical_methods.py +30 -20
  71. cloudnetpy/model_evaluation/tests/e2e/conftest.py +3 -3
  72. cloudnetpy/model_evaluation/tests/e2e/process_cf/main.py +9 -5
  73. cloudnetpy/model_evaluation/tests/e2e/process_cf/tests.py +15 -14
  74. cloudnetpy/model_evaluation/tests/e2e/process_iwc/main.py +9 -5
  75. cloudnetpy/model_evaluation/tests/e2e/process_iwc/tests.py +15 -14
  76. cloudnetpy/model_evaluation/tests/e2e/process_lwc/main.py +9 -5
  77. cloudnetpy/model_evaluation/tests/e2e/process_lwc/tests.py +15 -14
  78. cloudnetpy/model_evaluation/tests/unit/conftest.py +42 -41
  79. cloudnetpy/model_evaluation/tests/unit/test_advance_methods.py +41 -48
  80. cloudnetpy/model_evaluation/tests/unit/test_grid_methods.py +216 -194
  81. cloudnetpy/model_evaluation/tests/unit/test_model_products.py +23 -21
  82. cloudnetpy/model_evaluation/tests/unit/test_observation_products.py +37 -38
  83. cloudnetpy/model_evaluation/tests/unit/test_plot_tools.py +43 -40
  84. cloudnetpy/model_evaluation/tests/unit/test_plotting.py +30 -36
  85. cloudnetpy/model_evaluation/tests/unit/test_statistical_methods.py +68 -31
  86. cloudnetpy/model_evaluation/tests/unit/test_tools.py +33 -26
  87. cloudnetpy/model_evaluation/utils.py +2 -1
  88. cloudnetpy/output.py +170 -111
  89. cloudnetpy/plotting/__init__.py +2 -1
  90. cloudnetpy/plotting/plot_meta.py +562 -822
  91. cloudnetpy/plotting/plotting.py +1142 -704
  92. cloudnetpy/products/__init__.py +1 -0
  93. cloudnetpy/products/classification.py +370 -88
  94. cloudnetpy/products/der.py +85 -55
  95. cloudnetpy/products/drizzle.py +77 -34
  96. cloudnetpy/products/drizzle_error.py +15 -11
  97. cloudnetpy/products/drizzle_tools.py +79 -59
  98. cloudnetpy/products/epsilon.py +211 -0
  99. cloudnetpy/products/ier.py +27 -50
  100. cloudnetpy/products/iwc.py +55 -48
  101. cloudnetpy/products/lwc.py +96 -70
  102. cloudnetpy/products/mwr_tools.py +186 -0
  103. cloudnetpy/products/product_tools.py +170 -128
  104. cloudnetpy/utils.py +455 -240
  105. cloudnetpy/version.py +2 -2
  106. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/METADATA +44 -40
  107. cloudnetpy-1.87.3.dist-info/RECORD +127 -0
  108. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/WHEEL +1 -1
  109. cloudnetpy-1.87.3.dist-info/entry_points.txt +2 -0
  110. docs/source/conf.py +2 -2
  111. cloudnetpy/categorize/atmos.py +0 -361
  112. cloudnetpy/products/mwr_multi.py +0 -68
  113. cloudnetpy/products/mwr_single.py +0 -75
  114. cloudnetpy-1.49.9.dist-info/RECORD +0 -112
  115. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info/licenses}/LICENSE +0 -0
  116. {cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,10 @@
1
1
  """CloudnetArray class."""
2
- import math
3
- from collections.abc import Sequence
2
+
3
+ from collections.abc import Callable, Sequence
4
4
 
5
5
  import netCDF4
6
6
  import numpy as np
7
+ import numpy.typing as npt
7
8
  from numpy import ma
8
9
 
9
10
  from cloudnetpy import utils
@@ -20,23 +21,26 @@ class CloudnetArray:
20
21
  units_from_user: Explicit units, optional.
21
22
  dimensions: Explicit dimension names, optional.
22
23
  data_type: Explicit data type, optional.
24
+ source: Source attribute, optional.
23
25
 
24
26
  """
25
27
 
26
28
  def __init__(
27
29
  self,
28
- variable: netCDF4.Variable | np.ndarray | float | int,
30
+ variable: netCDF4.Variable | npt.NDArray | float,
29
31
  name: str,
30
32
  units_from_user: str | None = None,
31
33
  dimensions: Sequence[str] | None = None,
32
34
  data_type: str | None = None,
33
- ):
35
+ source: str | None = None,
36
+ ) -> None:
34
37
  self.variable = variable
35
38
  self.name = name
36
39
  self.data = self._init_data()
37
40
  self.units = units_from_user or self._init_units()
38
41
  self.data_type = data_type or self._init_data_type()
39
42
  self.dimensions = dimensions
43
+ self.source = source
40
44
 
41
45
  def lin2db(self) -> None:
42
46
  """Converts linear units to log."""
@@ -54,7 +58,7 @@ class CloudnetArray:
54
58
  """Masks data from given indices."""
55
59
  self.data[ind] = ma.masked
56
60
 
57
- def rebin_data(self, time: np.ndarray, time_new: np.ndarray) -> list:
61
+ def rebin_data(self, time: npt.NDArray, time_new: npt.NDArray) -> npt.NDArray:
58
62
  """Rebins `data` in time.
59
63
 
60
64
  Args:
@@ -67,41 +71,59 @@ class CloudnetArray:
67
71
  """
68
72
  if self.data.ndim == 1:
69
73
  self.data = utils.rebin_1d(time, self.data, time_new)
70
- bad_indices = list(np.where(self.data == ma.masked)[0])
74
+ bad_indices = np.nonzero(self.data.mask)[0]
71
75
  else:
72
- assert isinstance(self.data, ma.MaskedArray)
73
76
  self.data, bad_indices = utils.rebin_2d(time, self.data, time_new)
74
77
  return bad_indices
75
78
 
76
79
  def fetch_attributes(self) -> list:
77
80
  """Returns list of user-defined attributes."""
78
81
  attributes = []
79
- for attr in self.__dict__:
80
- if attr not in (
81
- "variable",
82
- "name",
83
- "data",
84
- "data_type",
85
- "dimensions",
82
+ for key, value in self.__dict__.items():
83
+ if (
84
+ key
85
+ not in (
86
+ "variable",
87
+ "name",
88
+ "data",
89
+ "data_type",
90
+ "dimensions",
91
+ )
92
+ and value is not None
86
93
  ):
87
- attributes.append(attr)
94
+ attributes.append(key)
88
95
  return attributes
89
96
 
90
97
  def set_attributes(self, attributes: MetaData) -> None:
91
98
  """Overwrites existing instance attributes."""
92
99
  for key in attributes._fields: # To iterate namedtuple fields.
93
100
  data = getattr(attributes, key)
94
- if data:
101
+ if key == "dimensions" or data:
95
102
  setattr(self, key, data)
96
103
 
97
- def _init_data(self) -> np.ndarray:
104
+ def filter_isolated_pixels(self) -> None:
105
+ """Filters hot pixels from radar data."""
106
+ self._filter(utils.filter_isolated_pixels)
107
+
108
+ def filter_vertical_stripes(self) -> None:
109
+ """Filters vertical artifacts from radar data."""
110
+ self._filter(utils.filter_x_pixels)
111
+
112
+ def _filter(self, fun: Callable[[npt.NDArray], npt.NDArray]) -> None:
113
+ if not isinstance(self.data, ma.MaskedArray):
114
+ self.data = ma.masked_array(self.data)
115
+ is_data = (~self.data.mask).astype(int)
116
+ is_data_filtered = fun(is_data)
117
+ self.data[is_data_filtered == 0] = ma.masked
118
+
119
+ def _init_data(self) -> npt.NDArray:
98
120
  if isinstance(self.variable, netCDF4.Variable):
99
121
  return self.variable[:]
100
122
  if isinstance(self.variable, np.ndarray):
101
123
  return self.variable
102
124
  if isinstance(
103
125
  self.variable,
104
- (int, float, np.float32, np.int8, np.float64, np.int32, np.uint16),
126
+ int | float | np.float32 | np.int8 | np.float64 | np.int32 | np.uint16,
105
127
  ):
106
128
  return np.array(self.variable)
107
129
  if isinstance(self.variable, str):
@@ -110,7 +132,8 @@ class CloudnetArray:
110
132
  return np.array(numeric_value)
111
133
  except ValueError:
112
134
  pass
113
- raise ValueError(f"Incorrect CloudnetArray input: {self.variable}")
135
+ msg = f"Incorrect CloudnetArray input: {self.variable}"
136
+ raise ValueError(msg)
114
137
 
115
138
  def _init_units(self) -> str:
116
139
  return getattr(self.variable, "units", "")
@@ -122,74 +145,5 @@ class CloudnetArray:
122
145
  return "i2"
123
146
  return "i4"
124
147
 
125
- def __getitem__(self, ind: tuple) -> np.ndarray:
148
+ def __getitem__(self, ind: tuple) -> npt.NDArray:
126
149
  return self.data[ind]
127
-
128
- def filter_isolated_pixels(self) -> None:
129
- """Filters hot pixels from radar data."""
130
- self._filter(utils.filter_isolated_pixels)
131
-
132
- def filter_vertical_stripes(self) -> None:
133
- """Filters vertical artifacts from radar data."""
134
- self._filter(utils.filter_x_pixels)
135
-
136
- def _filter(self, fun) -> None:
137
- assert isinstance(self.data, ma.MaskedArray)
138
- is_data = (~self.data.mask).astype(int)
139
- is_data_filtered = fun(is_data)
140
- self.data[is_data_filtered == 0] = ma.masked
141
-
142
- def calc_linear_std(self, time: np.ndarray, time_new: np.ndarray) -> None:
143
- """Calculates std of radar velocity.
144
-
145
- Args:
146
- time: 1D time array.
147
- time_new: 1D new time array.
148
-
149
- Notes:
150
- The result is masked if the bin contains masked values.
151
- """
152
- data_as_float = self.data.astype(float)
153
- assert isinstance(data_as_float, ma.MaskedArray)
154
- self.data, _ = utils.rebin_2d(time, data_as_float, time_new, "std")
155
-
156
- def rebin_velocity(
157
- self,
158
- time: np.ndarray,
159
- time_new: np.ndarray,
160
- folding_velocity: float | np.ndarray,
161
- sequence_indices: list,
162
- ) -> None:
163
- """Rebins Doppler velocity in polar coordinates.
164
-
165
- Args:
166
- time: 1D time array.
167
- time_new: 1D new time array.
168
- folding_velocity: Folding velocity (m/s). Can be a float when
169
- it's the same for all altitudes, or np.ndarray when it
170
- matches difference altitude regions (defined in `sequence_indices`).
171
- sequence_indices: List containing indices of different folding regions,
172
- e.g. [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10]].
173
-
174
- """
175
-
176
- def _get_scaled_vfold() -> np.ndarray:
177
- vfold_scaled = math.pi / folding_velocity
178
- if isinstance(vfold_scaled, float):
179
- vfold_scaled = np.array([float(vfold_scaled)])
180
- return vfold_scaled
181
-
182
- def _scale_by_vfold(data_in: np.ndarray, fun) -> np.ndarray:
183
- data_out = ma.copy(data_in)
184
- for i, ind in enumerate(sequence_indices):
185
- data_out[:, ind] = fun(data_in[:, ind], folding_velocity_scaled[i])
186
- return data_out
187
-
188
- folding_velocity_scaled = _get_scaled_vfold()
189
- data_scaled = _scale_by_vfold(self.data, np.multiply)
190
- vel_x = ma.cos(data_scaled)
191
- vel_y = ma.sin(data_scaled)
192
- vel_x_mean, _ = utils.rebin_2d(time, vel_x, time_new)
193
- vel_y_mean, _ = utils.rebin_2d(time, vel_y, time_new)
194
- mean_vel_scaled = np.arctan2(vel_y_mean, vel_x_mean)
195
- self.data = _scale_by_vfold(mean_vel_scaled, np.divide)
cloudnetpy/concat_lib.py CHANGED
@@ -1,20 +1,36 @@
1
1
  """Module for concatenating netCDF files."""
2
+
3
+ import datetime
4
+ import logging
5
+ import shutil
6
+ from collections.abc import Iterable, Sequence
7
+ from os import PathLike
8
+ from pathlib import Path
9
+ from types import TracebackType
10
+ from typing import Literal
11
+
2
12
  import netCDF4
3
13
  import numpy as np
14
+ import numpy.typing as npt
15
+ from numpy import ma
16
+ from typing_extensions import Self
4
17
 
5
- from cloudnetpy.exceptions import InconsistentDataError
18
+ from cloudnetpy import utils
19
+ from cloudnetpy.exceptions import ValidTimeStampError
6
20
 
7
21
 
8
- def truncate_netcdf_file(filename: str, output_file: str, n_profiles: int):
9
- """Truncates netcdf file in 'time' dimension taking only n_profiles.
22
+ def truncate_netcdf_file(
23
+ filename: str, output_file: str, n_profiles: int, dim_name: str = "time"
24
+ ) -> None:
25
+ """Truncates netcdf file in dim_name dimension taking only n_profiles.
10
26
  Useful for creating small files for tests.
11
27
  """
12
28
  with (
13
29
  netCDF4.Dataset(filename, "r") as nc,
14
30
  netCDF4.Dataset(output_file, "w", format=nc.data_model) as nc_new,
15
31
  ):
16
- for dim in nc.dimensions.keys():
17
- dim_len = None if dim == "time" else nc.dimensions[dim].size
32
+ for dim in nc.dimensions:
33
+ dim_len = None if dim == dim_name else nc.dimensions[dim].size
18
34
  nc_new.createDimension(dim, dim_len)
19
35
  for attr in nc.ncattrs():
20
36
  value = getattr(nc, attr)
@@ -24,9 +40,13 @@ def truncate_netcdf_file(filename: str, output_file: str, n_profiles: int):
24
40
  dimensions = nc.variables[key].dimensions
25
41
  fill_value = getattr(nc.variables[key], "_FillValue", None)
26
42
  var = nc_new.createVariable(
27
- key, array.dtype, dimensions, zlib=True, fill_value=fill_value
43
+ key,
44
+ array.dtype,
45
+ dimensions,
46
+ zlib=True,
47
+ fill_value=fill_value,
28
48
  )
29
- if dimensions and "time" in dimensions[0]:
49
+ if dimensions and dim_name in dimensions[0]:
30
50
  if array.ndim == 1:
31
51
  var[:] = array[:n_profiles]
32
52
  if array.ndim == 2:
@@ -68,14 +88,14 @@ def update_nc(old_file: str, new_file: str) -> int:
68
88
 
69
89
 
70
90
  def concatenate_files(
71
- filenames: list,
72
- output_file: str,
91
+ filenames: Iterable[PathLike | str],
92
+ output_file: str | PathLike,
73
93
  concat_dimension: str = "time",
74
- variables: list | None = None,
75
- new_attributes: dict | None = None,
76
- ignore: list | None = None,
77
- allow_difference: list | None = None,
78
- ) -> None:
94
+ variables: list[str] | None = None,
95
+ new_attributes: dict[str, str | int] | None = None,
96
+ ignore: list[str] | None = None,
97
+ interp_dimension: str = "range",
98
+ ) -> list[Path]:
79
99
  """Concatenate netCDF files in one dimension.
80
100
 
81
101
  Args:
@@ -86,39 +106,53 @@ def concatenate_files(
86
106
  Default is None when all variables with 'concat_dimension' will be saved.
87
107
  new_attributes: Optional new global attributes as {'attribute_name': value}.
88
108
  ignore: List of variables to be ignored.
89
- allow_difference: Names of scalar variables that can differ from one file to
90
- another (value from the first file is saved).
109
+ interp_dimension: Dimension name for interpolation if the dimensions
110
+ are not the same.
111
+
112
+ Returns:
113
+ List of filenames that were successfully concatenated.
91
114
 
92
115
  Notes:
93
- Arrays without 'concat_dimension', scalars, and global attributes will be taken
94
- from the first file. Groups, possibly present in a NETCDF4 formatted file,
95
- are ignored.
116
+ Arrays without 'concat_dimension' and scalars are expanded to the
117
+ concat_dimension. Global attributes are taken from the first file.
118
+ Groups, possibly present in a NETCDF4 formatted file, are ignored.
96
119
 
97
120
  """
98
- with _Concat(filenames, output_file, concat_dimension) as concat:
99
- concat.get_common_variables()
121
+ with _Concat(filenames, output_file, concat_dimension, interp_dimension) as concat:
100
122
  concat.create_global_attributes(new_attributes)
101
- concat.concat_data(variables, ignore, allow_difference)
123
+ return concat.concat_data(variables, ignore)
102
124
 
103
125
 
104
126
  class _Concat:
105
127
  common_variables: set[str]
106
128
 
107
129
  def __init__(
108
- self, filenames: list, output_file: str, concat_dimension: str = "time"
109
- ):
110
- self.filenames = sorted(filenames)
130
+ self,
131
+ filenames: Iterable[str | PathLike],
132
+ output_file: str | PathLike,
133
+ concat_dimension: str = "time",
134
+ interp_dim: str = "range",
135
+ ) -> None:
111
136
  self.concat_dimension = concat_dimension
137
+ self.interp_dim = interp_dim
138
+ self.filenames = sorted(
139
+ [Path(filename) for filename in filenames if self._is_valid_file(filename)],
140
+ key=lambda f: f.name,
141
+ )
142
+ if not self.filenames:
143
+ msg = "No valid files to concatenate."
144
+ raise ValidTimeStampError(msg)
112
145
  self.first_filename = self.filenames[0]
113
146
  self.first_file = netCDF4.Dataset(self.first_filename)
114
147
  self.concatenated_file = self._init_output_file(output_file)
115
- self.common_variables = set()
116
148
 
117
- def get_common_variables(self):
118
- """Finds variables which should have the same values in all files."""
119
- for key, value in self.first_file.variables.items():
120
- if self.concat_dimension not in value.dimensions:
121
- self.common_variables.add(key)
149
+ def _is_valid_file(self, filename: str | PathLike) -> bool:
150
+ # Added to handle strange .znc files with no time and huge range
151
+ # dimension resulting in large memory usage (e.g. Jülich 2019-05-18).
152
+ with netCDF4.Dataset(filename) as nc:
153
+ return (
154
+ nc[self.concat_dimension].size > 0 and nc[self.interp_dim].size < 10_000
155
+ )
122
156
 
123
157
  def create_global_attributes(self, new_attributes: dict | None) -> None:
124
158
  """Copies global attributes from one of the source files."""
@@ -129,33 +163,47 @@ class _Concat:
129
163
 
130
164
  def concat_data(
131
165
  self,
132
- variables: list | None,
133
- ignore: list | None,
134
- allow_vary: list | None,
135
- ):
166
+ keep: list | None = None,
167
+ ignore: list | None = None,
168
+ ) -> list[Path]:
136
169
  """Concatenates data arrays."""
137
- self._write_initial_data(variables, ignore)
170
+ self._write_initial_data(keep, ignore)
171
+ output = [self.first_filename]
138
172
  if len(self.filenames) > 1:
139
173
  for filename in self.filenames[1:]:
140
- self._append_data(filename, allow_vary)
174
+ try:
175
+ self._append_data(filename)
176
+ except RuntimeError as e:
177
+ if "NetCDF: HDF error" in str(e):
178
+ msg = f"Caught a NetCDF HDF error. Skipping file '{filename}'."
179
+ logging.exception(msg)
180
+ continue
181
+ raise
182
+ output.append(filename)
183
+ return output
141
184
 
142
- def _write_initial_data(self, variables: list | None, ignore: list | None) -> None:
143
- for key in self.first_file.variables.keys():
185
+ def _write_initial_data(self, keep: list | None, ignore: list | None) -> None:
186
+ len_concat_dim = self.first_file[self.concat_dimension].size
187
+ auto_scale = False
188
+
189
+ for key, var in self.first_file.variables.items():
144
190
  if (
145
- variables is not None
146
- and key not in variables
147
- and key not in self.common_variables
191
+ # This filtering only affects variables having the concat_dimension
192
+ keep is not None
193
+ and key not in keep
148
194
  and key != self.concat_dimension
195
+ and self.concat_dimension in var.dimensions
149
196
  ):
150
197
  continue
151
198
  if ignore and key in ignore:
152
199
  continue
153
200
 
154
- self.first_file[key].set_auto_scale(False)
155
- array = self.first_file[key][:]
156
- dimensions = self.first_file[key].dimensions
157
- fill_value = getattr(self.first_file[key], "_FillValue", None)
158
- var = self.concatenated_file.createVariable(
201
+ var.set_auto_scale(auto_scale)
202
+ array, dimensions = self._expand_array(var, len_concat_dim)
203
+
204
+ fill_value = var.get_fill_value()
205
+
206
+ var_new = self.concatenated_file.createVariable(
159
207
  key,
160
208
  array.dtype,
161
209
  dimensions,
@@ -164,39 +212,56 @@ class _Concat:
164
212
  shuffle=False,
165
213
  fill_value=fill_value,
166
214
  )
167
- var.set_auto_scale(False)
168
- var[:] = array
169
- _copy_attributes(self.first_file[key], var)
170
-
171
- def _append_data(self, filename: str, allow_vary: list | None) -> None:
215
+ var_new.set_auto_scale(auto_scale)
216
+ var_new[:] = array
217
+ _copy_attributes(var, var_new)
218
+
219
+ def _expand_array(
220
+ self, var: netCDF4.Variable, n_data: int
221
+ ) -> tuple[ma.MaskedArray, tuple[str, ...]]:
222
+ dimensions = var.dimensions
223
+ arr = var[:]
224
+ if self.concat_dimension not in dimensions and var.name != self.interp_dim:
225
+ dimensions = (self.concat_dimension, *dimensions)
226
+ arr = np.repeat(arr[np.newaxis, ...], n_data, axis=0)
227
+
228
+ return arr, dimensions
229
+
230
+ def _append_data(self, filename: str | PathLike) -> None:
172
231
  with netCDF4.Dataset(filename) as file:
173
- file.set_auto_scale(False)
232
+ auto_scale = False
233
+ file.set_auto_scale(auto_scale)
174
234
  ind0 = len(self.concatenated_file.variables[self.concat_dimension])
175
235
  ind1 = ind0 + len(file.variables[self.concat_dimension])
176
- for key in self.concatenated_file.variables.keys():
177
- array = file[key][:]
178
- if key in self.common_variables:
179
- if allow_vary is not None and key in allow_vary:
180
- continue
181
- if not np.array_equal(self.first_file[key][:], array):
182
- raise InconsistentDataError(
183
- f"Inconsistent values in variable '{key}' between "
184
- f"files '{self.first_filename}' and '{filename}'"
185
- )
186
- continue
187
- if array.ndim == 0:
236
+ n_points = ind1 - ind0
237
+
238
+ for key in self.concatenated_file.variables:
239
+ if key not in file.variables or key == self.interp_dim:
188
240
  continue
189
- if array.ndim == 1:
190
- self.concatenated_file.variables[key][ind0:ind1] = array
191
- else:
192
- self.concatenated_file.variables[key][ind0:ind1, :] = array
193
241
 
194
- def _init_output_file(self, output_file: str) -> netCDF4.Dataset:
195
- data_model = (
242
+ array, dimensions = self._expand_array(file[key], n_points)
243
+
244
+ # Nearest neighbour interpolation in the interp_dim dimension
245
+ # if the dimensions are not the same between the files
246
+ if self.interp_dim in dimensions and (
247
+ self.first_file[self.interp_dim].size != file[self.interp_dim].size
248
+ ):
249
+ x = file.variables[self.interp_dim][:]
250
+ x_target = self.first_file.variables[self.interp_dim][:]
251
+ idx = np.abs(x[:, None] - x_target[None, :]).argmin(axis=0)
252
+ array = array[:, idx]
253
+ out_of_bounds = (x_target < x.min()) | (x_target > x.max())
254
+ fill_value = self.first_file.variables[key].get_fill_value()
255
+ array[:, out_of_bounds] = fill_value
256
+
257
+ self.concatenated_file.variables[key][ind0:ind1, ...] = array
258
+
259
+ def _init_output_file(self, output_file: str | PathLike) -> netCDF4.Dataset:
260
+ data_model: Literal["NETCDF4", "NETCDF4_CLASSIC"] = (
196
261
  "NETCDF4" if self.first_file.data_model == "NETCDF4" else "NETCDF4_CLASSIC"
197
262
  )
198
263
  nc = netCDF4.Dataset(output_file, "w", format=data_model)
199
- for dim in self.first_file.dimensions.keys():
264
+ for dim in self.first_file.dimensions:
200
265
  dim_len = (
201
266
  None
202
267
  if dim == self.concat_dimension
@@ -205,29 +270,44 @@ class _Concat:
205
270
  nc.createDimension(dim, dim_len)
206
271
  return nc
207
272
 
208
- def _close(self):
273
+ def _close(self) -> None:
209
274
  self.first_file.close()
210
275
  self.concatenated_file.close()
211
276
 
212
- def __enter__(self):
277
+ def __enter__(self) -> Self:
213
278
  return self
214
279
 
215
- def __exit__(self, exc_type, exc_val, exc_tb):
280
+ def __exit__(
281
+ self,
282
+ exc_type: type[BaseException] | None,
283
+ exc_val: BaseException | None,
284
+ exc_tb: TracebackType | None,
285
+ ) -> None:
216
286
  self._close()
217
287
 
218
288
 
219
- def _copy_attributes(source: netCDF4.Dataset, target: netCDF4.Dataset) -> None:
289
+ def _copy_attributes(
290
+ source: netCDF4.Dataset | netCDF4.Variable,
291
+ target: netCDF4.Dataset | netCDF4.Variable,
292
+ ) -> None:
220
293
  for attr in source.ncattrs():
221
294
  if attr != "_FillValue":
222
295
  value = getattr(source, attr)
223
296
  setattr(target, attr, value)
224
297
 
225
298
 
226
- def _find_valid_time_indices(nc_old: netCDF4.Dataset, nc_new: netCDF4.Dataset):
299
+ def _find_valid_time_indices(
300
+ nc_old: netCDF4.Dataset,
301
+ nc_new: netCDF4.Dataset,
302
+ ) -> npt.NDArray:
227
303
  return np.where(nc_new.variables["time"][:] > nc_old.variables["time"][-1])[0]
228
304
 
229
305
 
230
- def _update_fields(nc_old: netCDF4.Dataset, nc_new: netCDF4.Dataset, valid_ind: list):
306
+ def _update_fields(
307
+ nc_old: netCDF4.Dataset,
308
+ nc_new: netCDF4.Dataset,
309
+ valid_ind: npt.NDArray,
310
+ ) -> None:
231
311
  ind0 = len(nc_old.variables["time"])
232
312
  idx = [ind0 + x for x in valid_ind]
233
313
  concat_dimension = nc_old.variables["time"].dimensions[0]
@@ -243,3 +323,63 @@ def _update_fields(nc_old: netCDF4.Dataset, nc_new: netCDF4.Dataset, valid_ind:
243
323
  nc_old.variables[field][idx, :] = nc_new.variables[field][valid_ind, :]
244
324
  elif len(dimensions) == 2 and concat_ind == 1:
245
325
  nc_old.variables[field][:, idx] = nc_new.variables[field][:, valid_ind]
326
+
327
+
328
+ def concatenate_text_files(filenames: list, output_filename: str | PathLike) -> None:
329
+ """Concatenates text files."""
330
+ with open(output_filename, "wb") as target:
331
+ for filename in filenames:
332
+ with open(filename, "rb") as source:
333
+ shutil.copyfileobj(source, target)
334
+
335
+
336
+ def bundle_netcdf_files(
337
+ files: Sequence[str | PathLike],
338
+ date: datetime.date,
339
+ output_file: str | PathLike,
340
+ concat_dimensions: tuple[str, ...] = ("time", "profile"),
341
+ variables: list | None = None,
342
+ ) -> list:
343
+ """Concatenates several netcdf files into daily file with
344
+ some extra data manipulation.
345
+ """
346
+ with netCDF4.Dataset(files[0]) as nc:
347
+ concat_dimension = None
348
+ for key in concat_dimensions:
349
+ if key in nc.dimensions:
350
+ concat_dimension = key
351
+ break
352
+ if concat_dimension is None:
353
+ msg = f"Dimension '{concat_dimensions}' not found in the files."
354
+ raise KeyError(msg)
355
+ if len(files) == 1:
356
+ shutil.copy(files[0], output_file)
357
+ return list(files)
358
+ valid_files = []
359
+ for file in files:
360
+ try:
361
+ with netCDF4.Dataset(file) as nc:
362
+ time = nc.variables["time"]
363
+ time_array = time[:]
364
+ time_units = time.units
365
+ except OSError:
366
+ continue
367
+ epoch = utils.get_epoch(time_units)
368
+ for timestamp in time_array:
369
+ if utils.seconds2date(timestamp, epoch).date() == date:
370
+ valid_files.append(file)
371
+ break
372
+ concatenate_files(
373
+ valid_files,
374
+ output_file,
375
+ concat_dimension=concat_dimension,
376
+ variables=variables,
377
+ ignore=[
378
+ "minimum",
379
+ "maximum",
380
+ "number_integrated_samples",
381
+ "Min_LWP",
382
+ "Max_LWP",
383
+ ],
384
+ )
385
+ return valid_files
cloudnetpy/constants.py CHANGED
@@ -1,19 +1,37 @@
1
1
  """Constants used in Cloudnet processing."""
2
2
 
3
+ from typing import Final
4
+
3
5
  # Triple point of water
4
- T0 = 273.16
6
+ T0: Final = 273.16
5
7
 
6
8
  # Ratio of the molecular weight of water vapor to dry air
7
- MW_RATIO = 0.62198
8
-
9
- # Specific heat capacity of air at around 275K (J kg-1 K-1)
10
- SPECIFIC_HEAT = 1004
11
-
12
- # Latent heat of evaporation (J kg-1)
13
- LATENT_HEAT = 2.26e6
9
+ MW_RATIO: Final = 0.62198
14
10
 
15
11
  # Specific gas constant for dry air (J kg-1 K-1)
16
- RS = 287.058
12
+ RS: Final = 287.058
17
13
 
18
14
  # ice density kg m-3
19
- RHO_ICE = 917
15
+ RHO_ICE: Final = 917
16
+
17
+ # Standard atmospheric pressure at sea level Pa
18
+ P0: Final = 1013_25
19
+
20
+ # other
21
+ SPEED_OF_LIGHT: Final = 3.0e8
22
+ SEC_IN_MINUTE: Final = 60
23
+ SEC_IN_HOUR: Final = 3600
24
+ SEC_IN_DAY: Final = 86400
25
+ MM_TO_M: Final = 1e-3
26
+ G_TO_KG: Final = 1e-3
27
+ M_TO_KM: Final = 1e-3
28
+ KG_TO_G: Final = 1e3
29
+ M_TO_MM: Final = 1e3
30
+ M_S_TO_MM_H: Final = SEC_IN_HOUR / MM_TO_M
31
+ MM_H_TO_M_S: Final = 1 / M_S_TO_MM_H
32
+ GHZ_TO_HZ: Final = 1e9
33
+ HPA_TO_PA: Final = 100
34
+ PA_TO_HPA: Final = 1 / HPA_TO_PA
35
+ KM_H_TO_M_S: Final = 1000 / SEC_IN_HOUR
36
+ TWO_WAY: Final = 2
37
+ G: Final = 9.80665