pycontrails 0.59.0__cp314-cp314-macosx_10_15_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pycontrails might be problematic. Click here for more details.
- pycontrails/__init__.py +70 -0
- pycontrails/_version.py +34 -0
- pycontrails/core/__init__.py +30 -0
- pycontrails/core/aircraft_performance.py +679 -0
- pycontrails/core/airports.py +228 -0
- pycontrails/core/cache.py +889 -0
- pycontrails/core/coordinates.py +174 -0
- pycontrails/core/fleet.py +483 -0
- pycontrails/core/flight.py +2185 -0
- pycontrails/core/flightplan.py +228 -0
- pycontrails/core/fuel.py +140 -0
- pycontrails/core/interpolation.py +702 -0
- pycontrails/core/met.py +2936 -0
- pycontrails/core/met_var.py +387 -0
- pycontrails/core/models.py +1321 -0
- pycontrails/core/polygon.py +549 -0
- pycontrails/core/rgi_cython.cpython-314-darwin.so +0 -0
- pycontrails/core/vector.py +2249 -0
- pycontrails/datalib/__init__.py +12 -0
- pycontrails/datalib/_met_utils/metsource.py +746 -0
- pycontrails/datalib/ecmwf/__init__.py +73 -0
- pycontrails/datalib/ecmwf/arco_era5.py +345 -0
- pycontrails/datalib/ecmwf/common.py +114 -0
- pycontrails/datalib/ecmwf/era5.py +554 -0
- pycontrails/datalib/ecmwf/era5_model_level.py +490 -0
- pycontrails/datalib/ecmwf/hres.py +804 -0
- pycontrails/datalib/ecmwf/hres_model_level.py +466 -0
- pycontrails/datalib/ecmwf/ifs.py +287 -0
- pycontrails/datalib/ecmwf/model_levels.py +435 -0
- pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
- pycontrails/datalib/ecmwf/variables.py +268 -0
- pycontrails/datalib/geo_utils.py +261 -0
- pycontrails/datalib/gfs/__init__.py +28 -0
- pycontrails/datalib/gfs/gfs.py +656 -0
- pycontrails/datalib/gfs/variables.py +104 -0
- pycontrails/datalib/goes.py +764 -0
- pycontrails/datalib/gruan.py +343 -0
- pycontrails/datalib/himawari/__init__.py +27 -0
- pycontrails/datalib/himawari/header_struct.py +266 -0
- pycontrails/datalib/himawari/himawari.py +671 -0
- pycontrails/datalib/landsat.py +589 -0
- pycontrails/datalib/leo_utils/__init__.py +5 -0
- pycontrails/datalib/leo_utils/correction.py +266 -0
- pycontrails/datalib/leo_utils/landsat_metadata.py +300 -0
- pycontrails/datalib/leo_utils/search.py +250 -0
- pycontrails/datalib/leo_utils/sentinel_metadata.py +748 -0
- pycontrails/datalib/leo_utils/static/bq_roi_query.sql +6 -0
- pycontrails/datalib/leo_utils/vis.py +59 -0
- pycontrails/datalib/sentinel.py +650 -0
- pycontrails/datalib/spire/__init__.py +5 -0
- pycontrails/datalib/spire/exceptions.py +62 -0
- pycontrails/datalib/spire/spire.py +604 -0
- pycontrails/ext/bada.py +42 -0
- pycontrails/ext/cirium.py +14 -0
- pycontrails/ext/empirical_grid.py +140 -0
- pycontrails/ext/synthetic_flight.py +431 -0
- pycontrails/models/__init__.py +1 -0
- pycontrails/models/accf.py +425 -0
- pycontrails/models/apcemm/__init__.py +8 -0
- pycontrails/models/apcemm/apcemm.py +983 -0
- pycontrails/models/apcemm/inputs.py +226 -0
- pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
- pycontrails/models/apcemm/utils.py +437 -0
- pycontrails/models/cocip/__init__.py +29 -0
- pycontrails/models/cocip/cocip.py +2742 -0
- pycontrails/models/cocip/cocip_params.py +305 -0
- pycontrails/models/cocip/cocip_uncertainty.py +291 -0
- pycontrails/models/cocip/contrail_properties.py +1530 -0
- pycontrails/models/cocip/output_formats.py +2270 -0
- pycontrails/models/cocip/radiative_forcing.py +1260 -0
- pycontrails/models/cocip/radiative_heating.py +520 -0
- pycontrails/models/cocip/unterstrasser_wake_vortex.py +508 -0
- pycontrails/models/cocip/wake_vortex.py +396 -0
- pycontrails/models/cocip/wind_shear.py +120 -0
- pycontrails/models/cocipgrid/__init__.py +9 -0
- pycontrails/models/cocipgrid/cocip_grid.py +2552 -0
- pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
- pycontrails/models/dry_advection.py +602 -0
- pycontrails/models/emissions/__init__.py +21 -0
- pycontrails/models/emissions/black_carbon.py +599 -0
- pycontrails/models/emissions/emissions.py +1353 -0
- pycontrails/models/emissions/ffm2.py +336 -0
- pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
- pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
- pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
- pycontrails/models/extended_k15.py +1327 -0
- pycontrails/models/humidity_scaling/__init__.py +37 -0
- pycontrails/models/humidity_scaling/humidity_scaling.py +1075 -0
- pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
- pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
- pycontrails/models/issr.py +210 -0
- pycontrails/models/pcc.py +326 -0
- pycontrails/models/pcr.py +154 -0
- pycontrails/models/ps_model/__init__.py +18 -0
- pycontrails/models/ps_model/ps_aircraft_params.py +381 -0
- pycontrails/models/ps_model/ps_grid.py +701 -0
- pycontrails/models/ps_model/ps_model.py +1000 -0
- pycontrails/models/ps_model/ps_operational_limits.py +525 -0
- pycontrails/models/ps_model/static/ps-aircraft-params-20250328.csv +69 -0
- pycontrails/models/ps_model/static/ps-synonym-list-20250328.csv +104 -0
- pycontrails/models/sac.py +442 -0
- pycontrails/models/tau_cirrus.py +183 -0
- pycontrails/physics/__init__.py +1 -0
- pycontrails/physics/constants.py +117 -0
- pycontrails/physics/geo.py +1138 -0
- pycontrails/physics/jet.py +968 -0
- pycontrails/physics/static/iata-cargo-load-factors-20250221.csv +74 -0
- pycontrails/physics/static/iata-passenger-load-factors-20250221.csv +74 -0
- pycontrails/physics/thermo.py +551 -0
- pycontrails/physics/units.py +472 -0
- pycontrails/py.typed +0 -0
- pycontrails/utils/__init__.py +1 -0
- pycontrails/utils/dependencies.py +66 -0
- pycontrails/utils/iteration.py +13 -0
- pycontrails/utils/json.py +187 -0
- pycontrails/utils/temp.py +50 -0
- pycontrails/utils/types.py +163 -0
- pycontrails-0.59.0.dist-info/METADATA +179 -0
- pycontrails-0.59.0.dist-info/RECORD +123 -0
- pycontrails-0.59.0.dist-info/WHEEL +6 -0
- pycontrails-0.59.0.dist-info/licenses/LICENSE +178 -0
- pycontrails-0.59.0.dist-info/licenses/NOTICE +43 -0
- pycontrails-0.59.0.dist-info/top_level.txt +3 -0
|
@@ -0,0 +1,2249 @@
|
|
|
1
|
+
"""Lightweight data structures for vector paths."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import sys
|
|
9
|
+
import warnings
|
|
10
|
+
from collections.abc import Generator, Iterable, Iterator, Sequence
|
|
11
|
+
from typing import Any, Self, overload
|
|
12
|
+
|
|
13
|
+
if sys.version_info >= (3, 12):
|
|
14
|
+
from typing import override
|
|
15
|
+
else:
|
|
16
|
+
from typing_extensions import override
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
import numpy.typing as npt
|
|
20
|
+
import pandas as pd
|
|
21
|
+
import xarray as xr
|
|
22
|
+
|
|
23
|
+
from pycontrails.core import coordinates, interpolation
|
|
24
|
+
from pycontrails.core import met as met_module
|
|
25
|
+
from pycontrails.physics import units
|
|
26
|
+
from pycontrails.utils import dependencies
|
|
27
|
+
from pycontrails.utils import json as json_utils
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AttrDict(dict[str, Any]):
|
|
33
|
+
"""Thin wrapper around dict to warn when setting a key that already exists."""
|
|
34
|
+
|
|
35
|
+
__slots__ = ()
|
|
36
|
+
|
|
37
|
+
def __setitem__(self, k: str, v: Any) -> None:
|
|
38
|
+
"""Warn when setting values that already contain values.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
k : str
|
|
43
|
+
Key
|
|
44
|
+
v : Any
|
|
45
|
+
Value
|
|
46
|
+
"""
|
|
47
|
+
if k in self and self[k] is not None and self[k] is not v:
|
|
48
|
+
warnings.warn(
|
|
49
|
+
f"Overwriting attr key `{k}`. Use `.update({k}=...)` to suppress warning."
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
super().__setitem__(k, v)
|
|
53
|
+
|
|
54
|
+
def setdefault(self, k: str, default: Any = None) -> Any:
|
|
55
|
+
"""Thin wrapper around ``dict.setdefault``.
|
|
56
|
+
|
|
57
|
+
Overwrites value if value is None.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
k : str
|
|
62
|
+
Key
|
|
63
|
+
default : Any, optional
|
|
64
|
+
Default value for key ``k``
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
Any
|
|
69
|
+
Value at ``k``
|
|
70
|
+
"""
|
|
71
|
+
ret = self.get(k, None)
|
|
72
|
+
if ret is not None:
|
|
73
|
+
return ret
|
|
74
|
+
|
|
75
|
+
self[k] = default
|
|
76
|
+
return default
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class VectorDataDict(dict[str, np.ndarray]):
|
|
80
|
+
"""Thin wrapper around ``dict[str, np.ndarray]`` to ensure consistency.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
data : dict[str, np.ndarray] | None, optional
|
|
85
|
+
Dictionary input. A shallow copy is always made.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
__slots__ = ("_size",)
|
|
89
|
+
|
|
90
|
+
#: Length of the data
|
|
91
|
+
_size: int
|
|
92
|
+
|
|
93
|
+
def __init__(self, data: dict[str, np.ndarray] | None = None) -> None:
|
|
94
|
+
super().__init__(data or {})
|
|
95
|
+
|
|
96
|
+
# validate any arrays, first one defines _size attribute
|
|
97
|
+
for arr in self.values():
|
|
98
|
+
self._validate_array(arr)
|
|
99
|
+
|
|
100
|
+
def __setitem__(self, k: str, v: npt.ArrayLike) -> None:
|
|
101
|
+
"""Set new key-value pair to instance and warn when overwriting existing key.
|
|
102
|
+
|
|
103
|
+
This method casts ``v`` to an :class:`numpy.ndarray` and ensures that the array size is
|
|
104
|
+
consistent with the instance.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
k : str
|
|
109
|
+
Key
|
|
110
|
+
v : npt.ArrayLike
|
|
111
|
+
Values
|
|
112
|
+
|
|
113
|
+
See Also
|
|
114
|
+
--------
|
|
115
|
+
:meth:`update`
|
|
116
|
+
"""
|
|
117
|
+
v = np.asarray(v) # asarray does NOT copy
|
|
118
|
+
self._validate_array(v)
|
|
119
|
+
|
|
120
|
+
if k in self and len(self[k]) and self[k] is not v:
|
|
121
|
+
warnings.warn(
|
|
122
|
+
f"Overwriting data in key `{k}`. Use `.update({k}=...)` to suppress warning."
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
super().__setitem__(k, v)
|
|
126
|
+
|
|
127
|
+
def __delitem__(self, k: str) -> None:
|
|
128
|
+
super().__delitem__(k)
|
|
129
|
+
|
|
130
|
+
# if no keys remain, delete _size attribute
|
|
131
|
+
if not self:
|
|
132
|
+
del self._size
|
|
133
|
+
|
|
134
|
+
def setdefault(self, k: str, default: npt.ArrayLike | None = None) -> np.ndarray:
|
|
135
|
+
"""Thin wrapper around ``dict.setdefault``.
|
|
136
|
+
|
|
137
|
+
The main purpose of overriding is to run :meth:`_validate_array()` on set.
|
|
138
|
+
|
|
139
|
+
Parameters
|
|
140
|
+
----------
|
|
141
|
+
k : str
|
|
142
|
+
Key
|
|
143
|
+
default : npt.ArrayLike | None, optional
|
|
144
|
+
Default value for key ``k``
|
|
145
|
+
|
|
146
|
+
Returns
|
|
147
|
+
-------
|
|
148
|
+
np.ndarray
|
|
149
|
+
Value at ``k``
|
|
150
|
+
"""
|
|
151
|
+
ret = self.get(k, None)
|
|
152
|
+
if ret is not None:
|
|
153
|
+
return ret
|
|
154
|
+
|
|
155
|
+
if default is None:
|
|
156
|
+
default = np.array([])
|
|
157
|
+
|
|
158
|
+
self[k] = default
|
|
159
|
+
return self[k]
|
|
160
|
+
|
|
161
|
+
def update( # type: ignore[override]
|
|
162
|
+
self, other: dict[str, npt.ArrayLike] | None = None, **kwargs: npt.ArrayLike
|
|
163
|
+
) -> None:
|
|
164
|
+
"""Update values without warning if overwriting.
|
|
165
|
+
|
|
166
|
+
This method casts values in ``other`` to :class:`numpy.ndarray` and
|
|
167
|
+
ensures that the array sizes are consistent with the instance.
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
other : dict[str, npt.ArrayLike] | None, optional
|
|
172
|
+
Fields to update as dict
|
|
173
|
+
**kwargs : npt.ArrayLike
|
|
174
|
+
Fields to update as kwargs
|
|
175
|
+
"""
|
|
176
|
+
other = other or {}
|
|
177
|
+
other_arrs = {k: np.asarray(v) for k, v in other.items()}
|
|
178
|
+
for arr in other_arrs.values():
|
|
179
|
+
self._validate_array(arr)
|
|
180
|
+
|
|
181
|
+
super().update(other_arrs)
|
|
182
|
+
|
|
183
|
+
# validate any kwarg arrays
|
|
184
|
+
kwargs_arr = {k: np.asarray(v) for k, v in kwargs.items()}
|
|
185
|
+
for arr in kwargs_arr.values():
|
|
186
|
+
self._validate_array(arr)
|
|
187
|
+
|
|
188
|
+
super().update(kwargs_arr)
|
|
189
|
+
|
|
190
|
+
def _validate_array(self, arr: np.ndarray) -> None:
|
|
191
|
+
"""Ensure that ``arr`` is compatible (1 dimensional of equal size) with instance.
|
|
192
|
+
|
|
193
|
+
Set attribute ``_size`` if it has not yet been defined.
|
|
194
|
+
|
|
195
|
+
Parameters
|
|
196
|
+
----------
|
|
197
|
+
arr : np.ndarray
|
|
198
|
+
Array to validate
|
|
199
|
+
|
|
200
|
+
Raises
|
|
201
|
+
------
|
|
202
|
+
ValueError
|
|
203
|
+
If ``arr`` is not compatible with instance.
|
|
204
|
+
"""
|
|
205
|
+
if arr.ndim != 1:
|
|
206
|
+
raise ValueError("All np.arrays must have dimension 1.")
|
|
207
|
+
|
|
208
|
+
size = getattr(self, "_size", 0)
|
|
209
|
+
if not size:
|
|
210
|
+
self._size = arr.size
|
|
211
|
+
return
|
|
212
|
+
|
|
213
|
+
if arr.size != size:
|
|
214
|
+
raise ValueError(f"Incompatible array sizes: {arr.size} and {size}.")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _empty_vector_dict(keys: Iterable[str]) -> dict[str, np.ndarray]:
|
|
218
|
+
"""Create a dictionary with keys defined by ``keys`` and empty arrays.
|
|
219
|
+
|
|
220
|
+
Parameters
|
|
221
|
+
----------
|
|
222
|
+
keys : Iterable[str]
|
|
223
|
+
Keys to include in dictionary.
|
|
224
|
+
|
|
225
|
+
Returns
|
|
226
|
+
-------
|
|
227
|
+
dict[str, np.ndarray]
|
|
228
|
+
Dictionary with empty arrays.
|
|
229
|
+
"""
|
|
230
|
+
data = {key: np.array([]) for key in keys}
|
|
231
|
+
|
|
232
|
+
# The default dtype is float64
|
|
233
|
+
# Time is special and should have a non-default dtype of datetime64[ns]
|
|
234
|
+
if "time" in data:
|
|
235
|
+
data.update(time=np.array([], dtype="datetime64[ns]"))
|
|
236
|
+
|
|
237
|
+
return data
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class VectorDataset: # noqa: PLW1641
|
|
241
|
+
"""Base class to hold 1D arrays of consistent size.
|
|
242
|
+
|
|
243
|
+
Parameters
|
|
244
|
+
----------
|
|
245
|
+
data : dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataset | None, optional
|
|
246
|
+
Initial data, by default None. A shallow copy is always made. Use the ``copy``
|
|
247
|
+
parameter to copy the underlying array data.
|
|
248
|
+
attrs : dict[str, Any] | None, optional
|
|
249
|
+
Dictionary of attributes, by default None. A shallow copy is always made.
|
|
250
|
+
copy : bool, optional
|
|
251
|
+
Copy individual arrays on instantiation, by default True.
|
|
252
|
+
**attrs_kwargs : Any
|
|
253
|
+
Additional attributes passed as keyword arguments.
|
|
254
|
+
|
|
255
|
+
Raises
|
|
256
|
+
------
|
|
257
|
+
ValueError
|
|
258
|
+
If "time" variable cannot be converted to numpy array.
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
__slots__ = ("attrs", "data")
|
|
262
|
+
|
|
263
|
+
#: Generic dataset attributes
|
|
264
|
+
attrs: AttrDict
|
|
265
|
+
|
|
266
|
+
#: Vector data with labels as keys and :class:`numpy.ndarray` as values
|
|
267
|
+
data: VectorDataDict
|
|
268
|
+
|
|
269
|
+
def __init__(
|
|
270
|
+
self,
|
|
271
|
+
data: dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataset | None = None,
|
|
272
|
+
*,
|
|
273
|
+
attrs: dict[str, Any] | None = None,
|
|
274
|
+
copy: bool = True,
|
|
275
|
+
**attrs_kwargs: Any,
|
|
276
|
+
) -> None:
|
|
277
|
+
# Set data: always shallow copy
|
|
278
|
+
# -----------------------------
|
|
279
|
+
|
|
280
|
+
# Casting from one VectorDataset type to another
|
|
281
|
+
# e.g., flight = Flight(...); vector = VectorDataset(flight)
|
|
282
|
+
if isinstance(data, VectorDataset):
|
|
283
|
+
attrs = {**data.attrs, **(attrs or {})}
|
|
284
|
+
if copy:
|
|
285
|
+
self.data = VectorDataDict({k: v.copy() for k, v in data.data.items()})
|
|
286
|
+
else:
|
|
287
|
+
self.data = VectorDataDict(data.data)
|
|
288
|
+
|
|
289
|
+
elif data is None:
|
|
290
|
+
self.data = VectorDataDict()
|
|
291
|
+
|
|
292
|
+
elif isinstance(data, pd.DataFrame):
|
|
293
|
+
attrs = {**data.attrs, **(attrs or {})}
|
|
294
|
+
|
|
295
|
+
# Take extra caution with a time column
|
|
296
|
+
try:
|
|
297
|
+
time = data["time"]
|
|
298
|
+
except KeyError:
|
|
299
|
+
self.data = VectorDataDict({k: v.to_numpy(copy=copy) for k, v in data.items()})
|
|
300
|
+
else:
|
|
301
|
+
time = _handle_time_column(time)
|
|
302
|
+
data_np = {k: v.to_numpy(copy=copy) for k, v in data.items() if k != "time"}
|
|
303
|
+
data_np["time"] = time.to_numpy(copy=copy)
|
|
304
|
+
self.data = VectorDataDict(data_np)
|
|
305
|
+
|
|
306
|
+
# For anything else, we assume it is a dictionary of array-like and attach it
|
|
307
|
+
else:
|
|
308
|
+
self.data = VectorDataDict({k: np.array(v, copy=copy) for k, v in data.items()})
|
|
309
|
+
|
|
310
|
+
# Set attributes: always shallow copy
|
|
311
|
+
# -----------------------------------
|
|
312
|
+
|
|
313
|
+
self.attrs = AttrDict(attrs or {})
|
|
314
|
+
self.attrs.update(attrs_kwargs)
|
|
315
|
+
|
|
316
|
+
@classmethod
|
|
317
|
+
def _from_fastpath(
|
|
318
|
+
cls,
|
|
319
|
+
data: dict[str, np.ndarray],
|
|
320
|
+
attrs: dict[str, Any] | None = None,
|
|
321
|
+
**kwargs: Any,
|
|
322
|
+
) -> Self:
|
|
323
|
+
"""Create new instance from consistent data.
|
|
324
|
+
|
|
325
|
+
This is a low-level method that bypasses the standard constructor in certain
|
|
326
|
+
special cases. It is intended for internal use only.
|
|
327
|
+
|
|
328
|
+
In essence, this method skips any validation from __init__ and directly sets
|
|
329
|
+
``data`` and ``attrs``. This is useful when creating a new instance from an existing
|
|
330
|
+
instance the data has already been validated.
|
|
331
|
+
"""
|
|
332
|
+
obj = cls.__new__(cls)
|
|
333
|
+
|
|
334
|
+
obj.data = VectorDataDict(data)
|
|
335
|
+
obj.attrs = AttrDict(attrs or {})
|
|
336
|
+
|
|
337
|
+
for key, value in kwargs.items():
|
|
338
|
+
try:
|
|
339
|
+
setattr(obj, key, value)
|
|
340
|
+
# If key not present in __slots__ of class (or parents), it's intended for attrs
|
|
341
|
+
except AttributeError:
|
|
342
|
+
obj.attrs[key] = value
|
|
343
|
+
|
|
344
|
+
return obj
|
|
345
|
+
|
|
346
|
+
# ------------
|
|
347
|
+
# dict-like methods
|
|
348
|
+
# ------------
|
|
349
|
+
def __getitem__(self, key: str) -> np.ndarray:
|
|
350
|
+
"""Get values from :attr:`data`.
|
|
351
|
+
|
|
352
|
+
Parameters
|
|
353
|
+
----------
|
|
354
|
+
key : str
|
|
355
|
+
Key to get from :attr:`data`
|
|
356
|
+
|
|
357
|
+
Returns
|
|
358
|
+
-------
|
|
359
|
+
np.ndarray
|
|
360
|
+
Values at :attr:`data[key]`
|
|
361
|
+
"""
|
|
362
|
+
return self.data[key]
|
|
363
|
+
|
|
364
|
+
def get(self, key: str, default_value: Any = None) -> Any:
|
|
365
|
+
"""Get values from :attr:`data` with ``default_value`` if ``key`` not in :attr:`data`.
|
|
366
|
+
|
|
367
|
+
Parameters
|
|
368
|
+
----------
|
|
369
|
+
key : str
|
|
370
|
+
Key to get from :attr:`data`
|
|
371
|
+
default_value : Any, optional
|
|
372
|
+
Return ``default_value`` if `key` not in :attr:`data`, by default ``None``
|
|
373
|
+
|
|
374
|
+
Returns
|
|
375
|
+
-------
|
|
376
|
+
Any
|
|
377
|
+
Values at :attr:`data[key]` or ``default_value``
|
|
378
|
+
"""
|
|
379
|
+
return self.data.get(key, default_value)
|
|
380
|
+
|
|
381
|
+
def __setitem__(self, key: str, values: npt.ArrayLike) -> None:
|
|
382
|
+
"""Set values at key `key` on :attr:`data`.
|
|
383
|
+
|
|
384
|
+
Parameters
|
|
385
|
+
----------
|
|
386
|
+
key : str
|
|
387
|
+
Key name in :attr:`data`
|
|
388
|
+
values : npt.ArrayLike
|
|
389
|
+
Values to set to :attr:`data`. Array size must be compatible with existing data.
|
|
390
|
+
"""
|
|
391
|
+
self.data[key] = values
|
|
392
|
+
|
|
393
|
+
def __delitem__(self, key: str) -> None:
|
|
394
|
+
"""Delete values at key `key` on :attr:`data`.
|
|
395
|
+
|
|
396
|
+
Parameters
|
|
397
|
+
----------
|
|
398
|
+
key : str
|
|
399
|
+
Key name in :attr:`data`
|
|
400
|
+
"""
|
|
401
|
+
del self.data[key]
|
|
402
|
+
|
|
403
|
+
def __iter__(self) -> Iterator[str]:
|
|
404
|
+
"""Iterate over keys in :attr:`data`.
|
|
405
|
+
|
|
406
|
+
Returns
|
|
407
|
+
-------
|
|
408
|
+
Iterator[str]
|
|
409
|
+
Keys in :attr:`data`
|
|
410
|
+
"""
|
|
411
|
+
return iter(self.data)
|
|
412
|
+
|
|
413
|
+
def __contains__(self, key: str) -> bool:
|
|
414
|
+
"""Check if key `key` is in :attr:`data`.
|
|
415
|
+
|
|
416
|
+
Parameters
|
|
417
|
+
----------
|
|
418
|
+
key : str
|
|
419
|
+
Key to check
|
|
420
|
+
|
|
421
|
+
Returns
|
|
422
|
+
-------
|
|
423
|
+
bool
|
|
424
|
+
True if `key` is in :attr:`data`, False otherwise
|
|
425
|
+
"""
|
|
426
|
+
return key in self.data
|
|
427
|
+
|
|
428
|
+
def update(
|
|
429
|
+
self,
|
|
430
|
+
other: dict[str, npt.ArrayLike] | None = None,
|
|
431
|
+
**kwargs: npt.ArrayLike,
|
|
432
|
+
) -> None:
|
|
433
|
+
"""Update values in :attr:`data` dict without warning if overwriting.
|
|
434
|
+
|
|
435
|
+
Parameters
|
|
436
|
+
----------
|
|
437
|
+
other : dict[str, npt.ArrayLike] | None, optional
|
|
438
|
+
Fields to update as dict
|
|
439
|
+
**kwargs : npt.ArrayLike
|
|
440
|
+
Fields to update as kwargs
|
|
441
|
+
"""
|
|
442
|
+
self.data.update(other, **kwargs)
|
|
443
|
+
|
|
444
|
+
def setdefault(self, key: str, default: npt.ArrayLike | None = None) -> np.ndarray:
|
|
445
|
+
"""Shortcut to :meth:`VectorDataDict.setdefault`.
|
|
446
|
+
|
|
447
|
+
Parameters
|
|
448
|
+
----------
|
|
449
|
+
key : str
|
|
450
|
+
Key in :attr:`data` dict.
|
|
451
|
+
default : npt.ArrayLike, optional
|
|
452
|
+
Values to use as default, if key is not defined
|
|
453
|
+
|
|
454
|
+
Returns
|
|
455
|
+
-------
|
|
456
|
+
np.ndarray
|
|
457
|
+
Values at ``key``
|
|
458
|
+
"""
|
|
459
|
+
return self.data.setdefault(key, default)
|
|
460
|
+
|
|
461
|
+
__marker = object()
|
|
462
|
+
|
|
463
|
+
def get_data_or_attr(self, key: str, default: Any = __marker) -> Any:
|
|
464
|
+
"""Get value from :attr:`data` or :attr:`attrs`.
|
|
465
|
+
|
|
466
|
+
This method first checks if ``key`` is in :attr:`data` and returns the value if so.
|
|
467
|
+
If ``key`` is not in :attr:`data`, then this method checks if ``key`` is in :attr:`attrs`
|
|
468
|
+
and returns the value if so. If ``key`` is not in :attr:`data` or :attr:`attrs`,
|
|
469
|
+
then the ``default`` value is returned if provided. Otherwise a :class:`KeyError` is raised.
|
|
470
|
+
|
|
471
|
+
Parameters
|
|
472
|
+
----------
|
|
473
|
+
key : str
|
|
474
|
+
Key to get from :attr:`data` or :attr:`attrs`
|
|
475
|
+
default : Any, optional
|
|
476
|
+
Default value to return if ``key`` is not in :attr:`data` or :attr:`attrs`.
|
|
477
|
+
|
|
478
|
+
Returns
|
|
479
|
+
-------
|
|
480
|
+
Any
|
|
481
|
+
Value at :attr:`data[key]` or :attr:`attrs[key]`
|
|
482
|
+
|
|
483
|
+
Raises
|
|
484
|
+
------
|
|
485
|
+
KeyError
|
|
486
|
+
If ``key`` is not in :attr:`data` or :attr:`attrs` and ``default`` is not provided.
|
|
487
|
+
|
|
488
|
+
Examples
|
|
489
|
+
--------
|
|
490
|
+
>>> vector = VectorDataset({"a": [1, 2, 3]}, attrs={"b": 4})
|
|
491
|
+
>>> vector.get_data_or_attr("a")
|
|
492
|
+
array([1, 2, 3])
|
|
493
|
+
|
|
494
|
+
>>> vector.get_data_or_attr("b")
|
|
495
|
+
4
|
|
496
|
+
|
|
497
|
+
>>> vector.get_data_or_attr("c")
|
|
498
|
+
Traceback (most recent call last):
|
|
499
|
+
...
|
|
500
|
+
KeyError: "Key 'c' not found in data or attrs."
|
|
501
|
+
|
|
502
|
+
>>> vector.get_data_or_attr("c", default=5)
|
|
503
|
+
5
|
|
504
|
+
|
|
505
|
+
See Also
|
|
506
|
+
--------
|
|
507
|
+
get_constant
|
|
508
|
+
"""
|
|
509
|
+
marker = self.__marker
|
|
510
|
+
|
|
511
|
+
out = self.get(key, marker)
|
|
512
|
+
if out is not marker:
|
|
513
|
+
return out
|
|
514
|
+
|
|
515
|
+
out = self.attrs.get(key, marker)
|
|
516
|
+
if out is not marker:
|
|
517
|
+
return out
|
|
518
|
+
|
|
519
|
+
if default is not marker:
|
|
520
|
+
return default
|
|
521
|
+
|
|
522
|
+
msg = f"Key '{key}' not found in data or attrs."
|
|
523
|
+
raise KeyError(msg)
|
|
524
|
+
|
|
525
|
+
# ------------
|
|
526
|
+
|
|
527
|
+
def __len__(self) -> int:
|
|
528
|
+
"""Length of each array in :attr:`data`.
|
|
529
|
+
|
|
530
|
+
Returns
|
|
531
|
+
-------
|
|
532
|
+
int
|
|
533
|
+
Length of each array in :attr:`data`
|
|
534
|
+
"""
|
|
535
|
+
return self.size
|
|
536
|
+
|
|
537
|
+
def _display_attrs(self) -> dict[str, str]:
|
|
538
|
+
"""Return properties used in `repr` constructions.
|
|
539
|
+
|
|
540
|
+
Returns
|
|
541
|
+
-------
|
|
542
|
+
dict[str, str]
|
|
543
|
+
Properties used in :meth:`__repr__` and :meth:`_repr_html_`.
|
|
544
|
+
"""
|
|
545
|
+
|
|
546
|
+
# Clip any attribute value that is too long
|
|
547
|
+
def str_clip(v: Any) -> str:
|
|
548
|
+
s = str(v)
|
|
549
|
+
if len(s) < 80:
|
|
550
|
+
return s
|
|
551
|
+
return f"{s[:77]}..."
|
|
552
|
+
|
|
553
|
+
return {k: str_clip(v) for k, v in self.attrs.items()}
|
|
554
|
+
|
|
555
|
+
def __repr__(self) -> str:
|
|
556
|
+
class_name = self.__class__.__name__
|
|
557
|
+
n_attrs = len(self.attrs)
|
|
558
|
+
n_keys = len(self.data)
|
|
559
|
+
_repr = f"{class_name} [{n_keys} keys x {self.size} length, {n_attrs} attributes]"
|
|
560
|
+
|
|
561
|
+
keys = list(self)
|
|
562
|
+
keys = [*keys[0:5], "...", *keys[-1:]] if len(keys) > 5 else keys
|
|
563
|
+
_repr += f"\n\tKeys: {', '.join(keys)}"
|
|
564
|
+
|
|
565
|
+
attrs = self._display_attrs()
|
|
566
|
+
_repr += "\n\tAttributes:\n"
|
|
567
|
+
_repr += "\n".join([f"\t{k:20}{v}" for k, v in attrs.items()])
|
|
568
|
+
|
|
569
|
+
return _repr
|
|
570
|
+
|
|
571
|
+
def _repr_html_(self) -> str:
|
|
572
|
+
name = type(self).__name__
|
|
573
|
+
n_attrs = len(self.attrs)
|
|
574
|
+
n_keys = len(self.data)
|
|
575
|
+
attrs = self._display_attrs()
|
|
576
|
+
size = self.size
|
|
577
|
+
|
|
578
|
+
title = f"<b>{name}</b> [{n_keys} keys x {size} length, {n_attrs} attributes]<br/ ><br/>"
|
|
579
|
+
|
|
580
|
+
# matching pd.DataFrame styling
|
|
581
|
+
header = '<tr style="border-bottom:1px solid silver"><th colspan="2">Attributes</th></tr>'
|
|
582
|
+
rows = [f"<tr><td>{k}</td><td>{v}</td></tr>" for k, v in attrs.items()]
|
|
583
|
+
table = f"<table>{header + ''.join(rows)}</table>"
|
|
584
|
+
return title + table + self.dataframe._repr_html_()
|
|
585
|
+
|
|
586
|
+
def __bool__(self) -> bool:
|
|
587
|
+
"""Check if :attr:`data` is nonempty..
|
|
588
|
+
|
|
589
|
+
Returns
|
|
590
|
+
-------
|
|
591
|
+
bool
|
|
592
|
+
True if non-empty values are set in :attr:`data`
|
|
593
|
+
"""
|
|
594
|
+
return self.size > 0
|
|
595
|
+
|
|
596
|
+
def __add__(self, other: Self | None) -> Self:
|
|
597
|
+
"""Concatenate two compatible instances of VectorDataset.
|
|
598
|
+
|
|
599
|
+
In this context, compatibility means that both have identical :attr:`data` keys.
|
|
600
|
+
|
|
601
|
+
This operator behaves similarly to the ``__add__`` method on python lists.
|
|
602
|
+
|
|
603
|
+
If self is an empty VectorDataset, return other. This is useful when
|
|
604
|
+
calling :keyword:`sum` with an empty initial value.
|
|
605
|
+
|
|
606
|
+
Parameters
|
|
607
|
+
----------
|
|
608
|
+
other : Self | None
|
|
609
|
+
Other values to concatenate
|
|
610
|
+
|
|
611
|
+
Returns
|
|
612
|
+
-------
|
|
613
|
+
Self
|
|
614
|
+
Concatenated values.
|
|
615
|
+
|
|
616
|
+
Raises
|
|
617
|
+
------
|
|
618
|
+
KeyError
|
|
619
|
+
If `other` has different :attr:`data` keys than self.
|
|
620
|
+
"""
|
|
621
|
+
# Short circuit: If other is empty or None, return self. The order here can matter.
|
|
622
|
+
# We let self (so the left addend) take priority.
|
|
623
|
+
if not other:
|
|
624
|
+
return self
|
|
625
|
+
if not self:
|
|
626
|
+
return other
|
|
627
|
+
|
|
628
|
+
return type(self).sum((self, other))
|
|
629
|
+
|
|
630
|
+
@classmethod
|
|
631
|
+
def sum(
|
|
632
|
+
cls,
|
|
633
|
+
vectors: Sequence[VectorDataset],
|
|
634
|
+
infer_attrs: bool = True,
|
|
635
|
+
fill_value: float | None = None,
|
|
636
|
+
) -> Self:
|
|
637
|
+
"""Sum a list of :class:`VectorDataset` instances.
|
|
638
|
+
|
|
639
|
+
Parameters
|
|
640
|
+
----------
|
|
641
|
+
vectors : Sequence[VectorDataset]
|
|
642
|
+
List of :class:`VectorDataset` instances to concatenate.
|
|
643
|
+
infer_attrs : bool, optional
|
|
644
|
+
If True, infer attributes from the first element in the sequence.
|
|
645
|
+
fill_value : float | None, optional
|
|
646
|
+
Fill value to use when concatenating arrays. By default None, which raises
|
|
647
|
+
an error if incompatible keys are found.
|
|
648
|
+
|
|
649
|
+
Returns
|
|
650
|
+
-------
|
|
651
|
+
Self
|
|
652
|
+
Sum of all instances in ``vectors``.
|
|
653
|
+
|
|
654
|
+
Raises
|
|
655
|
+
------
|
|
656
|
+
KeyError
|
|
657
|
+
If incompatible :attr:`data` keys are found among ``vectors``.
|
|
658
|
+
|
|
659
|
+
Examples
|
|
660
|
+
--------
|
|
661
|
+
>>> from pycontrails import VectorDataset
|
|
662
|
+
>>> v1 = VectorDataset({"a": [1, 2, 3], "b": [4, 5, 6]})
|
|
663
|
+
>>> v2 = VectorDataset({"a": [7, 8, 9], "b": [10, 11, 12]})
|
|
664
|
+
>>> v3 = VectorDataset({"a": [13, 14, 15], "b": [16, 17, 18]})
|
|
665
|
+
>>> v = VectorDataset.sum([v1, v2, v3])
|
|
666
|
+
>>> v.dataframe
|
|
667
|
+
a b
|
|
668
|
+
0 1 4
|
|
669
|
+
1 2 5
|
|
670
|
+
2 3 6
|
|
671
|
+
3 7 10
|
|
672
|
+
4 8 11
|
|
673
|
+
5 9 12
|
|
674
|
+
6 13 16
|
|
675
|
+
7 14 17
|
|
676
|
+
8 15 18
|
|
677
|
+
|
|
678
|
+
"""
|
|
679
|
+
if cls not in (VectorDataset, GeoVectorDataset):
|
|
680
|
+
msg = (
|
|
681
|
+
"Method 'sum' is only available on 'VectorDataset' and 'GeoVectorDataset'. "
|
|
682
|
+
"To sum 'Flight' instances, use 'Fleet.from_seq'."
|
|
683
|
+
)
|
|
684
|
+
raise TypeError(msg)
|
|
685
|
+
|
|
686
|
+
vectors = [v for v in vectors if v is not None] # remove None values
|
|
687
|
+
|
|
688
|
+
if not vectors:
|
|
689
|
+
return cls()
|
|
690
|
+
|
|
691
|
+
keys: Iterable[str]
|
|
692
|
+
if fill_value is None:
|
|
693
|
+
keys = vectors[0].data.keys()
|
|
694
|
+
for v in vectors[1:]:
|
|
695
|
+
if v.data.keys() != keys:
|
|
696
|
+
diff = set(v).symmetric_difference(keys)
|
|
697
|
+
msg = f"Summands have incompatible keys. Difference: {diff}"
|
|
698
|
+
raise KeyError(msg)
|
|
699
|
+
|
|
700
|
+
else:
|
|
701
|
+
keys = set().union(*[v.data.keys() for v in vectors])
|
|
702
|
+
|
|
703
|
+
def _get(k: str, v: VectorDataset) -> np.ndarray:
|
|
704
|
+
# Could also use VectorDataset.get() here, but we want to avoid creating
|
|
705
|
+
# an unused array if the key is present in the VectorDataset.
|
|
706
|
+
try:
|
|
707
|
+
return v[k]
|
|
708
|
+
except KeyError:
|
|
709
|
+
return np.full(v.size, fill_value)
|
|
710
|
+
|
|
711
|
+
def concat(key: str) -> np.ndarray:
|
|
712
|
+
values = [_get(key, v) for v in vectors]
|
|
713
|
+
return np.concatenate(values)
|
|
714
|
+
|
|
715
|
+
data = {key: concat(key) for key in keys}
|
|
716
|
+
attrs = vectors[0].attrs if infer_attrs else None
|
|
717
|
+
|
|
718
|
+
return cls._from_fastpath(data, attrs)
|
|
719
|
+
|
|
720
|
+
def __eq__(self, other: object) -> bool:
|
|
721
|
+
"""Determine if two instances are equal.
|
|
722
|
+
|
|
723
|
+
NaN values are considered equal in this comparison.
|
|
724
|
+
|
|
725
|
+
Parameters
|
|
726
|
+
----------
|
|
727
|
+
other : object
|
|
728
|
+
VectorDataset to compare with
|
|
729
|
+
|
|
730
|
+
Returns
|
|
731
|
+
-------
|
|
732
|
+
bool
|
|
733
|
+
True if both instances have identical :attr:`data` and :attr:`attrs`.
|
|
734
|
+
"""
|
|
735
|
+
if not isinstance(other, VectorDataset):
|
|
736
|
+
return False
|
|
737
|
+
|
|
738
|
+
# Check attrs
|
|
739
|
+
if self.attrs.keys() != other.attrs.keys():
|
|
740
|
+
return False
|
|
741
|
+
|
|
742
|
+
for key, val in self.attrs.items():
|
|
743
|
+
if isinstance(val, np.ndarray):
|
|
744
|
+
# equal_nan not supported for non-numeric data
|
|
745
|
+
equal_nan = not np.issubdtype(val.dtype, "O")
|
|
746
|
+
if not np.array_equal(val, other.attrs[key], equal_nan=equal_nan):
|
|
747
|
+
return False
|
|
748
|
+
elif val != other.attrs[key]:
|
|
749
|
+
return False
|
|
750
|
+
|
|
751
|
+
# Check data
|
|
752
|
+
if self.data.keys() != other.data.keys():
|
|
753
|
+
return False
|
|
754
|
+
|
|
755
|
+
for key, val in self.data.items():
|
|
756
|
+
# equal_nan not supported for non-numeric data (e.g. strings)
|
|
757
|
+
equal_nan = not np.issubdtype(val.dtype, "O")
|
|
758
|
+
if not np.array_equal(val, other[key], equal_nan=equal_nan):
|
|
759
|
+
return False
|
|
760
|
+
|
|
761
|
+
return True
|
|
762
|
+
|
|
763
|
+
@property
|
|
764
|
+
def size(self) -> int:
|
|
765
|
+
"""Length of each array in :attr:`data`.
|
|
766
|
+
|
|
767
|
+
Returns
|
|
768
|
+
-------
|
|
769
|
+
int
|
|
770
|
+
Length of each array in :attr:`data`.
|
|
771
|
+
"""
|
|
772
|
+
return getattr(self.data, "_size", 0)
|
|
773
|
+
|
|
774
|
+
@property
|
|
775
|
+
def shape(self) -> tuple[int]:
|
|
776
|
+
"""Shape of each array in :attr:`data`.
|
|
777
|
+
|
|
778
|
+
Returns
|
|
779
|
+
-------
|
|
780
|
+
tuple[int]
|
|
781
|
+
Shape of each array in :attr:`data`.
|
|
782
|
+
"""
|
|
783
|
+
return (self.size,)
|
|
784
|
+
|
|
785
|
+
@property
|
|
786
|
+
def dataframe(self) -> pd.DataFrame:
|
|
787
|
+
"""Shorthand property to access :meth:`to_dataframe` with ``copy=False``.
|
|
788
|
+
|
|
789
|
+
Returns
|
|
790
|
+
-------
|
|
791
|
+
pd.DataFrame
|
|
792
|
+
Equivalent to the output from :meth:`to_dataframe()`
|
|
793
|
+
"""
|
|
794
|
+
return self.to_dataframe(copy=False)
|
|
795
|
+
|
|
796
|
+
@property
|
|
797
|
+
def hash(self) -> str:
|
|
798
|
+
"""Generate a unique hash for this class instance.
|
|
799
|
+
|
|
800
|
+
Returns
|
|
801
|
+
-------
|
|
802
|
+
str
|
|
803
|
+
Unique hash for flight instance (sha1)
|
|
804
|
+
"""
|
|
805
|
+
_hash = json.dumps(self.data, cls=json_utils.NumpyEncoder)
|
|
806
|
+
return hashlib.sha1(bytes(_hash, "utf-8")).hexdigest()
|
|
807
|
+
|
|
808
|
+
# ------------
|
|
809
|
+
# Utilities
|
|
810
|
+
# ------------
|
|
811
|
+
|
|
812
|
+
def copy(self, **kwargs: Any) -> Self:
|
|
813
|
+
"""Return a copy of this instance.
|
|
814
|
+
|
|
815
|
+
Parameters
|
|
816
|
+
----------
|
|
817
|
+
**kwargs : Any
|
|
818
|
+
Additional keyword arguments passed into the constructor of the returned class.
|
|
819
|
+
|
|
820
|
+
Returns
|
|
821
|
+
-------
|
|
822
|
+
Self
|
|
823
|
+
Copy of class
|
|
824
|
+
"""
|
|
825
|
+
data = {key: value.copy() for key, value in self.data.items()}
|
|
826
|
+
return type(self)._from_fastpath(data, self.attrs, **kwargs)
|
|
827
|
+
|
|
828
|
+
def select(self: VectorDataset, keys: Iterable[str], copy: bool = True) -> VectorDataset:
|
|
829
|
+
"""Return new class instance only containing specified keys.
|
|
830
|
+
|
|
831
|
+
Parameters
|
|
832
|
+
----------
|
|
833
|
+
keys : Iterable[str]
|
|
834
|
+
An iterable of keys to filter by.
|
|
835
|
+
copy : bool, optional
|
|
836
|
+
Copy data on selection.
|
|
837
|
+
Defaults to True.
|
|
838
|
+
|
|
839
|
+
Returns
|
|
840
|
+
-------
|
|
841
|
+
VectorDataset
|
|
842
|
+
VectorDataset containing only data associated to ``keys``.
|
|
843
|
+
Note that this method always returns a :class:`VectorDataset`, even if
|
|
844
|
+
the calling class is a proper subclass of :class:`VectorDataset`.
|
|
845
|
+
"""
|
|
846
|
+
data = {key: np.array(self[key], copy=copy) for key in keys}
|
|
847
|
+
return VectorDataset._from_fastpath(data, self.attrs)
|
|
848
|
+
|
|
849
|
+
def filter(self, mask: npt.NDArray[np.bool_], copy: bool = True, **kwargs: Any) -> Self:
|
|
850
|
+
"""Filter :attr:`data` according to a boolean array ``mask``.
|
|
851
|
+
|
|
852
|
+
Entries corresponding to ``mask == True`` are kept.
|
|
853
|
+
|
|
854
|
+
Parameters
|
|
855
|
+
----------
|
|
856
|
+
mask : npt.NDArray[np.bool_]
|
|
857
|
+
Boolean array with compatible shape.
|
|
858
|
+
copy : bool, optional
|
|
859
|
+
Copy data on filter. Defaults to True. See
|
|
860
|
+
`numpy best practices <https://numpy.org/doc/stable/user/basics.indexing.html#slicing-and-striding>`_
|
|
861
|
+
for insight into whether copy is appropriate.
|
|
862
|
+
**kwargs : Any
|
|
863
|
+
Additional keyword arguments passed into the constructor of the returned class.
|
|
864
|
+
|
|
865
|
+
Returns
|
|
866
|
+
-------
|
|
867
|
+
Self
|
|
868
|
+
Containing filtered data
|
|
869
|
+
|
|
870
|
+
Raises
|
|
871
|
+
------
|
|
872
|
+
TypeError
|
|
873
|
+
If ``mask`` is not a boolean array.
|
|
874
|
+
"""
|
|
875
|
+
self.data._validate_array(mask)
|
|
876
|
+
if mask.dtype != bool:
|
|
877
|
+
raise TypeError("Parameter `mask` must be a boolean array.")
|
|
878
|
+
|
|
879
|
+
data = {key: np.array(value[mask], copy=copy) for key, value in self.data.items()}
|
|
880
|
+
return type(self)._from_fastpath(data, self.attrs, **kwargs)
|
|
881
|
+
|
|
882
|
+
def sort(self, by: str | list[str]) -> Self:
|
|
883
|
+
"""Sort data by key(s).
|
|
884
|
+
|
|
885
|
+
This method always creates a copy of the data by calling
|
|
886
|
+
:meth:`pandas.DataFrame.sort_values`.
|
|
887
|
+
|
|
888
|
+
Parameters
|
|
889
|
+
----------
|
|
890
|
+
by : str | list[str]
|
|
891
|
+
Key or list of keys to sort by.
|
|
892
|
+
|
|
893
|
+
Returns
|
|
894
|
+
-------
|
|
895
|
+
Self
|
|
896
|
+
Instance with sorted data.
|
|
897
|
+
"""
|
|
898
|
+
return type(self)(data=self.dataframe.sort_values(by=by), attrs=self.attrs, copy=False)
|
|
899
|
+
|
|
900
|
+
def ensure_vars(self, vars: str | Iterable[str], raise_error: bool = True) -> bool:
|
|
901
|
+
"""Ensure variables exist in column of :attr:`data` or :attr:`attrs`.
|
|
902
|
+
|
|
903
|
+
Parameters
|
|
904
|
+
----------
|
|
905
|
+
vars : str | Iterable[str]
|
|
906
|
+
A single string variable name or a sequence of string variable names.
|
|
907
|
+
raise_error : bool, optional
|
|
908
|
+
Raise KeyError if data does not contain variables.
|
|
909
|
+
Defaults to True.
|
|
910
|
+
|
|
911
|
+
Returns
|
|
912
|
+
-------
|
|
913
|
+
bool
|
|
914
|
+
True if all variables exist.
|
|
915
|
+
False otherwise.
|
|
916
|
+
|
|
917
|
+
Raises
|
|
918
|
+
------
|
|
919
|
+
KeyError
|
|
920
|
+
Raises when dataset does not contain variable in ``vars``
|
|
921
|
+
"""
|
|
922
|
+
if isinstance(vars, str):
|
|
923
|
+
vars = (vars,)
|
|
924
|
+
|
|
925
|
+
for v in vars:
|
|
926
|
+
if v in self or v in self.attrs:
|
|
927
|
+
continue
|
|
928
|
+
if raise_error:
|
|
929
|
+
msg = f"{type(self).__name__} instance does not contain data or attr '{v}'"
|
|
930
|
+
raise KeyError(msg)
|
|
931
|
+
return False
|
|
932
|
+
|
|
933
|
+
return True
|
|
934
|
+
|
|
935
|
+
def broadcast_attrs(
|
|
936
|
+
self,
|
|
937
|
+
keys: str | Iterable[str],
|
|
938
|
+
overwrite: bool = False,
|
|
939
|
+
raise_error: bool = True,
|
|
940
|
+
) -> None:
|
|
941
|
+
"""Attach values from ``keys`` in :attr:`attrs` onto :attr:`data`.
|
|
942
|
+
|
|
943
|
+
If possible, use ``dtype = np.float32`` when broadcasting. If not possible,
|
|
944
|
+
use whatever ``dtype`` is inferred from the data by :func:`numpy.full`.
|
|
945
|
+
|
|
946
|
+
Parameters
|
|
947
|
+
----------
|
|
948
|
+
keys : str | Iterable[str]
|
|
949
|
+
Keys to broadcast
|
|
950
|
+
overwrite : bool, optional
|
|
951
|
+
If True, overwrite existing values in :attr:`data`. By default False.
|
|
952
|
+
raise_error : bool, optional
|
|
953
|
+
Raise KeyError if :attr:`self.attrs` does not contain some of ``keys``.
|
|
954
|
+
|
|
955
|
+
Raises
|
|
956
|
+
------
|
|
957
|
+
KeyError
|
|
958
|
+
Not all ``keys`` found in :attr:`attrs`.
|
|
959
|
+
"""
|
|
960
|
+
if isinstance(keys, str):
|
|
961
|
+
keys = (keys,)
|
|
962
|
+
|
|
963
|
+
# Validate everything up front to avoid partial broadcasting
|
|
964
|
+
for key in keys:
|
|
965
|
+
try:
|
|
966
|
+
scalar = self.attrs[key]
|
|
967
|
+
except KeyError as exc:
|
|
968
|
+
if raise_error:
|
|
969
|
+
raise KeyError(f"{type(self)} does not contain attr `{key}`") from exc
|
|
970
|
+
continue
|
|
971
|
+
|
|
972
|
+
if key in self.data and not overwrite:
|
|
973
|
+
warnings.warn(
|
|
974
|
+
f"Found duplicate key {key} in attrs and data. "
|
|
975
|
+
"Set `overwrite=True` parameter to force overwrite."
|
|
976
|
+
)
|
|
977
|
+
continue
|
|
978
|
+
|
|
979
|
+
min_dtype = np.min_scalar_type(scalar)
|
|
980
|
+
dtype = np.float32 if np.can_cast(min_dtype, np.float32) else None
|
|
981
|
+
self.data.update({key: np.full(self.size, scalar, dtype=dtype)})
|
|
982
|
+
|
|
983
|
+
def broadcast_numeric_attrs(
|
|
984
|
+
self, ignore_keys: str | Iterable[str] | None = None, overwrite: bool = False
|
|
985
|
+
) -> None:
|
|
986
|
+
"""Attach numeric values in :attr:`attrs` onto :attr:`data`.
|
|
987
|
+
|
|
988
|
+
Iterate through values in :attr:`attrs` and attach :class:`float` and
|
|
989
|
+
:class:`int` values to ``data``.
|
|
990
|
+
|
|
991
|
+
This method modifies object in place.
|
|
992
|
+
|
|
993
|
+
Parameters
|
|
994
|
+
----------
|
|
995
|
+
ignore_keys: str | Iterable[str] | None, optional
|
|
996
|
+
Do not broadcast selected keys.
|
|
997
|
+
Defaults to None.
|
|
998
|
+
overwrite : bool, optional
|
|
999
|
+
If True, overwrite existing values in :attr:`data`. By default False.
|
|
1000
|
+
"""
|
|
1001
|
+
if ignore_keys is None:
|
|
1002
|
+
ignore_keys = ()
|
|
1003
|
+
elif isinstance(ignore_keys, str):
|
|
1004
|
+
ignore_keys = (ignore_keys,)
|
|
1005
|
+
|
|
1006
|
+
# Somewhat brittle: Only checking for int or float type
|
|
1007
|
+
numeric_attrs = (
|
|
1008
|
+
attr
|
|
1009
|
+
for attr, val in self.attrs.items()
|
|
1010
|
+
if (isinstance(val, int | float | np.number) and attr not in ignore_keys)
|
|
1011
|
+
)
|
|
1012
|
+
self.broadcast_attrs(numeric_attrs, overwrite)
|
|
1013
|
+
|
|
1014
|
+
def get_constant(self, key: str, default: Any = __marker) -> Any:
|
|
1015
|
+
"""Get a constant value from :attr:`attrs` or :attr:`data`.
|
|
1016
|
+
|
|
1017
|
+
- If ``key`` is found in :attr:`attrs`, the value is returned.
|
|
1018
|
+
- If ``key`` is found in :attr:`data`, the common value is returned if all
|
|
1019
|
+
values are equal.
|
|
1020
|
+
- If ``key`` is not found in :attr:`attrs` or :attr:`data` and a ``default`` is provided,
|
|
1021
|
+
the ``default`` is returned.
|
|
1022
|
+
- Otherwise, a KeyError is raised.
|
|
1023
|
+
|
|
1024
|
+
Parameters
|
|
1025
|
+
----------
|
|
1026
|
+
key : str
|
|
1027
|
+
Key to look for.
|
|
1028
|
+
default : Any, optional
|
|
1029
|
+
Default value to return if ``key`` is not found in :attr:`attrs` or :attr:`data`.
|
|
1030
|
+
|
|
1031
|
+
Returns
|
|
1032
|
+
-------
|
|
1033
|
+
Any
|
|
1034
|
+
The constant value for ``key``.
|
|
1035
|
+
|
|
1036
|
+
Raises
|
|
1037
|
+
------
|
|
1038
|
+
KeyError
|
|
1039
|
+
If ``key`` is not found in :attr:`attrs` or the values in :attr:`data` are not equal
|
|
1040
|
+
and ``default`` is not provided.
|
|
1041
|
+
|
|
1042
|
+
Examples
|
|
1043
|
+
--------
|
|
1044
|
+
>>> vector = VectorDataset({"a": [1, 1, 1], "b": [2, 2, 3]})
|
|
1045
|
+
>>> vector.get_constant("a")
|
|
1046
|
+
np.int64(1)
|
|
1047
|
+
>>> vector.get_constant("b")
|
|
1048
|
+
Traceback (most recent call last):
|
|
1049
|
+
...
|
|
1050
|
+
KeyError: "A constant key 'b' not found in attrs or data"
|
|
1051
|
+
>>> vector.get_constant("b", 3)
|
|
1052
|
+
3
|
|
1053
|
+
|
|
1054
|
+
See Also
|
|
1055
|
+
--------
|
|
1056
|
+
get_data_or_attr
|
|
1057
|
+
GeoVectorDataset.constants
|
|
1058
|
+
"""
|
|
1059
|
+
marker = self.__marker
|
|
1060
|
+
|
|
1061
|
+
out = self.attrs.get(key, marker)
|
|
1062
|
+
if out is not marker:
|
|
1063
|
+
return out
|
|
1064
|
+
|
|
1065
|
+
arr: np.ndarray = self.data.get(key, marker) # type: ignore[arg-type]
|
|
1066
|
+
if arr is not marker:
|
|
1067
|
+
try:
|
|
1068
|
+
vals = np.unique(arr)
|
|
1069
|
+
except TypeError:
|
|
1070
|
+
# A TypeError can occur if the arr has object dtype and contains None
|
|
1071
|
+
# Handle this case by returning None
|
|
1072
|
+
if arr.dtype == object and np.all(arr == None): # noqa: E711
|
|
1073
|
+
return None
|
|
1074
|
+
raise
|
|
1075
|
+
|
|
1076
|
+
if len(vals) == 1:
|
|
1077
|
+
return vals[0]
|
|
1078
|
+
|
|
1079
|
+
if default is not marker:
|
|
1080
|
+
return default
|
|
1081
|
+
|
|
1082
|
+
msg = f"A constant key '{key}' not found in attrs or data"
|
|
1083
|
+
raise KeyError(msg)
|
|
1084
|
+
|
|
1085
|
+
# ------------
|
|
1086
|
+
# I / O
|
|
1087
|
+
# ------------
|
|
1088
|
+
|
|
1089
|
+
def to_dataframe(self, copy: bool = True) -> pd.DataFrame:
|
|
1090
|
+
"""Create :class:`pd.DataFrame` in which each key-value pair in :attr:`data` is a column.
|
|
1091
|
+
|
|
1092
|
+
DataFrame does **not** copy data by default.
|
|
1093
|
+
Use the ``copy`` parameter to copy data values on creation.
|
|
1094
|
+
|
|
1095
|
+
Parameters
|
|
1096
|
+
----------
|
|
1097
|
+
copy : bool, optional
|
|
1098
|
+
Copy data on DataFrame creation.
|
|
1099
|
+
|
|
1100
|
+
Returns
|
|
1101
|
+
-------
|
|
1102
|
+
pd.DataFrame
|
|
1103
|
+
DataFrame holding key-values as columns.
|
|
1104
|
+
"""
|
|
1105
|
+
df = pd.DataFrame(self.data, copy=copy)
|
|
1106
|
+
df.attrs = self.attrs
|
|
1107
|
+
return df
|
|
1108
|
+
|
|
1109
|
+
def to_dict(self) -> dict[str, Any]:
|
|
1110
|
+
"""Create dictionary with :attr:`data` and :attr:`attrs`.
|
|
1111
|
+
|
|
1112
|
+
If geo-spatial coordinates (e.g. ``"latitude"``, ``"longitude"``, ``"altitude"``)
|
|
1113
|
+
are present, round to a reasonable precision. If a ``"time"`` variable is present,
|
|
1114
|
+
round to unix seconds. When the instance is a :class:`GeoVectorDataset`,
|
|
1115
|
+
disregard any ``"altitude"`` or ``"level"`` coordinate and only include
|
|
1116
|
+
``"altitude_ft"`` in the output.
|
|
1117
|
+
|
|
1118
|
+
Returns
|
|
1119
|
+
-------
|
|
1120
|
+
dict[str, Any]
|
|
1121
|
+
Dictionary with :attr:`data` and :attr:`attrs`.
|
|
1122
|
+
|
|
1123
|
+
See Also
|
|
1124
|
+
--------
|
|
1125
|
+
:meth:`from_dict`
|
|
1126
|
+
|
|
1127
|
+
Examples
|
|
1128
|
+
--------
|
|
1129
|
+
>>> import pprint
|
|
1130
|
+
>>> from pycontrails import Flight
|
|
1131
|
+
>>> fl = Flight(
|
|
1132
|
+
... longitude=[-100, -110],
|
|
1133
|
+
... latitude=[40, 50],
|
|
1134
|
+
... level=[200, 200],
|
|
1135
|
+
... time=[np.datetime64("2020-01-01T09"), np.datetime64("2020-01-01T09:30")],
|
|
1136
|
+
... aircraft_type="B737",
|
|
1137
|
+
... )
|
|
1138
|
+
>>> fl = fl.resample_and_fill("5min")
|
|
1139
|
+
>>> pprint.pprint(fl.to_dict())
|
|
1140
|
+
{'aircraft_type': 'B737',
|
|
1141
|
+
'altitude_ft': [38661.0, 38661.0, 38661.0, 38661.0, 38661.0, 38661.0, 38661.0],
|
|
1142
|
+
'latitude': [40.0, 41.724, 43.428, 45.111, 46.769, 48.399, 50.0],
|
|
1143
|
+
'longitude': [-100.0,
|
|
1144
|
+
-101.441,
|
|
1145
|
+
-102.959,
|
|
1146
|
+
-104.563,
|
|
1147
|
+
-106.267,
|
|
1148
|
+
-108.076,
|
|
1149
|
+
-110.0],
|
|
1150
|
+
'time': [1577869200,
|
|
1151
|
+
1577869500,
|
|
1152
|
+
1577869800,
|
|
1153
|
+
1577870100,
|
|
1154
|
+
1577870400,
|
|
1155
|
+
1577870700,
|
|
1156
|
+
1577871000]}
|
|
1157
|
+
"""
|
|
1158
|
+
np_encoder = json_utils.NumpyEncoder()
|
|
1159
|
+
|
|
1160
|
+
# round latitude, longitude, and altitude
|
|
1161
|
+
precision = {"longitude": 3, "latitude": 3, "altitude_ft": 0}
|
|
1162
|
+
|
|
1163
|
+
def encode(key: str, obj: Any) -> Any:
|
|
1164
|
+
# Try to handle some pandas objects
|
|
1165
|
+
if hasattr(obj, "to_numpy"):
|
|
1166
|
+
obj = obj.to_numpy()
|
|
1167
|
+
|
|
1168
|
+
# Convert numpy objects to python objects
|
|
1169
|
+
if isinstance(obj, np.ndarray | np.generic):
|
|
1170
|
+
# round time to unix seconds
|
|
1171
|
+
if key == "time":
|
|
1172
|
+
return np_encoder.default(obj.astype("datetime64[s]").astype(int))
|
|
1173
|
+
|
|
1174
|
+
# round specific keys in precision
|
|
1175
|
+
try:
|
|
1176
|
+
d = precision[key]
|
|
1177
|
+
except KeyError:
|
|
1178
|
+
return np_encoder.default(obj)
|
|
1179
|
+
|
|
1180
|
+
return np_encoder.default(obj.astype(float).round(d))
|
|
1181
|
+
|
|
1182
|
+
# Pass through everything else
|
|
1183
|
+
return obj
|
|
1184
|
+
|
|
1185
|
+
data = {k: encode(k, v) for k, v in self.data.items()}
|
|
1186
|
+
attrs = {k: encode(k, v) for k, v in self.attrs.items()}
|
|
1187
|
+
|
|
1188
|
+
# Only include one of the vertical coordinate keys
|
|
1189
|
+
if isinstance(self, GeoVectorDataset):
|
|
1190
|
+
data.pop("altitude", None)
|
|
1191
|
+
data.pop("level", None)
|
|
1192
|
+
if "altitude_ft" not in data:
|
|
1193
|
+
data["altitude_ft"] = self.altitude_ft.round(precision["altitude_ft"]).tolist()
|
|
1194
|
+
|
|
1195
|
+
# Issue warning if any keys are duplicated
|
|
1196
|
+
common_keys = data.keys() & attrs.keys()
|
|
1197
|
+
if common_keys:
|
|
1198
|
+
warnings.warn(
|
|
1199
|
+
f"Found duplicate keys in data and attrs: {common_keys}. "
|
|
1200
|
+
"Data keys will overwrite attrs keys in returned dictionary."
|
|
1201
|
+
)
|
|
1202
|
+
|
|
1203
|
+
return {**attrs, **data}
|
|
1204
|
+
|
|
1205
|
+
@classmethod
|
|
1206
|
+
def create_empty(
|
|
1207
|
+
cls,
|
|
1208
|
+
keys: Iterable[str],
|
|
1209
|
+
attrs: dict[str, Any] | None = None,
|
|
1210
|
+
**kwargs: Any,
|
|
1211
|
+
) -> Self:
|
|
1212
|
+
"""Create instance with variables defined by ``keys`` and size 0.
|
|
1213
|
+
|
|
1214
|
+
If instance requires additional variables to be defined, these keys will automatically
|
|
1215
|
+
be attached to returned instance.
|
|
1216
|
+
|
|
1217
|
+
Parameters
|
|
1218
|
+
----------
|
|
1219
|
+
keys : Iterable[str]
|
|
1220
|
+
Keys to include in empty VectorDataset instance.
|
|
1221
|
+
attrs : dict[str, Any] | None, optional
|
|
1222
|
+
Attributes to attach instance.
|
|
1223
|
+
**kwargs : Any
|
|
1224
|
+
Additional keyword arguments passed into the constructor of the returned class.
|
|
1225
|
+
|
|
1226
|
+
Returns
|
|
1227
|
+
-------
|
|
1228
|
+
Self
|
|
1229
|
+
Empty VectorDataset instance.
|
|
1230
|
+
"""
|
|
1231
|
+
data = _empty_vector_dict(keys)
|
|
1232
|
+
return cls._from_fastpath(data, attrs, **kwargs)
|
|
1233
|
+
|
|
1234
|
+
@classmethod
|
|
1235
|
+
def from_dict(cls, obj: dict[str, Any], copy: bool = True, **obj_kwargs: Any) -> Self:
|
|
1236
|
+
"""Create instance from dict representation containing data and attrs.
|
|
1237
|
+
|
|
1238
|
+
Parameters
|
|
1239
|
+
----------
|
|
1240
|
+
obj : dict[str, Any]
|
|
1241
|
+
Dict representation of VectorDataset (e.g. :meth:`to_dict`)
|
|
1242
|
+
copy : bool, optional
|
|
1243
|
+
Passed to :class:`VectorDataset` constructor.
|
|
1244
|
+
Defaults to True.
|
|
1245
|
+
**obj_kwargs : Any
|
|
1246
|
+
Additional properties passed as keyword arguments.
|
|
1247
|
+
|
|
1248
|
+
Returns
|
|
1249
|
+
-------
|
|
1250
|
+
Self
|
|
1251
|
+
VectorDataset instance.
|
|
1252
|
+
|
|
1253
|
+
See Also
|
|
1254
|
+
--------
|
|
1255
|
+
:meth:`to_dict`
|
|
1256
|
+
"""
|
|
1257
|
+
data = {}
|
|
1258
|
+
attrs = {}
|
|
1259
|
+
|
|
1260
|
+
for k, v in {**obj, **obj_kwargs}.items():
|
|
1261
|
+
if isinstance(v, list | np.ndarray):
|
|
1262
|
+
data[k] = v
|
|
1263
|
+
else:
|
|
1264
|
+
attrs[k] = v
|
|
1265
|
+
|
|
1266
|
+
return cls(data=data, attrs=attrs, copy=copy)
|
|
1267
|
+
|
|
1268
|
+
def generate_splits(self, n_splits: int, copy: bool = True) -> Generator[Self, None, None]:
|
|
1269
|
+
"""Split instance into ``n_split`` sub-vectors.
|
|
1270
|
+
|
|
1271
|
+
Parameters
|
|
1272
|
+
----------
|
|
1273
|
+
n_splits : int
|
|
1274
|
+
Number of splits.
|
|
1275
|
+
copy : bool, optional
|
|
1276
|
+
Passed into :meth:`filter`. Defaults to True. Recommend to keep as True
|
|
1277
|
+
based on `numpy best practices <https://numpy.org/doc/stable/user/basics.indexing.html#slicing-and-striding>`_.
|
|
1278
|
+
|
|
1279
|
+
Yields
|
|
1280
|
+
------
|
|
1281
|
+
Self
|
|
1282
|
+
Generator of split vectors.
|
|
1283
|
+
|
|
1284
|
+
See Also
|
|
1285
|
+
--------
|
|
1286
|
+
:func:`numpy.array_split`
|
|
1287
|
+
"""
|
|
1288
|
+
full_index = np.arange(self.size)
|
|
1289
|
+
index_splits = np.array_split(full_index, n_splits)
|
|
1290
|
+
for index in index_splits:
|
|
1291
|
+
filt = np.zeros(self.size, dtype=bool)
|
|
1292
|
+
filt[index] = True
|
|
1293
|
+
yield self.filter(filt, copy=copy)
|
|
1294
|
+
|
|
1295
|
+
|
|
1296
|
+
class GeoVectorDataset(VectorDataset):
|
|
1297
|
+
"""Base class to hold 1D geospatial arrays of consistent size.
|
|
1298
|
+
|
|
1299
|
+
GeoVectorDataset is required to have geospatial coordinate keys defined
|
|
1300
|
+
in :attr:`required_keys`.
|
|
1301
|
+
|
|
1302
|
+
Expect latitude-longitude CRS in WGS 84.
|
|
1303
|
+
Expect altitude in [:math:`m`].
|
|
1304
|
+
Expect level in [:math:`hPa`].
|
|
1305
|
+
|
|
1306
|
+
Each spatial variable is expected to have "float32" or "float64" ``dtype``.
|
|
1307
|
+
The time variable is expected to have "datetime64[ns]" ``dtype``.
|
|
1308
|
+
|
|
1309
|
+
Parameters
|
|
1310
|
+
----------
|
|
1311
|
+
data : dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataset | None, optional
|
|
1312
|
+
Data dictionary or :class:`pandas.DataFrame` .
|
|
1313
|
+
Must include keys/columns ``time``, ``latitude``, ``longitude``, ``altitude`` or ``level``.
|
|
1314
|
+
Keyword arguments for ``time``, ``latitude``, ``longitude``, ``altitude`` or ``level``
|
|
1315
|
+
override ``data`` inputs. Expects ``altitude`` in meters and ``time``
|
|
1316
|
+
as a DatetimeLike (or array that can processed with :meth:`pd.to_datetime`).
|
|
1317
|
+
Additional waypoint-specific data can be included as additional keys/columns.
|
|
1318
|
+
longitude : npt.ArrayLike | None, optional
|
|
1319
|
+
Longitude data.
|
|
1320
|
+
Defaults to None.
|
|
1321
|
+
latitude : npt.ArrayLike | None, optional
|
|
1322
|
+
Latitude data.
|
|
1323
|
+
Defaults to None.
|
|
1324
|
+
altitude : npt.ArrayLike | None, optional
|
|
1325
|
+
Altitude data, [:math:`m`].
|
|
1326
|
+
Defaults to None.
|
|
1327
|
+
altitude_ft : npt.ArrayLike | None, optional
|
|
1328
|
+
Altitude data, [:math:`ft`].
|
|
1329
|
+
Defaults to None.
|
|
1330
|
+
level : npt.ArrayLike | None, optional
|
|
1331
|
+
Level data, [:math:`hPa`].
|
|
1332
|
+
Defaults to None.
|
|
1333
|
+
time : npt.ArrayLike | None, optional
|
|
1334
|
+
Time data.
|
|
1335
|
+
Expects an array of DatetimeLike values,
|
|
1336
|
+
or array that can processed with :meth:`pd.to_datetime`.
|
|
1337
|
+
Defaults to None.
|
|
1338
|
+
attrs : dict[str, Any] | None, optional
|
|
1339
|
+
Additional properties as a dictionary.
|
|
1340
|
+
Defaults to {}.
|
|
1341
|
+
copy : bool, optional
|
|
1342
|
+
Copy data on class creation.
|
|
1343
|
+
Defaults to True.
|
|
1344
|
+
**attrs_kwargs : Any
|
|
1345
|
+
Additional properties passed as keyword arguments.
|
|
1346
|
+
|
|
1347
|
+
Raises
|
|
1348
|
+
------
|
|
1349
|
+
KeyError
|
|
1350
|
+
Raises if ``data`` input does not contain at least ``time``, ``latitude``, ``longitude``,
|
|
1351
|
+
(``altitude`` or ``level``).
|
|
1352
|
+
"""
|
|
1353
|
+
|
|
1354
|
+
__slots__ = ()
|
|
1355
|
+
|
|
1356
|
+
#: Required keys for creating GeoVectorDataset
|
|
1357
|
+
required_keys = "longitude", "latitude", "time"
|
|
1358
|
+
|
|
1359
|
+
#: At least one of these vertical-coordinate keys must also be included
|
|
1360
|
+
vertical_keys = "altitude", "level", "altitude_ft"
|
|
1361
|
+
|
|
1362
|
+
def __init__(
|
|
1363
|
+
self,
|
|
1364
|
+
data: dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataset | None = None,
|
|
1365
|
+
*,
|
|
1366
|
+
longitude: npt.ArrayLike | None = None,
|
|
1367
|
+
latitude: npt.ArrayLike | None = None,
|
|
1368
|
+
altitude: npt.ArrayLike | None = None,
|
|
1369
|
+
altitude_ft: npt.ArrayLike | None = None,
|
|
1370
|
+
level: npt.ArrayLike | None = None,
|
|
1371
|
+
time: npt.ArrayLike | None = None,
|
|
1372
|
+
attrs: dict[str, Any] | None = None,
|
|
1373
|
+
copy: bool = True,
|
|
1374
|
+
**attrs_kwargs: Any,
|
|
1375
|
+
) -> None:
|
|
1376
|
+
# shortcut to `GeoVectorDataset.create_empty` by just using `GeoVectorDataset()`
|
|
1377
|
+
if (
|
|
1378
|
+
data is None
|
|
1379
|
+
and longitude is None
|
|
1380
|
+
and latitude is None
|
|
1381
|
+
and altitude is None
|
|
1382
|
+
and level is None
|
|
1383
|
+
and time is None
|
|
1384
|
+
):
|
|
1385
|
+
keys = *self.required_keys, "altitude"
|
|
1386
|
+
self.data = VectorDataDict(_empty_vector_dict(keys))
|
|
1387
|
+
self.attrs = AttrDict(attrs or {})
|
|
1388
|
+
self.attrs.update(attrs_kwargs)
|
|
1389
|
+
return
|
|
1390
|
+
|
|
1391
|
+
super().__init__(data=data, attrs=attrs, copy=copy, **attrs_kwargs)
|
|
1392
|
+
|
|
1393
|
+
# using the self[key] syntax specifically to run qc on assignment
|
|
1394
|
+
if longitude is not None:
|
|
1395
|
+
self["longitude"] = np.array(longitude, copy=copy)
|
|
1396
|
+
|
|
1397
|
+
if latitude is not None:
|
|
1398
|
+
self["latitude"] = np.array(latitude, copy=copy)
|
|
1399
|
+
|
|
1400
|
+
if time is not None:
|
|
1401
|
+
self["time"] = np.array(time, copy=copy)
|
|
1402
|
+
|
|
1403
|
+
if altitude is not None:
|
|
1404
|
+
self["altitude"] = np.array(altitude, copy=copy)
|
|
1405
|
+
if altitude_ft is not None or level is not None:
|
|
1406
|
+
warnings.warn(
|
|
1407
|
+
"Altitude data provided. Ignoring altitude_ft and level inputs.",
|
|
1408
|
+
)
|
|
1409
|
+
elif altitude_ft is not None:
|
|
1410
|
+
self["altitude_ft"] = np.array(altitude_ft, copy=copy)
|
|
1411
|
+
if level is not None:
|
|
1412
|
+
warnings.warn(
|
|
1413
|
+
"Altitude_ft data provided. Ignoring level input.",
|
|
1414
|
+
)
|
|
1415
|
+
elif level is not None:
|
|
1416
|
+
self["level"] = np.array(level, copy=copy)
|
|
1417
|
+
|
|
1418
|
+
# Confirm that input has required keys
|
|
1419
|
+
if not all(key in self for key in self.required_keys):
|
|
1420
|
+
raise KeyError(
|
|
1421
|
+
f"{self.__class__.__name__} requires all of the following keys: "
|
|
1422
|
+
f"{', '.join(self.required_keys)}"
|
|
1423
|
+
)
|
|
1424
|
+
|
|
1425
|
+
# Confirm that input has at least one vertical key
|
|
1426
|
+
if not any(key in self for key in self.vertical_keys):
|
|
1427
|
+
raise KeyError(
|
|
1428
|
+
f"{self.__class__.__name__} requires at least one of the following keys: "
|
|
1429
|
+
f"{', '.join(self.vertical_keys)}"
|
|
1430
|
+
)
|
|
1431
|
+
|
|
1432
|
+
# Parse time: If time is not np.datetime64, we try to coerce it to be
|
|
1433
|
+
# by pumping it through pd.to_datetime.
|
|
1434
|
+
time = self["time"]
|
|
1435
|
+
if not np.issubdtype(time.dtype, np.datetime64):
|
|
1436
|
+
warnings.warn("Time data is not np.datetime64. Attempting to coerce.")
|
|
1437
|
+
try:
|
|
1438
|
+
pd_time = _handle_time_column(pd.Series(self["time"]))
|
|
1439
|
+
except ValueError as e:
|
|
1440
|
+
raise ValueError("Could not coerce time data to datetime64.") from e
|
|
1441
|
+
np_time = pd_time.to_numpy(dtype="datetime64[ns]")
|
|
1442
|
+
self.update(time=np_time)
|
|
1443
|
+
elif time.dtype != "datetime64[ns]":
|
|
1444
|
+
self.update(time=time.astype("datetime64[ns]"))
|
|
1445
|
+
|
|
1446
|
+
# Ensure spatial coordinates are float32 or float64
|
|
1447
|
+
float_dtype = (np.float32, np.float64)
|
|
1448
|
+
for coord in ("longitude", "latitude", "altitude", "level", "altitude_ft"):
|
|
1449
|
+
try:
|
|
1450
|
+
arr = self[coord]
|
|
1451
|
+
except KeyError:
|
|
1452
|
+
continue
|
|
1453
|
+
if arr.dtype not in float_dtype:
|
|
1454
|
+
self.update({coord: arr.astype(np.float64)})
|
|
1455
|
+
|
|
1456
|
+
longitude = self["longitude"]
|
|
1457
|
+
if np.any(longitude > 180.0) or np.any(longitude < -180.0):
|
|
1458
|
+
raise ValueError("EPSG:4326 longitude coordinates should lie between [-180, 180).")
|
|
1459
|
+
latitude = self["latitude"]
|
|
1460
|
+
if np.any(latitude > 90.0) or np.any(latitude < -90.0):
|
|
1461
|
+
raise ValueError("EPSG:4326 latitude coordinates should lie between [-90, 90].")
|
|
1462
|
+
|
|
1463
|
+
@override
|
|
1464
|
+
def _display_attrs(self) -> dict[str, str]:
|
|
1465
|
+
try:
|
|
1466
|
+
time0 = pd.Timestamp(np.nanmin(self["time"]))
|
|
1467
|
+
time1 = pd.Timestamp(np.nanmax(self["time"]))
|
|
1468
|
+
lon0 = round(np.nanmin(self["longitude"]), 3)
|
|
1469
|
+
lon1 = round(np.nanmax(self["longitude"]), 3)
|
|
1470
|
+
lat0 = round(np.nanmin(self["latitude"]), 3)
|
|
1471
|
+
lat1 = round(np.nanmax(self["latitude"]), 3)
|
|
1472
|
+
alt0 = round(np.nanmin(self.altitude), 1)
|
|
1473
|
+
alt1 = round(np.nanmax(self.altitude), 1)
|
|
1474
|
+
|
|
1475
|
+
attrs = {
|
|
1476
|
+
"time": f"[{time0}, {time1}]",
|
|
1477
|
+
"longitude": f"[{lon0}, {lon1}]",
|
|
1478
|
+
"latitude": f"[{lat0}, {lat1}]",
|
|
1479
|
+
"altitude": f"[{alt0}, {alt1}]",
|
|
1480
|
+
}
|
|
1481
|
+
except Exception:
|
|
1482
|
+
attrs = {}
|
|
1483
|
+
|
|
1484
|
+
attrs.update(super()._display_attrs())
|
|
1485
|
+
return attrs
|
|
1486
|
+
|
|
1487
|
+
@property
|
|
1488
|
+
def level(self) -> npt.NDArray[np.floating]:
|
|
1489
|
+
"""Get pressure ``level`` values for points.
|
|
1490
|
+
|
|
1491
|
+
Automatically calculates pressure level using :func:`units.m_to_pl` using ``altitude`` key.
|
|
1492
|
+
|
|
1493
|
+
Note that if ``level`` key exists in :attr:`data`, the data at the ``level``
|
|
1494
|
+
key will be returned. This allows an override of the default calculation
|
|
1495
|
+
of pressure level from altitude.
|
|
1496
|
+
|
|
1497
|
+
Returns
|
|
1498
|
+
-------
|
|
1499
|
+
npt.NDArray[np.floating]
|
|
1500
|
+
Point pressure level values, [:math:`hPa`]
|
|
1501
|
+
"""
|
|
1502
|
+
try:
|
|
1503
|
+
return self["level"]
|
|
1504
|
+
except KeyError:
|
|
1505
|
+
return units.m_to_pl(self.altitude)
|
|
1506
|
+
|
|
1507
|
+
@property
|
|
1508
|
+
def altitude(self) -> npt.NDArray[np.floating]:
|
|
1509
|
+
"""Get altitude.
|
|
1510
|
+
|
|
1511
|
+
Automatically calculates altitude using :func:`units.pl_to_m` using ``level`` key.
|
|
1512
|
+
|
|
1513
|
+
Note that if ``altitude`` key exists in :attr:`data`, the data at the ``altitude``
|
|
1514
|
+
key will be returned. This allows an override of the default calculation of altitude
|
|
1515
|
+
from pressure level.
|
|
1516
|
+
|
|
1517
|
+
Returns
|
|
1518
|
+
-------
|
|
1519
|
+
npt.NDArray[np.floating]
|
|
1520
|
+
Altitude, [:math:`m`]
|
|
1521
|
+
"""
|
|
1522
|
+
try:
|
|
1523
|
+
return self["altitude"]
|
|
1524
|
+
except KeyError:
|
|
1525
|
+
# Implementation note: explicitly look for "level" or "altitude_ft" key
|
|
1526
|
+
# here to avoid getting stuck in an infinite loop when .level or .altitude_ft
|
|
1527
|
+
# are called.
|
|
1528
|
+
if (level := self.get("level")) is not None:
|
|
1529
|
+
return units.pl_to_m(level)
|
|
1530
|
+
return units.ft_to_m(self["altitude_ft"])
|
|
1531
|
+
|
|
1532
|
+
@property
|
|
1533
|
+
def air_pressure(self) -> npt.NDArray[np.floating]:
|
|
1534
|
+
"""Get ``air_pressure`` values for points.
|
|
1535
|
+
|
|
1536
|
+
Returns
|
|
1537
|
+
-------
|
|
1538
|
+
npt.NDArray[np.floating]
|
|
1539
|
+
Point air pressure values, [:math:`Pa`]
|
|
1540
|
+
"""
|
|
1541
|
+
try:
|
|
1542
|
+
return self["air_pressure"]
|
|
1543
|
+
except KeyError:
|
|
1544
|
+
return 100.0 * self.level
|
|
1545
|
+
|
|
1546
|
+
@property
|
|
1547
|
+
def altitude_ft(self) -> npt.NDArray[np.floating]:
|
|
1548
|
+
"""Get altitude in feet.
|
|
1549
|
+
|
|
1550
|
+
Returns
|
|
1551
|
+
-------
|
|
1552
|
+
npt.NDArray[np.floating]
|
|
1553
|
+
Altitude, [:math:`ft`]
|
|
1554
|
+
"""
|
|
1555
|
+
try:
|
|
1556
|
+
return self["altitude_ft"]
|
|
1557
|
+
except KeyError:
|
|
1558
|
+
return units.m_to_ft(self.altitude)
|
|
1559
|
+
|
|
1560
|
+
@property
|
|
1561
|
+
def constants(self) -> dict[str, Any]:
|
|
1562
|
+
"""Return a dictionary of constant attributes and data values.
|
|
1563
|
+
|
|
1564
|
+
Includes :attr:`attrs` and values from columns in :attr:`data` with a unique
|
|
1565
|
+
value.
|
|
1566
|
+
|
|
1567
|
+
Returns
|
|
1568
|
+
-------
|
|
1569
|
+
dict[str, Any]
|
|
1570
|
+
Properties and their constant values
|
|
1571
|
+
"""
|
|
1572
|
+
constants = {}
|
|
1573
|
+
|
|
1574
|
+
# get constant data values that are not nan
|
|
1575
|
+
for key in set(self).difference(self.required_keys):
|
|
1576
|
+
unique = np.unique(self[key])
|
|
1577
|
+
if len(unique) == 1 and (isinstance(unique[0], str) or ~np.isnan(unique[0])):
|
|
1578
|
+
constants[key] = unique[0]
|
|
1579
|
+
|
|
1580
|
+
# add attributes
|
|
1581
|
+
constants.update(self.attrs)
|
|
1582
|
+
|
|
1583
|
+
# clean strings values by removing whitespace
|
|
1584
|
+
# convert any numpy items to python objects
|
|
1585
|
+
def _cleanup(v: Any) -> Any:
|
|
1586
|
+
if isinstance(v, str):
|
|
1587
|
+
return v.strip()
|
|
1588
|
+
if isinstance(v, np.integer):
|
|
1589
|
+
return int(v)
|
|
1590
|
+
if isinstance(v, np.floating):
|
|
1591
|
+
return float(v)
|
|
1592
|
+
if isinstance(v, np.bool_):
|
|
1593
|
+
return bool(v)
|
|
1594
|
+
return v
|
|
1595
|
+
|
|
1596
|
+
return {k: _cleanup(v) for k, v in constants.items()}
|
|
1597
|
+
|
|
1598
|
+
@property
|
|
1599
|
+
def coords(self) -> dict[str, np.ndarray]:
|
|
1600
|
+
"""Get geospatial coordinates for compatibility with MetDataArray.
|
|
1601
|
+
|
|
1602
|
+
Returns
|
|
1603
|
+
-------
|
|
1604
|
+
dict[str, np.ndarray]
|
|
1605
|
+
A dictionary with fields `longitude`, `latitude`, `level`, and `time`.
|
|
1606
|
+
"""
|
|
1607
|
+
return {
|
|
1608
|
+
"longitude": self["longitude"],
|
|
1609
|
+
"latitude": self["latitude"],
|
|
1610
|
+
"level": self.level,
|
|
1611
|
+
"time": self["time"],
|
|
1612
|
+
}
|
|
1613
|
+
|
|
1614
|
+
# ------------
|
|
1615
|
+
# Utilities
|
|
1616
|
+
# ------------
|
|
1617
|
+
|
|
1618
|
+
def transform_crs(self, crs: str) -> tuple[npt.NDArray[np.floating], npt.NDArray[np.floating]]:
|
|
1619
|
+
"""Transform trajectory data from one coordinate reference system (CRS) to another.
|
|
1620
|
+
|
|
1621
|
+
Parameters
|
|
1622
|
+
----------
|
|
1623
|
+
crs : str
|
|
1624
|
+
Target CRS. Passed into to :class:`pyproj.Transformer`. The source CRS
|
|
1625
|
+
is assumed to be EPSG:4326.
|
|
1626
|
+
|
|
1627
|
+
Returns
|
|
1628
|
+
-------
|
|
1629
|
+
tuple[npt.NDArray[np.floating], npt.NDArray[np.floating]]
|
|
1630
|
+
New x and y coordinates in the target CRS.
|
|
1631
|
+
"""
|
|
1632
|
+
try:
|
|
1633
|
+
import pyproj
|
|
1634
|
+
except ModuleNotFoundError as exc:
|
|
1635
|
+
dependencies.raise_module_not_found_error(
|
|
1636
|
+
name="GeoVectorDataset.transform_crs method",
|
|
1637
|
+
package_name="pyproj",
|
|
1638
|
+
module_not_found_error=exc,
|
|
1639
|
+
pycontrails_optional_package="pyproj",
|
|
1640
|
+
)
|
|
1641
|
+
|
|
1642
|
+
crs_from = "EPSG:4326"
|
|
1643
|
+
transformer = pyproj.Transformer.from_crs(crs_from, crs, always_xy=True)
|
|
1644
|
+
return transformer.transform(self["longitude"], self["latitude"])
|
|
1645
|
+
|
|
1646
|
+
def T_isa(self) -> npt.NDArray[np.floating]:
|
|
1647
|
+
"""Calculate the ICAO standard atmosphere temperature at each point.
|
|
1648
|
+
|
|
1649
|
+
Returns
|
|
1650
|
+
-------
|
|
1651
|
+
npt.NDArray[np.floating]
|
|
1652
|
+
ISA temperature, [:math:`K`]
|
|
1653
|
+
|
|
1654
|
+
See Also
|
|
1655
|
+
--------
|
|
1656
|
+
:func:`pycontrails.physics.units.m_to_T_isa`
|
|
1657
|
+
"""
|
|
1658
|
+
return units.m_to_T_isa(self.altitude)
|
|
1659
|
+
|
|
1660
|
+
# ------------
|
|
1661
|
+
# Met
|
|
1662
|
+
# ------------
|
|
1663
|
+
|
|
1664
|
+
def coords_intersect_met(
|
|
1665
|
+
self, met: met_module.MetDataset | met_module.MetDataArray
|
|
1666
|
+
) -> npt.NDArray[np.bool_]:
|
|
1667
|
+
"""Return boolean mask of data inside the bounding box defined by ``met``.
|
|
1668
|
+
|
|
1669
|
+
Parameters
|
|
1670
|
+
----------
|
|
1671
|
+
met : met_module.MetDataset | met_module.MetDataArray
|
|
1672
|
+
MetDataset or MetDataArray to compare.
|
|
1673
|
+
|
|
1674
|
+
Returns
|
|
1675
|
+
-------
|
|
1676
|
+
npt.NDArray[np.bool_]
|
|
1677
|
+
True if point is inside the bounding box defined by ``met``.
|
|
1678
|
+
"""
|
|
1679
|
+
indexes = met.indexes
|
|
1680
|
+
|
|
1681
|
+
lat_intersect = coordinates.intersect_domain(
|
|
1682
|
+
indexes["latitude"].to_numpy(),
|
|
1683
|
+
self["latitude"],
|
|
1684
|
+
)
|
|
1685
|
+
lon_intersect = coordinates.intersect_domain(
|
|
1686
|
+
indexes["longitude"].to_numpy(),
|
|
1687
|
+
self["longitude"],
|
|
1688
|
+
)
|
|
1689
|
+
level_intersect = coordinates.intersect_domain(
|
|
1690
|
+
indexes["level"].to_numpy(),
|
|
1691
|
+
self.level,
|
|
1692
|
+
)
|
|
1693
|
+
time_intersect = coordinates.intersect_domain(
|
|
1694
|
+
indexes["time"].to_numpy(),
|
|
1695
|
+
self["time"],
|
|
1696
|
+
)
|
|
1697
|
+
|
|
1698
|
+
return lat_intersect & lon_intersect & level_intersect & time_intersect
|
|
1699
|
+
|
|
1700
|
+
def intersect_met(
|
|
1701
|
+
self,
|
|
1702
|
+
mda: met_module.MetDataArray,
|
|
1703
|
+
*,
|
|
1704
|
+
longitude: npt.NDArray[np.floating] | None = None,
|
|
1705
|
+
latitude: npt.NDArray[np.floating] | None = None,
|
|
1706
|
+
level: npt.NDArray[np.floating] | None = None,
|
|
1707
|
+
time: npt.NDArray[np.datetime64] | None = None,
|
|
1708
|
+
use_indices: bool = False,
|
|
1709
|
+
**interp_kwargs: Any,
|
|
1710
|
+
) -> npt.NDArray[np.floating]:
|
|
1711
|
+
"""Intersect waypoints with MetDataArray.
|
|
1712
|
+
|
|
1713
|
+
Parameters
|
|
1714
|
+
----------
|
|
1715
|
+
mda : met_module.MetDataArray
|
|
1716
|
+
MetDataArray containing a meteorological variable at spatio-temporal coordinates.
|
|
1717
|
+
longitude : npt.NDArray[np.floating] | None, optional
|
|
1718
|
+
Override existing coordinates for met interpolation
|
|
1719
|
+
latitude : npt.NDArray[np.floating] | None, optional
|
|
1720
|
+
Override existing coordinates for met interpolation
|
|
1721
|
+
level : npt.NDArray[np.floating] | None, optional
|
|
1722
|
+
Override existing coordinates for met interpolation
|
|
1723
|
+
time : npt.NDArray[np.datetime64] | None, optional
|
|
1724
|
+
Override existing coordinates for met interpolation
|
|
1725
|
+
use_indices : bool, optional
|
|
1726
|
+
Experimental.
|
|
1727
|
+
**interp_kwargs : Any
|
|
1728
|
+
Additional keyword arguments to pass to :meth:`MetDataArray.intersect_met`.
|
|
1729
|
+
Examples include ``method``, ``bounds_error``, and ``fill_value``. If an error such as
|
|
1730
|
+
|
|
1731
|
+
.. code-block:: python
|
|
1732
|
+
|
|
1733
|
+
ValueError: One of the requested xi is out of bounds in dimension 2
|
|
1734
|
+
|
|
1735
|
+
occurs, try calling this function with ``bounds_error=False``. In addition,
|
|
1736
|
+
setting ``fill_value=0.0`` will replace NaN values with 0.0.
|
|
1737
|
+
|
|
1738
|
+
Returns
|
|
1739
|
+
-------
|
|
1740
|
+
npt.NDArray[np.floating]
|
|
1741
|
+
Interpolated values
|
|
1742
|
+
|
|
1743
|
+
Examples
|
|
1744
|
+
--------
|
|
1745
|
+
>>> from datetime import datetime
|
|
1746
|
+
>>> import pandas as pd
|
|
1747
|
+
>>> import numpy as np
|
|
1748
|
+
>>> from pycontrails.datalib.ecmwf import ERA5
|
|
1749
|
+
>>> from pycontrails import Flight
|
|
1750
|
+
|
|
1751
|
+
>>> # Get met data
|
|
1752
|
+
>>> times = (datetime(2022, 3, 1, 0), datetime(2022, 3, 1, 3))
|
|
1753
|
+
>>> variables = ["air_temperature", "specific_humidity"]
|
|
1754
|
+
>>> levels = [300, 250, 200]
|
|
1755
|
+
>>> era5 = ERA5(time=times, variables=variables, pressure_levels=levels)
|
|
1756
|
+
>>> met = era5.open_metdataset()
|
|
1757
|
+
|
|
1758
|
+
>>> # Example flight
|
|
1759
|
+
>>> df = pd.DataFrame()
|
|
1760
|
+
>>> df['longitude'] = np.linspace(0, 50, 10)
|
|
1761
|
+
>>> df['latitude'] = np.linspace(0, 10, 10)
|
|
1762
|
+
>>> df['altitude'] = 11000
|
|
1763
|
+
>>> df['time'] = pd.date_range("2022-03-01T00", "2022-03-01T02", periods=10)
|
|
1764
|
+
>>> fl = Flight(df)
|
|
1765
|
+
|
|
1766
|
+
>>> # Intersect
|
|
1767
|
+
>>> fl.intersect_met(met['air_temperature'], method='nearest')
|
|
1768
|
+
array([231.62969892, 230.72604651, 232.24318771, 231.88338483,
|
|
1769
|
+
231.06429438, 231.59073409, 231.65125393, 231.93064004,
|
|
1770
|
+
232.03344087, 231.65954432])
|
|
1771
|
+
|
|
1772
|
+
>>> fl.intersect_met(met['air_temperature'], method='linear')
|
|
1773
|
+
array([225.77794552, 225.13908414, 226.231218 , 226.31831528,
|
|
1774
|
+
225.56102321, 225.81192149, 226.03192642, 226.22056121,
|
|
1775
|
+
226.03770174, 225.63226188])
|
|
1776
|
+
|
|
1777
|
+
>>> # Interpolate and attach to `Flight` instance
|
|
1778
|
+
>>> for key in met:
|
|
1779
|
+
... fl[key] = fl.intersect_met(met[key])
|
|
1780
|
+
|
|
1781
|
+
>>> # Show the final three columns of the dataframe
|
|
1782
|
+
>>> fl.dataframe.iloc[:, -3:].head()
|
|
1783
|
+
time air_temperature specific_humidity
|
|
1784
|
+
0 2022-03-01 00:00:00 225.777946 0.000132
|
|
1785
|
+
1 2022-03-01 00:13:20 225.139084 0.000132
|
|
1786
|
+
2 2022-03-01 00:26:40 226.231218 0.000107
|
|
1787
|
+
3 2022-03-01 00:40:00 226.318315 0.000171
|
|
1788
|
+
4 2022-03-01 00:53:20 225.561022 0.000109
|
|
1789
|
+
|
|
1790
|
+
"""
|
|
1791
|
+
# Override use_indices in certain situations
|
|
1792
|
+
if use_indices:
|
|
1793
|
+
# Often the single_level data we use has time shifted
|
|
1794
|
+
# Don't allow it for now. We could do something smarter here!
|
|
1795
|
+
if mda.is_single_level:
|
|
1796
|
+
use_indices = False
|
|
1797
|
+
|
|
1798
|
+
# Cannot both override some coordinate AND pass indices.
|
|
1799
|
+
elif any(c is not None for c in (longitude, latitude, level, time)):
|
|
1800
|
+
# Should we warn?! Or is this "convenience"?
|
|
1801
|
+
use_indices = False
|
|
1802
|
+
|
|
1803
|
+
longitude = longitude if longitude is not None else self["longitude"]
|
|
1804
|
+
latitude = latitude if latitude is not None else self["latitude"]
|
|
1805
|
+
level = level if level is not None else self.level
|
|
1806
|
+
time = time if time is not None else self["time"]
|
|
1807
|
+
|
|
1808
|
+
if not use_indices:
|
|
1809
|
+
return mda.interpolate(longitude, latitude, level, time, **interp_kwargs)
|
|
1810
|
+
|
|
1811
|
+
indices = self._get_indices()
|
|
1812
|
+
already_has_indices = indices is not None
|
|
1813
|
+
out, indices = mda.interpolate(
|
|
1814
|
+
longitude,
|
|
1815
|
+
latitude,
|
|
1816
|
+
level,
|
|
1817
|
+
time,
|
|
1818
|
+
indices=indices,
|
|
1819
|
+
return_indices=True,
|
|
1820
|
+
**interp_kwargs,
|
|
1821
|
+
)
|
|
1822
|
+
if not already_has_indices:
|
|
1823
|
+
self._put_indices(indices)
|
|
1824
|
+
return out
|
|
1825
|
+
|
|
1826
|
+
def _put_indices(self, indices: interpolation.RGIArtifacts) -> None:
|
|
1827
|
+
"""Set entries of ``indices`` onto underlying :attr:`data.
|
|
1828
|
+
|
|
1829
|
+
Each entry of ``indices`` are unpacked assuming certain conventions
|
|
1830
|
+
for its structure. A ValueError is raise if these conventions are not
|
|
1831
|
+
satisfied.
|
|
1832
|
+
|
|
1833
|
+
.. versionadded:: 0.26.0
|
|
1834
|
+
|
|
1835
|
+
Experimental
|
|
1836
|
+
|
|
1837
|
+
|
|
1838
|
+
Parameters
|
|
1839
|
+
----------
|
|
1840
|
+
indices : interpolation.RGIArtifacts
|
|
1841
|
+
The indices to store.
|
|
1842
|
+
"""
|
|
1843
|
+
indices_x, indices_y, indices_z, indices_t = indices.xi_indices
|
|
1844
|
+
distances_x, distances_y, distances_z, distances_t = indices.norm_distances
|
|
1845
|
+
out_of_bounds = indices.out_of_bounds
|
|
1846
|
+
|
|
1847
|
+
self["_indices_x"] = indices_x
|
|
1848
|
+
self["_indices_y"] = indices_y
|
|
1849
|
+
self["_indices_z"] = indices_z
|
|
1850
|
+
self["_indices_t"] = indices_t
|
|
1851
|
+
self["_distances_x"] = distances_x
|
|
1852
|
+
self["_distances_y"] = distances_y
|
|
1853
|
+
self["_distances_z"] = distances_z
|
|
1854
|
+
self["_distances_t"] = distances_t
|
|
1855
|
+
self["_out_of_bounds"] = out_of_bounds
|
|
1856
|
+
|
|
1857
|
+
def _get_indices(self) -> interpolation.RGIArtifacts | None:
|
|
1858
|
+
"""Get entries from call to :meth:`_put_indices`.
|
|
1859
|
+
|
|
1860
|
+
.. versionadded:: 0.26.0
|
|
1861
|
+
|
|
1862
|
+
Experimental
|
|
1863
|
+
|
|
1864
|
+
Returns
|
|
1865
|
+
-------
|
|
1866
|
+
interpolation.RGIArtifacts | None
|
|
1867
|
+
Previously cached output of
|
|
1868
|
+
:meth:`scipy.interpolate.RegularGridInterpolator._find_indices`,
|
|
1869
|
+
or None if cached output is not present on instance.
|
|
1870
|
+
"""
|
|
1871
|
+
try:
|
|
1872
|
+
indices_x = self["_indices_x"]
|
|
1873
|
+
indices_y = self["_indices_y"]
|
|
1874
|
+
indices_z = self["_indices_z"]
|
|
1875
|
+
indices_t = self["_indices_t"]
|
|
1876
|
+
distances_x = self["_distances_x"]
|
|
1877
|
+
distances_y = self["_distances_y"]
|
|
1878
|
+
distances_z = self["_distances_z"]
|
|
1879
|
+
distances_t = self["_distances_t"]
|
|
1880
|
+
out_of_bounds = self["_out_of_bounds"]
|
|
1881
|
+
except KeyError:
|
|
1882
|
+
return None
|
|
1883
|
+
|
|
1884
|
+
indices = np.asarray([indices_x, indices_y, indices_z, indices_t])
|
|
1885
|
+
distances = np.asarray([distances_x, distances_y, distances_z, distances_t])
|
|
1886
|
+
|
|
1887
|
+
return interpolation.RGIArtifacts(indices, distances, out_of_bounds)
|
|
1888
|
+
|
|
1889
|
+
def _invalidate_indices(self) -> None:
|
|
1890
|
+
"""Remove any cached indices from :attr:`data."""
|
|
1891
|
+
for key in (
|
|
1892
|
+
"_indices_x",
|
|
1893
|
+
"_indices_y",
|
|
1894
|
+
"_indices_z",
|
|
1895
|
+
"_indices_t",
|
|
1896
|
+
"_distances_x",
|
|
1897
|
+
"_distances_y",
|
|
1898
|
+
"_distances_z",
|
|
1899
|
+
"_distances_t",
|
|
1900
|
+
"_out_of_bounds",
|
|
1901
|
+
):
|
|
1902
|
+
self.data.pop(key, None)
|
|
1903
|
+
|
|
1904
|
+
@overload
|
|
1905
|
+
def downselect_met(
|
|
1906
|
+
self,
|
|
1907
|
+
met: met_module.MetDataset,
|
|
1908
|
+
*,
|
|
1909
|
+
longitude_buffer: tuple[float, float] = ...,
|
|
1910
|
+
latitude_buffer: tuple[float, float] = ...,
|
|
1911
|
+
level_buffer: tuple[float, float] = ...,
|
|
1912
|
+
time_buffer: tuple[np.timedelta64, np.timedelta64] = ...,
|
|
1913
|
+
) -> met_module.MetDataset: ...
|
|
1914
|
+
|
|
1915
|
+
@overload
|
|
1916
|
+
def downselect_met(
|
|
1917
|
+
self,
|
|
1918
|
+
met: met_module.MetDataArray,
|
|
1919
|
+
*,
|
|
1920
|
+
longitude_buffer: tuple[float, float] = ...,
|
|
1921
|
+
latitude_buffer: tuple[float, float] = ...,
|
|
1922
|
+
level_buffer: tuple[float, float] = ...,
|
|
1923
|
+
time_buffer: tuple[np.timedelta64, np.timedelta64] = ...,
|
|
1924
|
+
) -> met_module.MetDataArray: ...
|
|
1925
|
+
|
|
1926
|
+
def downselect_met(
|
|
1927
|
+
self,
|
|
1928
|
+
met: met_module.MetDataType,
|
|
1929
|
+
*,
|
|
1930
|
+
longitude_buffer: tuple[float, float] = (0.0, 0.0),
|
|
1931
|
+
latitude_buffer: tuple[float, float] = (0.0, 0.0),
|
|
1932
|
+
level_buffer: tuple[float, float] = (0.0, 0.0),
|
|
1933
|
+
time_buffer: tuple[np.timedelta64, np.timedelta64] = (
|
|
1934
|
+
np.timedelta64(0, "h"),
|
|
1935
|
+
np.timedelta64(0, "h"),
|
|
1936
|
+
),
|
|
1937
|
+
) -> met_module.MetDataType:
|
|
1938
|
+
"""Downselect ``met`` to encompass a spatiotemporal region of the data.
|
|
1939
|
+
|
|
1940
|
+
.. versionchanged:: 0.54.5
|
|
1941
|
+
|
|
1942
|
+
Returned object is no longer copied.
|
|
1943
|
+
|
|
1944
|
+
Parameters
|
|
1945
|
+
----------
|
|
1946
|
+
met : met_module.MetDataType
|
|
1947
|
+
MetDataset or MetDataArray to downselect.
|
|
1948
|
+
longitude_buffer : tuple[float, float], optional
|
|
1949
|
+
Extend longitude domain past by ``longitude_buffer[0]`` on the low side
|
|
1950
|
+
and ``longitude_buffer[1]`` on the high side.
|
|
1951
|
+
Units must be the same as class coordinates.
|
|
1952
|
+
Defaults to ``(0, 0)`` degrees.
|
|
1953
|
+
latitude_buffer : tuple[float, float], optional
|
|
1954
|
+
Extend latitude domain past by ``latitude_buffer[0]`` on the low side
|
|
1955
|
+
and ``latitude_buffer[1]`` on the high side.
|
|
1956
|
+
Units must be the same as class coordinates.
|
|
1957
|
+
Defaults to ``(0, 0)`` degrees.
|
|
1958
|
+
level_buffer : tuple[float, float], optional
|
|
1959
|
+
Extend level domain past by ``level_buffer[0]`` on the low side
|
|
1960
|
+
and ``level_buffer[1]`` on the high side.
|
|
1961
|
+
Units must be the same as class coordinates.
|
|
1962
|
+
Defaults to ``(0, 0)`` [:math:`hPa`].
|
|
1963
|
+
time_buffer : tuple[np.timedelta64, np.timedelta64], optional
|
|
1964
|
+
Extend time domain past by ``time_buffer[0]`` on the low side
|
|
1965
|
+
and ``time_buffer[1]`` on the high side.
|
|
1966
|
+
Units must be the same as class coordinates.
|
|
1967
|
+
Defaults to ``(np.timedelta64(0, "h"), np.timedelta64(0, "h"))``.
|
|
1968
|
+
|
|
1969
|
+
Returns
|
|
1970
|
+
-------
|
|
1971
|
+
met_module.MetDataType
|
|
1972
|
+
Copy of downselected MetDataset or MetDataArray.
|
|
1973
|
+
"""
|
|
1974
|
+
indexes = met.indexes
|
|
1975
|
+
lon_slice = coordinates.slice_domain(
|
|
1976
|
+
indexes["longitude"].to_numpy(),
|
|
1977
|
+
self["longitude"],
|
|
1978
|
+
buffer=longitude_buffer,
|
|
1979
|
+
)
|
|
1980
|
+
lat_slice = coordinates.slice_domain(
|
|
1981
|
+
indexes["latitude"].to_numpy(),
|
|
1982
|
+
self["latitude"],
|
|
1983
|
+
buffer=latitude_buffer,
|
|
1984
|
+
)
|
|
1985
|
+
time_slice = coordinates.slice_domain(
|
|
1986
|
+
indexes["time"].to_numpy(),
|
|
1987
|
+
self["time"],
|
|
1988
|
+
buffer=time_buffer,
|
|
1989
|
+
)
|
|
1990
|
+
|
|
1991
|
+
# single level data have "level" == [-1]
|
|
1992
|
+
if met.is_single_level:
|
|
1993
|
+
level_slice = slice(None)
|
|
1994
|
+
else:
|
|
1995
|
+
level_slice = coordinates.slice_domain(
|
|
1996
|
+
indexes["level"].to_numpy(),
|
|
1997
|
+
self.level,
|
|
1998
|
+
buffer=level_buffer,
|
|
1999
|
+
)
|
|
2000
|
+
logger.debug("Downselect met at %s %s %s %s", lon_slice, lat_slice, level_slice, time_slice)
|
|
2001
|
+
|
|
2002
|
+
data = met.data.isel(
|
|
2003
|
+
longitude=lon_slice,
|
|
2004
|
+
latitude=lat_slice,
|
|
2005
|
+
level=level_slice,
|
|
2006
|
+
time=time_slice,
|
|
2007
|
+
)
|
|
2008
|
+
return type(met)._from_fastpath(data)
|
|
2009
|
+
|
|
2010
|
+
# ------------
|
|
2011
|
+
# I / O
|
|
2012
|
+
# ------------
|
|
2013
|
+
|
|
2014
|
+
@classmethod
|
|
2015
|
+
@override
|
|
2016
|
+
def create_empty(
|
|
2017
|
+
cls,
|
|
2018
|
+
keys: Iterable[str] | None = None,
|
|
2019
|
+
attrs: dict[str, Any] | None = None,
|
|
2020
|
+
**attrs_kwargs: Any,
|
|
2021
|
+
) -> Self:
|
|
2022
|
+
keys = *cls.required_keys, "altitude", *(keys or ())
|
|
2023
|
+
return super().create_empty(keys, attrs, **attrs_kwargs)
|
|
2024
|
+
|
|
2025
|
+
def to_geojson_points(self) -> dict[str, Any]:
|
|
2026
|
+
"""Return dataset as GeoJSON FeatureCollection of Points.
|
|
2027
|
+
|
|
2028
|
+
Each Feature has a properties attribute that includes ``time`` and
|
|
2029
|
+
other data besides ``latitude``, ``longitude``, and ``altitude`` in :attr:`data`.
|
|
2030
|
+
|
|
2031
|
+
Returns
|
|
2032
|
+
-------
|
|
2033
|
+
dict[str, Any]
|
|
2034
|
+
Python representation of GeoJSON FeatureCollection
|
|
2035
|
+
"""
|
|
2036
|
+
return json_utils.dataframe_to_geojson_points(self.dataframe)
|
|
2037
|
+
|
|
2038
|
+
# ------------
|
|
2039
|
+
# Vector to grid
|
|
2040
|
+
# ------------
|
|
2041
|
+
def to_lon_lat_grid(
|
|
2042
|
+
self,
|
|
2043
|
+
agg: dict[str, str],
|
|
2044
|
+
*,
|
|
2045
|
+
spatial_bbox: tuple[float, float, float, float] = (-180.0, -90.0, 180.0, 90.0),
|
|
2046
|
+
spatial_grid_res: float = 0.5,
|
|
2047
|
+
) -> xr.Dataset:
|
|
2048
|
+
"""
|
|
2049
|
+
Convert vectors to a longitude-latitude grid.
|
|
2050
|
+
|
|
2051
|
+
See Also
|
|
2052
|
+
--------
|
|
2053
|
+
vector_to_lon_lat_grid
|
|
2054
|
+
"""
|
|
2055
|
+
return vector_to_lon_lat_grid(
|
|
2056
|
+
self, agg=agg, spatial_bbox=spatial_bbox, spatial_grid_res=spatial_grid_res
|
|
2057
|
+
)
|
|
2058
|
+
|
|
2059
|
+
|
|
2060
|
+
def vector_to_lon_lat_grid(
|
|
2061
|
+
vector: GeoVectorDataset,
|
|
2062
|
+
agg: dict[str, str],
|
|
2063
|
+
*,
|
|
2064
|
+
spatial_bbox: tuple[float, float, float, float] = (-180.0, -90.0, 180.0, 90.0),
|
|
2065
|
+
spatial_grid_res: float = 0.5,
|
|
2066
|
+
) -> xr.Dataset:
|
|
2067
|
+
r"""
|
|
2068
|
+
Convert vectors to a longitude-latitude grid.
|
|
2069
|
+
|
|
2070
|
+
Parameters
|
|
2071
|
+
----------
|
|
2072
|
+
vector: GeoVectorDataset
|
|
2073
|
+
Contains the longitude, latitude and variables for aggregation.
|
|
2074
|
+
agg: dict[str, str]
|
|
2075
|
+
Variable name and the function selected for aggregation,
|
|
2076
|
+
i.e. ``{"segment_length": "sum"}``.
|
|
2077
|
+
spatial_bbox: tuple[float, float, float, float]
|
|
2078
|
+
Spatial bounding box, ``(lon_min, lat_min, lon_max, lat_max)``, [:math:`\deg`].
|
|
2079
|
+
By default, the entire globe is used.
|
|
2080
|
+
spatial_grid_res: float
|
|
2081
|
+
Spatial grid resolution, [:math:`\deg`]
|
|
2082
|
+
|
|
2083
|
+
Returns
|
|
2084
|
+
-------
|
|
2085
|
+
xr.Dataset
|
|
2086
|
+
Aggregated variables in a longitude-latitude grid.
|
|
2087
|
+
|
|
2088
|
+
Examples
|
|
2089
|
+
--------
|
|
2090
|
+
>>> rng = np.random.default_rng(234)
|
|
2091
|
+
>>> vector = GeoVectorDataset(
|
|
2092
|
+
... longitude=rng.uniform(-10, 10, 10000),
|
|
2093
|
+
... latitude=rng.uniform(-10, 10, 10000),
|
|
2094
|
+
... altitude=np.zeros(10000),
|
|
2095
|
+
... time=np.zeros(10000).astype("datetime64[ns]"),
|
|
2096
|
+
... )
|
|
2097
|
+
>>> vector["foo"] = rng.uniform(0, 1, 10000)
|
|
2098
|
+
>>> ds = vector.to_lon_lat_grid({"foo": "sum"}, spatial_bbox=(-10, -10, 9.5, 9.5))
|
|
2099
|
+
>>> da = ds["foo"]
|
|
2100
|
+
>>> da.coords
|
|
2101
|
+
Coordinates:
|
|
2102
|
+
* longitude (longitude) float64 320B -10.0 -9.5 -9.0 -8.5 ... 8.0 8.5 9.0 9.5
|
|
2103
|
+
* latitude (latitude) float64 320B -10.0 -9.5 -9.0 -8.5 ... 8.0 8.5 9.0 9.5
|
|
2104
|
+
|
|
2105
|
+
>>> da.values.round(2)
|
|
2106
|
+
array([[2.23, 0.67, 1.29, ..., 4.66, 3.91, 1.93],
|
|
2107
|
+
[4.1 , 3.84, 1.34, ..., 3.24, 1.71, 4.55],
|
|
2108
|
+
[0.78, 3.25, 2.33, ..., 3.78, 2.93, 2.33],
|
|
2109
|
+
...,
|
|
2110
|
+
[1.97, 3.02, 1.84, ..., 2.37, 3.87, 2.09],
|
|
2111
|
+
[3.74, 1.6 , 4.01, ..., 4.6 , 4.27, 3.4 ],
|
|
2112
|
+
[2.97, 0.12, 1.33, ..., 3.54, 0.74, 2.59]], shape=(40, 40))
|
|
2113
|
+
|
|
2114
|
+
>>> da.sum().item() == vector["foo"].sum()
|
|
2115
|
+
np.True_
|
|
2116
|
+
|
|
2117
|
+
"""
|
|
2118
|
+
df = vector.select(("longitude", "latitude", *agg), copy=False).dataframe
|
|
2119
|
+
|
|
2120
|
+
# Create longitude and latitude coordinates
|
|
2121
|
+
assert spatial_grid_res > 0.01, "spatial_grid_res must be greater than 0.01"
|
|
2122
|
+
west, south, east, north = spatial_bbox
|
|
2123
|
+
lon_coords = np.arange(west, east + 0.01, spatial_grid_res)
|
|
2124
|
+
lat_coords = np.arange(south, north + 0.01, spatial_grid_res)
|
|
2125
|
+
shape = lon_coords.size, lat_coords.size
|
|
2126
|
+
|
|
2127
|
+
# Convert vector to lon-lat grid
|
|
2128
|
+
idx_lon = np.searchsorted(lon_coords, df["longitude"]) - 1
|
|
2129
|
+
idx_lat = np.searchsorted(lat_coords, df["latitude"]) - 1
|
|
2130
|
+
|
|
2131
|
+
df_agg = df.groupby([idx_lon, idx_lat]).agg(agg)
|
|
2132
|
+
index = df_agg.index.get_level_values(0), df_agg.index.get_level_values(1)
|
|
2133
|
+
|
|
2134
|
+
out = xr.Dataset(coords={"longitude": lon_coords, "latitude": lat_coords})
|
|
2135
|
+
for name, col in df_agg.items():
|
|
2136
|
+
arr = np.zeros(shape, dtype=col.dtype)
|
|
2137
|
+
arr[index] = col
|
|
2138
|
+
out[name] = (("longitude", "latitude"), arr)
|
|
2139
|
+
|
|
2140
|
+
return out
|
|
2141
|
+
|
|
2142
|
+
|
|
2143
|
+
def _handle_time_column(time: pd.Series) -> pd.Series:
|
|
2144
|
+
"""Ensure that pd.Series has compatible Timestamps.
|
|
2145
|
+
|
|
2146
|
+
Parameters
|
|
2147
|
+
----------
|
|
2148
|
+
time : pd.Series
|
|
2149
|
+
Pandas dataframe column labeled "time".
|
|
2150
|
+
|
|
2151
|
+
Returns
|
|
2152
|
+
-------
|
|
2153
|
+
pd.Series
|
|
2154
|
+
Parsed pandas time series.
|
|
2155
|
+
|
|
2156
|
+
Raises
|
|
2157
|
+
------
|
|
2158
|
+
ValueError
|
|
2159
|
+
When time series can't be parsed, or is not timezone naive.
|
|
2160
|
+
"""
|
|
2161
|
+
if not hasattr(time, "dt"):
|
|
2162
|
+
time = _parse_pandas_time(time)
|
|
2163
|
+
|
|
2164
|
+
# Translate all times to UTC and then remove timezone.
|
|
2165
|
+
# If the time column contains a timezone, the call to `to_numpy`
|
|
2166
|
+
# will convert it to an array of object.
|
|
2167
|
+
# Note `.tz_convert(None)` automatically converts to UTC first.
|
|
2168
|
+
if time.dt.tz is not None:
|
|
2169
|
+
time = time.dt.tz_convert(None)
|
|
2170
|
+
|
|
2171
|
+
return time
|
|
2172
|
+
|
|
2173
|
+
|
|
2174
|
+
def _parse_pandas_time(time: pd.Series) -> pd.Series:
|
|
2175
|
+
"""Parse pandas dataframe column labelled "time".
|
|
2176
|
+
|
|
2177
|
+
Parameters
|
|
2178
|
+
----------
|
|
2179
|
+
time : pd.Series
|
|
2180
|
+
Time series
|
|
2181
|
+
|
|
2182
|
+
Returns
|
|
2183
|
+
-------
|
|
2184
|
+
pd.Series
|
|
2185
|
+
Parsed time series
|
|
2186
|
+
|
|
2187
|
+
Raises
|
|
2188
|
+
------
|
|
2189
|
+
ValueError
|
|
2190
|
+
When series values can't be inferred.
|
|
2191
|
+
"""
|
|
2192
|
+
try:
|
|
2193
|
+
# If the time series is a string, try to convert it to a datetime
|
|
2194
|
+
if time.dtype == "O":
|
|
2195
|
+
return pd.to_datetime(time)
|
|
2196
|
+
|
|
2197
|
+
# If the time is an int, try to parse it as unix time
|
|
2198
|
+
if np.issubdtype(time.dtype, np.integer):
|
|
2199
|
+
return _parse_unix_time(time)
|
|
2200
|
+
|
|
2201
|
+
except ValueError as exc:
|
|
2202
|
+
msg = (
|
|
2203
|
+
"The 'time' field must hold datetime-like values. "
|
|
2204
|
+
'Try data["time"] = pd.to_datetime(data["time"], unit=...) '
|
|
2205
|
+
"with the appropriate unit."
|
|
2206
|
+
)
|
|
2207
|
+
raise ValueError(msg) from exc
|
|
2208
|
+
|
|
2209
|
+
raise ValueError("Unsupported time format")
|
|
2210
|
+
|
|
2211
|
+
|
|
2212
|
+
def _parse_unix_time(time: list[int] | npt.NDArray[np.int_] | pd.Series) -> pd.Series:
|
|
2213
|
+
"""Parse array of int times as unix epoch timestamps.
|
|
2214
|
+
|
|
2215
|
+
Attempts to parse the time in "s", "ms", "us", "ns"
|
|
2216
|
+
|
|
2217
|
+
Parameters
|
|
2218
|
+
----------
|
|
2219
|
+
time : list[int] | npt.NDArray[np.int_] | pd.Series
|
|
2220
|
+
Sequence of unix timestamps
|
|
2221
|
+
|
|
2222
|
+
Returns
|
|
2223
|
+
-------
|
|
2224
|
+
pd.Series
|
|
2225
|
+
Series of timezone naive pandas Timestamps
|
|
2226
|
+
|
|
2227
|
+
Raises
|
|
2228
|
+
------
|
|
2229
|
+
ValueError
|
|
2230
|
+
When unable to parse time as unix epoch timestamp
|
|
2231
|
+
"""
|
|
2232
|
+
units = "s", "ms", "us", "ns"
|
|
2233
|
+
for unit in units:
|
|
2234
|
+
try:
|
|
2235
|
+
out = pd.to_datetime(time, unit=unit, utc=True)
|
|
2236
|
+
except ValueError:
|
|
2237
|
+
continue
|
|
2238
|
+
|
|
2239
|
+
# make timezone naive
|
|
2240
|
+
out = out.dt.tz_convert(None)
|
|
2241
|
+
|
|
2242
|
+
# make sure time is reasonable
|
|
2243
|
+
if (pd.Timestamp("1980-01-01") <= out).all() and (out <= pd.Timestamp("2030-01-01")).all():
|
|
2244
|
+
return out
|
|
2245
|
+
|
|
2246
|
+
raise ValueError(
|
|
2247
|
+
f"Unable to parse time parameter '{time}' as unix epoch timestamp between "
|
|
2248
|
+
"1980-01-01 and 2030-01-01"
|
|
2249
|
+
)
|