pycontrails 0.58.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (122) hide show
  1. pycontrails/__init__.py +70 -0
  2. pycontrails/_version.py +34 -0
  3. pycontrails/core/__init__.py +30 -0
  4. pycontrails/core/aircraft_performance.py +679 -0
  5. pycontrails/core/airports.py +228 -0
  6. pycontrails/core/cache.py +889 -0
  7. pycontrails/core/coordinates.py +174 -0
  8. pycontrails/core/fleet.py +483 -0
  9. pycontrails/core/flight.py +2185 -0
  10. pycontrails/core/flightplan.py +228 -0
  11. pycontrails/core/fuel.py +140 -0
  12. pycontrails/core/interpolation.py +702 -0
  13. pycontrails/core/met.py +2931 -0
  14. pycontrails/core/met_var.py +387 -0
  15. pycontrails/core/models.py +1321 -0
  16. pycontrails/core/polygon.py +549 -0
  17. pycontrails/core/rgi_cython.cp314-win_amd64.pyd +0 -0
  18. pycontrails/core/vector.py +2249 -0
  19. pycontrails/datalib/__init__.py +12 -0
  20. pycontrails/datalib/_met_utils/metsource.py +746 -0
  21. pycontrails/datalib/ecmwf/__init__.py +73 -0
  22. pycontrails/datalib/ecmwf/arco_era5.py +345 -0
  23. pycontrails/datalib/ecmwf/common.py +114 -0
  24. pycontrails/datalib/ecmwf/era5.py +554 -0
  25. pycontrails/datalib/ecmwf/era5_model_level.py +490 -0
  26. pycontrails/datalib/ecmwf/hres.py +804 -0
  27. pycontrails/datalib/ecmwf/hres_model_level.py +466 -0
  28. pycontrails/datalib/ecmwf/ifs.py +287 -0
  29. pycontrails/datalib/ecmwf/model_levels.py +435 -0
  30. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  31. pycontrails/datalib/ecmwf/variables.py +268 -0
  32. pycontrails/datalib/geo_utils.py +261 -0
  33. pycontrails/datalib/gfs/__init__.py +28 -0
  34. pycontrails/datalib/gfs/gfs.py +656 -0
  35. pycontrails/datalib/gfs/variables.py +104 -0
  36. pycontrails/datalib/goes.py +757 -0
  37. pycontrails/datalib/himawari/__init__.py +27 -0
  38. pycontrails/datalib/himawari/header_struct.py +266 -0
  39. pycontrails/datalib/himawari/himawari.py +667 -0
  40. pycontrails/datalib/landsat.py +589 -0
  41. pycontrails/datalib/leo_utils/__init__.py +5 -0
  42. pycontrails/datalib/leo_utils/correction.py +266 -0
  43. pycontrails/datalib/leo_utils/landsat_metadata.py +300 -0
  44. pycontrails/datalib/leo_utils/search.py +250 -0
  45. pycontrails/datalib/leo_utils/sentinel_metadata.py +748 -0
  46. pycontrails/datalib/leo_utils/static/bq_roi_query.sql +6 -0
  47. pycontrails/datalib/leo_utils/vis.py +59 -0
  48. pycontrails/datalib/sentinel.py +650 -0
  49. pycontrails/datalib/spire/__init__.py +5 -0
  50. pycontrails/datalib/spire/exceptions.py +62 -0
  51. pycontrails/datalib/spire/spire.py +604 -0
  52. pycontrails/ext/bada.py +42 -0
  53. pycontrails/ext/cirium.py +14 -0
  54. pycontrails/ext/empirical_grid.py +140 -0
  55. pycontrails/ext/synthetic_flight.py +431 -0
  56. pycontrails/models/__init__.py +1 -0
  57. pycontrails/models/accf.py +425 -0
  58. pycontrails/models/apcemm/__init__.py +8 -0
  59. pycontrails/models/apcemm/apcemm.py +983 -0
  60. pycontrails/models/apcemm/inputs.py +226 -0
  61. pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
  62. pycontrails/models/apcemm/utils.py +437 -0
  63. pycontrails/models/cocip/__init__.py +29 -0
  64. pycontrails/models/cocip/cocip.py +2742 -0
  65. pycontrails/models/cocip/cocip_params.py +305 -0
  66. pycontrails/models/cocip/cocip_uncertainty.py +291 -0
  67. pycontrails/models/cocip/contrail_properties.py +1530 -0
  68. pycontrails/models/cocip/output_formats.py +2270 -0
  69. pycontrails/models/cocip/radiative_forcing.py +1260 -0
  70. pycontrails/models/cocip/radiative_heating.py +520 -0
  71. pycontrails/models/cocip/unterstrasser_wake_vortex.py +508 -0
  72. pycontrails/models/cocip/wake_vortex.py +396 -0
  73. pycontrails/models/cocip/wind_shear.py +120 -0
  74. pycontrails/models/cocipgrid/__init__.py +9 -0
  75. pycontrails/models/cocipgrid/cocip_grid.py +2552 -0
  76. pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
  77. pycontrails/models/dry_advection.py +602 -0
  78. pycontrails/models/emissions/__init__.py +21 -0
  79. pycontrails/models/emissions/black_carbon.py +599 -0
  80. pycontrails/models/emissions/emissions.py +1353 -0
  81. pycontrails/models/emissions/ffm2.py +336 -0
  82. pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
  83. pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
  84. pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
  85. pycontrails/models/extended_k15.py +1327 -0
  86. pycontrails/models/humidity_scaling/__init__.py +37 -0
  87. pycontrails/models/humidity_scaling/humidity_scaling.py +1075 -0
  88. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  89. pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
  90. pycontrails/models/issr.py +210 -0
  91. pycontrails/models/pcc.py +326 -0
  92. pycontrails/models/pcr.py +154 -0
  93. pycontrails/models/ps_model/__init__.py +18 -0
  94. pycontrails/models/ps_model/ps_aircraft_params.py +381 -0
  95. pycontrails/models/ps_model/ps_grid.py +701 -0
  96. pycontrails/models/ps_model/ps_model.py +1000 -0
  97. pycontrails/models/ps_model/ps_operational_limits.py +525 -0
  98. pycontrails/models/ps_model/static/ps-aircraft-params-20250328.csv +69 -0
  99. pycontrails/models/ps_model/static/ps-synonym-list-20250328.csv +104 -0
  100. pycontrails/models/sac.py +442 -0
  101. pycontrails/models/tau_cirrus.py +183 -0
  102. pycontrails/physics/__init__.py +1 -0
  103. pycontrails/physics/constants.py +117 -0
  104. pycontrails/physics/geo.py +1138 -0
  105. pycontrails/physics/jet.py +968 -0
  106. pycontrails/physics/static/iata-cargo-load-factors-20250221.csv +74 -0
  107. pycontrails/physics/static/iata-passenger-load-factors-20250221.csv +74 -0
  108. pycontrails/physics/thermo.py +551 -0
  109. pycontrails/physics/units.py +472 -0
  110. pycontrails/py.typed +0 -0
  111. pycontrails/utils/__init__.py +1 -0
  112. pycontrails/utils/dependencies.py +66 -0
  113. pycontrails/utils/iteration.py +13 -0
  114. pycontrails/utils/json.py +187 -0
  115. pycontrails/utils/temp.py +50 -0
  116. pycontrails/utils/types.py +163 -0
  117. pycontrails-0.58.0.dist-info/METADATA +180 -0
  118. pycontrails-0.58.0.dist-info/RECORD +122 -0
  119. pycontrails-0.58.0.dist-info/WHEEL +5 -0
  120. pycontrails-0.58.0.dist-info/licenses/LICENSE +178 -0
  121. pycontrails-0.58.0.dist-info/licenses/NOTICE +43 -0
  122. pycontrails-0.58.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,2249 @@
1
+ """Lightweight data structures for vector paths."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ import logging
8
+ import sys
9
+ import warnings
10
+ from collections.abc import Generator, Iterable, Iterator, Sequence
11
+ from typing import Any, Self, overload
12
+
13
+ if sys.version_info >= (3, 12):
14
+ from typing import override
15
+ else:
16
+ from typing_extensions import override
17
+
18
+ import numpy as np
19
+ import numpy.typing as npt
20
+ import pandas as pd
21
+ import xarray as xr
22
+
23
+ from pycontrails.core import coordinates, interpolation
24
+ from pycontrails.core import met as met_module
25
+ from pycontrails.physics import units
26
+ from pycontrails.utils import dependencies
27
+ from pycontrails.utils import json as json_utils
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class AttrDict(dict[str, Any]):
33
+ """Thin wrapper around dict to warn when setting a key that already exists."""
34
+
35
+ __slots__ = ()
36
+
37
+ def __setitem__(self, k: str, v: Any) -> None:
38
+ """Warn when setting values that already contain values.
39
+
40
+ Parameters
41
+ ----------
42
+ k : str
43
+ Key
44
+ v : Any
45
+ Value
46
+ """
47
+ if k in self and self[k] is not None and self[k] is not v:
48
+ warnings.warn(
49
+ f"Overwriting attr key `{k}`. Use `.update({k}=...)` to suppress warning."
50
+ )
51
+
52
+ super().__setitem__(k, v)
53
+
54
+ def setdefault(self, k: str, default: Any = None) -> Any:
55
+ """Thin wrapper around ``dict.setdefault``.
56
+
57
+ Overwrites value if value is None.
58
+
59
+ Parameters
60
+ ----------
61
+ k : str
62
+ Key
63
+ default : Any, optional
64
+ Default value for key ``k``
65
+
66
+ Returns
67
+ -------
68
+ Any
69
+ Value at ``k``
70
+ """
71
+ ret = self.get(k, None)
72
+ if ret is not None:
73
+ return ret
74
+
75
+ self[k] = default
76
+ return default
77
+
78
+
79
+ class VectorDataDict(dict[str, np.ndarray]):
80
+ """Thin wrapper around ``dict[str, np.ndarray]`` to ensure consistency.
81
+
82
+ Parameters
83
+ ----------
84
+ data : dict[str, np.ndarray] | None, optional
85
+ Dictionary input. A shallow copy is always made.
86
+ """
87
+
88
+ __slots__ = ("_size",)
89
+
90
+ #: Length of the data
91
+ _size: int
92
+
93
+ def __init__(self, data: dict[str, np.ndarray] | None = None) -> None:
94
+ super().__init__(data or {})
95
+
96
+ # validate any arrays, first one defines _size attribute
97
+ for arr in self.values():
98
+ self._validate_array(arr)
99
+
100
+ def __setitem__(self, k: str, v: npt.ArrayLike) -> None:
101
+ """Set new key-value pair to instance and warn when overwriting existing key.
102
+
103
+ This method casts ``v`` to an :class:`numpy.ndarray` and ensures that the array size is
104
+ consistent with the instance.
105
+
106
+ Parameters
107
+ ----------
108
+ k : str
109
+ Key
110
+ v : npt.ArrayLike
111
+ Values
112
+
113
+ See Also
114
+ --------
115
+ :meth:`update`
116
+ """
117
+ v = np.asarray(v) # asarray does NOT copy
118
+ self._validate_array(v)
119
+
120
+ if k in self and len(self[k]) and self[k] is not v:
121
+ warnings.warn(
122
+ f"Overwriting data in key `{k}`. Use `.update({k}=...)` to suppress warning."
123
+ )
124
+
125
+ super().__setitem__(k, v)
126
+
127
+ def __delitem__(self, k: str) -> None:
128
+ super().__delitem__(k)
129
+
130
+ # if no keys remain, delete _size attribute
131
+ if not self:
132
+ del self._size
133
+
134
+ def setdefault(self, k: str, default: npt.ArrayLike | None = None) -> np.ndarray:
135
+ """Thin wrapper around ``dict.setdefault``.
136
+
137
+ The main purpose of overriding is to run :meth:`_validate_array()` on set.
138
+
139
+ Parameters
140
+ ----------
141
+ k : str
142
+ Key
143
+ default : npt.ArrayLike | None, optional
144
+ Default value for key ``k``
145
+
146
+ Returns
147
+ -------
148
+ np.ndarray
149
+ Value at ``k``
150
+ """
151
+ ret = self.get(k, None)
152
+ if ret is not None:
153
+ return ret
154
+
155
+ if default is None:
156
+ default = np.array([])
157
+
158
+ self[k] = default
159
+ return self[k]
160
+
161
+ def update( # type: ignore[override]
162
+ self, other: dict[str, npt.ArrayLike] | None = None, **kwargs: npt.ArrayLike
163
+ ) -> None:
164
+ """Update values without warning if overwriting.
165
+
166
+ This method casts values in ``other`` to :class:`numpy.ndarray` and
167
+ ensures that the array sizes are consistent with the instance.
168
+
169
+ Parameters
170
+ ----------
171
+ other : dict[str, npt.ArrayLike] | None, optional
172
+ Fields to update as dict
173
+ **kwargs : npt.ArrayLike
174
+ Fields to update as kwargs
175
+ """
176
+ other = other or {}
177
+ other_arrs = {k: np.asarray(v) for k, v in other.items()}
178
+ for arr in other_arrs.values():
179
+ self._validate_array(arr)
180
+
181
+ super().update(other_arrs)
182
+
183
+ # validate any kwarg arrays
184
+ kwargs_arr = {k: np.asarray(v) for k, v in kwargs.items()}
185
+ for arr in kwargs_arr.values():
186
+ self._validate_array(arr)
187
+
188
+ super().update(kwargs_arr)
189
+
190
+ def _validate_array(self, arr: np.ndarray) -> None:
191
+ """Ensure that ``arr`` is compatible (1 dimensional of equal size) with instance.
192
+
193
+ Set attribute ``_size`` if it has not yet been defined.
194
+
195
+ Parameters
196
+ ----------
197
+ arr : np.ndarray
198
+ Array to validate
199
+
200
+ Raises
201
+ ------
202
+ ValueError
203
+ If ``arr`` is not compatible with instance.
204
+ """
205
+ if arr.ndim != 1:
206
+ raise ValueError("All np.arrays must have dimension 1.")
207
+
208
+ size = getattr(self, "_size", 0)
209
+ if not size:
210
+ self._size = arr.size
211
+ return
212
+
213
+ if arr.size != size:
214
+ raise ValueError(f"Incompatible array sizes: {arr.size} and {size}.")
215
+
216
+
217
+ def _empty_vector_dict(keys: Iterable[str]) -> dict[str, np.ndarray]:
218
+ """Create a dictionary with keys defined by ``keys`` and empty arrays.
219
+
220
+ Parameters
221
+ ----------
222
+ keys : Iterable[str]
223
+ Keys to include in dictionary.
224
+
225
+ Returns
226
+ -------
227
+ dict[str, np.ndarray]
228
+ Dictionary with empty arrays.
229
+ """
230
+ data = {key: np.array([]) for key in keys}
231
+
232
+ # The default dtype is float64
233
+ # Time is special and should have a non-default dtype of datetime64[ns]
234
+ if "time" in data:
235
+ data.update(time=np.array([], dtype="datetime64[ns]"))
236
+
237
+ return data
238
+
239
+
240
+ class VectorDataset: # noqa: PLW1641
241
+ """Base class to hold 1D arrays of consistent size.
242
+
243
+ Parameters
244
+ ----------
245
+ data : dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataset | None, optional
246
+ Initial data, by default None. A shallow copy is always made. Use the ``copy``
247
+ parameter to copy the underlying array data.
248
+ attrs : dict[str, Any] | None, optional
249
+ Dictionary of attributes, by default None. A shallow copy is always made.
250
+ copy : bool, optional
251
+ Copy individual arrays on instantiation, by default True.
252
+ **attrs_kwargs : Any
253
+ Additional attributes passed as keyword arguments.
254
+
255
+ Raises
256
+ ------
257
+ ValueError
258
+ If "time" variable cannot be converted to numpy array.
259
+ """
260
+
261
+ __slots__ = ("attrs", "data")
262
+
263
+ #: Generic dataset attributes
264
+ attrs: AttrDict
265
+
266
+ #: Vector data with labels as keys and :class:`numpy.ndarray` as values
267
+ data: VectorDataDict
268
+
269
+ def __init__(
270
+ self,
271
+ data: dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataset | None = None,
272
+ *,
273
+ attrs: dict[str, Any] | None = None,
274
+ copy: bool = True,
275
+ **attrs_kwargs: Any,
276
+ ) -> None:
277
+ # Set data: always shallow copy
278
+ # -----------------------------
279
+
280
+ # Casting from one VectorDataset type to another
281
+ # e.g., flight = Flight(...); vector = VectorDataset(flight)
282
+ if isinstance(data, VectorDataset):
283
+ attrs = {**data.attrs, **(attrs or {})}
284
+ if copy:
285
+ self.data = VectorDataDict({k: v.copy() for k, v in data.data.items()})
286
+ else:
287
+ self.data = VectorDataDict(data.data)
288
+
289
+ elif data is None:
290
+ self.data = VectorDataDict()
291
+
292
+ elif isinstance(data, pd.DataFrame):
293
+ attrs = {**data.attrs, **(attrs or {})}
294
+
295
+ # Take extra caution with a time column
296
+ try:
297
+ time = data["time"]
298
+ except KeyError:
299
+ self.data = VectorDataDict({k: v.to_numpy(copy=copy) for k, v in data.items()})
300
+ else:
301
+ time = _handle_time_column(time)
302
+ data_np = {k: v.to_numpy(copy=copy) for k, v in data.items() if k != "time"}
303
+ data_np["time"] = time.to_numpy(copy=copy)
304
+ self.data = VectorDataDict(data_np)
305
+
306
+ # For anything else, we assume it is a dictionary of array-like and attach it
307
+ else:
308
+ self.data = VectorDataDict({k: np.array(v, copy=copy) for k, v in data.items()})
309
+
310
+ # Set attributes: always shallow copy
311
+ # -----------------------------------
312
+
313
+ self.attrs = AttrDict(attrs or {})
314
+ self.attrs.update(attrs_kwargs)
315
+
316
+ @classmethod
317
+ def _from_fastpath(
318
+ cls,
319
+ data: dict[str, np.ndarray],
320
+ attrs: dict[str, Any] | None = None,
321
+ **kwargs: Any,
322
+ ) -> Self:
323
+ """Create new instance from consistent data.
324
+
325
+ This is a low-level method that bypasses the standard constructor in certain
326
+ special cases. It is intended for internal use only.
327
+
328
+ In essence, this method skips any validation from __init__ and directly sets
329
+ ``data`` and ``attrs``. This is useful when creating a new instance from an existing
330
+ instance the data has already been validated.
331
+ """
332
+ obj = cls.__new__(cls)
333
+
334
+ obj.data = VectorDataDict(data)
335
+ obj.attrs = AttrDict(attrs or {})
336
+
337
+ for key, value in kwargs.items():
338
+ try:
339
+ setattr(obj, key, value)
340
+ # If key not present in __slots__ of class (or parents), it's intended for attrs
341
+ except AttributeError:
342
+ obj.attrs[key] = value
343
+
344
+ return obj
345
+
346
+ # ------------
347
+ # dict-like methods
348
+ # ------------
349
+ def __getitem__(self, key: str) -> np.ndarray:
350
+ """Get values from :attr:`data`.
351
+
352
+ Parameters
353
+ ----------
354
+ key : str
355
+ Key to get from :attr:`data`
356
+
357
+ Returns
358
+ -------
359
+ np.ndarray
360
+ Values at :attr:`data[key]`
361
+ """
362
+ return self.data[key]
363
+
364
+ def get(self, key: str, default_value: Any = None) -> Any:
365
+ """Get values from :attr:`data` with ``default_value`` if ``key`` not in :attr:`data`.
366
+
367
+ Parameters
368
+ ----------
369
+ key : str
370
+ Key to get from :attr:`data`
371
+ default_value : Any, optional
372
+ Return ``default_value`` if `key` not in :attr:`data`, by default ``None``
373
+
374
+ Returns
375
+ -------
376
+ Any
377
+ Values at :attr:`data[key]` or ``default_value``
378
+ """
379
+ return self.data.get(key, default_value)
380
+
381
+ def __setitem__(self, key: str, values: npt.ArrayLike) -> None:
382
+ """Set values at key `key` on :attr:`data`.
383
+
384
+ Parameters
385
+ ----------
386
+ key : str
387
+ Key name in :attr:`data`
388
+ values : npt.ArrayLike
389
+ Values to set to :attr:`data`. Array size must be compatible with existing data.
390
+ """
391
+ self.data[key] = values
392
+
393
+ def __delitem__(self, key: str) -> None:
394
+ """Delete values at key `key` on :attr:`data`.
395
+
396
+ Parameters
397
+ ----------
398
+ key : str
399
+ Key name in :attr:`data`
400
+ """
401
+ del self.data[key]
402
+
403
+ def __iter__(self) -> Iterator[str]:
404
+ """Iterate over keys in :attr:`data`.
405
+
406
+ Returns
407
+ -------
408
+ Iterator[str]
409
+ Keys in :attr:`data`
410
+ """
411
+ return iter(self.data)
412
+
413
+ def __contains__(self, key: str) -> bool:
414
+ """Check if key `key` is in :attr:`data`.
415
+
416
+ Parameters
417
+ ----------
418
+ key : str
419
+ Key to check
420
+
421
+ Returns
422
+ -------
423
+ bool
424
+ True if `key` is in :attr:`data`, False otherwise
425
+ """
426
+ return key in self.data
427
+
428
+ def update(
429
+ self,
430
+ other: dict[str, npt.ArrayLike] | None = None,
431
+ **kwargs: npt.ArrayLike,
432
+ ) -> None:
433
+ """Update values in :attr:`data` dict without warning if overwriting.
434
+
435
+ Parameters
436
+ ----------
437
+ other : dict[str, npt.ArrayLike] | None, optional
438
+ Fields to update as dict
439
+ **kwargs : npt.ArrayLike
440
+ Fields to update as kwargs
441
+ """
442
+ self.data.update(other, **kwargs)
443
+
444
+ def setdefault(self, key: str, default: npt.ArrayLike | None = None) -> np.ndarray:
445
+ """Shortcut to :meth:`VectorDataDict.setdefault`.
446
+
447
+ Parameters
448
+ ----------
449
+ key : str
450
+ Key in :attr:`data` dict.
451
+ default : npt.ArrayLike, optional
452
+ Values to use as default, if key is not defined
453
+
454
+ Returns
455
+ -------
456
+ np.ndarray
457
+ Values at ``key``
458
+ """
459
+ return self.data.setdefault(key, default)
460
+
461
+ __marker = object()
462
+
463
+ def get_data_or_attr(self, key: str, default: Any = __marker) -> Any:
464
+ """Get value from :attr:`data` or :attr:`attrs`.
465
+
466
+ This method first checks if ``key`` is in :attr:`data` and returns the value if so.
467
+ If ``key`` is not in :attr:`data`, then this method checks if ``key`` is in :attr:`attrs`
468
+ and returns the value if so. If ``key`` is not in :attr:`data` or :attr:`attrs`,
469
+ then the ``default`` value is returned if provided. Otherwise a :class:`KeyError` is raised.
470
+
471
+ Parameters
472
+ ----------
473
+ key : str
474
+ Key to get from :attr:`data` or :attr:`attrs`
475
+ default : Any, optional
476
+ Default value to return if ``key`` is not in :attr:`data` or :attr:`attrs`.
477
+
478
+ Returns
479
+ -------
480
+ Any
481
+ Value at :attr:`data[key]` or :attr:`attrs[key]`
482
+
483
+ Raises
484
+ ------
485
+ KeyError
486
+ If ``key`` is not in :attr:`data` or :attr:`attrs` and ``default`` is not provided.
487
+
488
+ Examples
489
+ --------
490
+ >>> vector = VectorDataset({"a": [1, 2, 3]}, attrs={"b": 4})
491
+ >>> vector.get_data_or_attr("a")
492
+ array([1, 2, 3])
493
+
494
+ >>> vector.get_data_or_attr("b")
495
+ 4
496
+
497
+ >>> vector.get_data_or_attr("c")
498
+ Traceback (most recent call last):
499
+ ...
500
+ KeyError: "Key 'c' not found in data or attrs."
501
+
502
+ >>> vector.get_data_or_attr("c", default=5)
503
+ 5
504
+
505
+ See Also
506
+ --------
507
+ get_constant
508
+ """
509
+ marker = self.__marker
510
+
511
+ out = self.get(key, marker)
512
+ if out is not marker:
513
+ return out
514
+
515
+ out = self.attrs.get(key, marker)
516
+ if out is not marker:
517
+ return out
518
+
519
+ if default is not marker:
520
+ return default
521
+
522
+ msg = f"Key '{key}' not found in data or attrs."
523
+ raise KeyError(msg)
524
+
525
+ # ------------
526
+
527
+ def __len__(self) -> int:
528
+ """Length of each array in :attr:`data`.
529
+
530
+ Returns
531
+ -------
532
+ int
533
+ Length of each array in :attr:`data`
534
+ """
535
+ return self.size
536
+
537
+ def _display_attrs(self) -> dict[str, str]:
538
+ """Return properties used in `repr` constructions.
539
+
540
+ Returns
541
+ -------
542
+ dict[str, str]
543
+ Properties used in :meth:`__repr__` and :meth:`_repr_html_`.
544
+ """
545
+
546
+ # Clip any attribute value that is too long
547
+ def str_clip(v: Any) -> str:
548
+ s = str(v)
549
+ if len(s) < 80:
550
+ return s
551
+ return f"{s[:77]}..."
552
+
553
+ return {k: str_clip(v) for k, v in self.attrs.items()}
554
+
555
+ def __repr__(self) -> str:
556
+ class_name = self.__class__.__name__
557
+ n_attrs = len(self.attrs)
558
+ n_keys = len(self.data)
559
+ _repr = f"{class_name} [{n_keys} keys x {self.size} length, {n_attrs} attributes]"
560
+
561
+ keys = list(self)
562
+ keys = [*keys[0:5], "...", *keys[-1:]] if len(keys) > 5 else keys
563
+ _repr += f"\n\tKeys: {', '.join(keys)}"
564
+
565
+ attrs = self._display_attrs()
566
+ _repr += "\n\tAttributes:\n"
567
+ _repr += "\n".join([f"\t{k:20}{v}" for k, v in attrs.items()])
568
+
569
+ return _repr
570
+
571
+ def _repr_html_(self) -> str:
572
+ name = type(self).__name__
573
+ n_attrs = len(self.attrs)
574
+ n_keys = len(self.data)
575
+ attrs = self._display_attrs()
576
+ size = self.size
577
+
578
+ title = f"<b>{name}</b> [{n_keys} keys x {size} length, {n_attrs} attributes]<br/ ><br/>"
579
+
580
+ # matching pd.DataFrame styling
581
+ header = '<tr style="border-bottom:1px solid silver"><th colspan="2">Attributes</th></tr>'
582
+ rows = [f"<tr><td>{k}</td><td>{v}</td></tr>" for k, v in attrs.items()]
583
+ table = f"<table>{header + ''.join(rows)}</table>"
584
+ return title + table + self.dataframe._repr_html_()
585
+
586
+ def __bool__(self) -> bool:
587
+ """Check if :attr:`data` is nonempty..
588
+
589
+ Returns
590
+ -------
591
+ bool
592
+ True if non-empty values are set in :attr:`data`
593
+ """
594
+ return self.size > 0
595
+
596
+ def __add__(self, other: Self | None) -> Self:
597
+ """Concatenate two compatible instances of VectorDataset.
598
+
599
+ In this context, compatibility means that both have identical :attr:`data` keys.
600
+
601
+ This operator behaves similarly to the ``__add__`` method on python lists.
602
+
603
+ If self is an empty VectorDataset, return other. This is useful when
604
+ calling :keyword:`sum` with an empty initial value.
605
+
606
+ Parameters
607
+ ----------
608
+ other : Self | None
609
+ Other values to concatenate
610
+
611
+ Returns
612
+ -------
613
+ Self
614
+ Concatenated values.
615
+
616
+ Raises
617
+ ------
618
+ KeyError
619
+ If `other` has different :attr:`data` keys than self.
620
+ """
621
+ # Short circuit: If other is empty or None, return self. The order here can matter.
622
+ # We let self (so the left addend) take priority.
623
+ if not other:
624
+ return self
625
+ if not self:
626
+ return other
627
+
628
+ return type(self).sum((self, other))
629
+
630
+ @classmethod
631
+ def sum(
632
+ cls,
633
+ vectors: Sequence[VectorDataset],
634
+ infer_attrs: bool = True,
635
+ fill_value: float | None = None,
636
+ ) -> Self:
637
+ """Sum a list of :class:`VectorDataset` instances.
638
+
639
+ Parameters
640
+ ----------
641
+ vectors : Sequence[VectorDataset]
642
+ List of :class:`VectorDataset` instances to concatenate.
643
+ infer_attrs : bool, optional
644
+ If True, infer attributes from the first element in the sequence.
645
+ fill_value : float | None, optional
646
+ Fill value to use when concatenating arrays. By default None, which raises
647
+ an error if incompatible keys are found.
648
+
649
+ Returns
650
+ -------
651
+ Self
652
+ Sum of all instances in ``vectors``.
653
+
654
+ Raises
655
+ ------
656
+ KeyError
657
+ If incompatible :attr:`data` keys are found among ``vectors``.
658
+
659
+ Examples
660
+ --------
661
+ >>> from pycontrails import VectorDataset
662
+ >>> v1 = VectorDataset({"a": [1, 2, 3], "b": [4, 5, 6]})
663
+ >>> v2 = VectorDataset({"a": [7, 8, 9], "b": [10, 11, 12]})
664
+ >>> v3 = VectorDataset({"a": [13, 14, 15], "b": [16, 17, 18]})
665
+ >>> v = VectorDataset.sum([v1, v2, v3])
666
+ >>> v.dataframe
667
+ a b
668
+ 0 1 4
669
+ 1 2 5
670
+ 2 3 6
671
+ 3 7 10
672
+ 4 8 11
673
+ 5 9 12
674
+ 6 13 16
675
+ 7 14 17
676
+ 8 15 18
677
+
678
+ """
679
+ if cls not in (VectorDataset, GeoVectorDataset):
680
+ msg = (
681
+ "Method 'sum' is only available on 'VectorDataset' and 'GeoVectorDataset'. "
682
+ "To sum 'Flight' instances, use 'Fleet.from_seq'."
683
+ )
684
+ raise TypeError(msg)
685
+
686
+ vectors = [v for v in vectors if v is not None] # remove None values
687
+
688
+ if not vectors:
689
+ return cls()
690
+
691
+ keys: Iterable[str]
692
+ if fill_value is None:
693
+ keys = vectors[0].data.keys()
694
+ for v in vectors[1:]:
695
+ if v.data.keys() != keys:
696
+ diff = set(v).symmetric_difference(keys)
697
+ msg = f"Summands have incompatible keys. Difference: {diff}"
698
+ raise KeyError(msg)
699
+
700
+ else:
701
+ keys = set().union(*[v.data.keys() for v in vectors])
702
+
703
+ def _get(k: str, v: VectorDataset) -> np.ndarray:
704
+ # Could also use VectorDataset.get() here, but we want to avoid creating
705
+ # an unused array if the key is present in the VectorDataset.
706
+ try:
707
+ return v[k]
708
+ except KeyError:
709
+ return np.full(v.size, fill_value)
710
+
711
+ def concat(key: str) -> np.ndarray:
712
+ values = [_get(key, v) for v in vectors]
713
+ return np.concatenate(values)
714
+
715
+ data = {key: concat(key) for key in keys}
716
+ attrs = vectors[0].attrs if infer_attrs else None
717
+
718
+ return cls._from_fastpath(data, attrs)
719
+
720
+ def __eq__(self, other: object) -> bool:
721
+ """Determine if two instances are equal.
722
+
723
+ NaN values are considered equal in this comparison.
724
+
725
+ Parameters
726
+ ----------
727
+ other : object
728
+ VectorDataset to compare with
729
+
730
+ Returns
731
+ -------
732
+ bool
733
+ True if both instances have identical :attr:`data` and :attr:`attrs`.
734
+ """
735
+ if not isinstance(other, VectorDataset):
736
+ return False
737
+
738
+ # Check attrs
739
+ if self.attrs.keys() != other.attrs.keys():
740
+ return False
741
+
742
+ for key, val in self.attrs.items():
743
+ if isinstance(val, np.ndarray):
744
+ # equal_nan not supported for non-numeric data
745
+ equal_nan = not np.issubdtype(val.dtype, "O")
746
+ if not np.array_equal(val, other.attrs[key], equal_nan=equal_nan):
747
+ return False
748
+ elif val != other.attrs[key]:
749
+ return False
750
+
751
+ # Check data
752
+ if self.data.keys() != other.data.keys():
753
+ return False
754
+
755
+ for key, val in self.data.items():
756
+ # equal_nan not supported for non-numeric data (e.g. strings)
757
+ equal_nan = not np.issubdtype(val.dtype, "O")
758
+ if not np.array_equal(val, other[key], equal_nan=equal_nan):
759
+ return False
760
+
761
+ return True
762
+
763
+ @property
764
+ def size(self) -> int:
765
+ """Length of each array in :attr:`data`.
766
+
767
+ Returns
768
+ -------
769
+ int
770
+ Length of each array in :attr:`data`.
771
+ """
772
+ return getattr(self.data, "_size", 0)
773
+
774
+ @property
775
+ def shape(self) -> tuple[int]:
776
+ """Shape of each array in :attr:`data`.
777
+
778
+ Returns
779
+ -------
780
+ tuple[int]
781
+ Shape of each array in :attr:`data`.
782
+ """
783
+ return (self.size,)
784
+
785
+ @property
786
+ def dataframe(self) -> pd.DataFrame:
787
+ """Shorthand property to access :meth:`to_dataframe` with ``copy=False``.
788
+
789
+ Returns
790
+ -------
791
+ pd.DataFrame
792
+ Equivalent to the output from :meth:`to_dataframe()`
793
+ """
794
+ return self.to_dataframe(copy=False)
795
+
796
+ @property
797
+ def hash(self) -> str:
798
+ """Generate a unique hash for this class instance.
799
+
800
+ Returns
801
+ -------
802
+ str
803
+ Unique hash for flight instance (sha1)
804
+ """
805
+ _hash = json.dumps(self.data, cls=json_utils.NumpyEncoder)
806
+ return hashlib.sha1(bytes(_hash, "utf-8")).hexdigest()
807
+
808
+ # ------------
809
+ # Utilities
810
+ # ------------
811
+
812
+ def copy(self, **kwargs: Any) -> Self:
813
+ """Return a copy of this instance.
814
+
815
+ Parameters
816
+ ----------
817
+ **kwargs : Any
818
+ Additional keyword arguments passed into the constructor of the returned class.
819
+
820
+ Returns
821
+ -------
822
+ Self
823
+ Copy of class
824
+ """
825
+ data = {key: value.copy() for key, value in self.data.items()}
826
+ return type(self)._from_fastpath(data, self.attrs, **kwargs)
827
+
828
+ def select(self: VectorDataset, keys: Iterable[str], copy: bool = True) -> VectorDataset:
829
+ """Return new class instance only containing specified keys.
830
+
831
+ Parameters
832
+ ----------
833
+ keys : Iterable[str]
834
+ An iterable of keys to filter by.
835
+ copy : bool, optional
836
+ Copy data on selection.
837
+ Defaults to True.
838
+
839
+ Returns
840
+ -------
841
+ VectorDataset
842
+ VectorDataset containing only data associated to ``keys``.
843
+ Note that this method always returns a :class:`VectorDataset`, even if
844
+ the calling class is a proper subclass of :class:`VectorDataset`.
845
+ """
846
+ data = {key: np.array(self[key], copy=copy) for key in keys}
847
+ return VectorDataset._from_fastpath(data, self.attrs)
848
+
849
+ def filter(self, mask: npt.NDArray[np.bool_], copy: bool = True, **kwargs: Any) -> Self:
850
+ """Filter :attr:`data` according to a boolean array ``mask``.
851
+
852
+ Entries corresponding to ``mask == True`` are kept.
853
+
854
+ Parameters
855
+ ----------
856
+ mask : npt.NDArray[np.bool_]
857
+ Boolean array with compatible shape.
858
+ copy : bool, optional
859
+ Copy data on filter. Defaults to True. See
860
+ `numpy best practices <https://numpy.org/doc/stable/user/basics.indexing.html#slicing-and-striding>`_
861
+ for insight into whether copy is appropriate.
862
+ **kwargs : Any
863
+ Additional keyword arguments passed into the constructor of the returned class.
864
+
865
+ Returns
866
+ -------
867
+ Self
868
+ Containing filtered data
869
+
870
+ Raises
871
+ ------
872
+ TypeError
873
+ If ``mask`` is not a boolean array.
874
+ """
875
+ self.data._validate_array(mask)
876
+ if mask.dtype != bool:
877
+ raise TypeError("Parameter `mask` must be a boolean array.")
878
+
879
+ data = {key: np.array(value[mask], copy=copy) for key, value in self.data.items()}
880
+ return type(self)._from_fastpath(data, self.attrs, **kwargs)
881
+
882
+ def sort(self, by: str | list[str]) -> Self:
883
+ """Sort data by key(s).
884
+
885
+ This method always creates a copy of the data by calling
886
+ :meth:`pandas.DataFrame.sort_values`.
887
+
888
+ Parameters
889
+ ----------
890
+ by : str | list[str]
891
+ Key or list of keys to sort by.
892
+
893
+ Returns
894
+ -------
895
+ Self
896
+ Instance with sorted data.
897
+ """
898
+ return type(self)(data=self.dataframe.sort_values(by=by), attrs=self.attrs, copy=False)
899
+
900
+ def ensure_vars(self, vars: str | Iterable[str], raise_error: bool = True) -> bool:
901
+ """Ensure variables exist in column of :attr:`data` or :attr:`attrs`.
902
+
903
+ Parameters
904
+ ----------
905
+ vars : str | Iterable[str]
906
+ A single string variable name or a sequence of string variable names.
907
+ raise_error : bool, optional
908
+ Raise KeyError if data does not contain variables.
909
+ Defaults to True.
910
+
911
+ Returns
912
+ -------
913
+ bool
914
+ True if all variables exist.
915
+ False otherwise.
916
+
917
+ Raises
918
+ ------
919
+ KeyError
920
+ Raises when dataset does not contain variable in ``vars``
921
+ """
922
+ if isinstance(vars, str):
923
+ vars = (vars,)
924
+
925
+ for v in vars:
926
+ if v in self or v in self.attrs:
927
+ continue
928
+ if raise_error:
929
+ msg = f"{type(self).__name__} instance does not contain data or attr '{v}'"
930
+ raise KeyError(msg)
931
+ return False
932
+
933
+ return True
934
+
935
+ def broadcast_attrs(
936
+ self,
937
+ keys: str | Iterable[str],
938
+ overwrite: bool = False,
939
+ raise_error: bool = True,
940
+ ) -> None:
941
+ """Attach values from ``keys`` in :attr:`attrs` onto :attr:`data`.
942
+
943
+ If possible, use ``dtype = np.float32`` when broadcasting. If not possible,
944
+ use whatever ``dtype`` is inferred from the data by :func:`numpy.full`.
945
+
946
+ Parameters
947
+ ----------
948
+ keys : str | Iterable[str]
949
+ Keys to broadcast
950
+ overwrite : bool, optional
951
+ If True, overwrite existing values in :attr:`data`. By default False.
952
+ raise_error : bool, optional
953
+ Raise KeyError if :attr:`self.attrs` does not contain some of ``keys``.
954
+
955
+ Raises
956
+ ------
957
+ KeyError
958
+ Not all ``keys`` found in :attr:`attrs`.
959
+ """
960
+ if isinstance(keys, str):
961
+ keys = (keys,)
962
+
963
+ # Validate everything up front to avoid partial broadcasting
964
+ for key in keys:
965
+ try:
966
+ scalar = self.attrs[key]
967
+ except KeyError as exc:
968
+ if raise_error:
969
+ raise KeyError(f"{type(self)} does not contain attr `{key}`") from exc
970
+ continue
971
+
972
+ if key in self.data and not overwrite:
973
+ warnings.warn(
974
+ f"Found duplicate key {key} in attrs and data. "
975
+ "Set `overwrite=True` parameter to force overwrite."
976
+ )
977
+ continue
978
+
979
+ min_dtype = np.min_scalar_type(scalar)
980
+ dtype = np.float32 if np.can_cast(min_dtype, np.float32) else None
981
+ self.data.update({key: np.full(self.size, scalar, dtype=dtype)})
982
+
983
+ def broadcast_numeric_attrs(
984
+ self, ignore_keys: str | Iterable[str] | None = None, overwrite: bool = False
985
+ ) -> None:
986
+ """Attach numeric values in :attr:`attrs` onto :attr:`data`.
987
+
988
+ Iterate through values in :attr:`attrs` and attach :class:`float` and
989
+ :class:`int` values to ``data``.
990
+
991
+ This method modifies object in place.
992
+
993
+ Parameters
994
+ ----------
995
+ ignore_keys: str | Iterable[str] | None, optional
996
+ Do not broadcast selected keys.
997
+ Defaults to None.
998
+ overwrite : bool, optional
999
+ If True, overwrite existing values in :attr:`data`. By default False.
1000
+ """
1001
+ if ignore_keys is None:
1002
+ ignore_keys = ()
1003
+ elif isinstance(ignore_keys, str):
1004
+ ignore_keys = (ignore_keys,)
1005
+
1006
+ # Somewhat brittle: Only checking for int or float type
1007
+ numeric_attrs = (
1008
+ attr
1009
+ for attr, val in self.attrs.items()
1010
+ if (isinstance(val, int | float | np.number) and attr not in ignore_keys)
1011
+ )
1012
+ self.broadcast_attrs(numeric_attrs, overwrite)
1013
+
1014
+ def get_constant(self, key: str, default: Any = __marker) -> Any:
1015
+ """Get a constant value from :attr:`attrs` or :attr:`data`.
1016
+
1017
+ - If ``key`` is found in :attr:`attrs`, the value is returned.
1018
+ - If ``key`` is found in :attr:`data`, the common value is returned if all
1019
+ values are equal.
1020
+ - If ``key`` is not found in :attr:`attrs` or :attr:`data` and a ``default`` is provided,
1021
+ the ``default`` is returned.
1022
+ - Otherwise, a KeyError is raised.
1023
+
1024
+ Parameters
1025
+ ----------
1026
+ key : str
1027
+ Key to look for.
1028
+ default : Any, optional
1029
+ Default value to return if ``key`` is not found in :attr:`attrs` or :attr:`data`.
1030
+
1031
+ Returns
1032
+ -------
1033
+ Any
1034
+ The constant value for ``key``.
1035
+
1036
+ Raises
1037
+ ------
1038
+ KeyError
1039
+ If ``key`` is not found in :attr:`attrs` or the values in :attr:`data` are not equal
1040
+ and ``default`` is not provided.
1041
+
1042
+ Examples
1043
+ --------
1044
+ >>> vector = VectorDataset({"a": [1, 1, 1], "b": [2, 2, 3]})
1045
+ >>> vector.get_constant("a")
1046
+ np.int64(1)
1047
+ >>> vector.get_constant("b")
1048
+ Traceback (most recent call last):
1049
+ ...
1050
+ KeyError: "A constant key 'b' not found in attrs or data"
1051
+ >>> vector.get_constant("b", 3)
1052
+ 3
1053
+
1054
+ See Also
1055
+ --------
1056
+ get_data_or_attr
1057
+ GeoVectorDataset.constants
1058
+ """
1059
+ marker = self.__marker
1060
+
1061
+ out = self.attrs.get(key, marker)
1062
+ if out is not marker:
1063
+ return out
1064
+
1065
+ arr: np.ndarray = self.data.get(key, marker) # type: ignore[arg-type]
1066
+ if arr is not marker:
1067
+ try:
1068
+ vals = np.unique(arr)
1069
+ except TypeError:
1070
+ # A TypeError can occur if the arr has object dtype and contains None
1071
+ # Handle this case by returning None
1072
+ if arr.dtype == object and np.all(arr == None): # noqa: E711
1073
+ return None
1074
+ raise
1075
+
1076
+ if len(vals) == 1:
1077
+ return vals[0]
1078
+
1079
+ if default is not marker:
1080
+ return default
1081
+
1082
+ msg = f"A constant key '{key}' not found in attrs or data"
1083
+ raise KeyError(msg)
1084
+
1085
+ # ------------
1086
+ # I / O
1087
+ # ------------
1088
+
1089
+ def to_dataframe(self, copy: bool = True) -> pd.DataFrame:
1090
+ """Create :class:`pd.DataFrame` in which each key-value pair in :attr:`data` is a column.
1091
+
1092
+ DataFrame does **not** copy data by default.
1093
+ Use the ``copy`` parameter to copy data values on creation.
1094
+
1095
+ Parameters
1096
+ ----------
1097
+ copy : bool, optional
1098
+ Copy data on DataFrame creation.
1099
+
1100
+ Returns
1101
+ -------
1102
+ pd.DataFrame
1103
+ DataFrame holding key-values as columns.
1104
+ """
1105
+ df = pd.DataFrame(self.data, copy=copy)
1106
+ df.attrs = self.attrs
1107
+ return df
1108
+
1109
+ def to_dict(self) -> dict[str, Any]:
1110
+ """Create dictionary with :attr:`data` and :attr:`attrs`.
1111
+
1112
+ If geo-spatial coordinates (e.g. ``"latitude"``, ``"longitude"``, ``"altitude"``)
1113
+ are present, round to a reasonable precision. If a ``"time"`` variable is present,
1114
+ round to unix seconds. When the instance is a :class:`GeoVectorDataset`,
1115
+ disregard any ``"altitude"`` or ``"level"`` coordinate and only include
1116
+ ``"altitude_ft"`` in the output.
1117
+
1118
+ Returns
1119
+ -------
1120
+ dict[str, Any]
1121
+ Dictionary with :attr:`data` and :attr:`attrs`.
1122
+
1123
+ See Also
1124
+ --------
1125
+ :meth:`from_dict`
1126
+
1127
+ Examples
1128
+ --------
1129
+ >>> import pprint
1130
+ >>> from pycontrails import Flight
1131
+ >>> fl = Flight(
1132
+ ... longitude=[-100, -110],
1133
+ ... latitude=[40, 50],
1134
+ ... level=[200, 200],
1135
+ ... time=[np.datetime64("2020-01-01T09"), np.datetime64("2020-01-01T09:30")],
1136
+ ... aircraft_type="B737",
1137
+ ... )
1138
+ >>> fl = fl.resample_and_fill("5min")
1139
+ >>> pprint.pprint(fl.to_dict())
1140
+ {'aircraft_type': 'B737',
1141
+ 'altitude_ft': [38661.0, 38661.0, 38661.0, 38661.0, 38661.0, 38661.0, 38661.0],
1142
+ 'latitude': [40.0, 41.724, 43.428, 45.111, 46.769, 48.399, 50.0],
1143
+ 'longitude': [-100.0,
1144
+ -101.441,
1145
+ -102.959,
1146
+ -104.563,
1147
+ -106.267,
1148
+ -108.076,
1149
+ -110.0],
1150
+ 'time': [1577869200,
1151
+ 1577869500,
1152
+ 1577869800,
1153
+ 1577870100,
1154
+ 1577870400,
1155
+ 1577870700,
1156
+ 1577871000]}
1157
+ """
1158
+ np_encoder = json_utils.NumpyEncoder()
1159
+
1160
+ # round latitude, longitude, and altitude
1161
+ precision = {"longitude": 3, "latitude": 3, "altitude_ft": 0}
1162
+
1163
+ def encode(key: str, obj: Any) -> Any:
1164
+ # Try to handle some pandas objects
1165
+ if hasattr(obj, "to_numpy"):
1166
+ obj = obj.to_numpy()
1167
+
1168
+ # Convert numpy objects to python objects
1169
+ if isinstance(obj, np.ndarray | np.generic):
1170
+ # round time to unix seconds
1171
+ if key == "time":
1172
+ return np_encoder.default(obj.astype("datetime64[s]").astype(int))
1173
+
1174
+ # round specific keys in precision
1175
+ try:
1176
+ d = precision[key]
1177
+ except KeyError:
1178
+ return np_encoder.default(obj)
1179
+
1180
+ return np_encoder.default(obj.astype(float).round(d))
1181
+
1182
+ # Pass through everything else
1183
+ return obj
1184
+
1185
+ data = {k: encode(k, v) for k, v in self.data.items()}
1186
+ attrs = {k: encode(k, v) for k, v in self.attrs.items()}
1187
+
1188
+ # Only include one of the vertical coordinate keys
1189
+ if isinstance(self, GeoVectorDataset):
1190
+ data.pop("altitude", None)
1191
+ data.pop("level", None)
1192
+ if "altitude_ft" not in data:
1193
+ data["altitude_ft"] = self.altitude_ft.round(precision["altitude_ft"]).tolist()
1194
+
1195
+ # Issue warning if any keys are duplicated
1196
+ common_keys = data.keys() & attrs.keys()
1197
+ if common_keys:
1198
+ warnings.warn(
1199
+ f"Found duplicate keys in data and attrs: {common_keys}. "
1200
+ "Data keys will overwrite attrs keys in returned dictionary."
1201
+ )
1202
+
1203
+ return {**attrs, **data}
1204
+
1205
+ @classmethod
1206
+ def create_empty(
1207
+ cls,
1208
+ keys: Iterable[str],
1209
+ attrs: dict[str, Any] | None = None,
1210
+ **kwargs: Any,
1211
+ ) -> Self:
1212
+ """Create instance with variables defined by ``keys`` and size 0.
1213
+
1214
+ If instance requires additional variables to be defined, these keys will automatically
1215
+ be attached to returned instance.
1216
+
1217
+ Parameters
1218
+ ----------
1219
+ keys : Iterable[str]
1220
+ Keys to include in empty VectorDataset instance.
1221
+ attrs : dict[str, Any] | None, optional
1222
+ Attributes to attach instance.
1223
+ **kwargs : Any
1224
+ Additional keyword arguments passed into the constructor of the returned class.
1225
+
1226
+ Returns
1227
+ -------
1228
+ Self
1229
+ Empty VectorDataset instance.
1230
+ """
1231
+ data = _empty_vector_dict(keys)
1232
+ return cls._from_fastpath(data, attrs, **kwargs)
1233
+
1234
+ @classmethod
1235
+ def from_dict(cls, obj: dict[str, Any], copy: bool = True, **obj_kwargs: Any) -> Self:
1236
+ """Create instance from dict representation containing data and attrs.
1237
+
1238
+ Parameters
1239
+ ----------
1240
+ obj : dict[str, Any]
1241
+ Dict representation of VectorDataset (e.g. :meth:`to_dict`)
1242
+ copy : bool, optional
1243
+ Passed to :class:`VectorDataset` constructor.
1244
+ Defaults to True.
1245
+ **obj_kwargs : Any
1246
+ Additional properties passed as keyword arguments.
1247
+
1248
+ Returns
1249
+ -------
1250
+ Self
1251
+ VectorDataset instance.
1252
+
1253
+ See Also
1254
+ --------
1255
+ :meth:`to_dict`
1256
+ """
1257
+ data = {}
1258
+ attrs = {}
1259
+
1260
+ for k, v in {**obj, **obj_kwargs}.items():
1261
+ if isinstance(v, list | np.ndarray):
1262
+ data[k] = v
1263
+ else:
1264
+ attrs[k] = v
1265
+
1266
+ return cls(data=data, attrs=attrs, copy=copy)
1267
+
1268
+ def generate_splits(self, n_splits: int, copy: bool = True) -> Generator[Self, None, None]:
1269
+ """Split instance into ``n_split`` sub-vectors.
1270
+
1271
+ Parameters
1272
+ ----------
1273
+ n_splits : int
1274
+ Number of splits.
1275
+ copy : bool, optional
1276
+ Passed into :meth:`filter`. Defaults to True. Recommend to keep as True
1277
+ based on `numpy best practices <https://numpy.org/doc/stable/user/basics.indexing.html#slicing-and-striding>`_.
1278
+
1279
+ Yields
1280
+ ------
1281
+ Self
1282
+ Generator of split vectors.
1283
+
1284
+ See Also
1285
+ --------
1286
+ :func:`numpy.array_split`
1287
+ """
1288
+ full_index = np.arange(self.size)
1289
+ index_splits = np.array_split(full_index, n_splits)
1290
+ for index in index_splits:
1291
+ filt = np.zeros(self.size, dtype=bool)
1292
+ filt[index] = True
1293
+ yield self.filter(filt, copy=copy)
1294
+
1295
+
1296
+ class GeoVectorDataset(VectorDataset):
1297
+ """Base class to hold 1D geospatial arrays of consistent size.
1298
+
1299
+ GeoVectorDataset is required to have geospatial coordinate keys defined
1300
+ in :attr:`required_keys`.
1301
+
1302
+ Expect latitude-longitude CRS in WGS 84.
1303
+ Expect altitude in [:math:`m`].
1304
+ Expect level in [:math:`hPa`].
1305
+
1306
+ Each spatial variable is expected to have "float32" or "float64" ``dtype``.
1307
+ The time variable is expected to have "datetime64[ns]" ``dtype``.
1308
+
1309
+ Parameters
1310
+ ----------
1311
+ data : dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataset | None, optional
1312
+ Data dictionary or :class:`pandas.DataFrame` .
1313
+ Must include keys/columns ``time``, ``latitude``, ``longitude``, ``altitude`` or ``level``.
1314
+ Keyword arguments for ``time``, ``latitude``, ``longitude``, ``altitude`` or ``level``
1315
+ override ``data`` inputs. Expects ``altitude`` in meters and ``time``
1316
+ as a DatetimeLike (or array that can processed with :meth:`pd.to_datetime`).
1317
+ Additional waypoint-specific data can be included as additional keys/columns.
1318
+ longitude : npt.ArrayLike | None, optional
1319
+ Longitude data.
1320
+ Defaults to None.
1321
+ latitude : npt.ArrayLike | None, optional
1322
+ Latitude data.
1323
+ Defaults to None.
1324
+ altitude : npt.ArrayLike | None, optional
1325
+ Altitude data, [:math:`m`].
1326
+ Defaults to None.
1327
+ altitude_ft : npt.ArrayLike | None, optional
1328
+ Altitude data, [:math:`ft`].
1329
+ Defaults to None.
1330
+ level : npt.ArrayLike | None, optional
1331
+ Level data, [:math:`hPa`].
1332
+ Defaults to None.
1333
+ time : npt.ArrayLike | None, optional
1334
+ Time data.
1335
+ Expects an array of DatetimeLike values,
1336
+ or array that can processed with :meth:`pd.to_datetime`.
1337
+ Defaults to None.
1338
+ attrs : dict[str, Any] | None, optional
1339
+ Additional properties as a dictionary.
1340
+ Defaults to {}.
1341
+ copy : bool, optional
1342
+ Copy data on class creation.
1343
+ Defaults to True.
1344
+ **attrs_kwargs : Any
1345
+ Additional properties passed as keyword arguments.
1346
+
1347
+ Raises
1348
+ ------
1349
+ KeyError
1350
+ Raises if ``data`` input does not contain at least ``time``, ``latitude``, ``longitude``,
1351
+ (``altitude`` or ``level``).
1352
+ """
1353
+
1354
+ __slots__ = ()
1355
+
1356
+ #: Required keys for creating GeoVectorDataset
1357
+ required_keys = "longitude", "latitude", "time"
1358
+
1359
+ #: At least one of these vertical-coordinate keys must also be included
1360
+ vertical_keys = "altitude", "level", "altitude_ft"
1361
+
1362
+ def __init__(
1363
+ self,
1364
+ data: dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataset | None = None,
1365
+ *,
1366
+ longitude: npt.ArrayLike | None = None,
1367
+ latitude: npt.ArrayLike | None = None,
1368
+ altitude: npt.ArrayLike | None = None,
1369
+ altitude_ft: npt.ArrayLike | None = None,
1370
+ level: npt.ArrayLike | None = None,
1371
+ time: npt.ArrayLike | None = None,
1372
+ attrs: dict[str, Any] | None = None,
1373
+ copy: bool = True,
1374
+ **attrs_kwargs: Any,
1375
+ ) -> None:
1376
+ # shortcut to `GeoVectorDataset.create_empty` by just using `GeoVectorDataset()`
1377
+ if (
1378
+ data is None
1379
+ and longitude is None
1380
+ and latitude is None
1381
+ and altitude is None
1382
+ and level is None
1383
+ and time is None
1384
+ ):
1385
+ keys = *self.required_keys, "altitude"
1386
+ self.data = VectorDataDict(_empty_vector_dict(keys))
1387
+ self.attrs = AttrDict(attrs or {})
1388
+ self.attrs.update(attrs_kwargs)
1389
+ return
1390
+
1391
+ super().__init__(data=data, attrs=attrs, copy=copy, **attrs_kwargs)
1392
+
1393
+ # using the self[key] syntax specifically to run qc on assignment
1394
+ if longitude is not None:
1395
+ self["longitude"] = np.array(longitude, copy=copy)
1396
+
1397
+ if latitude is not None:
1398
+ self["latitude"] = np.array(latitude, copy=copy)
1399
+
1400
+ if time is not None:
1401
+ self["time"] = np.array(time, copy=copy)
1402
+
1403
+ if altitude is not None:
1404
+ self["altitude"] = np.array(altitude, copy=copy)
1405
+ if altitude_ft is not None or level is not None:
1406
+ warnings.warn(
1407
+ "Altitude data provided. Ignoring altitude_ft and level inputs.",
1408
+ )
1409
+ elif altitude_ft is not None:
1410
+ self["altitude_ft"] = np.array(altitude_ft, copy=copy)
1411
+ if level is not None:
1412
+ warnings.warn(
1413
+ "Altitude_ft data provided. Ignoring level input.",
1414
+ )
1415
+ elif level is not None:
1416
+ self["level"] = np.array(level, copy=copy)
1417
+
1418
+ # Confirm that input has required keys
1419
+ if not all(key in self for key in self.required_keys):
1420
+ raise KeyError(
1421
+ f"{self.__class__.__name__} requires all of the following keys: "
1422
+ f"{', '.join(self.required_keys)}"
1423
+ )
1424
+
1425
+ # Confirm that input has at least one vertical key
1426
+ if not any(key in self for key in self.vertical_keys):
1427
+ raise KeyError(
1428
+ f"{self.__class__.__name__} requires at least one of the following keys: "
1429
+ f"{', '.join(self.vertical_keys)}"
1430
+ )
1431
+
1432
+ # Parse time: If time is not np.datetime64, we try to coerce it to be
1433
+ # by pumping it through pd.to_datetime.
1434
+ time = self["time"]
1435
+ if not np.issubdtype(time.dtype, np.datetime64):
1436
+ warnings.warn("Time data is not np.datetime64. Attempting to coerce.")
1437
+ try:
1438
+ pd_time = _handle_time_column(pd.Series(self["time"]))
1439
+ except ValueError as e:
1440
+ raise ValueError("Could not coerce time data to datetime64.") from e
1441
+ np_time = pd_time.to_numpy(dtype="datetime64[ns]")
1442
+ self.update(time=np_time)
1443
+ elif time.dtype != "datetime64[ns]":
1444
+ self.update(time=time.astype("datetime64[ns]"))
1445
+
1446
+ # Ensure spatial coordinates are float32 or float64
1447
+ float_dtype = (np.float32, np.float64)
1448
+ for coord in ("longitude", "latitude", "altitude", "level", "altitude_ft"):
1449
+ try:
1450
+ arr = self[coord]
1451
+ except KeyError:
1452
+ continue
1453
+ if arr.dtype not in float_dtype:
1454
+ self.update({coord: arr.astype(np.float64)})
1455
+
1456
+ longitude = self["longitude"]
1457
+ if np.any(longitude > 180.0) or np.any(longitude < -180.0):
1458
+ raise ValueError("EPSG:4326 longitude coordinates should lie between [-180, 180).")
1459
+ latitude = self["latitude"]
1460
+ if np.any(latitude > 90.0) or np.any(latitude < -90.0):
1461
+ raise ValueError("EPSG:4326 latitude coordinates should lie between [-90, 90].")
1462
+
1463
+ @override
1464
+ def _display_attrs(self) -> dict[str, str]:
1465
+ try:
1466
+ time0 = pd.Timestamp(np.nanmin(self["time"]))
1467
+ time1 = pd.Timestamp(np.nanmax(self["time"]))
1468
+ lon0 = round(np.nanmin(self["longitude"]), 3)
1469
+ lon1 = round(np.nanmax(self["longitude"]), 3)
1470
+ lat0 = round(np.nanmin(self["latitude"]), 3)
1471
+ lat1 = round(np.nanmax(self["latitude"]), 3)
1472
+ alt0 = round(np.nanmin(self.altitude), 1)
1473
+ alt1 = round(np.nanmax(self.altitude), 1)
1474
+
1475
+ attrs = {
1476
+ "time": f"[{time0}, {time1}]",
1477
+ "longitude": f"[{lon0}, {lon1}]",
1478
+ "latitude": f"[{lat0}, {lat1}]",
1479
+ "altitude": f"[{alt0}, {alt1}]",
1480
+ }
1481
+ except Exception:
1482
+ attrs = {}
1483
+
1484
+ attrs.update(super()._display_attrs())
1485
+ return attrs
1486
+
1487
+ @property
1488
+ def level(self) -> npt.NDArray[np.floating]:
1489
+ """Get pressure ``level`` values for points.
1490
+
1491
+ Automatically calculates pressure level using :func:`units.m_to_pl` using ``altitude`` key.
1492
+
1493
+ Note that if ``level`` key exists in :attr:`data`, the data at the ``level``
1494
+ key will be returned. This allows an override of the default calculation
1495
+ of pressure level from altitude.
1496
+
1497
+ Returns
1498
+ -------
1499
+ npt.NDArray[np.floating]
1500
+ Point pressure level values, [:math:`hPa`]
1501
+ """
1502
+ try:
1503
+ return self["level"]
1504
+ except KeyError:
1505
+ return units.m_to_pl(self.altitude)
1506
+
1507
+ @property
1508
+ def altitude(self) -> npt.NDArray[np.floating]:
1509
+ """Get altitude.
1510
+
1511
+ Automatically calculates altitude using :func:`units.pl_to_m` using ``level`` key.
1512
+
1513
+ Note that if ``altitude`` key exists in :attr:`data`, the data at the ``altitude``
1514
+ key will be returned. This allows an override of the default calculation of altitude
1515
+ from pressure level.
1516
+
1517
+ Returns
1518
+ -------
1519
+ npt.NDArray[np.floating]
1520
+ Altitude, [:math:`m`]
1521
+ """
1522
+ try:
1523
+ return self["altitude"]
1524
+ except KeyError:
1525
+ # Implementation note: explicitly look for "level" or "altitude_ft" key
1526
+ # here to avoid getting stuck in an infinite loop when .level or .altitude_ft
1527
+ # are called.
1528
+ if (level := self.get("level")) is not None:
1529
+ return units.pl_to_m(level)
1530
+ return units.ft_to_m(self["altitude_ft"])
1531
+
1532
+ @property
1533
+ def air_pressure(self) -> npt.NDArray[np.floating]:
1534
+ """Get ``air_pressure`` values for points.
1535
+
1536
+ Returns
1537
+ -------
1538
+ npt.NDArray[np.floating]
1539
+ Point air pressure values, [:math:`Pa`]
1540
+ """
1541
+ try:
1542
+ return self["air_pressure"]
1543
+ except KeyError:
1544
+ return 100.0 * self.level
1545
+
1546
+ @property
1547
+ def altitude_ft(self) -> npt.NDArray[np.floating]:
1548
+ """Get altitude in feet.
1549
+
1550
+ Returns
1551
+ -------
1552
+ npt.NDArray[np.floating]
1553
+ Altitude, [:math:`ft`]
1554
+ """
1555
+ try:
1556
+ return self["altitude_ft"]
1557
+ except KeyError:
1558
+ return units.m_to_ft(self.altitude)
1559
+
1560
+ @property
1561
+ def constants(self) -> dict[str, Any]:
1562
+ """Return a dictionary of constant attributes and data values.
1563
+
1564
+ Includes :attr:`attrs` and values from columns in :attr:`data` with a unique
1565
+ value.
1566
+
1567
+ Returns
1568
+ -------
1569
+ dict[str, Any]
1570
+ Properties and their constant values
1571
+ """
1572
+ constants = {}
1573
+
1574
+ # get constant data values that are not nan
1575
+ for key in set(self).difference(self.required_keys):
1576
+ unique = np.unique(self[key])
1577
+ if len(unique) == 1 and (isinstance(unique[0], str) or ~np.isnan(unique[0])):
1578
+ constants[key] = unique[0]
1579
+
1580
+ # add attributes
1581
+ constants.update(self.attrs)
1582
+
1583
+ # clean strings values by removing whitespace
1584
+ # convert any numpy items to python objects
1585
+ def _cleanup(v: Any) -> Any:
1586
+ if isinstance(v, str):
1587
+ return v.strip()
1588
+ if isinstance(v, np.integer):
1589
+ return int(v)
1590
+ if isinstance(v, np.floating):
1591
+ return float(v)
1592
+ if isinstance(v, np.bool_):
1593
+ return bool(v)
1594
+ return v
1595
+
1596
+ return {k: _cleanup(v) for k, v in constants.items()}
1597
+
1598
+ @property
1599
+ def coords(self) -> dict[str, np.ndarray]:
1600
+ """Get geospatial coordinates for compatibility with MetDataArray.
1601
+
1602
+ Returns
1603
+ -------
1604
+ dict[str, np.ndarray]
1605
+ A dictionary with fields `longitude`, `latitude`, `level`, and `time`.
1606
+ """
1607
+ return {
1608
+ "longitude": self["longitude"],
1609
+ "latitude": self["latitude"],
1610
+ "level": self.level,
1611
+ "time": self["time"],
1612
+ }
1613
+
1614
+ # ------------
1615
+ # Utilities
1616
+ # ------------
1617
+
1618
+ def transform_crs(self, crs: str) -> tuple[npt.NDArray[np.floating], npt.NDArray[np.floating]]:
1619
+ """Transform trajectory data from one coordinate reference system (CRS) to another.
1620
+
1621
+ Parameters
1622
+ ----------
1623
+ crs : str
1624
+ Target CRS. Passed into to :class:`pyproj.Transformer`. The source CRS
1625
+ is assumed to be EPSG:4326.
1626
+
1627
+ Returns
1628
+ -------
1629
+ tuple[npt.NDArray[np.floating], npt.NDArray[np.floating]]
1630
+ New x and y coordinates in the target CRS.
1631
+ """
1632
+ try:
1633
+ import pyproj
1634
+ except ModuleNotFoundError as exc:
1635
+ dependencies.raise_module_not_found_error(
1636
+ name="GeoVectorDataset.transform_crs method",
1637
+ package_name="pyproj",
1638
+ module_not_found_error=exc,
1639
+ pycontrails_optional_package="pyproj",
1640
+ )
1641
+
1642
+ crs_from = "EPSG:4326"
1643
+ transformer = pyproj.Transformer.from_crs(crs_from, crs, always_xy=True)
1644
+ return transformer.transform(self["longitude"], self["latitude"])
1645
+
1646
+ def T_isa(self) -> npt.NDArray[np.floating]:
1647
+ """Calculate the ICAO standard atmosphere temperature at each point.
1648
+
1649
+ Returns
1650
+ -------
1651
+ npt.NDArray[np.floating]
1652
+ ISA temperature, [:math:`K`]
1653
+
1654
+ See Also
1655
+ --------
1656
+ :func:`pycontrails.physics.units.m_to_T_isa`
1657
+ """
1658
+ return units.m_to_T_isa(self.altitude)
1659
+
1660
+ # ------------
1661
+ # Met
1662
+ # ------------
1663
+
1664
+ def coords_intersect_met(
1665
+ self, met: met_module.MetDataset | met_module.MetDataArray
1666
+ ) -> npt.NDArray[np.bool_]:
1667
+ """Return boolean mask of data inside the bounding box defined by ``met``.
1668
+
1669
+ Parameters
1670
+ ----------
1671
+ met : met_module.MetDataset | met_module.MetDataArray
1672
+ MetDataset or MetDataArray to compare.
1673
+
1674
+ Returns
1675
+ -------
1676
+ npt.NDArray[np.bool_]
1677
+ True if point is inside the bounding box defined by ``met``.
1678
+ """
1679
+ indexes = met.indexes
1680
+
1681
+ lat_intersect = coordinates.intersect_domain(
1682
+ indexes["latitude"].to_numpy(),
1683
+ self["latitude"],
1684
+ )
1685
+ lon_intersect = coordinates.intersect_domain(
1686
+ indexes["longitude"].to_numpy(),
1687
+ self["longitude"],
1688
+ )
1689
+ level_intersect = coordinates.intersect_domain(
1690
+ indexes["level"].to_numpy(),
1691
+ self.level,
1692
+ )
1693
+ time_intersect = coordinates.intersect_domain(
1694
+ indexes["time"].to_numpy(),
1695
+ self["time"],
1696
+ )
1697
+
1698
+ return lat_intersect & lon_intersect & level_intersect & time_intersect
1699
+
1700
+ def intersect_met(
1701
+ self,
1702
+ mda: met_module.MetDataArray,
1703
+ *,
1704
+ longitude: npt.NDArray[np.floating] | None = None,
1705
+ latitude: npt.NDArray[np.floating] | None = None,
1706
+ level: npt.NDArray[np.floating] | None = None,
1707
+ time: npt.NDArray[np.datetime64] | None = None,
1708
+ use_indices: bool = False,
1709
+ **interp_kwargs: Any,
1710
+ ) -> npt.NDArray[np.floating]:
1711
+ """Intersect waypoints with MetDataArray.
1712
+
1713
+ Parameters
1714
+ ----------
1715
+ mda : met_module.MetDataArray
1716
+ MetDataArray containing a meteorological variable at spatio-temporal coordinates.
1717
+ longitude : npt.NDArray[np.floating] | None, optional
1718
+ Override existing coordinates for met interpolation
1719
+ latitude : npt.NDArray[np.floating] | None, optional
1720
+ Override existing coordinates for met interpolation
1721
+ level : npt.NDArray[np.floating] | None, optional
1722
+ Override existing coordinates for met interpolation
1723
+ time : npt.NDArray[np.datetime64] | None, optional
1724
+ Override existing coordinates for met interpolation
1725
+ use_indices : bool, optional
1726
+ Experimental.
1727
+ **interp_kwargs : Any
1728
+ Additional keyword arguments to pass to :meth:`MetDataArray.intersect_met`.
1729
+ Examples include ``method``, ``bounds_error``, and ``fill_value``. If an error such as
1730
+
1731
+ .. code-block:: python
1732
+
1733
+ ValueError: One of the requested xi is out of bounds in dimension 2
1734
+
1735
+ occurs, try calling this function with ``bounds_error=False``. In addition,
1736
+ setting ``fill_value=0.0`` will replace NaN values with 0.0.
1737
+
1738
+ Returns
1739
+ -------
1740
+ npt.NDArray[np.floating]
1741
+ Interpolated values
1742
+
1743
+ Examples
1744
+ --------
1745
+ >>> from datetime import datetime
1746
+ >>> import pandas as pd
1747
+ >>> import numpy as np
1748
+ >>> from pycontrails.datalib.ecmwf import ERA5
1749
+ >>> from pycontrails import Flight
1750
+
1751
+ >>> # Get met data
1752
+ >>> times = (datetime(2022, 3, 1, 0), datetime(2022, 3, 1, 3))
1753
+ >>> variables = ["air_temperature", "specific_humidity"]
1754
+ >>> levels = [300, 250, 200]
1755
+ >>> era5 = ERA5(time=times, variables=variables, pressure_levels=levels)
1756
+ >>> met = era5.open_metdataset()
1757
+
1758
+ >>> # Example flight
1759
+ >>> df = pd.DataFrame()
1760
+ >>> df['longitude'] = np.linspace(0, 50, 10)
1761
+ >>> df['latitude'] = np.linspace(0, 10, 10)
1762
+ >>> df['altitude'] = 11000
1763
+ >>> df['time'] = pd.date_range("2022-03-01T00", "2022-03-01T02", periods=10)
1764
+ >>> fl = Flight(df)
1765
+
1766
+ >>> # Intersect
1767
+ >>> fl.intersect_met(met['air_temperature'], method='nearest')
1768
+ array([231.62969892, 230.72604651, 232.24318771, 231.88338483,
1769
+ 231.06429438, 231.59073409, 231.65125393, 231.93064004,
1770
+ 232.03344087, 231.65954432])
1771
+
1772
+ >>> fl.intersect_met(met['air_temperature'], method='linear')
1773
+ array([225.77794552, 225.13908414, 226.231218 , 226.31831528,
1774
+ 225.56102321, 225.81192149, 226.03192642, 226.22056121,
1775
+ 226.03770174, 225.63226188])
1776
+
1777
+ >>> # Interpolate and attach to `Flight` instance
1778
+ >>> for key in met:
1779
+ ... fl[key] = fl.intersect_met(met[key])
1780
+
1781
+ >>> # Show the final three columns of the dataframe
1782
+ >>> fl.dataframe.iloc[:, -3:].head()
1783
+ time air_temperature specific_humidity
1784
+ 0 2022-03-01 00:00:00 225.777946 0.000132
1785
+ 1 2022-03-01 00:13:20 225.139084 0.000132
1786
+ 2 2022-03-01 00:26:40 226.231218 0.000107
1787
+ 3 2022-03-01 00:40:00 226.318315 0.000171
1788
+ 4 2022-03-01 00:53:20 225.561022 0.000109
1789
+
1790
+ """
1791
+ # Override use_indices in certain situations
1792
+ if use_indices:
1793
+ # Often the single_level data we use has time shifted
1794
+ # Don't allow it for now. We could do something smarter here!
1795
+ if mda.is_single_level:
1796
+ use_indices = False
1797
+
1798
+ # Cannot both override some coordinate AND pass indices.
1799
+ elif any(c is not None for c in (longitude, latitude, level, time)):
1800
+ # Should we warn?! Or is this "convenience"?
1801
+ use_indices = False
1802
+
1803
+ longitude = longitude if longitude is not None else self["longitude"]
1804
+ latitude = latitude if latitude is not None else self["latitude"]
1805
+ level = level if level is not None else self.level
1806
+ time = time if time is not None else self["time"]
1807
+
1808
+ if not use_indices:
1809
+ return mda.interpolate(longitude, latitude, level, time, **interp_kwargs)
1810
+
1811
+ indices = self._get_indices()
1812
+ already_has_indices = indices is not None
1813
+ out, indices = mda.interpolate(
1814
+ longitude,
1815
+ latitude,
1816
+ level,
1817
+ time,
1818
+ indices=indices,
1819
+ return_indices=True,
1820
+ **interp_kwargs,
1821
+ )
1822
+ if not already_has_indices:
1823
+ self._put_indices(indices)
1824
+ return out
1825
+
1826
+ def _put_indices(self, indices: interpolation.RGIArtifacts) -> None:
1827
+ """Set entries of ``indices`` onto underlying :attr:`data.
1828
+
1829
+ Each entry of ``indices`` are unpacked assuming certain conventions
1830
+ for its structure. A ValueError is raise if these conventions are not
1831
+ satisfied.
1832
+
1833
+ .. versionadded:: 0.26.0
1834
+
1835
+ Experimental
1836
+
1837
+
1838
+ Parameters
1839
+ ----------
1840
+ indices : interpolation.RGIArtifacts
1841
+ The indices to store.
1842
+ """
1843
+ indices_x, indices_y, indices_z, indices_t = indices.xi_indices
1844
+ distances_x, distances_y, distances_z, distances_t = indices.norm_distances
1845
+ out_of_bounds = indices.out_of_bounds
1846
+
1847
+ self["_indices_x"] = indices_x
1848
+ self["_indices_y"] = indices_y
1849
+ self["_indices_z"] = indices_z
1850
+ self["_indices_t"] = indices_t
1851
+ self["_distances_x"] = distances_x
1852
+ self["_distances_y"] = distances_y
1853
+ self["_distances_z"] = distances_z
1854
+ self["_distances_t"] = distances_t
1855
+ self["_out_of_bounds"] = out_of_bounds
1856
+
1857
+ def _get_indices(self) -> interpolation.RGIArtifacts | None:
1858
+ """Get entries from call to :meth:`_put_indices`.
1859
+
1860
+ .. versionadded:: 0.26.0
1861
+
1862
+ Experimental
1863
+
1864
+ Returns
1865
+ -------
1866
+ interpolation.RGIArtifacts | None
1867
+ Previously cached output of
1868
+ :meth:`scipy.interpolate.RegularGridInterpolator._find_indices`,
1869
+ or None if cached output is not present on instance.
1870
+ """
1871
+ try:
1872
+ indices_x = self["_indices_x"]
1873
+ indices_y = self["_indices_y"]
1874
+ indices_z = self["_indices_z"]
1875
+ indices_t = self["_indices_t"]
1876
+ distances_x = self["_distances_x"]
1877
+ distances_y = self["_distances_y"]
1878
+ distances_z = self["_distances_z"]
1879
+ distances_t = self["_distances_t"]
1880
+ out_of_bounds = self["_out_of_bounds"]
1881
+ except KeyError:
1882
+ return None
1883
+
1884
+ indices = np.asarray([indices_x, indices_y, indices_z, indices_t])
1885
+ distances = np.asarray([distances_x, distances_y, distances_z, distances_t])
1886
+
1887
+ return interpolation.RGIArtifacts(indices, distances, out_of_bounds)
1888
+
1889
+ def _invalidate_indices(self) -> None:
1890
+ """Remove any cached indices from :attr:`data."""
1891
+ for key in (
1892
+ "_indices_x",
1893
+ "_indices_y",
1894
+ "_indices_z",
1895
+ "_indices_t",
1896
+ "_distances_x",
1897
+ "_distances_y",
1898
+ "_distances_z",
1899
+ "_distances_t",
1900
+ "_out_of_bounds",
1901
+ ):
1902
+ self.data.pop(key, None)
1903
+
1904
+ @overload
1905
+ def downselect_met(
1906
+ self,
1907
+ met: met_module.MetDataset,
1908
+ *,
1909
+ longitude_buffer: tuple[float, float] = ...,
1910
+ latitude_buffer: tuple[float, float] = ...,
1911
+ level_buffer: tuple[float, float] = ...,
1912
+ time_buffer: tuple[np.timedelta64, np.timedelta64] = ...,
1913
+ ) -> met_module.MetDataset: ...
1914
+
1915
+ @overload
1916
+ def downselect_met(
1917
+ self,
1918
+ met: met_module.MetDataArray,
1919
+ *,
1920
+ longitude_buffer: tuple[float, float] = ...,
1921
+ latitude_buffer: tuple[float, float] = ...,
1922
+ level_buffer: tuple[float, float] = ...,
1923
+ time_buffer: tuple[np.timedelta64, np.timedelta64] = ...,
1924
+ ) -> met_module.MetDataArray: ...
1925
+
1926
+ def downselect_met(
1927
+ self,
1928
+ met: met_module.MetDataType,
1929
+ *,
1930
+ longitude_buffer: tuple[float, float] = (0.0, 0.0),
1931
+ latitude_buffer: tuple[float, float] = (0.0, 0.0),
1932
+ level_buffer: tuple[float, float] = (0.0, 0.0),
1933
+ time_buffer: tuple[np.timedelta64, np.timedelta64] = (
1934
+ np.timedelta64(0, "h"),
1935
+ np.timedelta64(0, "h"),
1936
+ ),
1937
+ ) -> met_module.MetDataType:
1938
+ """Downselect ``met`` to encompass a spatiotemporal region of the data.
1939
+
1940
+ .. versionchanged:: 0.54.5
1941
+
1942
+ Returned object is no longer copied.
1943
+
1944
+ Parameters
1945
+ ----------
1946
+ met : met_module.MetDataType
1947
+ MetDataset or MetDataArray to downselect.
1948
+ longitude_buffer : tuple[float, float], optional
1949
+ Extend longitude domain past by ``longitude_buffer[0]`` on the low side
1950
+ and ``longitude_buffer[1]`` on the high side.
1951
+ Units must be the same as class coordinates.
1952
+ Defaults to ``(0, 0)`` degrees.
1953
+ latitude_buffer : tuple[float, float], optional
1954
+ Extend latitude domain past by ``latitude_buffer[0]`` on the low side
1955
+ and ``latitude_buffer[1]`` on the high side.
1956
+ Units must be the same as class coordinates.
1957
+ Defaults to ``(0, 0)`` degrees.
1958
+ level_buffer : tuple[float, float], optional
1959
+ Extend level domain past by ``level_buffer[0]`` on the low side
1960
+ and ``level_buffer[1]`` on the high side.
1961
+ Units must be the same as class coordinates.
1962
+ Defaults to ``(0, 0)`` [:math:`hPa`].
1963
+ time_buffer : tuple[np.timedelta64, np.timedelta64], optional
1964
+ Extend time domain past by ``time_buffer[0]`` on the low side
1965
+ and ``time_buffer[1]`` on the high side.
1966
+ Units must be the same as class coordinates.
1967
+ Defaults to ``(np.timedelta64(0, "h"), np.timedelta64(0, "h"))``.
1968
+
1969
+ Returns
1970
+ -------
1971
+ met_module.MetDataType
1972
+ Copy of downselected MetDataset or MetDataArray.
1973
+ """
1974
+ indexes = met.indexes
1975
+ lon_slice = coordinates.slice_domain(
1976
+ indexes["longitude"].to_numpy(),
1977
+ self["longitude"],
1978
+ buffer=longitude_buffer,
1979
+ )
1980
+ lat_slice = coordinates.slice_domain(
1981
+ indexes["latitude"].to_numpy(),
1982
+ self["latitude"],
1983
+ buffer=latitude_buffer,
1984
+ )
1985
+ time_slice = coordinates.slice_domain(
1986
+ indexes["time"].to_numpy(),
1987
+ self["time"],
1988
+ buffer=time_buffer,
1989
+ )
1990
+
1991
+ # single level data have "level" == [-1]
1992
+ if met.is_single_level:
1993
+ level_slice = slice(None)
1994
+ else:
1995
+ level_slice = coordinates.slice_domain(
1996
+ indexes["level"].to_numpy(),
1997
+ self.level,
1998
+ buffer=level_buffer,
1999
+ )
2000
+ logger.debug("Downselect met at %s %s %s %s", lon_slice, lat_slice, level_slice, time_slice)
2001
+
2002
+ data = met.data.isel(
2003
+ longitude=lon_slice,
2004
+ latitude=lat_slice,
2005
+ level=level_slice,
2006
+ time=time_slice,
2007
+ )
2008
+ return type(met)._from_fastpath(data)
2009
+
2010
+ # ------------
2011
+ # I / O
2012
+ # ------------
2013
+
2014
+ @classmethod
2015
+ @override
2016
+ def create_empty(
2017
+ cls,
2018
+ keys: Iterable[str] | None = None,
2019
+ attrs: dict[str, Any] | None = None,
2020
+ **attrs_kwargs: Any,
2021
+ ) -> Self:
2022
+ keys = *cls.required_keys, "altitude", *(keys or ())
2023
+ return super().create_empty(keys, attrs, **attrs_kwargs)
2024
+
2025
+ def to_geojson_points(self) -> dict[str, Any]:
2026
+ """Return dataset as GeoJSON FeatureCollection of Points.
2027
+
2028
+ Each Feature has a properties attribute that includes ``time`` and
2029
+ other data besides ``latitude``, ``longitude``, and ``altitude`` in :attr:`data`.
2030
+
2031
+ Returns
2032
+ -------
2033
+ dict[str, Any]
2034
+ Python representation of GeoJSON FeatureCollection
2035
+ """
2036
+ return json_utils.dataframe_to_geojson_points(self.dataframe)
2037
+
2038
+ # ------------
2039
+ # Vector to grid
2040
+ # ------------
2041
+ def to_lon_lat_grid(
2042
+ self,
2043
+ agg: dict[str, str],
2044
+ *,
2045
+ spatial_bbox: tuple[float, float, float, float] = (-180.0, -90.0, 180.0, 90.0),
2046
+ spatial_grid_res: float = 0.5,
2047
+ ) -> xr.Dataset:
2048
+ """
2049
+ Convert vectors to a longitude-latitude grid.
2050
+
2051
+ See Also
2052
+ --------
2053
+ vector_to_lon_lat_grid
2054
+ """
2055
+ return vector_to_lon_lat_grid(
2056
+ self, agg=agg, spatial_bbox=spatial_bbox, spatial_grid_res=spatial_grid_res
2057
+ )
2058
+
2059
+
2060
+ def vector_to_lon_lat_grid(
2061
+ vector: GeoVectorDataset,
2062
+ agg: dict[str, str],
2063
+ *,
2064
+ spatial_bbox: tuple[float, float, float, float] = (-180.0, -90.0, 180.0, 90.0),
2065
+ spatial_grid_res: float = 0.5,
2066
+ ) -> xr.Dataset:
2067
+ r"""
2068
+ Convert vectors to a longitude-latitude grid.
2069
+
2070
+ Parameters
2071
+ ----------
2072
+ vector: GeoVectorDataset
2073
+ Contains the longitude, latitude and variables for aggregation.
2074
+ agg: dict[str, str]
2075
+ Variable name and the function selected for aggregation,
2076
+ i.e. ``{"segment_length": "sum"}``.
2077
+ spatial_bbox: tuple[float, float, float, float]
2078
+ Spatial bounding box, ``(lon_min, lat_min, lon_max, lat_max)``, [:math:`\deg`].
2079
+ By default, the entire globe is used.
2080
+ spatial_grid_res: float
2081
+ Spatial grid resolution, [:math:`\deg`]
2082
+
2083
+ Returns
2084
+ -------
2085
+ xr.Dataset
2086
+ Aggregated variables in a longitude-latitude grid.
2087
+
2088
+ Examples
2089
+ --------
2090
+ >>> rng = np.random.default_rng(234)
2091
+ >>> vector = GeoVectorDataset(
2092
+ ... longitude=rng.uniform(-10, 10, 10000),
2093
+ ... latitude=rng.uniform(-10, 10, 10000),
2094
+ ... altitude=np.zeros(10000),
2095
+ ... time=np.zeros(10000).astype("datetime64[ns]"),
2096
+ ... )
2097
+ >>> vector["foo"] = rng.uniform(0, 1, 10000)
2098
+ >>> ds = vector.to_lon_lat_grid({"foo": "sum"}, spatial_bbox=(-10, -10, 9.5, 9.5))
2099
+ >>> da = ds["foo"]
2100
+ >>> da.coords
2101
+ Coordinates:
2102
+ * longitude (longitude) float64 320B -10.0 -9.5 -9.0 -8.5 ... 8.0 8.5 9.0 9.5
2103
+ * latitude (latitude) float64 320B -10.0 -9.5 -9.0 -8.5 ... 8.0 8.5 9.0 9.5
2104
+
2105
+ >>> da.values.round(2)
2106
+ array([[2.23, 0.67, 1.29, ..., 4.66, 3.91, 1.93],
2107
+ [4.1 , 3.84, 1.34, ..., 3.24, 1.71, 4.55],
2108
+ [0.78, 3.25, 2.33, ..., 3.78, 2.93, 2.33],
2109
+ ...,
2110
+ [1.97, 3.02, 1.84, ..., 2.37, 3.87, 2.09],
2111
+ [3.74, 1.6 , 4.01, ..., 4.6 , 4.27, 3.4 ],
2112
+ [2.97, 0.12, 1.33, ..., 3.54, 0.74, 2.59]], shape=(40, 40))
2113
+
2114
+ >>> da.sum().item() == vector["foo"].sum()
2115
+ np.True_
2116
+
2117
+ """
2118
+ df = vector.select(("longitude", "latitude", *agg), copy=False).dataframe
2119
+
2120
+ # Create longitude and latitude coordinates
2121
+ assert spatial_grid_res > 0.01, "spatial_grid_res must be greater than 0.01"
2122
+ west, south, east, north = spatial_bbox
2123
+ lon_coords = np.arange(west, east + 0.01, spatial_grid_res)
2124
+ lat_coords = np.arange(south, north + 0.01, spatial_grid_res)
2125
+ shape = lon_coords.size, lat_coords.size
2126
+
2127
+ # Convert vector to lon-lat grid
2128
+ idx_lon = np.searchsorted(lon_coords, df["longitude"]) - 1
2129
+ idx_lat = np.searchsorted(lat_coords, df["latitude"]) - 1
2130
+
2131
+ df_agg = df.groupby([idx_lon, idx_lat]).agg(agg)
2132
+ index = df_agg.index.get_level_values(0), df_agg.index.get_level_values(1)
2133
+
2134
+ out = xr.Dataset(coords={"longitude": lon_coords, "latitude": lat_coords})
2135
+ for name, col in df_agg.items():
2136
+ arr = np.zeros(shape, dtype=col.dtype)
2137
+ arr[index] = col
2138
+ out[name] = (("longitude", "latitude"), arr)
2139
+
2140
+ return out
2141
+
2142
+
2143
+ def _handle_time_column(time: pd.Series) -> pd.Series:
2144
+ """Ensure that pd.Series has compatible Timestamps.
2145
+
2146
+ Parameters
2147
+ ----------
2148
+ time : pd.Series
2149
+ Pandas dataframe column labeled "time".
2150
+
2151
+ Returns
2152
+ -------
2153
+ pd.Series
2154
+ Parsed pandas time series.
2155
+
2156
+ Raises
2157
+ ------
2158
+ ValueError
2159
+ When time series can't be parsed, or is not timezone naive.
2160
+ """
2161
+ if not hasattr(time, "dt"):
2162
+ time = _parse_pandas_time(time)
2163
+
2164
+ # Translate all times to UTC and then remove timezone.
2165
+ # If the time column contains a timezone, the call to `to_numpy`
2166
+ # will convert it to an array of object.
2167
+ # Note `.tz_convert(None)` automatically converts to UTC first.
2168
+ if time.dt.tz is not None:
2169
+ time = time.dt.tz_convert(None)
2170
+
2171
+ return time
2172
+
2173
+
2174
+ def _parse_pandas_time(time: pd.Series) -> pd.Series:
2175
+ """Parse pandas dataframe column labelled "time".
2176
+
2177
+ Parameters
2178
+ ----------
2179
+ time : pd.Series
2180
+ Time series
2181
+
2182
+ Returns
2183
+ -------
2184
+ pd.Series
2185
+ Parsed time series
2186
+
2187
+ Raises
2188
+ ------
2189
+ ValueError
2190
+ When series values can't be inferred.
2191
+ """
2192
+ try:
2193
+ # If the time series is a string, try to convert it to a datetime
2194
+ if time.dtype == "O":
2195
+ return pd.to_datetime(time)
2196
+
2197
+ # If the time is an int, try to parse it as unix time
2198
+ if np.issubdtype(time.dtype, np.integer):
2199
+ return _parse_unix_time(time)
2200
+
2201
+ except ValueError as exc:
2202
+ msg = (
2203
+ "The 'time' field must hold datetime-like values. "
2204
+ 'Try data["time"] = pd.to_datetime(data["time"], unit=...) '
2205
+ "with the appropriate unit."
2206
+ )
2207
+ raise ValueError(msg) from exc
2208
+
2209
+ raise ValueError("Unsupported time format")
2210
+
2211
+
2212
+ def _parse_unix_time(time: list[int] | npt.NDArray[np.int_] | pd.Series) -> pd.Series:
2213
+ """Parse array of int times as unix epoch timestamps.
2214
+
2215
+ Attempts to parse the time in "s", "ms", "us", "ns"
2216
+
2217
+ Parameters
2218
+ ----------
2219
+ time : list[int] | npt.NDArray[np.int_] | pd.Series
2220
+ Sequence of unix timestamps
2221
+
2222
+ Returns
2223
+ -------
2224
+ pd.Series
2225
+ Series of timezone naive pandas Timestamps
2226
+
2227
+ Raises
2228
+ ------
2229
+ ValueError
2230
+ When unable to parse time as unix epoch timestamp
2231
+ """
2232
+ units = "s", "ms", "us", "ns"
2233
+ for unit in units:
2234
+ try:
2235
+ out = pd.to_datetime(time, unit=unit, utc=True)
2236
+ except ValueError:
2237
+ continue
2238
+
2239
+ # make timezone naive
2240
+ out = out.dt.tz_convert(None)
2241
+
2242
+ # make sure time is reasonable
2243
+ if (pd.Timestamp("1980-01-01") <= out).all() and (out <= pd.Timestamp("2030-01-01")).all():
2244
+ return out
2245
+
2246
+ raise ValueError(
2247
+ f"Unable to parse time parameter '{time}' as unix epoch timestamp between "
2248
+ "1980-01-01 and 2030-01-01"
2249
+ )