pycontrails 0.53.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (109) hide show
  1. pycontrails/__init__.py +70 -0
  2. pycontrails/_version.py +16 -0
  3. pycontrails/core/__init__.py +30 -0
  4. pycontrails/core/aircraft_performance.py +641 -0
  5. pycontrails/core/airports.py +226 -0
  6. pycontrails/core/cache.py +881 -0
  7. pycontrails/core/coordinates.py +174 -0
  8. pycontrails/core/fleet.py +470 -0
  9. pycontrails/core/flight.py +2312 -0
  10. pycontrails/core/flightplan.py +220 -0
  11. pycontrails/core/fuel.py +140 -0
  12. pycontrails/core/interpolation.py +721 -0
  13. pycontrails/core/met.py +2833 -0
  14. pycontrails/core/met_var.py +307 -0
  15. pycontrails/core/models.py +1181 -0
  16. pycontrails/core/polygon.py +549 -0
  17. pycontrails/core/rgi_cython.cp313-win_amd64.pyd +0 -0
  18. pycontrails/core/vector.py +2191 -0
  19. pycontrails/datalib/__init__.py +12 -0
  20. pycontrails/datalib/_leo_utils/search.py +250 -0
  21. pycontrails/datalib/_leo_utils/static/bq_roi_query.sql +6 -0
  22. pycontrails/datalib/_leo_utils/vis.py +59 -0
  23. pycontrails/datalib/_met_utils/metsource.py +743 -0
  24. pycontrails/datalib/ecmwf/__init__.py +53 -0
  25. pycontrails/datalib/ecmwf/arco_era5.py +527 -0
  26. pycontrails/datalib/ecmwf/common.py +109 -0
  27. pycontrails/datalib/ecmwf/era5.py +538 -0
  28. pycontrails/datalib/ecmwf/era5_model_level.py +482 -0
  29. pycontrails/datalib/ecmwf/hres.py +782 -0
  30. pycontrails/datalib/ecmwf/hres_model_level.py +495 -0
  31. pycontrails/datalib/ecmwf/ifs.py +284 -0
  32. pycontrails/datalib/ecmwf/model_levels.py +79 -0
  33. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  34. pycontrails/datalib/ecmwf/variables.py +256 -0
  35. pycontrails/datalib/gfs/__init__.py +28 -0
  36. pycontrails/datalib/gfs/gfs.py +646 -0
  37. pycontrails/datalib/gfs/variables.py +100 -0
  38. pycontrails/datalib/goes.py +772 -0
  39. pycontrails/datalib/landsat.py +568 -0
  40. pycontrails/datalib/sentinel.py +512 -0
  41. pycontrails/datalib/spire.py +739 -0
  42. pycontrails/ext/bada.py +41 -0
  43. pycontrails/ext/cirium.py +14 -0
  44. pycontrails/ext/empirical_grid.py +140 -0
  45. pycontrails/ext/synthetic_flight.py +426 -0
  46. pycontrails/models/__init__.py +1 -0
  47. pycontrails/models/accf.py +406 -0
  48. pycontrails/models/apcemm/__init__.py +8 -0
  49. pycontrails/models/apcemm/apcemm.py +983 -0
  50. pycontrails/models/apcemm/inputs.py +226 -0
  51. pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
  52. pycontrails/models/apcemm/utils.py +437 -0
  53. pycontrails/models/cocip/__init__.py +29 -0
  54. pycontrails/models/cocip/cocip.py +2617 -0
  55. pycontrails/models/cocip/cocip_params.py +299 -0
  56. pycontrails/models/cocip/cocip_uncertainty.py +285 -0
  57. pycontrails/models/cocip/contrail_properties.py +1517 -0
  58. pycontrails/models/cocip/output_formats.py +2261 -0
  59. pycontrails/models/cocip/radiative_forcing.py +1262 -0
  60. pycontrails/models/cocip/radiative_heating.py +520 -0
  61. pycontrails/models/cocip/unterstrasser_wake_vortex.py +403 -0
  62. pycontrails/models/cocip/wake_vortex.py +396 -0
  63. pycontrails/models/cocip/wind_shear.py +120 -0
  64. pycontrails/models/cocipgrid/__init__.py +9 -0
  65. pycontrails/models/cocipgrid/cocip_grid.py +2573 -0
  66. pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
  67. pycontrails/models/dry_advection.py +486 -0
  68. pycontrails/models/emissions/__init__.py +21 -0
  69. pycontrails/models/emissions/black_carbon.py +594 -0
  70. pycontrails/models/emissions/emissions.py +1353 -0
  71. pycontrails/models/emissions/ffm2.py +336 -0
  72. pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
  73. pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
  74. pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
  75. pycontrails/models/humidity_scaling/__init__.py +37 -0
  76. pycontrails/models/humidity_scaling/humidity_scaling.py +1025 -0
  77. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  78. pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
  79. pycontrails/models/issr.py +210 -0
  80. pycontrails/models/pcc.py +327 -0
  81. pycontrails/models/pcr.py +154 -0
  82. pycontrails/models/ps_model/__init__.py +17 -0
  83. pycontrails/models/ps_model/ps_aircraft_params.py +376 -0
  84. pycontrails/models/ps_model/ps_grid.py +505 -0
  85. pycontrails/models/ps_model/ps_model.py +1017 -0
  86. pycontrails/models/ps_model/ps_operational_limits.py +540 -0
  87. pycontrails/models/ps_model/static/ps-aircraft-params-20240524.csv +68 -0
  88. pycontrails/models/ps_model/static/ps-synonym-list-20240524.csv +103 -0
  89. pycontrails/models/sac.py +459 -0
  90. pycontrails/models/tau_cirrus.py +168 -0
  91. pycontrails/physics/__init__.py +1 -0
  92. pycontrails/physics/constants.py +116 -0
  93. pycontrails/physics/geo.py +989 -0
  94. pycontrails/physics/jet.py +837 -0
  95. pycontrails/physics/thermo.py +451 -0
  96. pycontrails/physics/units.py +472 -0
  97. pycontrails/py.typed +0 -0
  98. pycontrails/utils/__init__.py +1 -0
  99. pycontrails/utils/dependencies.py +66 -0
  100. pycontrails/utils/iteration.py +13 -0
  101. pycontrails/utils/json.py +188 -0
  102. pycontrails/utils/temp.py +50 -0
  103. pycontrails/utils/types.py +165 -0
  104. pycontrails-0.53.0.dist-info/LICENSE +178 -0
  105. pycontrails-0.53.0.dist-info/METADATA +181 -0
  106. pycontrails-0.53.0.dist-info/NOTICE +43 -0
  107. pycontrails-0.53.0.dist-info/RECORD +109 -0
  108. pycontrails-0.53.0.dist-info/WHEEL +5 -0
  109. pycontrails-0.53.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,2191 @@
1
+ """Lightweight data structures for vector paths."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ import logging
8
+ import warnings
9
+ from collections.abc import Generator, Iterable, Iterator, Sequence
10
+ from typing import Any, TypeVar, overload
11
+
12
+ import numpy as np
13
+ import numpy.typing as npt
14
+ import pandas as pd
15
+ import xarray as xr
16
+ from overrides import overrides
17
+
18
+ from pycontrails.core import coordinates, interpolation
19
+ from pycontrails.core import met as met_module
20
+ from pycontrails.physics import units
21
+ from pycontrails.utils import dependencies
22
+ from pycontrails.utils import json as json_utils
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ #: Vector types
27
+ VectorDatasetType = TypeVar("VectorDatasetType", bound="VectorDataset")
28
+ GeoVectorDatasetType = TypeVar("GeoVectorDatasetType", bound="GeoVectorDataset")
29
+
30
+
31
+ class AttrDict(dict[str, Any]):
32
+ """Thin wrapper around dict to warn when setting a key that already exists."""
33
+
34
+ def __setitem__(self, k: str, v: Any) -> None:
35
+ """Warn when setting values that already contain values.
36
+
37
+ Parameters
38
+ ----------
39
+ k : str
40
+ Key
41
+ v : Any
42
+ Value
43
+ """
44
+ if k in self and self[k] is not None and self[k] is not v:
45
+ warnings.warn(
46
+ f"Overwriting attr key `{k}`. Use `.update({k}=...)` to suppress warning."
47
+ )
48
+
49
+ super().__setitem__(k, v)
50
+
51
+ def setdefault(self, k: str, default: Any = None) -> Any:
52
+ """Thin wrapper around ``dict.setdefault``.
53
+
54
+ Overwrites value if value is None.
55
+
56
+ Parameters
57
+ ----------
58
+ k : str
59
+ Key
60
+ default : Any, optional
61
+ Default value for key ``k``
62
+
63
+ Returns
64
+ -------
65
+ Any
66
+ Value at ``k``
67
+ """
68
+ ret = self.get(k, None)
69
+ if ret is not None:
70
+ return ret
71
+
72
+ self[k] = default
73
+ return default
74
+
75
+
76
+ class VectorDataDict(dict[str, np.ndarray]):
77
+ """Thin wrapper around ``dict[str, np.ndarray]`` to ensure consistency.
78
+
79
+ Parameters
80
+ ----------
81
+ data : dict[str, np.ndarray], optional
82
+ Dictionary input
83
+ """
84
+
85
+ __slots__ = ("_size",)
86
+
87
+ #: Length of the data
88
+ _size: int
89
+
90
+ def __init__(self, data: dict[str, np.ndarray] | None = None) -> None:
91
+ super().__init__(data or {})
92
+
93
+ # validate any arrays, first one defines _size attribute
94
+ for arr in self.values():
95
+ self._validate_array(arr)
96
+
97
+ def __setitem__(self, k: str, v: npt.ArrayLike) -> None:
98
+ """Set new key-value pair to instance and warn when overwriting existing key.
99
+
100
+ This method casts ``v`` to an :class:`numpy.ndarray` and ensures that the array size is
101
+ consistent with the instance.
102
+
103
+ Parameters
104
+ ----------
105
+ k : str
106
+ Key
107
+ v : np.ndarray
108
+ Values
109
+
110
+ See Also
111
+ --------
112
+ :meth:`update`
113
+ """
114
+ v = np.asarray(v) # asarray does NOT copy
115
+ self._validate_array(v)
116
+
117
+ if k in self and len(self[k]) and self[k] is not v:
118
+ warnings.warn(
119
+ f"Overwriting data in key `{k}`. Use `.update({k}=...)` to suppress warning."
120
+ )
121
+
122
+ super().__setitem__(k, v)
123
+
124
+ def __delitem__(self, k: str) -> None:
125
+ super().__delitem__(k)
126
+
127
+ # if not data keys left, set size to 0
128
+ if not len(self):
129
+ del self._size
130
+
131
+ def setdefault(self, k: str, default: npt.ArrayLike | None = None) -> np.ndarray:
132
+ """Thin wrapper around ``dict.setdefault``.
133
+
134
+ The main purpose of overriding is to run :meth:`_validate_array()` on set.
135
+
136
+ Parameters
137
+ ----------
138
+ k : str
139
+ Key
140
+ default : npt.ArrayLike, optional
141
+ Default value for key ``k``
142
+
143
+ Returns
144
+ -------
145
+ Any
146
+ Value at ``k``
147
+ """
148
+ ret = self.get(k, None)
149
+ if ret is not None:
150
+ return ret
151
+
152
+ if default is None:
153
+ default = np.array([])
154
+
155
+ self[k] = default
156
+ return self[k]
157
+
158
+ def update( # type: ignore[override]
159
+ self, other: dict[str, npt.ArrayLike] | None = None, **kwargs: npt.ArrayLike
160
+ ) -> None:
161
+ """Update values without warning if overwriting.
162
+
163
+ This method casts values in ``other`` to :class:`numpy.ndarray` and
164
+ ensures that the array sizes are consistent with the instance.
165
+
166
+ Parameters
167
+ ----------
168
+ other : dict[str, npt.ArrayLike] | None, optional
169
+ Fields to update as dict
170
+ **kwargs : npt.ArrayLike
171
+ Fields to update as kwargs
172
+ """
173
+ other = other or {}
174
+ other_arrs = {k: np.asarray(v) for k, v in other.items()}
175
+ for arr in other_arrs.values():
176
+ self._validate_array(arr)
177
+
178
+ super().update(other_arrs)
179
+
180
+ # validate any kwarg arrays
181
+ kwargs_arr = {k: np.asarray(v) for k, v in kwargs.items()}
182
+ for arr in kwargs_arr.values():
183
+ self._validate_array(arr)
184
+
185
+ super().update(kwargs_arr)
186
+
187
+ def _validate_array(self, arr: np.ndarray) -> None:
188
+ """Ensure that `arr` is compatible with instance.
189
+
190
+ Set attribute `_size` if it has not yet been defined.
191
+
192
+ Parameters
193
+ ----------
194
+ arr : np.ndarray
195
+ Array to validate
196
+
197
+ Raises
198
+ ------
199
+ ValueError
200
+ If `arr` is not compatible with instance.
201
+ """
202
+ if arr.ndim != 1:
203
+ raise ValueError("All np.arrays must have dimension 1.")
204
+
205
+ size = getattr(self, "_size", 0)
206
+ if size != 0:
207
+ if arr.size != size:
208
+ raise ValueError(f"Incompatible array sizes: {arr.size} and {size}.")
209
+ else:
210
+ self._size = arr.size
211
+
212
+
213
+ def _empty_vector_dict(keys: Iterable[str]) -> VectorDataDict:
214
+ """Create instance of VectorDataDict with variables defined by `keys` and size 0.
215
+
216
+ Parameters
217
+ ----------
218
+ keys : Iterable[str]
219
+ Keys to include in empty VectorDataset instance.
220
+
221
+ Returns
222
+ -------
223
+ VectorDataDict
224
+ Empty :class:`VectorDataDict` instance.
225
+ """
226
+ keys = keys or ()
227
+ data = VectorDataDict({key: np.array([]) for key in keys})
228
+
229
+ # The default dtype is float64
230
+ # Time is special and should have a non-default dtype of datetime64[ns]
231
+ if "time" in data:
232
+ data.update(time=np.array([], dtype="datetime64[ns]"))
233
+
234
+ return data
235
+
236
+
237
+ class VectorDataset:
238
+ """Base class to hold 1D arrays of consistent size.
239
+
240
+ Parameters
241
+ ----------
242
+ data : dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataDict | VectorDataset | None, optional
243
+ Initial data, by default None
244
+ attrs : dict[str, Any] | AttrDict, optional
245
+ Dictionary of attributes, by default None
246
+ copy : bool, optional
247
+ Copy data on class creation, by default True
248
+ **attrs_kwargs : Any
249
+ Additional attributes passed as keyword arguments
250
+
251
+ Raises
252
+ ------
253
+ ValueError
254
+ If "time" variable cannot be converted to numpy array.
255
+ """
256
+
257
+ __slots__ = ("data", "attrs")
258
+
259
+ #: Vector data with labels as keys and :class:`numpy.ndarray` as values
260
+ data: VectorDataDict
261
+
262
+ #: Generic dataset attributes
263
+ attrs: AttrDict
264
+
265
+ def __init__(
266
+ self,
267
+ data: (
268
+ dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataDict | VectorDataset | None
269
+ ) = None,
270
+ *,
271
+ attrs: dict[str, Any] | AttrDict | None = None,
272
+ copy: bool = True,
273
+ **attrs_kwargs: Any,
274
+ ) -> None:
275
+ # Set data
276
+ # --------
277
+
278
+ # Casting from one VectorDataset type to another
279
+ # e.g., flight = Flight(...); vector = VectorDataset(flight)
280
+ if isinstance(data, VectorDataset):
281
+ attrs = {**data.attrs, **(attrs or {})}
282
+ if copy:
283
+ self.data = VectorDataDict({k: v.copy() for k, v in data.data.items()})
284
+ else:
285
+ self.data = data.data
286
+
287
+ elif data is None:
288
+ self.data = VectorDataDict()
289
+
290
+ elif isinstance(data, pd.DataFrame):
291
+ attrs = {**data.attrs, **(attrs or {})}
292
+
293
+ # Take extra caution with a time column
294
+ try:
295
+ time = data["time"]
296
+ except KeyError:
297
+ self.data = VectorDataDict({k: v.to_numpy(copy=copy) for k, v in data.items()})
298
+ else:
299
+ time = _handle_time_column(time)
300
+ data = {k: v.to_numpy(copy=copy) for k, v in data.items() if k != "time"}
301
+ data["time"] = time.to_numpy(copy=copy)
302
+ self.data = VectorDataDict(data)
303
+
304
+ elif isinstance(data, VectorDataDict):
305
+ if copy:
306
+ self.data = VectorDataDict({k: v.copy() for k, v in data.items()})
307
+ else:
308
+ self.data = data
309
+
310
+ # For anything else, we assume it is a dictionary of array-like and attach it
311
+ else:
312
+ self.data = VectorDataDict({k: np.array(v, copy=copy) for k, v in data.items()})
313
+
314
+ # Set attributes
315
+ # --------------
316
+
317
+ if attrs is None:
318
+ self.attrs = AttrDict()
319
+
320
+ elif isinstance(attrs, AttrDict) and not copy:
321
+ self.attrs = attrs
322
+
323
+ # shallow copy if dict
324
+ else:
325
+ self.attrs = AttrDict(attrs.copy())
326
+
327
+ # update with kwargs
328
+ self.attrs.update(attrs_kwargs)
329
+
330
+ # ------------
331
+ # dict-like methods
332
+ # ------------
333
+ def __getitem__(self, key: str) -> np.ndarray:
334
+ """Get values from :attr:`data`.
335
+
336
+ Parameters
337
+ ----------
338
+ key : str
339
+ Key to get from :attr:`data`
340
+
341
+ Returns
342
+ -------
343
+ np.ndarray
344
+ Values at :attr:`data[key]`
345
+ """
346
+ return self.data[key]
347
+
348
+ def get(self, key: str, default_value: Any = None) -> Any:
349
+ """Get values from :attr:`data` with ``default_value`` if ``key`` not in :attr:`data`.
350
+
351
+ Parameters
352
+ ----------
353
+ key : str
354
+ Key to get from :attr:`data`
355
+ default_value : Any, optional
356
+ Return ``default_value`` if `key` not in :attr:`data`, by default ``None``
357
+
358
+ Returns
359
+ -------
360
+ Any
361
+ Values at :attr:`data[key]` or ``default_value``
362
+ """
363
+ return self.data.get(key, default_value)
364
+
365
+ def __setitem__(self, key: str, values: npt.ArrayLike) -> None:
366
+ """Set values at key `key` on :attr:`data`.
367
+
368
+ Parameters
369
+ ----------
370
+ key : str
371
+ Key name in :attr:`data`
372
+ values : npt.ArrayLike
373
+ Values to set to :attr:`data`. Array size must be compatible with existing data.
374
+ """
375
+ self.data[key] = values
376
+
377
+ def __delitem__(self, key: str) -> None:
378
+ """Delete values at key `key` on :attr:`data`.
379
+
380
+ Parameters
381
+ ----------
382
+ key : str
383
+ Key name in :attr:`data`
384
+ """
385
+ del self.data[key]
386
+
387
+ def __iter__(self) -> Iterator[str]:
388
+ """Iterate over keys in :attr:`data`.
389
+
390
+ Returns
391
+ -------
392
+ Iterator[str]
393
+ Keys in :attr:`data`
394
+ """
395
+ return iter(self.data)
396
+
397
+ def __contains__(self, key: str) -> bool:
398
+ """Check if key `key` is in :attr:`data`.
399
+
400
+ Parameters
401
+ ----------
402
+ key : str
403
+ Key to check
404
+
405
+ Returns
406
+ -------
407
+ bool
408
+ True if `key` is in :attr:`data`, False otherwise
409
+ """
410
+ return key in self.data
411
+
412
+ def update(
413
+ self,
414
+ other: dict[str, npt.ArrayLike] | None = None,
415
+ **kwargs: npt.ArrayLike,
416
+ ) -> None:
417
+ """Update values in :attr:`data` dict without warning if overwriting.
418
+
419
+ Parameters
420
+ ----------
421
+ other : dict[str, npt.ArrayLike] | None, optional
422
+ Fields to update as dict
423
+ **kwargs : npt.ArrayLike
424
+ Fields to update as kwargs
425
+ """
426
+ self.data.update(other, **kwargs)
427
+
428
+ def setdefault(self, key: str, default: npt.ArrayLike | None = None) -> np.ndarray:
429
+ """Shortcut to :meth:`VectorDataDict.setdefault`.
430
+
431
+ Parameters
432
+ ----------
433
+ key : str
434
+ Key in :attr:`data` dict.
435
+ default : npt.ArrayLike, optional
436
+ Values to use as default, if key is not defined
437
+
438
+ Returns
439
+ -------
440
+ np.ndarray
441
+ Values at ``key``
442
+ """
443
+ return self.data.setdefault(key, default)
444
+
445
+ __marker = object()
446
+
447
+ def get_data_or_attr(self, key: str, default: Any = __marker) -> Any:
448
+ """Get value from :attr:`data` or :attr:`attrs`.
449
+
450
+ This method first checks if ``key`` is in :attr:`data` and returns the value if so.
451
+ If ``key`` is not in :attr:`data`, then this method checks if ``key`` is in :attr:`attrs`
452
+ and returns the value if so. If ``key`` is not in :attr:`data` or :attr:`attrs`,
453
+ then the ``default`` value is returned if provided. Otherwise a :class:`KeyError` is raised.
454
+
455
+ Parameters
456
+ ----------
457
+ key : str
458
+ Key to get from :attr:`data` or :attr:`attrs`
459
+ default : Any, optional
460
+ Default value to return if ``key`` is not in :attr:`data` or :attr:`attrs`.
461
+
462
+ Returns
463
+ -------
464
+ Any
465
+ Value at :attr:`data[key]` or :attr:`attrs[key]`
466
+
467
+ Raises
468
+ ------
469
+ KeyError
470
+ If ``key`` is not in :attr:`data` or :attr:`attrs` and ``default`` is not provided.
471
+
472
+ Examples
473
+ --------
474
+ >>> vector = VectorDataset({"a": [1, 2, 3]}, attrs={"b": 4})
475
+ >>> vector.get_data_or_attr("a")
476
+ array([1, 2, 3])
477
+
478
+ >>> vector.get_data_or_attr("b")
479
+ 4
480
+
481
+ >>> vector.get_data_or_attr("c")
482
+ Traceback (most recent call last):
483
+ ...
484
+ KeyError: "Key 'c' not found in data or attrs."
485
+
486
+ >>> vector.get_data_or_attr("c", default=5)
487
+ 5
488
+
489
+ """
490
+ marker = self.__marker
491
+
492
+ out = self.get(key, marker)
493
+ if out is not marker:
494
+ return out
495
+
496
+ out = self.attrs.get(key, marker)
497
+ if out is not marker:
498
+ return out
499
+
500
+ if default is not marker:
501
+ return default
502
+
503
+ msg = f"Key '{key}' not found in data or attrs."
504
+ raise KeyError(msg)
505
+
506
+ # ------------
507
+
508
+ def __len__(self) -> int:
509
+ """Length of each array in :attr:`data`.
510
+
511
+ Returns
512
+ -------
513
+ int
514
+ Length of each array in :attr:`data`
515
+ """
516
+ return self.size
517
+
518
+ def _display_attrs(self) -> dict[str, str]:
519
+ """Return properties used in `repr` constructions.
520
+
521
+ Returns
522
+ -------
523
+ dict[str, str]
524
+ Properties used in :meth:`__repr__` and :meth:`_repr_html_`.
525
+ """
526
+
527
+ # Clip any attribute value that is too long
528
+ def str_clip(v: Any) -> str:
529
+ s = str(v)
530
+ if len(s) < 80:
531
+ return s
532
+ return f"{s[:77]}..."
533
+
534
+ return {k: str_clip(v) for k, v in self.attrs.items()}
535
+
536
+ def __repr__(self) -> str:
537
+ class_name = self.__class__.__name__
538
+ n_attrs = len(self.attrs)
539
+ n_keys = len(self.data)
540
+ _repr = f"{class_name} [{n_keys} keys x {self.size} length, {n_attrs} attributes]"
541
+
542
+ keys = list(self)
543
+ keys = keys[0:5] + ["..."] + keys[-1:] if len(keys) > 5 else keys
544
+ _repr += f"\n\tKeys: {', '.join(keys)}"
545
+
546
+ attrs = self._display_attrs()
547
+ _repr += "\n\tAttributes:\n"
548
+ _repr += "\n".join([f"\t{k:20}{v}" for k, v in attrs.items()])
549
+
550
+ return _repr
551
+
552
+ def _repr_html_(self) -> str:
553
+ name = type(self).__name__
554
+ n_attrs = len(self.attrs)
555
+ n_keys = len(self.data)
556
+ attrs = self._display_attrs()
557
+ size = self.size
558
+
559
+ title = f"<b>{name}</b> [{n_keys} keys x {size} length, {n_attrs} attributes]<br/ ><br/>"
560
+
561
+ # matching pd.DataFrame styling
562
+ header = '<tr style="border-bottom:1px solid silver"><th colspan="2">Attributes</th></tr>'
563
+ rows = [f"<tr><td>{k}</td><td>{v}</td></tr>" for k, v in attrs.items()]
564
+ table = f"<table>{header + ''.join(rows)}</table>"
565
+ return title + table + self.dataframe._repr_html_()
566
+
567
+ def __bool__(self) -> bool:
568
+ """Check if :attr:`data` is nonempty..
569
+
570
+ Returns
571
+ -------
572
+ bool
573
+ True if non-empty values are set in :attr:`data`
574
+ """
575
+ return self.size > 0
576
+
577
+ def __add__(self: VectorDatasetType, other: VectorDatasetType | None) -> VectorDatasetType:
578
+ """Concatenate two compatible instances of VectorDataset.
579
+
580
+ In this context, compatibility means that both have identical :attr:`data` keys.
581
+
582
+ This operator behaves similarly to the ``__add__`` method on python lists.
583
+
584
+ If self is an empty VectorDataset, return other. This is useful when
585
+ calling :keyword:`sum` with an empty initial value.
586
+
587
+ Parameters
588
+ ----------
589
+ other : VectorDatasetType
590
+ Other values to concatenate
591
+
592
+ Returns
593
+ -------
594
+ VectorDatasetType
595
+ Concatenated values.
596
+
597
+ Raises
598
+ ------
599
+ KeyError
600
+ If `other` has different :attr:`data` keys than self.
601
+ """
602
+ # Short circuit: If other is empty or None, return self. The order here can matter.
603
+ # We let self (so the left addend) take priority.
604
+ if not other:
605
+ return self
606
+ if not self:
607
+ return other
608
+
609
+ return type(self).sum((self, other))
610
+
611
+ @classmethod
612
+ def sum(
613
+ cls: type[VectorDatasetType],
614
+ vectors: Sequence[VectorDataset],
615
+ infer_attrs: bool = True,
616
+ fill_value: float | None = None,
617
+ ) -> VectorDatasetType:
618
+ """Sum a list of :class:`VectorDataset` instances.
619
+
620
+ Parameters
621
+ ----------
622
+ vectors : Sequence[VectorDataset]
623
+ List of :class:`VectorDataset` instances to concatenate.
624
+ infer_attrs : bool, optional
625
+ If True, infer attributes from the first element in the sequence.
626
+ fill_value : float, optional
627
+ Fill value to use when concatenating arrays. By default None, which raises
628
+ an error if incompatible keys are found.
629
+
630
+ Returns
631
+ -------
632
+ VectorDataset
633
+ Sum of all instances in ``vectors``.
634
+
635
+ Raises
636
+ ------
637
+ KeyError
638
+ If incompatible :attr:`data` keys are found among ``vectors``.
639
+
640
+ Examples
641
+ --------
642
+ >>> from pycontrails import VectorDataset
643
+ >>> v1 = VectorDataset({"a": [1, 2, 3], "b": [4, 5, 6]})
644
+ >>> v2 = VectorDataset({"a": [7, 8, 9], "b": [10, 11, 12]})
645
+ >>> v3 = VectorDataset({"a": [13, 14, 15], "b": [16, 17, 18]})
646
+ >>> v = VectorDataset.sum([v1, v2, v3])
647
+ >>> v.dataframe
648
+ a b
649
+ 0 1 4
650
+ 1 2 5
651
+ 2 3 6
652
+ 3 7 10
653
+ 4 8 11
654
+ 5 9 12
655
+ 6 13 16
656
+ 7 14 17
657
+ 8 15 18
658
+
659
+ """
660
+ vectors = [v for v in vectors if v is not None] # remove None values
661
+
662
+ if not vectors:
663
+ return cls()
664
+
665
+ keys: Iterable[str]
666
+ if fill_value is None:
667
+ keys = vectors[0].data.keys()
668
+ for v in vectors[1:]:
669
+ if v.data.keys() != keys:
670
+ diff = set(v).symmetric_difference(keys)
671
+ msg = f"Summands have incompatible keys. Difference: {diff}"
672
+ raise KeyError(msg)
673
+
674
+ else:
675
+ keys = set().union(*[v.data.keys() for v in vectors])
676
+
677
+ def _get(k: str, v: VectorDataset) -> np.ndarray:
678
+ # Could also use VectorDataset.get() here, but we want to avoid creating
679
+ # an unused array if the key is present in the VectorDataset.
680
+ try:
681
+ return v[k]
682
+ except KeyError:
683
+ return np.full(v.size, fill_value)
684
+
685
+ def concat(key: str) -> np.ndarray:
686
+ values = [_get(key, v) for v in vectors]
687
+ return np.concatenate(values)
688
+
689
+ data = {key: concat(key) for key in keys}
690
+
691
+ if infer_attrs:
692
+ return cls(data, attrs=vectors[0].attrs, copy=False)
693
+ return cls(data, copy=False)
694
+
695
+ def __eq__(self: VectorDatasetType, other: object) -> bool:
696
+ """Determine if two instances are equal.
697
+
698
+ NaN values are considered equal in this comparison.
699
+
700
+ Parameters
701
+ ----------
702
+ other : object
703
+ VectorDatasetType to compare with
704
+
705
+ Returns
706
+ -------
707
+ bool
708
+ True if both instances have identical :attr:`data` and :attr:`attrs`.
709
+ """
710
+ if not isinstance(other, VectorDataset):
711
+ return False
712
+
713
+ # Check attrs
714
+ if self.attrs.keys() != other.attrs.keys():
715
+ return False
716
+
717
+ for key, val in self.attrs.items():
718
+ if isinstance(val, np.ndarray):
719
+ # equal_nan not supported for non-numeric data
720
+ equal_nan = not np.issubdtype(val.dtype, "O")
721
+ if not np.array_equal(val, other.attrs[key], equal_nan=equal_nan):
722
+ return False
723
+ elif val != other.attrs[key]:
724
+ return False
725
+
726
+ # Check data
727
+ if self.data.keys() != other.data.keys():
728
+ return False
729
+
730
+ for key, val in self.data.items():
731
+ # equal_nan not supported for non-numeric data (e.g. strings)
732
+ equal_nan = not np.issubdtype(val.dtype, "O")
733
+ if not np.array_equal(val, other[key], equal_nan=equal_nan):
734
+ return False
735
+
736
+ return True
737
+
738
+ @property
739
+ def size(self) -> int:
740
+ """Length of each array in :attr:`data`.
741
+
742
+ Returns
743
+ -------
744
+ int
745
+ Length of each array in :attr:`data`.
746
+ """
747
+ return getattr(self.data, "_size", 0)
748
+
749
+ @property
750
+ def shape(self) -> tuple[int]:
751
+ """Shape of each array in :attr:`data`.
752
+
753
+ Returns
754
+ -------
755
+ tuple[int]
756
+ Shape of each array in :attr:`data`.
757
+ """
758
+ return (self.size,)
759
+
760
+ @property
761
+ def dataframe(self) -> pd.DataFrame:
762
+ """Shorthand property to access :meth:`to_dataframe` with ``copy=False``.
763
+
764
+ Returns
765
+ -------
766
+ pd.DataFrame
767
+ Equivalent to the output from :meth:`to_dataframe()`
768
+ """
769
+ return self.to_dataframe(copy=False)
770
+
771
+ @property
772
+ def hash(self) -> str:
773
+ """Generate a unique hash for this class instance.
774
+
775
+ Returns
776
+ -------
777
+ str
778
+ Unique hash for flight instance (sha1)
779
+ """
780
+ _hash = json.dumps(self.data, cls=json_utils.NumpyEncoder)
781
+ return hashlib.sha1(bytes(_hash, "utf-8")).hexdigest()
782
+
783
+ # ------------
784
+ # Utilities
785
+ # ------------
786
+
787
+ def copy(self: VectorDatasetType, **kwargs: Any) -> VectorDatasetType:
788
+ """Return a copy of this VectorDatasetType class.
789
+
790
+ Parameters
791
+ ----------
792
+ **kwargs : Any
793
+ Additional keyword arguments passed into the constructor of the returned class.
794
+
795
+ Returns
796
+ -------
797
+ VectorDatasetType
798
+ Copy of class
799
+ """
800
+ return type(self)(data=self.data, attrs=self.attrs, copy=True, **kwargs)
801
+
802
+ def select(self: VectorDataset, keys: Iterable[str], copy: bool = True) -> VectorDataset:
803
+ """Return new class instance only containing specified keys.
804
+
805
+ Parameters
806
+ ----------
807
+ keys : Iterable[str]
808
+ An iterable of keys to filter by.
809
+ copy : bool, optional
810
+ Copy data on selection.
811
+ Defaults to True.
812
+
813
+ Returns
814
+ -------
815
+ VectorDataset
816
+ VectorDataset containing only data associated to ``keys``.
817
+ Note that this method always returns a :class:`VectorDataset`, even if
818
+ the calling class is a proper subclass of :class:`VectorDataset`.
819
+ """
820
+ data = {key: self[key] for key in keys}
821
+ return VectorDataset(data=data, attrs=self.attrs, copy=copy)
822
+
823
+ def filter(
824
+ self: VectorDatasetType, mask: npt.NDArray[np.bool_], copy: bool = True, **kwargs: Any
825
+ ) -> VectorDatasetType:
826
+ """Filter :attr:`data` according to a boolean array ``mask``.
827
+
828
+ Entries corresponding to ``mask == True`` are kept.
829
+
830
+ Parameters
831
+ ----------
832
+ mask : npt.NDArray[np.bool_]
833
+ Boolean array with compatible shape.
834
+ copy : bool, optional
835
+ Copy data on filter. Defaults to True. See
836
+ `numpy best practices <https://numpy.org/doc/stable/user/basics.indexing.html#slicing-and-striding>`_
837
+ for insight into whether copy is appropriate.
838
+ **kwargs : Any
839
+ Additional keyword arguments passed into the constructor of the returned class.
840
+
841
+ Returns
842
+ -------
843
+ VectorDatasetType
844
+ Containing filtered data
845
+
846
+ Raises
847
+ ------
848
+ TypeError
849
+ If ``mask`` is not a boolean array.
850
+ """
851
+ self.data._validate_array(mask)
852
+ if mask.dtype != bool:
853
+ raise TypeError("Parameter `mask` must be a boolean array.")
854
+
855
+ data = {key: value[mask] for key, value in self.data.items()}
856
+ return type(self)(data=data, attrs=self.attrs, copy=copy, **kwargs)
857
+
858
+ def sort(self: VectorDatasetType, by: str | list[str]) -> VectorDatasetType:
859
+ """Sort data by key(s).
860
+
861
+ This method always creates a copy of the data by calling
862
+ :meth:`pandas.DataFrame.sort_values`.
863
+
864
+ Parameters
865
+ ----------
866
+ by : str | list[str]
867
+ Key or list of keys to sort by.
868
+
869
+ Returns
870
+ -------
871
+ VectorDatasetType
872
+ Instance with sorted data.
873
+ """
874
+ return type(self)(data=self.dataframe.sort_values(by=by), attrs=self.attrs, copy=False)
875
+
876
+ def ensure_vars(self, vars: str | Iterable[str], raise_error: bool = True) -> bool:
877
+ """Ensure variables exist in column of :attr:`data` or :attr:`attrs`.
878
+
879
+ Parameters
880
+ ----------
881
+ vars : str | Iterable[str]
882
+ A single string variable name or a sequence of string variable names.
883
+ raise_error : bool, optional
884
+ Raise KeyError if data does not contain variables.
885
+ Defaults to True.
886
+
887
+ Returns
888
+ -------
889
+ bool
890
+ True if all variables exist.
891
+ False otherwise.
892
+
893
+ Raises
894
+ ------
895
+ KeyError
896
+ Raises when dataset does not contain variable in ``vars``
897
+ """
898
+ if isinstance(vars, str):
899
+ vars = (vars,)
900
+
901
+ for v in vars:
902
+ if v in self or v in self.attrs:
903
+ continue
904
+ if raise_error:
905
+ msg = f"{type(self).__name__} instance does not contain data or attr '{v}'"
906
+ raise KeyError(msg)
907
+ return False
908
+
909
+ return True
910
+
911
+ def broadcast_attrs(
912
+ self,
913
+ keys: str | Iterable[str],
914
+ overwrite: bool = False,
915
+ raise_error: bool = True,
916
+ ) -> None:
917
+ """Attach values from ``keys`` in :attr:`attrs` onto :attr:`data`.
918
+
919
+ If possible, use ``dtype = np.float32`` when broadcasting. If not possible,
920
+ use whatever ``dtype`` is inferred from the data by :func:`numpy.full`.
921
+
922
+ Parameters
923
+ ----------
924
+ keys : str | Iterable[str]
925
+ Keys to broadcast
926
+ overwrite : bool, optional
927
+ If True, overwrite existing values in :attr:`data`. By default False.
928
+ raise_error : bool, optional
929
+ Raise KeyError if :attr:`self.attrs` does not contain some of ``keys``.
930
+
931
+ Raises
932
+ ------
933
+ KeyError
934
+ Not all ``keys`` found in :attr:`attrs`.
935
+ """
936
+ if isinstance(keys, str):
937
+ keys = (keys,)
938
+
939
+ # Validate everything up front to avoid partial broadcasting
940
+ for key in keys:
941
+ try:
942
+ scalar = self.attrs[key]
943
+ except KeyError as exc:
944
+ if raise_error:
945
+ raise KeyError(f"{type(self)} does not contain attr `{key}`") from exc
946
+ continue
947
+
948
+ if key in self.data and not overwrite:
949
+ warnings.warn(
950
+ f"Found duplicate key {key} in attrs and data. "
951
+ "Set `overwrite=True` parameter to force overwrite."
952
+ )
953
+ continue
954
+
955
+ min_dtype = np.min_scalar_type(scalar)
956
+ dtype = np.float32 if np.can_cast(min_dtype, np.float32) else None
957
+ self.data.update({key: np.full(self.size, scalar, dtype=dtype)})
958
+
959
+ def broadcast_numeric_attrs(
960
+ self, ignore_keys: str | Iterable[str] | None = None, overwrite: bool = False
961
+ ) -> None:
962
+ """Attach numeric values in :attr:`attrs` onto :attr:`data`.
963
+
964
+ Iterate through values in :attr:`attrs` and attach :class:`float` and
965
+ :class:`int` values to ``data``.
966
+
967
+ This method modifies object in place.
968
+
969
+ Parameters
970
+ ----------
971
+ ignore_keys: str | Iterable[str], optional
972
+ Do not broadcast selected keys.
973
+ Defaults to None.
974
+ overwrite : bool, optional
975
+ If True, overwrite existing values in :attr:`data`. By default False.
976
+ """
977
+ if ignore_keys is None:
978
+ ignore_keys = ()
979
+ elif isinstance(ignore_keys, str):
980
+ ignore_keys = (ignore_keys,)
981
+
982
+ # Somewhat brittle: Only checking for int or float type
983
+ numeric_attrs = (
984
+ attr
985
+ for attr, val in self.attrs.items()
986
+ if (isinstance(val, int | float | np.number) and attr not in ignore_keys)
987
+ )
988
+ self.broadcast_attrs(numeric_attrs, overwrite)
989
+
990
+ # ------------
991
+ # I / O
992
+ # ------------
993
+
994
+ def to_dataframe(self, copy: bool = True) -> pd.DataFrame:
995
+ """Create :class:`pd.DataFrame` in which each key-value pair in :attr:`data` is a column.
996
+
997
+ DataFrame does **not** copy data by default.
998
+ Use the ``copy`` parameter to copy data values on creation.
999
+
1000
+ Parameters
1001
+ ----------
1002
+ copy : bool, optional
1003
+ Copy data on DataFrame creation.
1004
+
1005
+ Returns
1006
+ -------
1007
+ pd.DataFrame
1008
+ DataFrame holding key-values as columns.
1009
+ """
1010
+ df = pd.DataFrame(self.data, copy=copy)
1011
+ df.attrs = self.attrs
1012
+ return df
1013
+
1014
+ def to_dict(self) -> dict[str, Any]:
1015
+ """Create dictionary with :attr:`data` and :attr:`attrs`.
1016
+
1017
+ If geo-spatial coordinates (e.g. ``"latitude"``, ``"longitude"``, ``"altitude"``)
1018
+ are present, round to a reasonable precision. If a ``"time"`` variable is present,
1019
+ round to unix seconds. When the instance is a :class:`GeoVectorDataset`,
1020
+ disregard any ``"altitude"`` or ``"level"`` coordinate and only include
1021
+ ``"altitude_ft"`` in the output.
1022
+
1023
+ Returns
1024
+ -------
1025
+ dict[str, Any]
1026
+ Dictionary with :attr:`data` and :attr:`attrs`.
1027
+
1028
+ See Also
1029
+ --------
1030
+ :meth:`from_dict`
1031
+
1032
+ Examples
1033
+ --------
1034
+ >>> import pprint
1035
+ >>> from pycontrails import Flight
1036
+ >>> fl = Flight(
1037
+ ... longitude=[-100, -110],
1038
+ ... latitude=[40, 50],
1039
+ ... level=[200, 200],
1040
+ ... time=[np.datetime64("2020-01-01T09"), np.datetime64("2020-01-01T09:30")],
1041
+ ... aircraft_type="B737",
1042
+ ... )
1043
+ >>> fl = fl.resample_and_fill("5min")
1044
+ >>> pprint.pprint(fl.to_dict())
1045
+ {'aircraft_type': 'B737',
1046
+ 'altitude_ft': [38661.0, 38661.0, 38661.0, 38661.0, 38661.0, 38661.0, 38661.0],
1047
+ 'crs': 'EPSG:4326',
1048
+ 'latitude': [40.0, 41.724, 43.428, 45.111, 46.769, 48.399, 50.0],
1049
+ 'longitude': [-100.0,
1050
+ -101.441,
1051
+ -102.959,
1052
+ -104.563,
1053
+ -106.267,
1054
+ -108.076,
1055
+ -110.0],
1056
+ 'time': [1577869200,
1057
+ 1577869500,
1058
+ 1577869800,
1059
+ 1577870100,
1060
+ 1577870400,
1061
+ 1577870700,
1062
+ 1577871000]}
1063
+ """
1064
+ np_encoder = json_utils.NumpyEncoder()
1065
+
1066
+ # round latitude, longitude, and altitude
1067
+ precision = {"longitude": 3, "latitude": 3, "altitude_ft": 0}
1068
+
1069
+ def encode(key: str, obj: Any) -> Any:
1070
+ # Try to handle some pandas objects
1071
+ if hasattr(obj, "to_numpy"):
1072
+ obj = obj.to_numpy()
1073
+
1074
+ # Convert numpy objects to python objects
1075
+ if isinstance(obj, np.ndarray | np.generic):
1076
+
1077
+ # round time to unix seconds
1078
+ if key == "time":
1079
+ return np_encoder.default(obj.astype("datetime64[s]").astype(int))
1080
+
1081
+ # round specific keys in precision
1082
+ try:
1083
+ d = precision[key]
1084
+ except KeyError:
1085
+ return np_encoder.default(obj)
1086
+
1087
+ return np_encoder.default(obj.astype(float).round(d))
1088
+
1089
+ # Pass through everything else
1090
+ return obj
1091
+
1092
+ data = {k: encode(k, v) for k, v in self.data.items()}
1093
+ attrs = {k: encode(k, v) for k, v in self.attrs.items()}
1094
+
1095
+ # Only include one of the vertical coordinate keys
1096
+ if isinstance(self, GeoVectorDataset):
1097
+ data.pop("altitude", None)
1098
+ data.pop("level", None)
1099
+ if "altitude_ft" not in data:
1100
+ data["altitude_ft"] = self.altitude_ft.round(precision["altitude_ft"]).tolist()
1101
+
1102
+ # Issue warning if any keys are duplicated
1103
+ common_keys = data.keys() & attrs.keys()
1104
+ if common_keys:
1105
+ warnings.warn(
1106
+ f"Found duplicate keys in data and attrs: {common_keys}. "
1107
+ "Data keys will overwrite attrs keys in returned dictionary."
1108
+ )
1109
+
1110
+ return {**attrs, **data}
1111
+
1112
+ @classmethod
1113
+ def create_empty(
1114
+ cls: type[VectorDatasetType],
1115
+ keys: Iterable[str],
1116
+ attrs: dict[str, Any] | None = None,
1117
+ **attrs_kwargs: Any,
1118
+ ) -> VectorDatasetType:
1119
+ """Create instance with variables defined by `keys` and size 0.
1120
+
1121
+ If instance requires additional variables to be defined, these keys will automatically
1122
+ be attached to returned instance.
1123
+
1124
+ Parameters
1125
+ ----------
1126
+ keys : Iterable[str]
1127
+ Keys to include in empty VectorDataset instance.
1128
+ attrs : dict[str, Any] | None, optional
1129
+ Attributes to attach instance.
1130
+ **attrs_kwargs : Any
1131
+ Define attributes as keyword arguments.
1132
+
1133
+ Returns
1134
+ -------
1135
+ VectorDatasetType
1136
+ Empty VectorDataset instance.
1137
+ """
1138
+ return cls(data=_empty_vector_dict(keys or set()), attrs=attrs, copy=False, **attrs_kwargs)
1139
+
1140
+ @classmethod
1141
+ def from_dict(
1142
+ cls: type[VectorDatasetType], obj: dict[str, Any], copy: bool = True, **obj_kwargs: Any
1143
+ ) -> VectorDatasetType:
1144
+ """Create instance from dict representation containing data and attrs.
1145
+
1146
+ Parameters
1147
+ ----------
1148
+ obj : dict[str, Any]
1149
+ Dict representation of VectorDataset (e.g. :meth:`to_dict`)
1150
+ copy : bool, optional
1151
+ Passed to :class:`VectorDataset` constructor.
1152
+ Defaults to True.
1153
+ **obj_kwargs : Any
1154
+ Additional properties passed as keyword arguments.
1155
+
1156
+ Returns
1157
+ -------
1158
+ VectorDatasetType
1159
+ VectorDataset instance.
1160
+
1161
+ See Also
1162
+ --------
1163
+ :meth:`to_dict`
1164
+ """
1165
+ data = {}
1166
+ attrs = {}
1167
+
1168
+ for k, v in {**obj, **obj_kwargs}.items():
1169
+ if isinstance(v, list | np.ndarray):
1170
+ data[k] = v
1171
+ else:
1172
+ attrs[k] = v
1173
+
1174
+ return cls(data=data, attrs=attrs, copy=copy)
1175
+
1176
+ def generate_splits(
1177
+ self: VectorDatasetType, n_splits: int, copy: bool = True
1178
+ ) -> Generator[VectorDatasetType, None, None]:
1179
+ """Split instance into ``n_split`` sub-vectors.
1180
+
1181
+ Parameters
1182
+ ----------
1183
+ n_splits : int
1184
+ Number of splits.
1185
+ copy : bool, optional
1186
+ Passed into :meth:`filter`. Defaults to True. Recommend to keep as True
1187
+ based on `numpy best practices <https://numpy.org/doc/stable/user/basics.indexing.html#slicing-and-striding>`_.
1188
+
1189
+ Returns
1190
+ -------
1191
+ Generator[VectorDatasetType, None, None]
1192
+ Generator of split vectors.
1193
+
1194
+ See Also
1195
+ --------
1196
+ :func:`numpy.array_split`
1197
+ """
1198
+ full_index = np.arange(self.size)
1199
+ index_splits = np.array_split(full_index, n_splits)
1200
+ for index in index_splits:
1201
+ filt = np.zeros(self.size, dtype=bool)
1202
+ filt[index] = True
1203
+ yield self.filter(filt, copy=copy)
1204
+
1205
+
1206
+ class GeoVectorDataset(VectorDataset):
1207
+ """Base class to hold 1D geospatial arrays of consistent size.
1208
+
1209
+ GeoVectorDataset is required to have geospatial coordinate keys defined
1210
+ in :attr:`required_keys`.
1211
+
1212
+ Expect latitude-longitude CRS in WGS 84.
1213
+ Expect altitude in [:math:`m`].
1214
+ Expect level in [:math:`hPa`].
1215
+
1216
+ Each spatial variable is expected to have "float32" or "float64" ``dtype``.
1217
+ The time variable is expected to have "datetime64[ns]" ``dtype``.
1218
+
1219
+ Use the attribute :attr:`attr["crs"]` to specify coordinate reference system
1220
+ using `PROJ <https://proj.org/>`_ or `EPSG <https://epsg.org/home.html>`_ syntax.
1221
+
1222
+ Parameters
1223
+ ----------
1224
+ data : dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataDict | VectorDataset | None, optional
1225
+ Data dictionary or :class:`pandas.DataFrame` .
1226
+ Must include keys/columns ``time``, ``latitude``, ``longitude``, ``altitude`` or ``level``.
1227
+ Keyword arguments for ``time``, ``latitude``, ``longitude``, ``altitude`` or ``level``
1228
+ override ``data`` inputs. Expects ``altitude`` in meters and ``time``
1229
+ as a DatetimeLike (or array that can processed with :meth:`pd.to_datetime`).
1230
+ Additional waypoint-specific data can be included as additional keys/columns.
1231
+ longitude : npt.ArrayLike, optional
1232
+ Longitude data.
1233
+ Defaults to None.
1234
+ latitude : npt.ArrayLike, optional
1235
+ Latitude data.
1236
+ Defaults to None.
1237
+ altitude : npt.ArrayLike, optional
1238
+ Altitude data, [:math:`m`].
1239
+ Defaults to None.
1240
+ altitude_ft : npt.ArrayLike, optional
1241
+ Altitude data, [:math:`ft`].
1242
+ Defaults to None.
1243
+ level : npt.ArrayLike, optional
1244
+ Level data, [:math:`hPa`].
1245
+ Defaults to None.
1246
+ time : npt.ArrayLike, optional
1247
+ Time data.
1248
+ Expects an array of DatetimeLike values,
1249
+ or array that can processed with :meth:`pd.to_datetime`.
1250
+ Defaults to None.
1251
+ attrs : dict[Hashable, Any] | AttrDict, optional
1252
+ Additional properties as a dictionary.
1253
+ Defaults to {}.
1254
+ copy : bool, optional
1255
+ Copy data on class creation.
1256
+ Defaults to True.
1257
+ **attrs_kwargs : Any
1258
+ Additional properties passed as keyword arguments.
1259
+
1260
+ Raises
1261
+ ------
1262
+ KeyError
1263
+ Raises if ``data`` input does not contain at least ``time``, ``latitude``, ``longitude``,
1264
+ (``altitude`` or ``level``).
1265
+ """
1266
+
1267
+ __slots__ = ()
1268
+
1269
+ #: Required keys for creating GeoVectorDataset
1270
+ required_keys = "longitude", "latitude", "time"
1271
+
1272
+ #: At least one of these vertical-coordinate keys must also be included
1273
+ vertical_keys = "altitude", "level", "altitude_ft"
1274
+
1275
+ def __init__(
1276
+ self,
1277
+ data: (
1278
+ dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataDict | VectorDataset | None
1279
+ ) = None,
1280
+ *,
1281
+ longitude: npt.ArrayLike | None = None,
1282
+ latitude: npt.ArrayLike | None = None,
1283
+ altitude: npt.ArrayLike | None = None,
1284
+ altitude_ft: npt.ArrayLike | None = None,
1285
+ level: npt.ArrayLike | None = None,
1286
+ time: npt.ArrayLike | None = None,
1287
+ attrs: dict[str, Any] | AttrDict | None = None,
1288
+ copy: bool = True,
1289
+ **attrs_kwargs: Any,
1290
+ ) -> None:
1291
+ # shortcut to `GeoVectorDataset.create_empty` by just using `GeoVectorDataset()`
1292
+ if (
1293
+ data is None
1294
+ and longitude is None
1295
+ and latitude is None
1296
+ and altitude is None
1297
+ and level is None
1298
+ and time is None
1299
+ ):
1300
+ keys = *self.required_keys, "altitude"
1301
+ data = _empty_vector_dict(keys)
1302
+
1303
+ super().__init__(data=data, attrs=attrs, copy=copy, **attrs_kwargs)
1304
+
1305
+ # using the self[key] syntax specifically to run qc on assignment
1306
+ if longitude is not None:
1307
+ self["longitude"] = np.array(longitude, copy=copy)
1308
+
1309
+ if latitude is not None:
1310
+ self["latitude"] = np.array(latitude, copy=copy)
1311
+
1312
+ if time is not None:
1313
+ self["time"] = np.array(time, copy=copy)
1314
+
1315
+ if altitude is not None:
1316
+ self["altitude"] = np.array(altitude, copy=copy)
1317
+ if altitude_ft is not None or level is not None:
1318
+ warnings.warn(
1319
+ "Altitude data provided. Ignoring altitude_ft and level inputs.",
1320
+ )
1321
+ elif altitude_ft is not None:
1322
+ self["altitude_ft"] = np.array(altitude_ft, copy=copy)
1323
+ if level is not None:
1324
+ warnings.warn(
1325
+ "Altitude_ft data provided. Ignoring level input.",
1326
+ )
1327
+ elif level is not None:
1328
+ self["level"] = np.array(level, copy=copy)
1329
+
1330
+ # Confirm that input has required keys
1331
+ if not all(key in self for key in self.required_keys):
1332
+ raise KeyError(
1333
+ f"{self.__class__.__name__} requires all of the following keys: "
1334
+ f"{', '.join(self.required_keys)}"
1335
+ )
1336
+
1337
+ # Confirm that input has at least one vertical key
1338
+ if not any(key in self for key in self.vertical_keys):
1339
+ raise KeyError(
1340
+ f"{self.__class__.__name__} requires at least one of the following keys: "
1341
+ f"{', '.join(self.vertical_keys)}"
1342
+ )
1343
+
1344
+ # Parse time: If time is not np.datetime64, we try to coerce it to be
1345
+ # by pumping it through pd.to_datetime.
1346
+ time = self["time"]
1347
+ if not np.issubdtype(time.dtype, np.datetime64):
1348
+ warnings.warn("Time data is not np.datetime64. Attempting to coerce.")
1349
+ try:
1350
+ pd_time = _handle_time_column(pd.Series(self["time"]))
1351
+ except ValueError as e:
1352
+ raise ValueError("Could not coerce time data to datetime64.") from e
1353
+ np_time = pd_time.to_numpy(dtype="datetime64[ns]")
1354
+ self.update(time=np_time)
1355
+ elif time.dtype != "datetime64[ns]":
1356
+ self.update(time=time.astype("datetime64[ns]"))
1357
+
1358
+ # Ensure spatial coordinates are float32 or float64
1359
+ float_dtype = (np.float32, np.float64)
1360
+ for coord in ("longitude", "latitude", "altitude", "level", "altitude_ft"):
1361
+ try:
1362
+ arr = self[coord]
1363
+ except KeyError:
1364
+ continue
1365
+ if arr.dtype not in float_dtype:
1366
+ self.update({coord: arr.astype(np.float64)})
1367
+
1368
+ # set CRS to "EPSG:4326" by default
1369
+ crs = self.attrs.setdefault("crs", "EPSG:4326")
1370
+
1371
+ if crs == "EPSG:4326":
1372
+ longitude = self["longitude"]
1373
+ if np.any(longitude > 180.0) or np.any(longitude < -180.0):
1374
+ raise ValueError("EPSG:4326 longitude coordinates should lie between [-180, 180).")
1375
+ latitude = self["latitude"]
1376
+ if np.any(latitude > 90.0) or np.any(latitude < -90.0):
1377
+ raise ValueError("EPSG:4326 latitude coordinates should lie between [-90, 90].")
1378
+
1379
+ @overrides
1380
+ def _display_attrs(self) -> dict[str, str]:
1381
+ try:
1382
+ time0 = pd.Timestamp(np.nanmin(self["time"]))
1383
+ time1 = pd.Timestamp(np.nanmax(self["time"]))
1384
+ lon0 = round(np.nanmin(self["longitude"]), 3)
1385
+ lon1 = round(np.nanmax(self["longitude"]), 3)
1386
+ lat0 = round(np.nanmin(self["latitude"]), 3)
1387
+ lat1 = round(np.nanmax(self["latitude"]), 3)
1388
+ alt0 = round(np.nanmin(self.altitude), 1)
1389
+ alt1 = round(np.nanmax(self.altitude), 1)
1390
+
1391
+ attrs = {
1392
+ "time": f"[{time0}, {time1}]",
1393
+ "longitude": f"[{lon0}, {lon1}]",
1394
+ "latitude": f"[{lat0}, {lat1}]",
1395
+ "altitude": f"[{alt0}, {alt1}]",
1396
+ }
1397
+ except Exception:
1398
+ attrs = {}
1399
+
1400
+ attrs.update(super()._display_attrs())
1401
+ return attrs
1402
+
1403
+ @property
1404
+ def level(self) -> npt.NDArray[np.float64]:
1405
+ """Get pressure ``level`` values for points.
1406
+
1407
+ Automatically calculates pressure level using :func:`units.m_to_pl` using ``altitude`` key.
1408
+
1409
+ Note that if ``level`` key exists in :attr:`data`, the data at the ``level``
1410
+ key will be returned. This allows an override of the default calculation
1411
+ of pressure level from altitude.
1412
+
1413
+ Returns
1414
+ -------
1415
+ npt.NDArray[np.float64]
1416
+ Point pressure level values, [:math:`hPa`]
1417
+ """
1418
+ try:
1419
+ return self["level"]
1420
+ except KeyError:
1421
+ return units.m_to_pl(self.altitude)
1422
+
1423
+ @property
1424
+ def altitude(self) -> npt.NDArray[np.float64]:
1425
+ """Get altitude.
1426
+
1427
+ Automatically calculates altitude using :func:`units.pl_to_m` using ``level`` key.
1428
+
1429
+ Note that if ``altitude`` key exists in :attr:`data`, the data at the ``altitude``
1430
+ key will be returned. This allows an override of the default calculation of altitude
1431
+ from pressure level.
1432
+
1433
+ Returns
1434
+ -------
1435
+ npt.NDArray[np.float64]
1436
+ Altitude, [:math:`m`]
1437
+ """
1438
+ try:
1439
+ return self["altitude"]
1440
+ except KeyError:
1441
+ # Implementation note: explicitly look for "level" or "altitude_ft" key
1442
+ # here to avoid getting stuck in an infinite loop when .level or .altitude_ft
1443
+ # are called.
1444
+ if (level := self.get("level")) is not None:
1445
+ return units.pl_to_m(level)
1446
+ return units.ft_to_m(self["altitude_ft"])
1447
+
1448
+ @property
1449
+ def air_pressure(self) -> npt.NDArray[np.float64]:
1450
+ """Get ``air_pressure`` values for points.
1451
+
1452
+ Returns
1453
+ -------
1454
+ npt.NDArray[np.float64]
1455
+ Point air pressure values, [:math:`Pa`]
1456
+ """
1457
+ try:
1458
+ return self["air_pressure"]
1459
+ except KeyError:
1460
+ return 100.0 * self.level
1461
+
1462
+ @property
1463
+ def altitude_ft(self) -> npt.NDArray[np.float64]:
1464
+ """Get altitude in feet.
1465
+
1466
+ Returns
1467
+ -------
1468
+ npt.NDArray[np.float64]
1469
+ Altitude, [:math:`ft`]
1470
+ """
1471
+ try:
1472
+ return self["altitude_ft"]
1473
+ except KeyError:
1474
+ return units.m_to_ft(self.altitude)
1475
+
1476
+ @property
1477
+ def constants(self) -> dict[str, Any]:
1478
+ """Return a dictionary of constant attributes and data values.
1479
+
1480
+ Includes :attr:`attrs` and values from columns in :attr:`data` with a unique
1481
+ value.
1482
+
1483
+ Returns
1484
+ -------
1485
+ dict[str, Any]
1486
+ Properties and their constant values
1487
+ """
1488
+ constants = {}
1489
+
1490
+ # get constant data values that are not nan
1491
+ for key in set(self).difference(self.required_keys):
1492
+ unique = np.unique(self[key])
1493
+ if len(unique) == 1 and (isinstance(unique[0], str) or ~np.isnan(unique[0])):
1494
+ constants[key] = unique[0]
1495
+
1496
+ # add attributes
1497
+ constants.update(self.attrs)
1498
+
1499
+ # clean strings values by removing whitespace
1500
+ # convert any numpy items to python objects
1501
+ def _cleanup(v: Any) -> Any:
1502
+ if isinstance(v, str):
1503
+ return v.strip()
1504
+ if isinstance(v, np.integer):
1505
+ return int(v)
1506
+ if isinstance(v, np.floating):
1507
+ return float(v)
1508
+ if isinstance(v, np.bool_):
1509
+ return bool(v)
1510
+ return v
1511
+
1512
+ return {k: _cleanup(v) for k, v in constants.items()}
1513
+
1514
+ @property
1515
+ def coords(self) -> dict[str, np.ndarray]:
1516
+ """Get geospatial coordinates for compatibility with MetDataArray.
1517
+
1518
+ Returns
1519
+ -------
1520
+ pd.DataFrame
1521
+ :class:`pd.DataFrame` with columns `longitude`, `latitude`, `level`, and `time`.
1522
+ """
1523
+ return {
1524
+ "longitude": self["longitude"],
1525
+ "latitude": self["latitude"],
1526
+ "level": self.level,
1527
+ "time": self["time"],
1528
+ }
1529
+
1530
+ # ------------
1531
+ # Utilities
1532
+ # ------------
1533
+
1534
+ def transform_crs(
1535
+ self: GeoVectorDatasetType, crs: str, copy: bool = True
1536
+ ) -> GeoVectorDatasetType:
1537
+ """Transform trajectory data from one coordinate reference system (CRS) to another.
1538
+
1539
+ Parameters
1540
+ ----------
1541
+ crs : str
1542
+ Target CRS. Passed into to :class:`pyproj.Transformer`. The source CRS
1543
+ is inferred from the :attr:`attrs["crs"]` attribute.
1544
+ copy : bool, optional
1545
+ Copy data on transformation. Defaults to True.
1546
+
1547
+ Returns
1548
+ -------
1549
+ GeoVectorDatasetType
1550
+ Converted dataset with new coordinate reference system.
1551
+ :attr:`attrs["crs"]` reflects new crs.
1552
+ """
1553
+ try:
1554
+ import pyproj
1555
+ except ModuleNotFoundError as exc:
1556
+ dependencies.raise_module_not_found_error(
1557
+ name="GeoVectorDataset.transform_crs method",
1558
+ package_name="pyproj",
1559
+ module_not_found_error=exc,
1560
+ pycontrails_optional_package="pyproj",
1561
+ )
1562
+
1563
+ transformer = pyproj.Transformer.from_crs(self.attrs["crs"], crs, always_xy=True)
1564
+ lon, lat = transformer.transform(self["longitude"], self["latitude"])
1565
+
1566
+ ret = self.copy() if copy else self
1567
+
1568
+ ret.update(longitude=lon, latitude=lat)
1569
+ ret.attrs.update(crs=crs)
1570
+ return ret
1571
+
1572
+ def T_isa(self) -> npt.NDArray[np.float64]:
1573
+ """Calculate the ICAO standard atmosphere temperature at each point.
1574
+
1575
+ Returns
1576
+ -------
1577
+ npt.NDArray[np.float64]
1578
+ ISA temperature, [:math:`K`]
1579
+
1580
+ See Also
1581
+ --------
1582
+ :func:`pycontrails.physics.units.m_to_T_isa`
1583
+ """
1584
+ return units.m_to_T_isa(self.altitude)
1585
+
1586
+ # ------------
1587
+ # Met
1588
+ # ------------
1589
+
1590
+ def coords_intersect_met(
1591
+ self, met: met_module.MetDataset | met_module.MetDataArray
1592
+ ) -> npt.NDArray[np.bool_]:
1593
+ """Return boolean mask of data inside the bounding box defined by ``met``.
1594
+
1595
+ Parameters
1596
+ ----------
1597
+ met : MetDataset | MetDataArray
1598
+ MetDataset or MetDataArray to compare.
1599
+
1600
+ Returns
1601
+ -------
1602
+ npt.NDArray[np.bool_]
1603
+ True if point is inside the bounding box defined by ``met``.
1604
+ """
1605
+ indexes = met.indexes
1606
+
1607
+ lat_intersect = coordinates.intersect_domain(
1608
+ indexes["latitude"].to_numpy(),
1609
+ self["latitude"],
1610
+ )
1611
+ lon_intersect = coordinates.intersect_domain(
1612
+ indexes["longitude"].to_numpy(),
1613
+ self["longitude"],
1614
+ )
1615
+ level_intersect = coordinates.intersect_domain(
1616
+ indexes["level"].to_numpy(),
1617
+ self.level,
1618
+ )
1619
+ time_intersect = coordinates.intersect_domain(
1620
+ indexes["time"].to_numpy(),
1621
+ self["time"],
1622
+ )
1623
+
1624
+ return lat_intersect & lon_intersect & level_intersect & time_intersect
1625
+
1626
+ def intersect_met(
1627
+ self,
1628
+ mda: met_module.MetDataArray,
1629
+ *,
1630
+ longitude: npt.NDArray[np.float64] | None = None,
1631
+ latitude: npt.NDArray[np.float64] | None = None,
1632
+ level: npt.NDArray[np.float64] | None = None,
1633
+ time: npt.NDArray[np.datetime64] | None = None,
1634
+ use_indices: bool = False,
1635
+ **interp_kwargs: Any,
1636
+ ) -> npt.NDArray[np.float64]:
1637
+ """Intersect waypoints with MetDataArray.
1638
+
1639
+ Parameters
1640
+ ----------
1641
+ mda : MetDataArray
1642
+ MetDataArray containing a meteorological variable at spatio-temporal coordinates.
1643
+ longitude : npt.NDArray[np.float64], optional
1644
+ Override existing coordinates for met interpolation
1645
+ latitude : npt.NDArray[np.float64], optional
1646
+ Override existing coordinates for met interpolation
1647
+ level : npt.NDArray[np.float64], optional
1648
+ Override existing coordinates for met interpolation
1649
+ time : npt.NDArray[np.datetime64], optional
1650
+ Override existing coordinates for met interpolation
1651
+ use_indices : bool, optional
1652
+ Experimental.
1653
+ **interp_kwargs : Any
1654
+ Additional keyword arguments to pass to :meth:`MetDataArray.intersect_met`.
1655
+ Examples include ``method``, ``bounds_error``, and ``fill_value``. If an error such as
1656
+
1657
+ .. code-block:: python
1658
+
1659
+ ValueError: One of the requested xi is out of bounds in dimension 2
1660
+
1661
+ occurs, try calling this function with ``bounds_error=False``. In addition,
1662
+ setting ``fill_value=0.0`` will replace NaN values with 0.0.
1663
+
1664
+ Returns
1665
+ -------
1666
+ npt.NDArray[np.float64]
1667
+ Interpolated values
1668
+
1669
+ Examples
1670
+ --------
1671
+ >>> from datetime import datetime
1672
+ >>> import pandas as pd
1673
+ >>> import numpy as np
1674
+ >>> from pycontrails.datalib.ecmwf import ERA5
1675
+ >>> from pycontrails import Flight
1676
+
1677
+ >>> # Get met data
1678
+ >>> times = (datetime(2022, 3, 1, 0), datetime(2022, 3, 1, 3))
1679
+ >>> variables = ["air_temperature", "specific_humidity"]
1680
+ >>> levels = [300, 250, 200]
1681
+ >>> era5 = ERA5(time=times, variables=variables, pressure_levels=levels)
1682
+ >>> met = era5.open_metdataset()
1683
+
1684
+ >>> # Example flight
1685
+ >>> df = pd.DataFrame()
1686
+ >>> df['longitude'] = np.linspace(0, 50, 10)
1687
+ >>> df['latitude'] = np.linspace(0, 10, 10)
1688
+ >>> df['altitude'] = 11000
1689
+ >>> df['time'] = pd.date_range("2022-03-01T00", "2022-03-01T02", periods=10)
1690
+ >>> fl = Flight(df)
1691
+
1692
+ >>> # Intersect
1693
+ >>> fl.intersect_met(met['air_temperature'], method='nearest')
1694
+ array([231.62969892, 230.72604651, 232.24318771, 231.88338483,
1695
+ 231.06429438, 231.59073409, 231.65125393, 231.93064004,
1696
+ 232.03344087, 231.65954432])
1697
+
1698
+ >>> fl.intersect_met(met['air_temperature'], method='linear')
1699
+ array([225.77794552, 225.13908414, 226.231218 , 226.31831528,
1700
+ 225.56102321, 225.81192149, 226.03192642, 226.22056121,
1701
+ 226.03770174, 225.63226188])
1702
+
1703
+ >>> # Interpolate and attach to `Flight` instance
1704
+ >>> for key in met:
1705
+ ... fl[key] = fl.intersect_met(met[key])
1706
+
1707
+ >>> # Show the final three columns of the dataframe
1708
+ >>> fl.dataframe.iloc[:, -3:].head()
1709
+ time air_temperature specific_humidity
1710
+ 0 2022-03-01 00:00:00 225.777946 0.000132
1711
+ 1 2022-03-01 00:13:20 225.139084 0.000132
1712
+ 2 2022-03-01 00:26:40 226.231218 0.000107
1713
+ 3 2022-03-01 00:40:00 226.318315 0.000171
1714
+ 4 2022-03-01 00:53:20 225.561022 0.000109
1715
+
1716
+ """
1717
+ # Override use_indices in certain situations
1718
+ if use_indices:
1719
+ # Often the single_level data we use has time shifted
1720
+ # Don't allow it for now. We could do something smarter here!
1721
+ if mda.is_single_level:
1722
+ use_indices = False
1723
+
1724
+ # Cannot both override some coordinate AND pass indices.
1725
+ elif any(c is not None for c in (longitude, latitude, level, time)):
1726
+ # Should we warn?! Or is this "convenience"?
1727
+ use_indices = False
1728
+
1729
+ longitude = longitude if longitude is not None else self["longitude"]
1730
+ latitude = latitude if latitude is not None else self["latitude"]
1731
+ level = level if level is not None else self.level
1732
+ time = time if time is not None else self["time"]
1733
+
1734
+ if not use_indices:
1735
+ return mda.interpolate(longitude, latitude, level, time, **interp_kwargs)
1736
+
1737
+ indices = self._get_indices()
1738
+ already_has_indices = indices is not None
1739
+ out, indices = mda.interpolate(
1740
+ longitude,
1741
+ latitude,
1742
+ level,
1743
+ time,
1744
+ indices=indices,
1745
+ return_indices=True,
1746
+ **interp_kwargs,
1747
+ )
1748
+ if not already_has_indices:
1749
+ self._put_indices(indices)
1750
+ return out
1751
+
1752
+ def _put_indices(self, indices: interpolation.RGIArtifacts) -> None:
1753
+ """Set entries of ``indices`` onto underlying :attr:`data.
1754
+
1755
+ Each entry of ``indices`` are unpacked assuming certain conventions
1756
+ for its structure. A ValueError is raise if these conventions are not
1757
+ satisfied.
1758
+
1759
+ .. versionadded:: 0.26.0
1760
+
1761
+ Experimental
1762
+
1763
+
1764
+ Parameters
1765
+ ----------
1766
+ indices : interpolation.RGIArtifacts
1767
+ The indices to store.
1768
+ """
1769
+ indices_x, indices_y, indices_z, indices_t = indices.xi_indices
1770
+ distances_x, distances_y, distances_z, distances_t = indices.norm_distances
1771
+ out_of_bounds = indices.out_of_bounds
1772
+
1773
+ self["_indices_x"] = indices_x
1774
+ self["_indices_y"] = indices_y
1775
+ self["_indices_z"] = indices_z
1776
+ self["_indices_t"] = indices_t
1777
+ self["_distances_x"] = distances_x
1778
+ self["_distances_y"] = distances_y
1779
+ self["_distances_z"] = distances_z
1780
+ self["_distances_t"] = distances_t
1781
+ self["_out_of_bounds"] = out_of_bounds
1782
+
1783
+ def _get_indices(self) -> interpolation.RGIArtifacts | None:
1784
+ """Get entries from call to :meth:`_put_indices`.
1785
+
1786
+ .. versionadded:: 0.26.0
1787
+
1788
+ Experimental
1789
+
1790
+ Returns
1791
+ -------
1792
+ tuple | None
1793
+ Previously cached output of
1794
+ :meth:`scipy.interpolate.RegularGridInterpolator._find_indices`,
1795
+ or None if cached output is not present on instance.
1796
+ """
1797
+ try:
1798
+ indices_x = self["_indices_x"]
1799
+ indices_y = self["_indices_y"]
1800
+ indices_z = self["_indices_z"]
1801
+ indices_t = self["_indices_t"]
1802
+ distances_x = self["_distances_x"]
1803
+ distances_y = self["_distances_y"]
1804
+ distances_z = self["_distances_z"]
1805
+ distances_t = self["_distances_t"]
1806
+ out_of_bounds = self["_out_of_bounds"]
1807
+ except KeyError:
1808
+ return None
1809
+
1810
+ indices = np.asarray([indices_x, indices_y, indices_z, indices_t])
1811
+ distances = np.asarray([distances_x, distances_y, distances_z, distances_t])
1812
+
1813
+ return interpolation.RGIArtifacts(indices, distances, out_of_bounds)
1814
+
1815
+ def _invalidate_indices(self) -> None:
1816
+ """Remove any cached indices from :attr:`data."""
1817
+ for key in (
1818
+ "_indices_x",
1819
+ "_indices_y",
1820
+ "_indices_z",
1821
+ "_indices_t",
1822
+ "_distances_x",
1823
+ "_distances_y",
1824
+ "_distances_z",
1825
+ "_distances_t",
1826
+ "_out_of_bounds",
1827
+ ):
1828
+ self.data.pop(key, None)
1829
+
1830
+ @overload
1831
+ def downselect_met(
1832
+ self,
1833
+ met: met_module.MetDataset,
1834
+ *,
1835
+ longitude_buffer: tuple[float, float] = ...,
1836
+ latitude_buffer: tuple[float, float] = ...,
1837
+ level_buffer: tuple[float, float] = ...,
1838
+ time_buffer: tuple[np.timedelta64, np.timedelta64] = ...,
1839
+ copy: bool = ...,
1840
+ ) -> met_module.MetDataset: ...
1841
+
1842
+ @overload
1843
+ def downselect_met(
1844
+ self,
1845
+ met: met_module.MetDataArray,
1846
+ *,
1847
+ longitude_buffer: tuple[float, float] = ...,
1848
+ latitude_buffer: tuple[float, float] = ...,
1849
+ level_buffer: tuple[float, float] = ...,
1850
+ time_buffer: tuple[np.timedelta64, np.timedelta64] = ...,
1851
+ copy: bool = ...,
1852
+ ) -> met_module.MetDataArray: ...
1853
+
1854
+ def downselect_met(
1855
+ self,
1856
+ met: met_module.MetDataType,
1857
+ *,
1858
+ longitude_buffer: tuple[float, float] = (0.0, 0.0),
1859
+ latitude_buffer: tuple[float, float] = (0.0, 0.0),
1860
+ level_buffer: tuple[float, float] = (0.0, 0.0),
1861
+ time_buffer: tuple[np.timedelta64, np.timedelta64] = (
1862
+ np.timedelta64(0, "h"),
1863
+ np.timedelta64(0, "h"),
1864
+ ),
1865
+ copy: bool = True,
1866
+ ) -> met_module.MetDataType:
1867
+ """Downselect ``met`` to encompass a spatiotemporal region of the data.
1868
+
1869
+ Parameters
1870
+ ----------
1871
+ met : MetDataset | MetDataArray
1872
+ MetDataset or MetDataArray to downselect.
1873
+ longitude_buffer : tuple[float, float], optional
1874
+ Extend longitude domain past by ``longitude_buffer[0]`` on the low side
1875
+ and ``longitude_buffer[1]`` on the high side.
1876
+ Units must be the same as class coordinates.
1877
+ Defaults to ``(0, 0)`` degrees.
1878
+ latitude_buffer : tuple[float, float], optional
1879
+ Extend latitude domain past by ``latitude_buffer[0]`` on the low side
1880
+ and ``latitude_buffer[1]`` on the high side.
1881
+ Units must be the same as class coordinates.
1882
+ Defaults to ``(0, 0)`` degrees.
1883
+ level_buffer : tuple[float, float], optional
1884
+ Extend level domain past by ``level_buffer[0]`` on the low side
1885
+ and ``level_buffer[1]`` on the high side.
1886
+ Units must be the same as class coordinates.
1887
+ Defaults to ``(0, 0)`` [:math:`hPa`].
1888
+ time_buffer : tuple[np.timedelta64, np.timedelta64], optional
1889
+ Extend time domain past by ``time_buffer[0]`` on the low side
1890
+ and ``time_buffer[1]`` on the high side.
1891
+ Units must be the same as class coordinates.
1892
+ Defaults to ``(np.timedelta64(0, "h"), np.timedelta64(0, "h"))``.
1893
+ copy : bool
1894
+ If returned object is a copy or view of the original. True by default.
1895
+
1896
+ Returns
1897
+ -------
1898
+ MetDataset | MetDataArray
1899
+ Copy of downselected MetDataset or MetDataArray.
1900
+ """
1901
+ indexes = met.indexes
1902
+ lon_slice = coordinates.slice_domain(
1903
+ indexes["longitude"].to_numpy(),
1904
+ self["longitude"],
1905
+ buffer=longitude_buffer,
1906
+ )
1907
+ lat_slice = coordinates.slice_domain(
1908
+ indexes["latitude"].to_numpy(),
1909
+ self["latitude"],
1910
+ buffer=latitude_buffer,
1911
+ )
1912
+ time_slice = coordinates.slice_domain(
1913
+ indexes["time"].to_numpy(),
1914
+ self["time"],
1915
+ buffer=time_buffer,
1916
+ )
1917
+
1918
+ # single level data have "level" == [-1]
1919
+ if met.is_single_level:
1920
+ level_slice = slice(None)
1921
+ else:
1922
+ level_slice = coordinates.slice_domain(
1923
+ indexes["level"].to_numpy(),
1924
+ self.level,
1925
+ buffer=level_buffer,
1926
+ )
1927
+ logger.debug("Downselect met at %s %s %s %s", lon_slice, lat_slice, level_slice, time_slice)
1928
+
1929
+ data = met.data.isel(
1930
+ longitude=lon_slice,
1931
+ latitude=lat_slice,
1932
+ level=level_slice,
1933
+ time=time_slice,
1934
+ )
1935
+ return type(met)(data, copy=copy)
1936
+
1937
+ # ------------
1938
+ # I / O
1939
+ # ------------
1940
+
1941
+ @classmethod
1942
+ @overrides
1943
+ def create_empty(
1944
+ cls: type[GeoVectorDatasetType],
1945
+ keys: Iterable[str] | None = None,
1946
+ attrs: dict[str, Any] | None = None,
1947
+ **attrs_kwargs: Any,
1948
+ ) -> GeoVectorDatasetType:
1949
+ keys = *cls.required_keys, "altitude", *(keys or ())
1950
+ return super().create_empty(keys, attrs, **attrs_kwargs)
1951
+
1952
+ def to_geojson_points(self) -> dict[str, Any]:
1953
+ """Return dataset as GeoJSON FeatureCollection of Points.
1954
+
1955
+ Each Feature has a properties attribute that includes ``time`` and
1956
+ other data besides ``latitude``, ``longitude``, and ``altitude`` in :attr:`data`.
1957
+
1958
+ Returns
1959
+ -------
1960
+ dict[str, Any]
1961
+ Python representation of GeoJSON FeatureCollection
1962
+ """
1963
+ return json_utils.dataframe_to_geojson_points(self.dataframe)
1964
+
1965
+ def to_pseudo_mercator(self: GeoVectorDatasetType, copy: bool = True) -> GeoVectorDatasetType:
1966
+ """Convert data from :attr:`attrs["crs"]` to Pseudo Mercator (EPSG:3857).
1967
+
1968
+ Parameters
1969
+ ----------
1970
+ copy : bool, optional
1971
+ Copy data on transformation.
1972
+ Defaults to True.
1973
+
1974
+ Returns
1975
+ -------
1976
+ GeoVectorDatasetType
1977
+ """
1978
+ return self.transform_crs("EPSG:3857", copy=copy)
1979
+
1980
+ # ------------
1981
+ # Vector to grid
1982
+ # ------------
1983
+ def to_lon_lat_grid(
1984
+ self,
1985
+ agg: dict[str, str],
1986
+ *,
1987
+ spatial_bbox: tuple[float, float, float, float] = (-180.0, -90.0, 180.0, 90.0),
1988
+ spatial_grid_res: float = 0.5,
1989
+ ) -> xr.Dataset:
1990
+ """
1991
+ Convert vectors to a longitude-latitude grid.
1992
+
1993
+ See Also
1994
+ --------
1995
+ vector_to_lon_lat_grid
1996
+ """
1997
+ return vector_to_lon_lat_grid(
1998
+ self, agg=agg, spatial_bbox=spatial_bbox, spatial_grid_res=spatial_grid_res
1999
+ )
2000
+
2001
+
2002
+ def vector_to_lon_lat_grid(
2003
+ vector: GeoVectorDataset,
2004
+ agg: dict[str, str],
2005
+ *,
2006
+ spatial_bbox: tuple[float, float, float, float] = (-180.0, -90.0, 180.0, 90.0),
2007
+ spatial_grid_res: float = 0.5,
2008
+ ) -> xr.Dataset:
2009
+ r"""
2010
+ Convert vectors to a longitude-latitude grid.
2011
+
2012
+ Parameters
2013
+ ----------
2014
+ vector: GeoVectorDataset
2015
+ Contains the longitude, latitude and variables for aggregation.
2016
+ agg: dict[str, str]
2017
+ Variable name and the function selected for aggregation,
2018
+ i.e. ``{"segment_length": "sum"}``.
2019
+ spatial_bbox: tuple[float, float, float, float]
2020
+ Spatial bounding box, ``(lon_min, lat_min, lon_max, lat_max)``, [:math:`\deg`].
2021
+ By default, the entire globe is used.
2022
+ spatial_grid_res: float
2023
+ Spatial grid resolution, [:math:`\deg`]
2024
+
2025
+ Returns
2026
+ -------
2027
+ xr.Dataset
2028
+ Aggregated variables in a longitude-latitude grid.
2029
+
2030
+ Examples
2031
+ --------
2032
+ >>> rng = np.random.default_rng(234)
2033
+ >>> vector = GeoVectorDataset(
2034
+ ... longitude=rng.uniform(-10, 10, 10000),
2035
+ ... latitude=rng.uniform(-10, 10, 10000),
2036
+ ... altitude=np.zeros(10000),
2037
+ ... time=np.zeros(10000).astype("datetime64[ns]"),
2038
+ ... )
2039
+ >>> vector["foo"] = rng.uniform(0, 1, 10000)
2040
+ >>> ds = vector.to_lon_lat_grid({"foo": "sum"}, spatial_bbox=(-10, -10, 9.5, 9.5))
2041
+ >>> da = ds["foo"]
2042
+ >>> da.coords
2043
+ Coordinates:
2044
+ * longitude (longitude) float64 320B -10.0 -9.5 -9.0 -8.5 ... 8.0 8.5 9.0 9.5
2045
+ * latitude (latitude) float64 320B -10.0 -9.5 -9.0 -8.5 ... 8.0 8.5 9.0 9.5
2046
+
2047
+ >>> da.values.round(2)
2048
+ array([[2.23, 0.67, 1.29, ..., 4.66, 3.91, 1.93],
2049
+ [4.1 , 3.84, 1.34, ..., 3.24, 1.71, 4.55],
2050
+ [0.78, 3.25, 2.33, ..., 3.78, 2.93, 2.33],
2051
+ ...,
2052
+ [1.97, 3.02, 1.84, ..., 2.37, 3.87, 2.09],
2053
+ [3.74, 1.6 , 4.01, ..., 4.6 , 4.27, 3.4 ],
2054
+ [2.97, 0.12, 1.33, ..., 3.54, 0.74, 2.59]])
2055
+
2056
+ >>> da.sum().item() == vector["foo"].sum()
2057
+ np.True_
2058
+
2059
+ """
2060
+ df = vector.select(("longitude", "latitude", *agg), copy=False).dataframe
2061
+
2062
+ # Create longitude and latitude coordinates
2063
+ assert spatial_grid_res > 0.01, "spatial_grid_res must be greater than 0.01"
2064
+ west, south, east, north = spatial_bbox
2065
+ lon_coords = np.arange(west, east + 0.01, spatial_grid_res)
2066
+ lat_coords = np.arange(south, north + 0.01, spatial_grid_res)
2067
+ shape = lon_coords.size, lat_coords.size
2068
+
2069
+ # Convert vector to lon-lat grid
2070
+ idx_lon = np.searchsorted(lon_coords, df["longitude"]) - 1
2071
+ idx_lat = np.searchsorted(lat_coords, df["latitude"]) - 1
2072
+
2073
+ df_agg = df.groupby([idx_lon, idx_lat]).agg(agg)
2074
+ index = df_agg.index.get_level_values(0), df_agg.index.get_level_values(1)
2075
+
2076
+ out = xr.Dataset(coords={"longitude": lon_coords, "latitude": lat_coords})
2077
+ for name, col in df_agg.items():
2078
+ arr = np.zeros(shape, dtype=col.dtype)
2079
+ arr[index] = col
2080
+ out[name] = (("longitude", "latitude"), arr)
2081
+
2082
+ return out
2083
+
2084
+
2085
+ def _handle_time_column(time: pd.Series) -> pd.Series:
2086
+ """Ensure that pd.Series has compatible Timestamps.
2087
+
2088
+ Parameters
2089
+ ----------
2090
+ time : pd.Series
2091
+ Pandas dataframe column labeled "time".
2092
+
2093
+ Returns
2094
+ -------
2095
+ pd.Series
2096
+ Parsed pandas time series.
2097
+
2098
+ Raises
2099
+ ------
2100
+ ValueError
2101
+ When time series can't be parsed, or is not timezone naive.
2102
+ """
2103
+ if not hasattr(time, "dt"):
2104
+ time = _parse_pandas_time(time)
2105
+
2106
+ # Translate all times to UTC and then remove timezone.
2107
+ # If the time column contains a timezone, the call to `to_numpy`
2108
+ # will convert it to an array of object.
2109
+ # Note `.tz_convert(None)` automatically converts to UTC first.
2110
+ if time.dt.tz is not None:
2111
+ time = time.dt.tz_convert(None)
2112
+
2113
+ return time
2114
+
2115
+
2116
+ def _parse_pandas_time(time: pd.Series) -> pd.Series:
2117
+ """Parse pandas dataframe column labelled "time".
2118
+
2119
+ Parameters
2120
+ ----------
2121
+ time : pd.Series
2122
+ Time series
2123
+
2124
+ Returns
2125
+ -------
2126
+ pd.Series
2127
+ Parsed time series
2128
+
2129
+ Raises
2130
+ ------
2131
+ ValueError
2132
+ When series values can't be inferred.
2133
+ """
2134
+ try:
2135
+ # If the time series is a string, try to convert it to a datetime
2136
+ if time.dtype == "O":
2137
+ return pd.to_datetime(time)
2138
+
2139
+ # If the time is an int, try to parse it as unix time
2140
+ if np.issubdtype(time.dtype, np.integer):
2141
+ return _parse_unix_time(time)
2142
+
2143
+ except ValueError as exc:
2144
+ msg = (
2145
+ "The 'time' field must hold datetime-like values. "
2146
+ 'Try data["time"] = pd.to_datetime(data["time"], unit=...) '
2147
+ "with the appropriate unit."
2148
+ )
2149
+ raise ValueError(msg) from exc
2150
+
2151
+ raise ValueError("Unsupported time format")
2152
+
2153
+
2154
+ def _parse_unix_time(time: list[int] | npt.NDArray[np.int_] | pd.Series) -> pd.Series:
2155
+ """Parse array of int times as unix epoch timestamps.
2156
+
2157
+ Attempts to parse the time in "s", "ms", "us", "ns"
2158
+
2159
+ Parameters
2160
+ ----------
2161
+ time : list[int] | npt.NDArray[np.int_] | pd.Series
2162
+ Sequence of unix timestamps
2163
+
2164
+ Returns
2165
+ -------
2166
+ pd.Series
2167
+ Series of timezone naive pandas Timestamps
2168
+
2169
+ Raises
2170
+ ------
2171
+ ValueError
2172
+ When unable to parse time as unix epoch timestamp
2173
+ """
2174
+ units = "s", "ms", "us", "ns"
2175
+ for unit in units:
2176
+ try:
2177
+ out = pd.to_datetime(time, unit=unit, utc=True)
2178
+ except ValueError:
2179
+ continue
2180
+
2181
+ # make timezone naive
2182
+ out = out.dt.tz_convert(None)
2183
+
2184
+ # make sure time is reasonable
2185
+ if (pd.Timestamp("1980-01-01") <= out).all() and (out <= pd.Timestamp("2030-01-01")).all():
2186
+ return out
2187
+
2188
+ raise ValueError(
2189
+ f"Unable to parse time parameter '{time}' as unix epoch timestamp between "
2190
+ "1980-01-01 and 2030-01-01"
2191
+ )