pycontrails 0.54.0__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (109) hide show
  1. pycontrails/__init__.py +70 -0
  2. pycontrails/_version.py +16 -0
  3. pycontrails/core/__init__.py +30 -0
  4. pycontrails/core/aircraft_performance.py +641 -0
  5. pycontrails/core/airports.py +226 -0
  6. pycontrails/core/cache.py +881 -0
  7. pycontrails/core/coordinates.py +174 -0
  8. pycontrails/core/fleet.py +470 -0
  9. pycontrails/core/flight.py +2314 -0
  10. pycontrails/core/flightplan.py +220 -0
  11. pycontrails/core/fuel.py +140 -0
  12. pycontrails/core/interpolation.py +721 -0
  13. pycontrails/core/met.py +2833 -0
  14. pycontrails/core/met_var.py +307 -0
  15. pycontrails/core/models.py +1181 -0
  16. pycontrails/core/polygon.py +549 -0
  17. pycontrails/core/rgi_cython.cpython-312-darwin.so +0 -0
  18. pycontrails/core/vector.py +2190 -0
  19. pycontrails/datalib/__init__.py +12 -0
  20. pycontrails/datalib/_leo_utils/search.py +250 -0
  21. pycontrails/datalib/_leo_utils/static/bq_roi_query.sql +6 -0
  22. pycontrails/datalib/_leo_utils/vis.py +59 -0
  23. pycontrails/datalib/_met_utils/metsource.py +746 -0
  24. pycontrails/datalib/ecmwf/__init__.py +73 -0
  25. pycontrails/datalib/ecmwf/arco_era5.py +340 -0
  26. pycontrails/datalib/ecmwf/common.py +109 -0
  27. pycontrails/datalib/ecmwf/era5.py +550 -0
  28. pycontrails/datalib/ecmwf/era5_model_level.py +487 -0
  29. pycontrails/datalib/ecmwf/hres.py +782 -0
  30. pycontrails/datalib/ecmwf/hres_model_level.py +459 -0
  31. pycontrails/datalib/ecmwf/ifs.py +284 -0
  32. pycontrails/datalib/ecmwf/model_levels.py +434 -0
  33. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  34. pycontrails/datalib/ecmwf/variables.py +267 -0
  35. pycontrails/datalib/gfs/__init__.py +28 -0
  36. pycontrails/datalib/gfs/gfs.py +646 -0
  37. pycontrails/datalib/gfs/variables.py +100 -0
  38. pycontrails/datalib/goes.py +772 -0
  39. pycontrails/datalib/landsat.py +569 -0
  40. pycontrails/datalib/sentinel.py +511 -0
  41. pycontrails/datalib/spire.py +739 -0
  42. pycontrails/ext/bada.py +41 -0
  43. pycontrails/ext/cirium.py +14 -0
  44. pycontrails/ext/empirical_grid.py +140 -0
  45. pycontrails/ext/synthetic_flight.py +430 -0
  46. pycontrails/models/__init__.py +1 -0
  47. pycontrails/models/accf.py +406 -0
  48. pycontrails/models/apcemm/__init__.py +8 -0
  49. pycontrails/models/apcemm/apcemm.py +982 -0
  50. pycontrails/models/apcemm/inputs.py +226 -0
  51. pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
  52. pycontrails/models/apcemm/utils.py +437 -0
  53. pycontrails/models/cocip/__init__.py +29 -0
  54. pycontrails/models/cocip/cocip.py +2616 -0
  55. pycontrails/models/cocip/cocip_params.py +299 -0
  56. pycontrails/models/cocip/cocip_uncertainty.py +285 -0
  57. pycontrails/models/cocip/contrail_properties.py +1517 -0
  58. pycontrails/models/cocip/output_formats.py +2261 -0
  59. pycontrails/models/cocip/radiative_forcing.py +1262 -0
  60. pycontrails/models/cocip/radiative_heating.py +520 -0
  61. pycontrails/models/cocip/unterstrasser_wake_vortex.py +403 -0
  62. pycontrails/models/cocip/wake_vortex.py +396 -0
  63. pycontrails/models/cocip/wind_shear.py +120 -0
  64. pycontrails/models/cocipgrid/__init__.py +9 -0
  65. pycontrails/models/cocipgrid/cocip_grid.py +2573 -0
  66. pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
  67. pycontrails/models/dry_advection.py +494 -0
  68. pycontrails/models/emissions/__init__.py +21 -0
  69. pycontrails/models/emissions/black_carbon.py +594 -0
  70. pycontrails/models/emissions/emissions.py +1353 -0
  71. pycontrails/models/emissions/ffm2.py +336 -0
  72. pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
  73. pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
  74. pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
  75. pycontrails/models/humidity_scaling/__init__.py +37 -0
  76. pycontrails/models/humidity_scaling/humidity_scaling.py +1025 -0
  77. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  78. pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
  79. pycontrails/models/issr.py +210 -0
  80. pycontrails/models/pcc.py +327 -0
  81. pycontrails/models/pcr.py +154 -0
  82. pycontrails/models/ps_model/__init__.py +17 -0
  83. pycontrails/models/ps_model/ps_aircraft_params.py +376 -0
  84. pycontrails/models/ps_model/ps_grid.py +505 -0
  85. pycontrails/models/ps_model/ps_model.py +1017 -0
  86. pycontrails/models/ps_model/ps_operational_limits.py +540 -0
  87. pycontrails/models/ps_model/static/ps-aircraft-params-20240524.csv +68 -0
  88. pycontrails/models/ps_model/static/ps-synonym-list-20240524.csv +103 -0
  89. pycontrails/models/sac.py +459 -0
  90. pycontrails/models/tau_cirrus.py +168 -0
  91. pycontrails/physics/__init__.py +1 -0
  92. pycontrails/physics/constants.py +116 -0
  93. pycontrails/physics/geo.py +989 -0
  94. pycontrails/physics/jet.py +837 -0
  95. pycontrails/physics/thermo.py +451 -0
  96. pycontrails/physics/units.py +472 -0
  97. pycontrails/py.typed +0 -0
  98. pycontrails/utils/__init__.py +1 -0
  99. pycontrails/utils/dependencies.py +66 -0
  100. pycontrails/utils/iteration.py +13 -0
  101. pycontrails/utils/json.py +188 -0
  102. pycontrails/utils/temp.py +50 -0
  103. pycontrails/utils/types.py +165 -0
  104. pycontrails-0.54.0.dist-info/LICENSE +178 -0
  105. pycontrails-0.54.0.dist-info/METADATA +179 -0
  106. pycontrails-0.54.0.dist-info/NOTICE +43 -0
  107. pycontrails-0.54.0.dist-info/RECORD +109 -0
  108. pycontrails-0.54.0.dist-info/WHEEL +5 -0
  109. pycontrails-0.54.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,2190 @@
1
+ """Lightweight data structures for vector paths."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ import logging
8
+ import warnings
9
+ from collections.abc import Generator, Iterable, Iterator, Sequence
10
+ from typing import Any, TypeVar, overload
11
+
12
+ import numpy as np
13
+ import numpy.typing as npt
14
+ import pandas as pd
15
+ import xarray as xr
16
+ from overrides import overrides
17
+
18
+ from pycontrails.core import coordinates, interpolation
19
+ from pycontrails.core import met as met_module
20
+ from pycontrails.physics import units
21
+ from pycontrails.utils import dependencies
22
+ from pycontrails.utils import json as json_utils
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ #: Vector types
27
+ VectorDatasetType = TypeVar("VectorDatasetType", bound="VectorDataset")
28
+ GeoVectorDatasetType = TypeVar("GeoVectorDatasetType", bound="GeoVectorDataset")
29
+
30
+
31
+ class AttrDict(dict[str, Any]):
32
+ """Thin wrapper around dict to warn when setting a key that already exists."""
33
+
34
+ def __setitem__(self, k: str, v: Any) -> None:
35
+ """Warn when setting values that already contain values.
36
+
37
+ Parameters
38
+ ----------
39
+ k : str
40
+ Key
41
+ v : Any
42
+ Value
43
+ """
44
+ if k in self and self[k] is not None and self[k] is not v:
45
+ warnings.warn(
46
+ f"Overwriting attr key `{k}`. Use `.update({k}=...)` to suppress warning."
47
+ )
48
+
49
+ super().__setitem__(k, v)
50
+
51
+ def setdefault(self, k: str, default: Any = None) -> Any:
52
+ """Thin wrapper around ``dict.setdefault``.
53
+
54
+ Overwrites value if value is None.
55
+
56
+ Parameters
57
+ ----------
58
+ k : str
59
+ Key
60
+ default : Any, optional
61
+ Default value for key ``k``
62
+
63
+ Returns
64
+ -------
65
+ Any
66
+ Value at ``k``
67
+ """
68
+ ret = self.get(k, None)
69
+ if ret is not None:
70
+ return ret
71
+
72
+ self[k] = default
73
+ return default
74
+
75
+
76
+ class VectorDataDict(dict[str, np.ndarray]):
77
+ """Thin wrapper around ``dict[str, np.ndarray]`` to ensure consistency.
78
+
79
+ Parameters
80
+ ----------
81
+ data : dict[str, np.ndarray], optional
82
+ Dictionary input
83
+ """
84
+
85
+ __slots__ = ("_size",)
86
+
87
+ #: Length of the data
88
+ _size: int
89
+
90
+ def __init__(self, data: dict[str, np.ndarray] | None = None) -> None:
91
+ super().__init__(data or {})
92
+
93
+ # validate any arrays, first one defines _size attribute
94
+ for arr in self.values():
95
+ self._validate_array(arr)
96
+
97
+ def __setitem__(self, k: str, v: npt.ArrayLike) -> None:
98
+ """Set new key-value pair to instance and warn when overwriting existing key.
99
+
100
+ This method casts ``v`` to an :class:`numpy.ndarray` and ensures that the array size is
101
+ consistent with the instance.
102
+
103
+ Parameters
104
+ ----------
105
+ k : str
106
+ Key
107
+ v : np.ndarray
108
+ Values
109
+
110
+ See Also
111
+ --------
112
+ :meth:`update`
113
+ """
114
+ v = np.asarray(v) # asarray does NOT copy
115
+ self._validate_array(v)
116
+
117
+ if k in self and len(self[k]) and self[k] is not v:
118
+ warnings.warn(
119
+ f"Overwriting data in key `{k}`. Use `.update({k}=...)` to suppress warning."
120
+ )
121
+
122
+ super().__setitem__(k, v)
123
+
124
+ def __delitem__(self, k: str) -> None:
125
+ super().__delitem__(k)
126
+
127
+ # if not data keys left, set size to 0
128
+ if not len(self):
129
+ del self._size
130
+
131
+ def setdefault(self, k: str, default: npt.ArrayLike | None = None) -> np.ndarray:
132
+ """Thin wrapper around ``dict.setdefault``.
133
+
134
+ The main purpose of overriding is to run :meth:`_validate_array()` on set.
135
+
136
+ Parameters
137
+ ----------
138
+ k : str
139
+ Key
140
+ default : npt.ArrayLike, optional
141
+ Default value for key ``k``
142
+
143
+ Returns
144
+ -------
145
+ Any
146
+ Value at ``k``
147
+ """
148
+ ret = self.get(k, None)
149
+ if ret is not None:
150
+ return ret
151
+
152
+ if default is None:
153
+ default = np.array([])
154
+
155
+ self[k] = default
156
+ return self[k]
157
+
158
+ def update( # type: ignore[override]
159
+ self, other: dict[str, npt.ArrayLike] | None = None, **kwargs: npt.ArrayLike
160
+ ) -> None:
161
+ """Update values without warning if overwriting.
162
+
163
+ This method casts values in ``other`` to :class:`numpy.ndarray` and
164
+ ensures that the array sizes are consistent with the instance.
165
+
166
+ Parameters
167
+ ----------
168
+ other : dict[str, npt.ArrayLike] | None, optional
169
+ Fields to update as dict
170
+ **kwargs : npt.ArrayLike
171
+ Fields to update as kwargs
172
+ """
173
+ other = other or {}
174
+ other_arrs = {k: np.asarray(v) for k, v in other.items()}
175
+ for arr in other_arrs.values():
176
+ self._validate_array(arr)
177
+
178
+ super().update(other_arrs)
179
+
180
+ # validate any kwarg arrays
181
+ kwargs_arr = {k: np.asarray(v) for k, v in kwargs.items()}
182
+ for arr in kwargs_arr.values():
183
+ self._validate_array(arr)
184
+
185
+ super().update(kwargs_arr)
186
+
187
+ def _validate_array(self, arr: np.ndarray) -> None:
188
+ """Ensure that `arr` is compatible with instance.
189
+
190
+ Set attribute `_size` if it has not yet been defined.
191
+
192
+ Parameters
193
+ ----------
194
+ arr : np.ndarray
195
+ Array to validate
196
+
197
+ Raises
198
+ ------
199
+ ValueError
200
+ If `arr` is not compatible with instance.
201
+ """
202
+ if arr.ndim != 1:
203
+ raise ValueError("All np.arrays must have dimension 1.")
204
+
205
+ size = getattr(self, "_size", 0)
206
+ if size != 0:
207
+ if arr.size != size:
208
+ raise ValueError(f"Incompatible array sizes: {arr.size} and {size}.")
209
+ else:
210
+ self._size = arr.size
211
+
212
+
213
+ def _empty_vector_dict(keys: Iterable[str]) -> VectorDataDict:
214
+ """Create instance of VectorDataDict with variables defined by `keys` and size 0.
215
+
216
+ Parameters
217
+ ----------
218
+ keys : Iterable[str]
219
+ Keys to include in empty VectorDataset instance.
220
+
221
+ Returns
222
+ -------
223
+ VectorDataDict
224
+ Empty :class:`VectorDataDict` instance.
225
+ """
226
+ keys = keys or ()
227
+ data = VectorDataDict({key: np.array([]) for key in keys})
228
+
229
+ # The default dtype is float64
230
+ # Time is special and should have a non-default dtype of datetime64[ns]
231
+ if "time" in data:
232
+ data.update(time=np.array([], dtype="datetime64[ns]"))
233
+
234
+ return data
235
+
236
+
237
+ class VectorDataset:
238
+ """Base class to hold 1D arrays of consistent size.
239
+
240
+ Parameters
241
+ ----------
242
+ data : dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataDict | VectorDataset | None, optional
243
+ Initial data, by default None
244
+ attrs : dict[str, Any] | AttrDict, optional
245
+ Dictionary of attributes, by default None
246
+ copy : bool, optional
247
+ Copy data on class creation, by default True
248
+ **attrs_kwargs : Any
249
+ Additional attributes passed as keyword arguments
250
+
251
+ Raises
252
+ ------
253
+ ValueError
254
+ If "time" variable cannot be converted to numpy array.
255
+ """
256
+
257
+ __slots__ = ("data", "attrs")
258
+
259
+ #: Vector data with labels as keys and :class:`numpy.ndarray` as values
260
+ data: VectorDataDict
261
+
262
+ #: Generic dataset attributes
263
+ attrs: AttrDict
264
+
265
+ def __init__(
266
+ self,
267
+ data: (
268
+ dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataDict | VectorDataset | None
269
+ ) = None,
270
+ *,
271
+ attrs: dict[str, Any] | AttrDict | None = None,
272
+ copy: bool = True,
273
+ **attrs_kwargs: Any,
274
+ ) -> None:
275
+ # Set data
276
+ # --------
277
+
278
+ # Casting from one VectorDataset type to another
279
+ # e.g., flight = Flight(...); vector = VectorDataset(flight)
280
+ if isinstance(data, VectorDataset):
281
+ attrs = {**data.attrs, **(attrs or {})}
282
+ if copy:
283
+ self.data = VectorDataDict({k: v.copy() for k, v in data.data.items()})
284
+ else:
285
+ self.data = data.data
286
+
287
+ elif data is None:
288
+ self.data = VectorDataDict()
289
+
290
+ elif isinstance(data, pd.DataFrame):
291
+ attrs = {**data.attrs, **(attrs or {})}
292
+
293
+ # Take extra caution with a time column
294
+ try:
295
+ time = data["time"]
296
+ except KeyError:
297
+ self.data = VectorDataDict({k: v.to_numpy(copy=copy) for k, v in data.items()})
298
+ else:
299
+ time = _handle_time_column(time)
300
+ data = {k: v.to_numpy(copy=copy) for k, v in data.items() if k != "time"}
301
+ data["time"] = time.to_numpy(copy=copy)
302
+ self.data = VectorDataDict(data)
303
+
304
+ elif isinstance(data, VectorDataDict):
305
+ if copy:
306
+ self.data = VectorDataDict({k: v.copy() for k, v in data.items()})
307
+ else:
308
+ self.data = data
309
+
310
+ # For anything else, we assume it is a dictionary of array-like and attach it
311
+ else:
312
+ self.data = VectorDataDict({k: np.array(v, copy=copy) for k, v in data.items()})
313
+
314
+ # Set attributes
315
+ # --------------
316
+
317
+ if attrs is None:
318
+ self.attrs = AttrDict()
319
+
320
+ elif isinstance(attrs, AttrDict) and not copy:
321
+ self.attrs = attrs
322
+
323
+ # shallow copy if dict
324
+ else:
325
+ self.attrs = AttrDict(attrs.copy())
326
+
327
+ # update with kwargs
328
+ self.attrs.update(attrs_kwargs)
329
+
330
+ # ------------
331
+ # dict-like methods
332
+ # ------------
333
+ def __getitem__(self, key: str) -> np.ndarray:
334
+ """Get values from :attr:`data`.
335
+
336
+ Parameters
337
+ ----------
338
+ key : str
339
+ Key to get from :attr:`data`
340
+
341
+ Returns
342
+ -------
343
+ np.ndarray
344
+ Values at :attr:`data[key]`
345
+ """
346
+ return self.data[key]
347
+
348
+ def get(self, key: str, default_value: Any = None) -> Any:
349
+ """Get values from :attr:`data` with ``default_value`` if ``key`` not in :attr:`data`.
350
+
351
+ Parameters
352
+ ----------
353
+ key : str
354
+ Key to get from :attr:`data`
355
+ default_value : Any, optional
356
+ Return ``default_value`` if `key` not in :attr:`data`, by default ``None``
357
+
358
+ Returns
359
+ -------
360
+ Any
361
+ Values at :attr:`data[key]` or ``default_value``
362
+ """
363
+ return self.data.get(key, default_value)
364
+
365
+ def __setitem__(self, key: str, values: npt.ArrayLike) -> None:
366
+ """Set values at key `key` on :attr:`data`.
367
+
368
+ Parameters
369
+ ----------
370
+ key : str
371
+ Key name in :attr:`data`
372
+ values : npt.ArrayLike
373
+ Values to set to :attr:`data`. Array size must be compatible with existing data.
374
+ """
375
+ self.data[key] = values
376
+
377
+ def __delitem__(self, key: str) -> None:
378
+ """Delete values at key `key` on :attr:`data`.
379
+
380
+ Parameters
381
+ ----------
382
+ key : str
383
+ Key name in :attr:`data`
384
+ """
385
+ del self.data[key]
386
+
387
+ def __iter__(self) -> Iterator[str]:
388
+ """Iterate over keys in :attr:`data`.
389
+
390
+ Returns
391
+ -------
392
+ Iterator[str]
393
+ Keys in :attr:`data`
394
+ """
395
+ return iter(self.data)
396
+
397
+ def __contains__(self, key: str) -> bool:
398
+ """Check if key `key` is in :attr:`data`.
399
+
400
+ Parameters
401
+ ----------
402
+ key : str
403
+ Key to check
404
+
405
+ Returns
406
+ -------
407
+ bool
408
+ True if `key` is in :attr:`data`, False otherwise
409
+ """
410
+ return key in self.data
411
+
412
+ def update(
413
+ self,
414
+ other: dict[str, npt.ArrayLike] | None = None,
415
+ **kwargs: npt.ArrayLike,
416
+ ) -> None:
417
+ """Update values in :attr:`data` dict without warning if overwriting.
418
+
419
+ Parameters
420
+ ----------
421
+ other : dict[str, npt.ArrayLike] | None, optional
422
+ Fields to update as dict
423
+ **kwargs : npt.ArrayLike
424
+ Fields to update as kwargs
425
+ """
426
+ self.data.update(other, **kwargs)
427
+
428
+ def setdefault(self, key: str, default: npt.ArrayLike | None = None) -> np.ndarray:
429
+ """Shortcut to :meth:`VectorDataDict.setdefault`.
430
+
431
+ Parameters
432
+ ----------
433
+ key : str
434
+ Key in :attr:`data` dict.
435
+ default : npt.ArrayLike, optional
436
+ Values to use as default, if key is not defined
437
+
438
+ Returns
439
+ -------
440
+ np.ndarray
441
+ Values at ``key``
442
+ """
443
+ return self.data.setdefault(key, default)
444
+
445
+ __marker = object()
446
+
447
+ def get_data_or_attr(self, key: str, default: Any = __marker) -> Any:
448
+ """Get value from :attr:`data` or :attr:`attrs`.
449
+
450
+ This method first checks if ``key`` is in :attr:`data` and returns the value if so.
451
+ If ``key`` is not in :attr:`data`, then this method checks if ``key`` is in :attr:`attrs`
452
+ and returns the value if so. If ``key`` is not in :attr:`data` or :attr:`attrs`,
453
+ then the ``default`` value is returned if provided. Otherwise a :class:`KeyError` is raised.
454
+
455
+ Parameters
456
+ ----------
457
+ key : str
458
+ Key to get from :attr:`data` or :attr:`attrs`
459
+ default : Any, optional
460
+ Default value to return if ``key`` is not in :attr:`data` or :attr:`attrs`.
461
+
462
+ Returns
463
+ -------
464
+ Any
465
+ Value at :attr:`data[key]` or :attr:`attrs[key]`
466
+
467
+ Raises
468
+ ------
469
+ KeyError
470
+ If ``key`` is not in :attr:`data` or :attr:`attrs` and ``default`` is not provided.
471
+
472
+ Examples
473
+ --------
474
+ >>> vector = VectorDataset({"a": [1, 2, 3]}, attrs={"b": 4})
475
+ >>> vector.get_data_or_attr("a")
476
+ array([1, 2, 3])
477
+
478
+ >>> vector.get_data_or_attr("b")
479
+ 4
480
+
481
+ >>> vector.get_data_or_attr("c")
482
+ Traceback (most recent call last):
483
+ ...
484
+ KeyError: "Key 'c' not found in data or attrs."
485
+
486
+ >>> vector.get_data_or_attr("c", default=5)
487
+ 5
488
+
489
+ """
490
+ marker = self.__marker
491
+
492
+ out = self.get(key, marker)
493
+ if out is not marker:
494
+ return out
495
+
496
+ out = self.attrs.get(key, marker)
497
+ if out is not marker:
498
+ return out
499
+
500
+ if default is not marker:
501
+ return default
502
+
503
+ msg = f"Key '{key}' not found in data or attrs."
504
+ raise KeyError(msg)
505
+
506
+ # ------------
507
+
508
+ def __len__(self) -> int:
509
+ """Length of each array in :attr:`data`.
510
+
511
+ Returns
512
+ -------
513
+ int
514
+ Length of each array in :attr:`data`
515
+ """
516
+ return self.size
517
+
518
+ def _display_attrs(self) -> dict[str, str]:
519
+ """Return properties used in `repr` constructions.
520
+
521
+ Returns
522
+ -------
523
+ dict[str, str]
524
+ Properties used in :meth:`__repr__` and :meth:`_repr_html_`.
525
+ """
526
+
527
+ # Clip any attribute value that is too long
528
+ def str_clip(v: Any) -> str:
529
+ s = str(v)
530
+ if len(s) < 80:
531
+ return s
532
+ return f"{s[:77]}..."
533
+
534
+ return {k: str_clip(v) for k, v in self.attrs.items()}
535
+
536
+ def __repr__(self) -> str:
537
+ class_name = self.__class__.__name__
538
+ n_attrs = len(self.attrs)
539
+ n_keys = len(self.data)
540
+ _repr = f"{class_name} [{n_keys} keys x {self.size} length, {n_attrs} attributes]"
541
+
542
+ keys = list(self)
543
+ keys = keys[0:5] + ["..."] + keys[-1:] if len(keys) > 5 else keys
544
+ _repr += f"\n\tKeys: {', '.join(keys)}"
545
+
546
+ attrs = self._display_attrs()
547
+ _repr += "\n\tAttributes:\n"
548
+ _repr += "\n".join([f"\t{k:20}{v}" for k, v in attrs.items()])
549
+
550
+ return _repr
551
+
552
+ def _repr_html_(self) -> str:
553
+ name = type(self).__name__
554
+ n_attrs = len(self.attrs)
555
+ n_keys = len(self.data)
556
+ attrs = self._display_attrs()
557
+ size = self.size
558
+
559
+ title = f"<b>{name}</b> [{n_keys} keys x {size} length, {n_attrs} attributes]<br/ ><br/>"
560
+
561
+ # matching pd.DataFrame styling
562
+ header = '<tr style="border-bottom:1px solid silver"><th colspan="2">Attributes</th></tr>'
563
+ rows = [f"<tr><td>{k}</td><td>{v}</td></tr>" for k, v in attrs.items()]
564
+ table = f"<table>{header + ''.join(rows)}</table>"
565
+ return title + table + self.dataframe._repr_html_()
566
+
567
+ def __bool__(self) -> bool:
568
+ """Check if :attr:`data` is nonempty..
569
+
570
+ Returns
571
+ -------
572
+ bool
573
+ True if non-empty values are set in :attr:`data`
574
+ """
575
+ return self.size > 0
576
+
577
+ def __add__(self: VectorDatasetType, other: VectorDatasetType | None) -> VectorDatasetType:
578
+ """Concatenate two compatible instances of VectorDataset.
579
+
580
+ In this context, compatibility means that both have identical :attr:`data` keys.
581
+
582
+ This operator behaves similarly to the ``__add__`` method on python lists.
583
+
584
+ If self is an empty VectorDataset, return other. This is useful when
585
+ calling :keyword:`sum` with an empty initial value.
586
+
587
+ Parameters
588
+ ----------
589
+ other : VectorDatasetType
590
+ Other values to concatenate
591
+
592
+ Returns
593
+ -------
594
+ VectorDatasetType
595
+ Concatenated values.
596
+
597
+ Raises
598
+ ------
599
+ KeyError
600
+ If `other` has different :attr:`data` keys than self.
601
+ """
602
+ # Short circuit: If other is empty or None, return self. The order here can matter.
603
+ # We let self (so the left addend) take priority.
604
+ if not other:
605
+ return self
606
+ if not self:
607
+ return other
608
+
609
+ return type(self).sum((self, other))
610
+
611
+ @classmethod
612
+ def sum(
613
+ cls: type[VectorDatasetType],
614
+ vectors: Sequence[VectorDataset],
615
+ infer_attrs: bool = True,
616
+ fill_value: float | None = None,
617
+ ) -> VectorDatasetType:
618
+ """Sum a list of :class:`VectorDataset` instances.
619
+
620
+ Parameters
621
+ ----------
622
+ vectors : Sequence[VectorDataset]
623
+ List of :class:`VectorDataset` instances to concatenate.
624
+ infer_attrs : bool, optional
625
+ If True, infer attributes from the first element in the sequence.
626
+ fill_value : float, optional
627
+ Fill value to use when concatenating arrays. By default None, which raises
628
+ an error if incompatible keys are found.
629
+
630
+ Returns
631
+ -------
632
+ VectorDataset
633
+ Sum of all instances in ``vectors``.
634
+
635
+ Raises
636
+ ------
637
+ KeyError
638
+ If incompatible :attr:`data` keys are found among ``vectors``.
639
+
640
+ Examples
641
+ --------
642
+ >>> from pycontrails import VectorDataset
643
+ >>> v1 = VectorDataset({"a": [1, 2, 3], "b": [4, 5, 6]})
644
+ >>> v2 = VectorDataset({"a": [7, 8, 9], "b": [10, 11, 12]})
645
+ >>> v3 = VectorDataset({"a": [13, 14, 15], "b": [16, 17, 18]})
646
+ >>> v = VectorDataset.sum([v1, v2, v3])
647
+ >>> v.dataframe
648
+ a b
649
+ 0 1 4
650
+ 1 2 5
651
+ 2 3 6
652
+ 3 7 10
653
+ 4 8 11
654
+ 5 9 12
655
+ 6 13 16
656
+ 7 14 17
657
+ 8 15 18
658
+
659
+ """
660
+ vectors = [v for v in vectors if v is not None] # remove None values
661
+
662
+ if not vectors:
663
+ return cls()
664
+
665
+ keys: Iterable[str]
666
+ if fill_value is None:
667
+ keys = vectors[0].data.keys()
668
+ for v in vectors[1:]:
669
+ if v.data.keys() != keys:
670
+ diff = set(v).symmetric_difference(keys)
671
+ msg = f"Summands have incompatible keys. Difference: {diff}"
672
+ raise KeyError(msg)
673
+
674
+ else:
675
+ keys = set().union(*[v.data.keys() for v in vectors])
676
+
677
+ def _get(k: str, v: VectorDataset) -> np.ndarray:
678
+ # Could also use VectorDataset.get() here, but we want to avoid creating
679
+ # an unused array if the key is present in the VectorDataset.
680
+ try:
681
+ return v[k]
682
+ except KeyError:
683
+ return np.full(v.size, fill_value)
684
+
685
+ def concat(key: str) -> np.ndarray:
686
+ values = [_get(key, v) for v in vectors]
687
+ return np.concatenate(values)
688
+
689
+ data = {key: concat(key) for key in keys}
690
+
691
+ if infer_attrs:
692
+ return cls(data, attrs=vectors[0].attrs, copy=False)
693
+ return cls(data, copy=False)
694
+
695
+ def __eq__(self: VectorDatasetType, other: object) -> bool:
696
+ """Determine if two instances are equal.
697
+
698
+ NaN values are considered equal in this comparison.
699
+
700
+ Parameters
701
+ ----------
702
+ other : object
703
+ VectorDatasetType to compare with
704
+
705
+ Returns
706
+ -------
707
+ bool
708
+ True if both instances have identical :attr:`data` and :attr:`attrs`.
709
+ """
710
+ if not isinstance(other, VectorDataset):
711
+ return False
712
+
713
+ # Check attrs
714
+ if self.attrs.keys() != other.attrs.keys():
715
+ return False
716
+
717
+ for key, val in self.attrs.items():
718
+ if isinstance(val, np.ndarray):
719
+ # equal_nan not supported for non-numeric data
720
+ equal_nan = not np.issubdtype(val.dtype, "O")
721
+ if not np.array_equal(val, other.attrs[key], equal_nan=equal_nan):
722
+ return False
723
+ elif val != other.attrs[key]:
724
+ return False
725
+
726
+ # Check data
727
+ if self.data.keys() != other.data.keys():
728
+ return False
729
+
730
+ for key, val in self.data.items():
731
+ # equal_nan not supported for non-numeric data (e.g. strings)
732
+ equal_nan = not np.issubdtype(val.dtype, "O")
733
+ if not np.array_equal(val, other[key], equal_nan=equal_nan):
734
+ return False
735
+
736
+ return True
737
+
738
+ @property
739
+ def size(self) -> int:
740
+ """Length of each array in :attr:`data`.
741
+
742
+ Returns
743
+ -------
744
+ int
745
+ Length of each array in :attr:`data`.
746
+ """
747
+ return getattr(self.data, "_size", 0)
748
+
749
+ @property
750
+ def shape(self) -> tuple[int]:
751
+ """Shape of each array in :attr:`data`.
752
+
753
+ Returns
754
+ -------
755
+ tuple[int]
756
+ Shape of each array in :attr:`data`.
757
+ """
758
+ return (self.size,)
759
+
760
+ @property
761
+ def dataframe(self) -> pd.DataFrame:
762
+ """Shorthand property to access :meth:`to_dataframe` with ``copy=False``.
763
+
764
+ Returns
765
+ -------
766
+ pd.DataFrame
767
+ Equivalent to the output from :meth:`to_dataframe()`
768
+ """
769
+ return self.to_dataframe(copy=False)
770
+
771
+ @property
772
+ def hash(self) -> str:
773
+ """Generate a unique hash for this class instance.
774
+
775
+ Returns
776
+ -------
777
+ str
778
+ Unique hash for flight instance (sha1)
779
+ """
780
+ _hash = json.dumps(self.data, cls=json_utils.NumpyEncoder)
781
+ return hashlib.sha1(bytes(_hash, "utf-8")).hexdigest()
782
+
783
+ # ------------
784
+ # Utilities
785
+ # ------------
786
+
787
+ def copy(self: VectorDatasetType, **kwargs: Any) -> VectorDatasetType:
788
+ """Return a copy of this VectorDatasetType class.
789
+
790
+ Parameters
791
+ ----------
792
+ **kwargs : Any
793
+ Additional keyword arguments passed into the constructor of the returned class.
794
+
795
+ Returns
796
+ -------
797
+ VectorDatasetType
798
+ Copy of class
799
+ """
800
+ return type(self)(data=self.data, attrs=self.attrs, copy=True, **kwargs)
801
+
802
+ def select(self: VectorDataset, keys: Iterable[str], copy: bool = True) -> VectorDataset:
803
+ """Return new class instance only containing specified keys.
804
+
805
+ Parameters
806
+ ----------
807
+ keys : Iterable[str]
808
+ An iterable of keys to filter by.
809
+ copy : bool, optional
810
+ Copy data on selection.
811
+ Defaults to True.
812
+
813
+ Returns
814
+ -------
815
+ VectorDataset
816
+ VectorDataset containing only data associated to ``keys``.
817
+ Note that this method always returns a :class:`VectorDataset`, even if
818
+ the calling class is a proper subclass of :class:`VectorDataset`.
819
+ """
820
+ data = {key: self[key] for key in keys}
821
+ return VectorDataset(data=data, attrs=self.attrs, copy=copy)
822
+
823
+ def filter(
824
+ self: VectorDatasetType, mask: npt.NDArray[np.bool_], copy: bool = True, **kwargs: Any
825
+ ) -> VectorDatasetType:
826
+ """Filter :attr:`data` according to a boolean array ``mask``.
827
+
828
+ Entries corresponding to ``mask == True`` are kept.
829
+
830
+ Parameters
831
+ ----------
832
+ mask : npt.NDArray[np.bool_]
833
+ Boolean array with compatible shape.
834
+ copy : bool, optional
835
+ Copy data on filter. Defaults to True. See
836
+ `numpy best practices <https://numpy.org/doc/stable/user/basics.indexing.html#slicing-and-striding>`_
837
+ for insight into whether copy is appropriate.
838
+ **kwargs : Any
839
+ Additional keyword arguments passed into the constructor of the returned class.
840
+
841
+ Returns
842
+ -------
843
+ VectorDatasetType
844
+ Containing filtered data
845
+
846
+ Raises
847
+ ------
848
+ TypeError
849
+ If ``mask`` is not a boolean array.
850
+ """
851
+ self.data._validate_array(mask)
852
+ if mask.dtype != bool:
853
+ raise TypeError("Parameter `mask` must be a boolean array.")
854
+
855
+ data = {key: value[mask] for key, value in self.data.items()}
856
+ return type(self)(data=data, attrs=self.attrs, copy=copy, **kwargs)
857
+
858
+ def sort(self: VectorDatasetType, by: str | list[str]) -> VectorDatasetType:
859
+ """Sort data by key(s).
860
+
861
+ This method always creates a copy of the data by calling
862
+ :meth:`pandas.DataFrame.sort_values`.
863
+
864
+ Parameters
865
+ ----------
866
+ by : str | list[str]
867
+ Key or list of keys to sort by.
868
+
869
+ Returns
870
+ -------
871
+ VectorDatasetType
872
+ Instance with sorted data.
873
+ """
874
+ return type(self)(data=self.dataframe.sort_values(by=by), attrs=self.attrs, copy=False)
875
+
876
+ def ensure_vars(self, vars: str | Iterable[str], raise_error: bool = True) -> bool:
877
+ """Ensure variables exist in column of :attr:`data` or :attr:`attrs`.
878
+
879
+ Parameters
880
+ ----------
881
+ vars : str | Iterable[str]
882
+ A single string variable name or a sequence of string variable names.
883
+ raise_error : bool, optional
884
+ Raise KeyError if data does not contain variables.
885
+ Defaults to True.
886
+
887
+ Returns
888
+ -------
889
+ bool
890
+ True if all variables exist.
891
+ False otherwise.
892
+
893
+ Raises
894
+ ------
895
+ KeyError
896
+ Raises when dataset does not contain variable in ``vars``
897
+ """
898
+ if isinstance(vars, str):
899
+ vars = (vars,)
900
+
901
+ for v in vars:
902
+ if v in self or v in self.attrs:
903
+ continue
904
+ if raise_error:
905
+ msg = f"{type(self).__name__} instance does not contain data or attr '{v}'"
906
+ raise KeyError(msg)
907
+ return False
908
+
909
+ return True
910
+
911
+ def broadcast_attrs(
912
+ self,
913
+ keys: str | Iterable[str],
914
+ overwrite: bool = False,
915
+ raise_error: bool = True,
916
+ ) -> None:
917
+ """Attach values from ``keys`` in :attr:`attrs` onto :attr:`data`.
918
+
919
+ If possible, use ``dtype = np.float32`` when broadcasting. If not possible,
920
+ use whatever ``dtype`` is inferred from the data by :func:`numpy.full`.
921
+
922
+ Parameters
923
+ ----------
924
+ keys : str | Iterable[str]
925
+ Keys to broadcast
926
+ overwrite : bool, optional
927
+ If True, overwrite existing values in :attr:`data`. By default False.
928
+ raise_error : bool, optional
929
+ Raise KeyError if :attr:`self.attrs` does not contain some of ``keys``.
930
+
931
+ Raises
932
+ ------
933
+ KeyError
934
+ Not all ``keys`` found in :attr:`attrs`.
935
+ """
936
+ if isinstance(keys, str):
937
+ keys = (keys,)
938
+
939
+ # Validate everything up front to avoid partial broadcasting
940
+ for key in keys:
941
+ try:
942
+ scalar = self.attrs[key]
943
+ except KeyError as exc:
944
+ if raise_error:
945
+ raise KeyError(f"{type(self)} does not contain attr `{key}`") from exc
946
+ continue
947
+
948
+ if key in self.data and not overwrite:
949
+ warnings.warn(
950
+ f"Found duplicate key {key} in attrs and data. "
951
+ "Set `overwrite=True` parameter to force overwrite."
952
+ )
953
+ continue
954
+
955
+ min_dtype = np.min_scalar_type(scalar)
956
+ dtype = np.float32 if np.can_cast(min_dtype, np.float32) else None
957
+ self.data.update({key: np.full(self.size, scalar, dtype=dtype)})
958
+
959
+ def broadcast_numeric_attrs(
960
+ self, ignore_keys: str | Iterable[str] | None = None, overwrite: bool = False
961
+ ) -> None:
962
+ """Attach numeric values in :attr:`attrs` onto :attr:`data`.
963
+
964
+ Iterate through values in :attr:`attrs` and attach :class:`float` and
965
+ :class:`int` values to ``data``.
966
+
967
+ This method modifies object in place.
968
+
969
+ Parameters
970
+ ----------
971
+ ignore_keys: str | Iterable[str], optional
972
+ Do not broadcast selected keys.
973
+ Defaults to None.
974
+ overwrite : bool, optional
975
+ If True, overwrite existing values in :attr:`data`. By default False.
976
+ """
977
+ if ignore_keys is None:
978
+ ignore_keys = ()
979
+ elif isinstance(ignore_keys, str):
980
+ ignore_keys = (ignore_keys,)
981
+
982
+ # Somewhat brittle: Only checking for int or float type
983
+ numeric_attrs = (
984
+ attr
985
+ for attr, val in self.attrs.items()
986
+ if (isinstance(val, int | float | np.number) and attr not in ignore_keys)
987
+ )
988
+ self.broadcast_attrs(numeric_attrs, overwrite)
989
+
990
+ # ------------
991
+ # I / O
992
+ # ------------
993
+
994
+ def to_dataframe(self, copy: bool = True) -> pd.DataFrame:
995
+ """Create :class:`pd.DataFrame` in which each key-value pair in :attr:`data` is a column.
996
+
997
+ DataFrame does **not** copy data by default.
998
+ Use the ``copy`` parameter to copy data values on creation.
999
+
1000
+ Parameters
1001
+ ----------
1002
+ copy : bool, optional
1003
+ Copy data on DataFrame creation.
1004
+
1005
+ Returns
1006
+ -------
1007
+ pd.DataFrame
1008
+ DataFrame holding key-values as columns.
1009
+ """
1010
+ df = pd.DataFrame(self.data, copy=copy)
1011
+ df.attrs = self.attrs
1012
+ return df
1013
+
1014
+ def to_dict(self) -> dict[str, Any]:
1015
+ """Create dictionary with :attr:`data` and :attr:`attrs`.
1016
+
1017
+ If geo-spatial coordinates (e.g. ``"latitude"``, ``"longitude"``, ``"altitude"``)
1018
+ are present, round to a reasonable precision. If a ``"time"`` variable is present,
1019
+ round to unix seconds. When the instance is a :class:`GeoVectorDataset`,
1020
+ disregard any ``"altitude"`` or ``"level"`` coordinate and only include
1021
+ ``"altitude_ft"`` in the output.
1022
+
1023
+ Returns
1024
+ -------
1025
+ dict[str, Any]
1026
+ Dictionary with :attr:`data` and :attr:`attrs`.
1027
+
1028
+ See Also
1029
+ --------
1030
+ :meth:`from_dict`
1031
+
1032
+ Examples
1033
+ --------
1034
+ >>> import pprint
1035
+ >>> from pycontrails import Flight
1036
+ >>> fl = Flight(
1037
+ ... longitude=[-100, -110],
1038
+ ... latitude=[40, 50],
1039
+ ... level=[200, 200],
1040
+ ... time=[np.datetime64("2020-01-01T09"), np.datetime64("2020-01-01T09:30")],
1041
+ ... aircraft_type="B737",
1042
+ ... )
1043
+ >>> fl = fl.resample_and_fill("5min")
1044
+ >>> pprint.pprint(fl.to_dict())
1045
+ {'aircraft_type': 'B737',
1046
+ 'altitude_ft': [38661.0, 38661.0, 38661.0, 38661.0, 38661.0, 38661.0, 38661.0],
1047
+ 'crs': 'EPSG:4326',
1048
+ 'latitude': [40.0, 41.724, 43.428, 45.111, 46.769, 48.399, 50.0],
1049
+ 'longitude': [-100.0,
1050
+ -101.441,
1051
+ -102.959,
1052
+ -104.563,
1053
+ -106.267,
1054
+ -108.076,
1055
+ -110.0],
1056
+ 'time': [1577869200,
1057
+ 1577869500,
1058
+ 1577869800,
1059
+ 1577870100,
1060
+ 1577870400,
1061
+ 1577870700,
1062
+ 1577871000]}
1063
+ """
1064
+ np_encoder = json_utils.NumpyEncoder()
1065
+
1066
+ # round latitude, longitude, and altitude
1067
+ precision = {"longitude": 3, "latitude": 3, "altitude_ft": 0}
1068
+
1069
+ def encode(key: str, obj: Any) -> Any:
1070
+ # Try to handle some pandas objects
1071
+ if hasattr(obj, "to_numpy"):
1072
+ obj = obj.to_numpy()
1073
+
1074
+ # Convert numpy objects to python objects
1075
+ if isinstance(obj, np.ndarray | np.generic):
1076
+ # round time to unix seconds
1077
+ if key == "time":
1078
+ return np_encoder.default(obj.astype("datetime64[s]").astype(int))
1079
+
1080
+ # round specific keys in precision
1081
+ try:
1082
+ d = precision[key]
1083
+ except KeyError:
1084
+ return np_encoder.default(obj)
1085
+
1086
+ return np_encoder.default(obj.astype(float).round(d))
1087
+
1088
+ # Pass through everything else
1089
+ return obj
1090
+
1091
+ data = {k: encode(k, v) for k, v in self.data.items()}
1092
+ attrs = {k: encode(k, v) for k, v in self.attrs.items()}
1093
+
1094
+ # Only include one of the vertical coordinate keys
1095
+ if isinstance(self, GeoVectorDataset):
1096
+ data.pop("altitude", None)
1097
+ data.pop("level", None)
1098
+ if "altitude_ft" not in data:
1099
+ data["altitude_ft"] = self.altitude_ft.round(precision["altitude_ft"]).tolist()
1100
+
1101
+ # Issue warning if any keys are duplicated
1102
+ common_keys = data.keys() & attrs.keys()
1103
+ if common_keys:
1104
+ warnings.warn(
1105
+ f"Found duplicate keys in data and attrs: {common_keys}. "
1106
+ "Data keys will overwrite attrs keys in returned dictionary."
1107
+ )
1108
+
1109
+ return {**attrs, **data}
1110
+
1111
+ @classmethod
1112
+ def create_empty(
1113
+ cls: type[VectorDatasetType],
1114
+ keys: Iterable[str],
1115
+ attrs: dict[str, Any] | None = None,
1116
+ **attrs_kwargs: Any,
1117
+ ) -> VectorDatasetType:
1118
+ """Create instance with variables defined by `keys` and size 0.
1119
+
1120
+ If instance requires additional variables to be defined, these keys will automatically
1121
+ be attached to returned instance.
1122
+
1123
+ Parameters
1124
+ ----------
1125
+ keys : Iterable[str]
1126
+ Keys to include in empty VectorDataset instance.
1127
+ attrs : dict[str, Any] | None, optional
1128
+ Attributes to attach instance.
1129
+ **attrs_kwargs : Any
1130
+ Define attributes as keyword arguments.
1131
+
1132
+ Returns
1133
+ -------
1134
+ VectorDatasetType
1135
+ Empty VectorDataset instance.
1136
+ """
1137
+ return cls(data=_empty_vector_dict(keys or set()), attrs=attrs, copy=False, **attrs_kwargs)
1138
+
1139
+ @classmethod
1140
+ def from_dict(
1141
+ cls: type[VectorDatasetType], obj: dict[str, Any], copy: bool = True, **obj_kwargs: Any
1142
+ ) -> VectorDatasetType:
1143
+ """Create instance from dict representation containing data and attrs.
1144
+
1145
+ Parameters
1146
+ ----------
1147
+ obj : dict[str, Any]
1148
+ Dict representation of VectorDataset (e.g. :meth:`to_dict`)
1149
+ copy : bool, optional
1150
+ Passed to :class:`VectorDataset` constructor.
1151
+ Defaults to True.
1152
+ **obj_kwargs : Any
1153
+ Additional properties passed as keyword arguments.
1154
+
1155
+ Returns
1156
+ -------
1157
+ VectorDatasetType
1158
+ VectorDataset instance.
1159
+
1160
+ See Also
1161
+ --------
1162
+ :meth:`to_dict`
1163
+ """
1164
+ data = {}
1165
+ attrs = {}
1166
+
1167
+ for k, v in {**obj, **obj_kwargs}.items():
1168
+ if isinstance(v, list | np.ndarray):
1169
+ data[k] = v
1170
+ else:
1171
+ attrs[k] = v
1172
+
1173
+ return cls(data=data, attrs=attrs, copy=copy)
1174
+
1175
+ def generate_splits(
1176
+ self: VectorDatasetType, n_splits: int, copy: bool = True
1177
+ ) -> Generator[VectorDatasetType, None, None]:
1178
+ """Split instance into ``n_split`` sub-vectors.
1179
+
1180
+ Parameters
1181
+ ----------
1182
+ n_splits : int
1183
+ Number of splits.
1184
+ copy : bool, optional
1185
+ Passed into :meth:`filter`. Defaults to True. Recommend to keep as True
1186
+ based on `numpy best practices <https://numpy.org/doc/stable/user/basics.indexing.html#slicing-and-striding>`_.
1187
+
1188
+ Returns
1189
+ -------
1190
+ Generator[VectorDatasetType, None, None]
1191
+ Generator of split vectors.
1192
+
1193
+ See Also
1194
+ --------
1195
+ :func:`numpy.array_split`
1196
+ """
1197
+ full_index = np.arange(self.size)
1198
+ index_splits = np.array_split(full_index, n_splits)
1199
+ for index in index_splits:
1200
+ filt = np.zeros(self.size, dtype=bool)
1201
+ filt[index] = True
1202
+ yield self.filter(filt, copy=copy)
1203
+
1204
+
1205
+ class GeoVectorDataset(VectorDataset):
1206
+ """Base class to hold 1D geospatial arrays of consistent size.
1207
+
1208
+ GeoVectorDataset is required to have geospatial coordinate keys defined
1209
+ in :attr:`required_keys`.
1210
+
1211
+ Expect latitude-longitude CRS in WGS 84.
1212
+ Expect altitude in [:math:`m`].
1213
+ Expect level in [:math:`hPa`].
1214
+
1215
+ Each spatial variable is expected to have "float32" or "float64" ``dtype``.
1216
+ The time variable is expected to have "datetime64[ns]" ``dtype``.
1217
+
1218
+ Use the attribute :attr:`attr["crs"]` to specify coordinate reference system
1219
+ using `PROJ <https://proj.org/>`_ or `EPSG <https://epsg.org/home.html>`_ syntax.
1220
+
1221
+ Parameters
1222
+ ----------
1223
+ data : dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataDict | VectorDataset | None, optional
1224
+ Data dictionary or :class:`pandas.DataFrame` .
1225
+ Must include keys/columns ``time``, ``latitude``, ``longitude``, ``altitude`` or ``level``.
1226
+ Keyword arguments for ``time``, ``latitude``, ``longitude``, ``altitude`` or ``level``
1227
+ override ``data`` inputs. Expects ``altitude`` in meters and ``time``
1228
+ as a DatetimeLike (or array that can processed with :meth:`pd.to_datetime`).
1229
+ Additional waypoint-specific data can be included as additional keys/columns.
1230
+ longitude : npt.ArrayLike, optional
1231
+ Longitude data.
1232
+ Defaults to None.
1233
+ latitude : npt.ArrayLike, optional
1234
+ Latitude data.
1235
+ Defaults to None.
1236
+ altitude : npt.ArrayLike, optional
1237
+ Altitude data, [:math:`m`].
1238
+ Defaults to None.
1239
+ altitude_ft : npt.ArrayLike, optional
1240
+ Altitude data, [:math:`ft`].
1241
+ Defaults to None.
1242
+ level : npt.ArrayLike, optional
1243
+ Level data, [:math:`hPa`].
1244
+ Defaults to None.
1245
+ time : npt.ArrayLike, optional
1246
+ Time data.
1247
+ Expects an array of DatetimeLike values,
1248
+ or array that can processed with :meth:`pd.to_datetime`.
1249
+ Defaults to None.
1250
+ attrs : dict[Hashable, Any] | AttrDict, optional
1251
+ Additional properties as a dictionary.
1252
+ Defaults to {}.
1253
+ copy : bool, optional
1254
+ Copy data on class creation.
1255
+ Defaults to True.
1256
+ **attrs_kwargs : Any
1257
+ Additional properties passed as keyword arguments.
1258
+
1259
+ Raises
1260
+ ------
1261
+ KeyError
1262
+ Raises if ``data`` input does not contain at least ``time``, ``latitude``, ``longitude``,
1263
+ (``altitude`` or ``level``).
1264
+ """
1265
+
1266
+ __slots__ = ()
1267
+
1268
+ #: Required keys for creating GeoVectorDataset
1269
+ required_keys = "longitude", "latitude", "time"
1270
+
1271
+ #: At least one of these vertical-coordinate keys must also be included
1272
+ vertical_keys = "altitude", "level", "altitude_ft"
1273
+
1274
+ def __init__(
1275
+ self,
1276
+ data: (
1277
+ dict[str, npt.ArrayLike] | pd.DataFrame | VectorDataDict | VectorDataset | None
1278
+ ) = None,
1279
+ *,
1280
+ longitude: npt.ArrayLike | None = None,
1281
+ latitude: npt.ArrayLike | None = None,
1282
+ altitude: npt.ArrayLike | None = None,
1283
+ altitude_ft: npt.ArrayLike | None = None,
1284
+ level: npt.ArrayLike | None = None,
1285
+ time: npt.ArrayLike | None = None,
1286
+ attrs: dict[str, Any] | AttrDict | None = None,
1287
+ copy: bool = True,
1288
+ **attrs_kwargs: Any,
1289
+ ) -> None:
1290
+ # shortcut to `GeoVectorDataset.create_empty` by just using `GeoVectorDataset()`
1291
+ if (
1292
+ data is None
1293
+ and longitude is None
1294
+ and latitude is None
1295
+ and altitude is None
1296
+ and level is None
1297
+ and time is None
1298
+ ):
1299
+ keys = *self.required_keys, "altitude"
1300
+ data = _empty_vector_dict(keys)
1301
+
1302
+ super().__init__(data=data, attrs=attrs, copy=copy, **attrs_kwargs)
1303
+
1304
+ # using the self[key] syntax specifically to run qc on assignment
1305
+ if longitude is not None:
1306
+ self["longitude"] = np.array(longitude, copy=copy)
1307
+
1308
+ if latitude is not None:
1309
+ self["latitude"] = np.array(latitude, copy=copy)
1310
+
1311
+ if time is not None:
1312
+ self["time"] = np.array(time, copy=copy)
1313
+
1314
+ if altitude is not None:
1315
+ self["altitude"] = np.array(altitude, copy=copy)
1316
+ if altitude_ft is not None or level is not None:
1317
+ warnings.warn(
1318
+ "Altitude data provided. Ignoring altitude_ft and level inputs.",
1319
+ )
1320
+ elif altitude_ft is not None:
1321
+ self["altitude_ft"] = np.array(altitude_ft, copy=copy)
1322
+ if level is not None:
1323
+ warnings.warn(
1324
+ "Altitude_ft data provided. Ignoring level input.",
1325
+ )
1326
+ elif level is not None:
1327
+ self["level"] = np.array(level, copy=copy)
1328
+
1329
+ # Confirm that input has required keys
1330
+ if not all(key in self for key in self.required_keys):
1331
+ raise KeyError(
1332
+ f"{self.__class__.__name__} requires all of the following keys: "
1333
+ f"{', '.join(self.required_keys)}"
1334
+ )
1335
+
1336
+ # Confirm that input has at least one vertical key
1337
+ if not any(key in self for key in self.vertical_keys):
1338
+ raise KeyError(
1339
+ f"{self.__class__.__name__} requires at least one of the following keys: "
1340
+ f"{', '.join(self.vertical_keys)}"
1341
+ )
1342
+
1343
+ # Parse time: If time is not np.datetime64, we try to coerce it to be
1344
+ # by pumping it through pd.to_datetime.
1345
+ time = self["time"]
1346
+ if not np.issubdtype(time.dtype, np.datetime64):
1347
+ warnings.warn("Time data is not np.datetime64. Attempting to coerce.")
1348
+ try:
1349
+ pd_time = _handle_time_column(pd.Series(self["time"]))
1350
+ except ValueError as e:
1351
+ raise ValueError("Could not coerce time data to datetime64.") from e
1352
+ np_time = pd_time.to_numpy(dtype="datetime64[ns]")
1353
+ self.update(time=np_time)
1354
+ elif time.dtype != "datetime64[ns]":
1355
+ self.update(time=time.astype("datetime64[ns]"))
1356
+
1357
+ # Ensure spatial coordinates are float32 or float64
1358
+ float_dtype = (np.float32, np.float64)
1359
+ for coord in ("longitude", "latitude", "altitude", "level", "altitude_ft"):
1360
+ try:
1361
+ arr = self[coord]
1362
+ except KeyError:
1363
+ continue
1364
+ if arr.dtype not in float_dtype:
1365
+ self.update({coord: arr.astype(np.float64)})
1366
+
1367
+ # set CRS to "EPSG:4326" by default
1368
+ crs = self.attrs.setdefault("crs", "EPSG:4326")
1369
+
1370
+ if crs == "EPSG:4326":
1371
+ longitude = self["longitude"]
1372
+ if np.any(longitude > 180.0) or np.any(longitude < -180.0):
1373
+ raise ValueError("EPSG:4326 longitude coordinates should lie between [-180, 180).")
1374
+ latitude = self["latitude"]
1375
+ if np.any(latitude > 90.0) or np.any(latitude < -90.0):
1376
+ raise ValueError("EPSG:4326 latitude coordinates should lie between [-90, 90].")
1377
+
1378
+ @overrides
1379
+ def _display_attrs(self) -> dict[str, str]:
1380
+ try:
1381
+ time0 = pd.Timestamp(np.nanmin(self["time"]))
1382
+ time1 = pd.Timestamp(np.nanmax(self["time"]))
1383
+ lon0 = round(np.nanmin(self["longitude"]), 3)
1384
+ lon1 = round(np.nanmax(self["longitude"]), 3)
1385
+ lat0 = round(np.nanmin(self["latitude"]), 3)
1386
+ lat1 = round(np.nanmax(self["latitude"]), 3)
1387
+ alt0 = round(np.nanmin(self.altitude), 1)
1388
+ alt1 = round(np.nanmax(self.altitude), 1)
1389
+
1390
+ attrs = {
1391
+ "time": f"[{time0}, {time1}]",
1392
+ "longitude": f"[{lon0}, {lon1}]",
1393
+ "latitude": f"[{lat0}, {lat1}]",
1394
+ "altitude": f"[{alt0}, {alt1}]",
1395
+ }
1396
+ except Exception:
1397
+ attrs = {}
1398
+
1399
+ attrs.update(super()._display_attrs())
1400
+ return attrs
1401
+
1402
+ @property
1403
+ def level(self) -> npt.NDArray[np.float64]:
1404
+ """Get pressure ``level`` values for points.
1405
+
1406
+ Automatically calculates pressure level using :func:`units.m_to_pl` using ``altitude`` key.
1407
+
1408
+ Note that if ``level`` key exists in :attr:`data`, the data at the ``level``
1409
+ key will be returned. This allows an override of the default calculation
1410
+ of pressure level from altitude.
1411
+
1412
+ Returns
1413
+ -------
1414
+ npt.NDArray[np.float64]
1415
+ Point pressure level values, [:math:`hPa`]
1416
+ """
1417
+ try:
1418
+ return self["level"]
1419
+ except KeyError:
1420
+ return units.m_to_pl(self.altitude)
1421
+
1422
+ @property
1423
+ def altitude(self) -> npt.NDArray[np.float64]:
1424
+ """Get altitude.
1425
+
1426
+ Automatically calculates altitude using :func:`units.pl_to_m` using ``level`` key.
1427
+
1428
+ Note that if ``altitude`` key exists in :attr:`data`, the data at the ``altitude``
1429
+ key will be returned. This allows an override of the default calculation of altitude
1430
+ from pressure level.
1431
+
1432
+ Returns
1433
+ -------
1434
+ npt.NDArray[np.float64]
1435
+ Altitude, [:math:`m`]
1436
+ """
1437
+ try:
1438
+ return self["altitude"]
1439
+ except KeyError:
1440
+ # Implementation note: explicitly look for "level" or "altitude_ft" key
1441
+ # here to avoid getting stuck in an infinite loop when .level or .altitude_ft
1442
+ # are called.
1443
+ if (level := self.get("level")) is not None:
1444
+ return units.pl_to_m(level)
1445
+ return units.ft_to_m(self["altitude_ft"])
1446
+
1447
+ @property
1448
+ def air_pressure(self) -> npt.NDArray[np.float64]:
1449
+ """Get ``air_pressure`` values for points.
1450
+
1451
+ Returns
1452
+ -------
1453
+ npt.NDArray[np.float64]
1454
+ Point air pressure values, [:math:`Pa`]
1455
+ """
1456
+ try:
1457
+ return self["air_pressure"]
1458
+ except KeyError:
1459
+ return 100.0 * self.level
1460
+
1461
+ @property
1462
+ def altitude_ft(self) -> npt.NDArray[np.float64]:
1463
+ """Get altitude in feet.
1464
+
1465
+ Returns
1466
+ -------
1467
+ npt.NDArray[np.float64]
1468
+ Altitude, [:math:`ft`]
1469
+ """
1470
+ try:
1471
+ return self["altitude_ft"]
1472
+ except KeyError:
1473
+ return units.m_to_ft(self.altitude)
1474
+
1475
+ @property
1476
+ def constants(self) -> dict[str, Any]:
1477
+ """Return a dictionary of constant attributes and data values.
1478
+
1479
+ Includes :attr:`attrs` and values from columns in :attr:`data` with a unique
1480
+ value.
1481
+
1482
+ Returns
1483
+ -------
1484
+ dict[str, Any]
1485
+ Properties and their constant values
1486
+ """
1487
+ constants = {}
1488
+
1489
+ # get constant data values that are not nan
1490
+ for key in set(self).difference(self.required_keys):
1491
+ unique = np.unique(self[key])
1492
+ if len(unique) == 1 and (isinstance(unique[0], str) or ~np.isnan(unique[0])):
1493
+ constants[key] = unique[0]
1494
+
1495
+ # add attributes
1496
+ constants.update(self.attrs)
1497
+
1498
+ # clean strings values by removing whitespace
1499
+ # convert any numpy items to python objects
1500
+ def _cleanup(v: Any) -> Any:
1501
+ if isinstance(v, str):
1502
+ return v.strip()
1503
+ if isinstance(v, np.integer):
1504
+ return int(v)
1505
+ if isinstance(v, np.floating):
1506
+ return float(v)
1507
+ if isinstance(v, np.bool_):
1508
+ return bool(v)
1509
+ return v
1510
+
1511
+ return {k: _cleanup(v) for k, v in constants.items()}
1512
+
1513
+ @property
1514
+ def coords(self) -> dict[str, np.ndarray]:
1515
+ """Get geospatial coordinates for compatibility with MetDataArray.
1516
+
1517
+ Returns
1518
+ -------
1519
+ pd.DataFrame
1520
+ :class:`pd.DataFrame` with columns `longitude`, `latitude`, `level`, and `time`.
1521
+ """
1522
+ return {
1523
+ "longitude": self["longitude"],
1524
+ "latitude": self["latitude"],
1525
+ "level": self.level,
1526
+ "time": self["time"],
1527
+ }
1528
+
1529
+ # ------------
1530
+ # Utilities
1531
+ # ------------
1532
+
1533
+ def transform_crs(
1534
+ self: GeoVectorDatasetType, crs: str, copy: bool = True
1535
+ ) -> GeoVectorDatasetType:
1536
+ """Transform trajectory data from one coordinate reference system (CRS) to another.
1537
+
1538
+ Parameters
1539
+ ----------
1540
+ crs : str
1541
+ Target CRS. Passed into to :class:`pyproj.Transformer`. The source CRS
1542
+ is inferred from the :attr:`attrs["crs"]` attribute.
1543
+ copy : bool, optional
1544
+ Copy data on transformation. Defaults to True.
1545
+
1546
+ Returns
1547
+ -------
1548
+ GeoVectorDatasetType
1549
+ Converted dataset with new coordinate reference system.
1550
+ :attr:`attrs["crs"]` reflects new crs.
1551
+ """
1552
+ try:
1553
+ import pyproj
1554
+ except ModuleNotFoundError as exc:
1555
+ dependencies.raise_module_not_found_error(
1556
+ name="GeoVectorDataset.transform_crs method",
1557
+ package_name="pyproj",
1558
+ module_not_found_error=exc,
1559
+ pycontrails_optional_package="pyproj",
1560
+ )
1561
+
1562
+ transformer = pyproj.Transformer.from_crs(self.attrs["crs"], crs, always_xy=True)
1563
+ lon, lat = transformer.transform(self["longitude"], self["latitude"])
1564
+
1565
+ ret = self.copy() if copy else self
1566
+
1567
+ ret.update(longitude=lon, latitude=lat)
1568
+ ret.attrs.update(crs=crs)
1569
+ return ret
1570
+
1571
+ def T_isa(self) -> npt.NDArray[np.float64]:
1572
+ """Calculate the ICAO standard atmosphere temperature at each point.
1573
+
1574
+ Returns
1575
+ -------
1576
+ npt.NDArray[np.float64]
1577
+ ISA temperature, [:math:`K`]
1578
+
1579
+ See Also
1580
+ --------
1581
+ :func:`pycontrails.physics.units.m_to_T_isa`
1582
+ """
1583
+ return units.m_to_T_isa(self.altitude)
1584
+
1585
+ # ------------
1586
+ # Met
1587
+ # ------------
1588
+
1589
+ def coords_intersect_met(
1590
+ self, met: met_module.MetDataset | met_module.MetDataArray
1591
+ ) -> npt.NDArray[np.bool_]:
1592
+ """Return boolean mask of data inside the bounding box defined by ``met``.
1593
+
1594
+ Parameters
1595
+ ----------
1596
+ met : MetDataset | MetDataArray
1597
+ MetDataset or MetDataArray to compare.
1598
+
1599
+ Returns
1600
+ -------
1601
+ npt.NDArray[np.bool_]
1602
+ True if point is inside the bounding box defined by ``met``.
1603
+ """
1604
+ indexes = met.indexes
1605
+
1606
+ lat_intersect = coordinates.intersect_domain(
1607
+ indexes["latitude"].to_numpy(),
1608
+ self["latitude"],
1609
+ )
1610
+ lon_intersect = coordinates.intersect_domain(
1611
+ indexes["longitude"].to_numpy(),
1612
+ self["longitude"],
1613
+ )
1614
+ level_intersect = coordinates.intersect_domain(
1615
+ indexes["level"].to_numpy(),
1616
+ self.level,
1617
+ )
1618
+ time_intersect = coordinates.intersect_domain(
1619
+ indexes["time"].to_numpy(),
1620
+ self["time"],
1621
+ )
1622
+
1623
+ return lat_intersect & lon_intersect & level_intersect & time_intersect
1624
+
1625
+ def intersect_met(
1626
+ self,
1627
+ mda: met_module.MetDataArray,
1628
+ *,
1629
+ longitude: npt.NDArray[np.float64] | None = None,
1630
+ latitude: npt.NDArray[np.float64] | None = None,
1631
+ level: npt.NDArray[np.float64] | None = None,
1632
+ time: npt.NDArray[np.datetime64] | None = None,
1633
+ use_indices: bool = False,
1634
+ **interp_kwargs: Any,
1635
+ ) -> npt.NDArray[np.float64]:
1636
+ """Intersect waypoints with MetDataArray.
1637
+
1638
+ Parameters
1639
+ ----------
1640
+ mda : MetDataArray
1641
+ MetDataArray containing a meteorological variable at spatio-temporal coordinates.
1642
+ longitude : npt.NDArray[np.float64], optional
1643
+ Override existing coordinates for met interpolation
1644
+ latitude : npt.NDArray[np.float64], optional
1645
+ Override existing coordinates for met interpolation
1646
+ level : npt.NDArray[np.float64], optional
1647
+ Override existing coordinates for met interpolation
1648
+ time : npt.NDArray[np.datetime64], optional
1649
+ Override existing coordinates for met interpolation
1650
+ use_indices : bool, optional
1651
+ Experimental.
1652
+ **interp_kwargs : Any
1653
+ Additional keyword arguments to pass to :meth:`MetDataArray.intersect_met`.
1654
+ Examples include ``method``, ``bounds_error``, and ``fill_value``. If an error such as
1655
+
1656
+ .. code-block:: python
1657
+
1658
+ ValueError: One of the requested xi is out of bounds in dimension 2
1659
+
1660
+ occurs, try calling this function with ``bounds_error=False``. In addition,
1661
+ setting ``fill_value=0.0`` will replace NaN values with 0.0.
1662
+
1663
+ Returns
1664
+ -------
1665
+ npt.NDArray[np.float64]
1666
+ Interpolated values
1667
+
1668
+ Examples
1669
+ --------
1670
+ >>> from datetime import datetime
1671
+ >>> import pandas as pd
1672
+ >>> import numpy as np
1673
+ >>> from pycontrails.datalib.ecmwf import ERA5
1674
+ >>> from pycontrails import Flight
1675
+
1676
+ >>> # Get met data
1677
+ >>> times = (datetime(2022, 3, 1, 0), datetime(2022, 3, 1, 3))
1678
+ >>> variables = ["air_temperature", "specific_humidity"]
1679
+ >>> levels = [300, 250, 200]
1680
+ >>> era5 = ERA5(time=times, variables=variables, pressure_levels=levels)
1681
+ >>> met = era5.open_metdataset()
1682
+
1683
+ >>> # Example flight
1684
+ >>> df = pd.DataFrame()
1685
+ >>> df['longitude'] = np.linspace(0, 50, 10)
1686
+ >>> df['latitude'] = np.linspace(0, 10, 10)
1687
+ >>> df['altitude'] = 11000
1688
+ >>> df['time'] = pd.date_range("2022-03-01T00", "2022-03-01T02", periods=10)
1689
+ >>> fl = Flight(df)
1690
+
1691
+ >>> # Intersect
1692
+ >>> fl.intersect_met(met['air_temperature'], method='nearest')
1693
+ array([231.62969892, 230.72604651, 232.24318771, 231.88338483,
1694
+ 231.06429438, 231.59073409, 231.65125393, 231.93064004,
1695
+ 232.03344087, 231.65954432])
1696
+
1697
+ >>> fl.intersect_met(met['air_temperature'], method='linear')
1698
+ array([225.77794552, 225.13908414, 226.231218 , 226.31831528,
1699
+ 225.56102321, 225.81192149, 226.03192642, 226.22056121,
1700
+ 226.03770174, 225.63226188])
1701
+
1702
+ >>> # Interpolate and attach to `Flight` instance
1703
+ >>> for key in met:
1704
+ ... fl[key] = fl.intersect_met(met[key])
1705
+
1706
+ >>> # Show the final three columns of the dataframe
1707
+ >>> fl.dataframe.iloc[:, -3:].head()
1708
+ time air_temperature specific_humidity
1709
+ 0 2022-03-01 00:00:00 225.777946 0.000132
1710
+ 1 2022-03-01 00:13:20 225.139084 0.000132
1711
+ 2 2022-03-01 00:26:40 226.231218 0.000107
1712
+ 3 2022-03-01 00:40:00 226.318315 0.000171
1713
+ 4 2022-03-01 00:53:20 225.561022 0.000109
1714
+
1715
+ """
1716
+ # Override use_indices in certain situations
1717
+ if use_indices:
1718
+ # Often the single_level data we use has time shifted
1719
+ # Don't allow it for now. We could do something smarter here!
1720
+ if mda.is_single_level:
1721
+ use_indices = False
1722
+
1723
+ # Cannot both override some coordinate AND pass indices.
1724
+ elif any(c is not None for c in (longitude, latitude, level, time)):
1725
+ # Should we warn?! Or is this "convenience"?
1726
+ use_indices = False
1727
+
1728
+ longitude = longitude if longitude is not None else self["longitude"]
1729
+ latitude = latitude if latitude is not None else self["latitude"]
1730
+ level = level if level is not None else self.level
1731
+ time = time if time is not None else self["time"]
1732
+
1733
+ if not use_indices:
1734
+ return mda.interpolate(longitude, latitude, level, time, **interp_kwargs)
1735
+
1736
+ indices = self._get_indices()
1737
+ already_has_indices = indices is not None
1738
+ out, indices = mda.interpolate(
1739
+ longitude,
1740
+ latitude,
1741
+ level,
1742
+ time,
1743
+ indices=indices,
1744
+ return_indices=True,
1745
+ **interp_kwargs,
1746
+ )
1747
+ if not already_has_indices:
1748
+ self._put_indices(indices)
1749
+ return out
1750
+
1751
+ def _put_indices(self, indices: interpolation.RGIArtifacts) -> None:
1752
+ """Set entries of ``indices`` onto underlying :attr:`data.
1753
+
1754
+ Each entry of ``indices`` are unpacked assuming certain conventions
1755
+ for its structure. A ValueError is raise if these conventions are not
1756
+ satisfied.
1757
+
1758
+ .. versionadded:: 0.26.0
1759
+
1760
+ Experimental
1761
+
1762
+
1763
+ Parameters
1764
+ ----------
1765
+ indices : interpolation.RGIArtifacts
1766
+ The indices to store.
1767
+ """
1768
+ indices_x, indices_y, indices_z, indices_t = indices.xi_indices
1769
+ distances_x, distances_y, distances_z, distances_t = indices.norm_distances
1770
+ out_of_bounds = indices.out_of_bounds
1771
+
1772
+ self["_indices_x"] = indices_x
1773
+ self["_indices_y"] = indices_y
1774
+ self["_indices_z"] = indices_z
1775
+ self["_indices_t"] = indices_t
1776
+ self["_distances_x"] = distances_x
1777
+ self["_distances_y"] = distances_y
1778
+ self["_distances_z"] = distances_z
1779
+ self["_distances_t"] = distances_t
1780
+ self["_out_of_bounds"] = out_of_bounds
1781
+
1782
+ def _get_indices(self) -> interpolation.RGIArtifacts | None:
1783
+ """Get entries from call to :meth:`_put_indices`.
1784
+
1785
+ .. versionadded:: 0.26.0
1786
+
1787
+ Experimental
1788
+
1789
+ Returns
1790
+ -------
1791
+ tuple | None
1792
+ Previously cached output of
1793
+ :meth:`scipy.interpolate.RegularGridInterpolator._find_indices`,
1794
+ or None if cached output is not present on instance.
1795
+ """
1796
+ try:
1797
+ indices_x = self["_indices_x"]
1798
+ indices_y = self["_indices_y"]
1799
+ indices_z = self["_indices_z"]
1800
+ indices_t = self["_indices_t"]
1801
+ distances_x = self["_distances_x"]
1802
+ distances_y = self["_distances_y"]
1803
+ distances_z = self["_distances_z"]
1804
+ distances_t = self["_distances_t"]
1805
+ out_of_bounds = self["_out_of_bounds"]
1806
+ except KeyError:
1807
+ return None
1808
+
1809
+ indices = np.asarray([indices_x, indices_y, indices_z, indices_t])
1810
+ distances = np.asarray([distances_x, distances_y, distances_z, distances_t])
1811
+
1812
+ return interpolation.RGIArtifacts(indices, distances, out_of_bounds)
1813
+
1814
+ def _invalidate_indices(self) -> None:
1815
+ """Remove any cached indices from :attr:`data."""
1816
+ for key in (
1817
+ "_indices_x",
1818
+ "_indices_y",
1819
+ "_indices_z",
1820
+ "_indices_t",
1821
+ "_distances_x",
1822
+ "_distances_y",
1823
+ "_distances_z",
1824
+ "_distances_t",
1825
+ "_out_of_bounds",
1826
+ ):
1827
+ self.data.pop(key, None)
1828
+
1829
+ @overload
1830
+ def downselect_met(
1831
+ self,
1832
+ met: met_module.MetDataset,
1833
+ *,
1834
+ longitude_buffer: tuple[float, float] = ...,
1835
+ latitude_buffer: tuple[float, float] = ...,
1836
+ level_buffer: tuple[float, float] = ...,
1837
+ time_buffer: tuple[np.timedelta64, np.timedelta64] = ...,
1838
+ copy: bool = ...,
1839
+ ) -> met_module.MetDataset: ...
1840
+
1841
+ @overload
1842
+ def downselect_met(
1843
+ self,
1844
+ met: met_module.MetDataArray,
1845
+ *,
1846
+ longitude_buffer: tuple[float, float] = ...,
1847
+ latitude_buffer: tuple[float, float] = ...,
1848
+ level_buffer: tuple[float, float] = ...,
1849
+ time_buffer: tuple[np.timedelta64, np.timedelta64] = ...,
1850
+ copy: bool = ...,
1851
+ ) -> met_module.MetDataArray: ...
1852
+
1853
+ def downselect_met(
1854
+ self,
1855
+ met: met_module.MetDataType,
1856
+ *,
1857
+ longitude_buffer: tuple[float, float] = (0.0, 0.0),
1858
+ latitude_buffer: tuple[float, float] = (0.0, 0.0),
1859
+ level_buffer: tuple[float, float] = (0.0, 0.0),
1860
+ time_buffer: tuple[np.timedelta64, np.timedelta64] = (
1861
+ np.timedelta64(0, "h"),
1862
+ np.timedelta64(0, "h"),
1863
+ ),
1864
+ copy: bool = True,
1865
+ ) -> met_module.MetDataType:
1866
+ """Downselect ``met`` to encompass a spatiotemporal region of the data.
1867
+
1868
+ Parameters
1869
+ ----------
1870
+ met : MetDataset | MetDataArray
1871
+ MetDataset or MetDataArray to downselect.
1872
+ longitude_buffer : tuple[float, float], optional
1873
+ Extend longitude domain past by ``longitude_buffer[0]`` on the low side
1874
+ and ``longitude_buffer[1]`` on the high side.
1875
+ Units must be the same as class coordinates.
1876
+ Defaults to ``(0, 0)`` degrees.
1877
+ latitude_buffer : tuple[float, float], optional
1878
+ Extend latitude domain past by ``latitude_buffer[0]`` on the low side
1879
+ and ``latitude_buffer[1]`` on the high side.
1880
+ Units must be the same as class coordinates.
1881
+ Defaults to ``(0, 0)`` degrees.
1882
+ level_buffer : tuple[float, float], optional
1883
+ Extend level domain past by ``level_buffer[0]`` on the low side
1884
+ and ``level_buffer[1]`` on the high side.
1885
+ Units must be the same as class coordinates.
1886
+ Defaults to ``(0, 0)`` [:math:`hPa`].
1887
+ time_buffer : tuple[np.timedelta64, np.timedelta64], optional
1888
+ Extend time domain past by ``time_buffer[0]`` on the low side
1889
+ and ``time_buffer[1]`` on the high side.
1890
+ Units must be the same as class coordinates.
1891
+ Defaults to ``(np.timedelta64(0, "h"), np.timedelta64(0, "h"))``.
1892
+ copy : bool
1893
+ If returned object is a copy or view of the original. True by default.
1894
+
1895
+ Returns
1896
+ -------
1897
+ MetDataset | MetDataArray
1898
+ Copy of downselected MetDataset or MetDataArray.
1899
+ """
1900
+ indexes = met.indexes
1901
+ lon_slice = coordinates.slice_domain(
1902
+ indexes["longitude"].to_numpy(),
1903
+ self["longitude"],
1904
+ buffer=longitude_buffer,
1905
+ )
1906
+ lat_slice = coordinates.slice_domain(
1907
+ indexes["latitude"].to_numpy(),
1908
+ self["latitude"],
1909
+ buffer=latitude_buffer,
1910
+ )
1911
+ time_slice = coordinates.slice_domain(
1912
+ indexes["time"].to_numpy(),
1913
+ self["time"],
1914
+ buffer=time_buffer,
1915
+ )
1916
+
1917
+ # single level data have "level" == [-1]
1918
+ if met.is_single_level:
1919
+ level_slice = slice(None)
1920
+ else:
1921
+ level_slice = coordinates.slice_domain(
1922
+ indexes["level"].to_numpy(),
1923
+ self.level,
1924
+ buffer=level_buffer,
1925
+ )
1926
+ logger.debug("Downselect met at %s %s %s %s", lon_slice, lat_slice, level_slice, time_slice)
1927
+
1928
+ data = met.data.isel(
1929
+ longitude=lon_slice,
1930
+ latitude=lat_slice,
1931
+ level=level_slice,
1932
+ time=time_slice,
1933
+ )
1934
+ return type(met)(data, copy=copy)
1935
+
1936
+ # ------------
1937
+ # I / O
1938
+ # ------------
1939
+
1940
+ @classmethod
1941
+ @overrides
1942
+ def create_empty(
1943
+ cls: type[GeoVectorDatasetType],
1944
+ keys: Iterable[str] | None = None,
1945
+ attrs: dict[str, Any] | None = None,
1946
+ **attrs_kwargs: Any,
1947
+ ) -> GeoVectorDatasetType:
1948
+ keys = *cls.required_keys, "altitude", *(keys or ())
1949
+ return super().create_empty(keys, attrs, **attrs_kwargs)
1950
+
1951
+ def to_geojson_points(self) -> dict[str, Any]:
1952
+ """Return dataset as GeoJSON FeatureCollection of Points.
1953
+
1954
+ Each Feature has a properties attribute that includes ``time`` and
1955
+ other data besides ``latitude``, ``longitude``, and ``altitude`` in :attr:`data`.
1956
+
1957
+ Returns
1958
+ -------
1959
+ dict[str, Any]
1960
+ Python representation of GeoJSON FeatureCollection
1961
+ """
1962
+ return json_utils.dataframe_to_geojson_points(self.dataframe)
1963
+
1964
+ def to_pseudo_mercator(self: GeoVectorDatasetType, copy: bool = True) -> GeoVectorDatasetType:
1965
+ """Convert data from :attr:`attrs["crs"]` to Pseudo Mercator (EPSG:3857).
1966
+
1967
+ Parameters
1968
+ ----------
1969
+ copy : bool, optional
1970
+ Copy data on transformation.
1971
+ Defaults to True.
1972
+
1973
+ Returns
1974
+ -------
1975
+ GeoVectorDatasetType
1976
+ """
1977
+ return self.transform_crs("EPSG:3857", copy=copy)
1978
+
1979
+ # ------------
1980
+ # Vector to grid
1981
+ # ------------
1982
+ def to_lon_lat_grid(
1983
+ self,
1984
+ agg: dict[str, str],
1985
+ *,
1986
+ spatial_bbox: tuple[float, float, float, float] = (-180.0, -90.0, 180.0, 90.0),
1987
+ spatial_grid_res: float = 0.5,
1988
+ ) -> xr.Dataset:
1989
+ """
1990
+ Convert vectors to a longitude-latitude grid.
1991
+
1992
+ See Also
1993
+ --------
1994
+ vector_to_lon_lat_grid
1995
+ """
1996
+ return vector_to_lon_lat_grid(
1997
+ self, agg=agg, spatial_bbox=spatial_bbox, spatial_grid_res=spatial_grid_res
1998
+ )
1999
+
2000
+
2001
+ def vector_to_lon_lat_grid(
2002
+ vector: GeoVectorDataset,
2003
+ agg: dict[str, str],
2004
+ *,
2005
+ spatial_bbox: tuple[float, float, float, float] = (-180.0, -90.0, 180.0, 90.0),
2006
+ spatial_grid_res: float = 0.5,
2007
+ ) -> xr.Dataset:
2008
+ r"""
2009
+ Convert vectors to a longitude-latitude grid.
2010
+
2011
+ Parameters
2012
+ ----------
2013
+ vector: GeoVectorDataset
2014
+ Contains the longitude, latitude and variables for aggregation.
2015
+ agg: dict[str, str]
2016
+ Variable name and the function selected for aggregation,
2017
+ i.e. ``{"segment_length": "sum"}``.
2018
+ spatial_bbox: tuple[float, float, float, float]
2019
+ Spatial bounding box, ``(lon_min, lat_min, lon_max, lat_max)``, [:math:`\deg`].
2020
+ By default, the entire globe is used.
2021
+ spatial_grid_res: float
2022
+ Spatial grid resolution, [:math:`\deg`]
2023
+
2024
+ Returns
2025
+ -------
2026
+ xr.Dataset
2027
+ Aggregated variables in a longitude-latitude grid.
2028
+
2029
+ Examples
2030
+ --------
2031
+ >>> rng = np.random.default_rng(234)
2032
+ >>> vector = GeoVectorDataset(
2033
+ ... longitude=rng.uniform(-10, 10, 10000),
2034
+ ... latitude=rng.uniform(-10, 10, 10000),
2035
+ ... altitude=np.zeros(10000),
2036
+ ... time=np.zeros(10000).astype("datetime64[ns]"),
2037
+ ... )
2038
+ >>> vector["foo"] = rng.uniform(0, 1, 10000)
2039
+ >>> ds = vector.to_lon_lat_grid({"foo": "sum"}, spatial_bbox=(-10, -10, 9.5, 9.5))
2040
+ >>> da = ds["foo"]
2041
+ >>> da.coords
2042
+ Coordinates:
2043
+ * longitude (longitude) float64 320B -10.0 -9.5 -9.0 -8.5 ... 8.0 8.5 9.0 9.5
2044
+ * latitude (latitude) float64 320B -10.0 -9.5 -9.0 -8.5 ... 8.0 8.5 9.0 9.5
2045
+
2046
+ >>> da.values.round(2)
2047
+ array([[2.23, 0.67, 1.29, ..., 4.66, 3.91, 1.93],
2048
+ [4.1 , 3.84, 1.34, ..., 3.24, 1.71, 4.55],
2049
+ [0.78, 3.25, 2.33, ..., 3.78, 2.93, 2.33],
2050
+ ...,
2051
+ [1.97, 3.02, 1.84, ..., 2.37, 3.87, 2.09],
2052
+ [3.74, 1.6 , 4.01, ..., 4.6 , 4.27, 3.4 ],
2053
+ [2.97, 0.12, 1.33, ..., 3.54, 0.74, 2.59]])
2054
+
2055
+ >>> da.sum().item() == vector["foo"].sum()
2056
+ np.True_
2057
+
2058
+ """
2059
+ df = vector.select(("longitude", "latitude", *agg), copy=False).dataframe
2060
+
2061
+ # Create longitude and latitude coordinates
2062
+ assert spatial_grid_res > 0.01, "spatial_grid_res must be greater than 0.01"
2063
+ west, south, east, north = spatial_bbox
2064
+ lon_coords = np.arange(west, east + 0.01, spatial_grid_res)
2065
+ lat_coords = np.arange(south, north + 0.01, spatial_grid_res)
2066
+ shape = lon_coords.size, lat_coords.size
2067
+
2068
+ # Convert vector to lon-lat grid
2069
+ idx_lon = np.searchsorted(lon_coords, df["longitude"]) - 1
2070
+ idx_lat = np.searchsorted(lat_coords, df["latitude"]) - 1
2071
+
2072
+ df_agg = df.groupby([idx_lon, idx_lat]).agg(agg)
2073
+ index = df_agg.index.get_level_values(0), df_agg.index.get_level_values(1)
2074
+
2075
+ out = xr.Dataset(coords={"longitude": lon_coords, "latitude": lat_coords})
2076
+ for name, col in df_agg.items():
2077
+ arr = np.zeros(shape, dtype=col.dtype)
2078
+ arr[index] = col
2079
+ out[name] = (("longitude", "latitude"), arr)
2080
+
2081
+ return out
2082
+
2083
+
2084
+ def _handle_time_column(time: pd.Series) -> pd.Series:
2085
+ """Ensure that pd.Series has compatible Timestamps.
2086
+
2087
+ Parameters
2088
+ ----------
2089
+ time : pd.Series
2090
+ Pandas dataframe column labeled "time".
2091
+
2092
+ Returns
2093
+ -------
2094
+ pd.Series
2095
+ Parsed pandas time series.
2096
+
2097
+ Raises
2098
+ ------
2099
+ ValueError
2100
+ When time series can't be parsed, or is not timezone naive.
2101
+ """
2102
+ if not hasattr(time, "dt"):
2103
+ time = _parse_pandas_time(time)
2104
+
2105
+ # Translate all times to UTC and then remove timezone.
2106
+ # If the time column contains a timezone, the call to `to_numpy`
2107
+ # will convert it to an array of object.
2108
+ # Note `.tz_convert(None)` automatically converts to UTC first.
2109
+ if time.dt.tz is not None:
2110
+ time = time.dt.tz_convert(None)
2111
+
2112
+ return time
2113
+
2114
+
2115
+ def _parse_pandas_time(time: pd.Series) -> pd.Series:
2116
+ """Parse pandas dataframe column labelled "time".
2117
+
2118
+ Parameters
2119
+ ----------
2120
+ time : pd.Series
2121
+ Time series
2122
+
2123
+ Returns
2124
+ -------
2125
+ pd.Series
2126
+ Parsed time series
2127
+
2128
+ Raises
2129
+ ------
2130
+ ValueError
2131
+ When series values can't be inferred.
2132
+ """
2133
+ try:
2134
+ # If the time series is a string, try to convert it to a datetime
2135
+ if time.dtype == "O":
2136
+ return pd.to_datetime(time)
2137
+
2138
+ # If the time is an int, try to parse it as unix time
2139
+ if np.issubdtype(time.dtype, np.integer):
2140
+ return _parse_unix_time(time)
2141
+
2142
+ except ValueError as exc:
2143
+ msg = (
2144
+ "The 'time' field must hold datetime-like values. "
2145
+ 'Try data["time"] = pd.to_datetime(data["time"], unit=...) '
2146
+ "with the appropriate unit."
2147
+ )
2148
+ raise ValueError(msg) from exc
2149
+
2150
+ raise ValueError("Unsupported time format")
2151
+
2152
+
2153
+ def _parse_unix_time(time: list[int] | npt.NDArray[np.int_] | pd.Series) -> pd.Series:
2154
+ """Parse array of int times as unix epoch timestamps.
2155
+
2156
+ Attempts to parse the time in "s", "ms", "us", "ns"
2157
+
2158
+ Parameters
2159
+ ----------
2160
+ time : list[int] | npt.NDArray[np.int_] | pd.Series
2161
+ Sequence of unix timestamps
2162
+
2163
+ Returns
2164
+ -------
2165
+ pd.Series
2166
+ Series of timezone naive pandas Timestamps
2167
+
2168
+ Raises
2169
+ ------
2170
+ ValueError
2171
+ When unable to parse time as unix epoch timestamp
2172
+ """
2173
+ units = "s", "ms", "us", "ns"
2174
+ for unit in units:
2175
+ try:
2176
+ out = pd.to_datetime(time, unit=unit, utc=True)
2177
+ except ValueError:
2178
+ continue
2179
+
2180
+ # make timezone naive
2181
+ out = out.dt.tz_convert(None)
2182
+
2183
+ # make sure time is reasonable
2184
+ if (pd.Timestamp("1980-01-01") <= out).all() and (out <= pd.Timestamp("2030-01-01")).all():
2185
+ return out
2186
+
2187
+ raise ValueError(
2188
+ f"Unable to parse time parameter '{time}' as unix epoch timestamp between "
2189
+ "1980-01-01 and 2030-01-01"
2190
+ )