pycontrails 0.54.6__cp312-cp312-win_amd64.whl → 0.54.7__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

@@ -0,0 +1,606 @@
1
+ """Support for `Spire Aviation <https://spire.com/aviation/>`_ data validation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import ClassVar
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import pandas.api.types as pdtypes
10
+
11
+ from pycontrails.core import airports
12
+ from pycontrails.datalib.spire.exceptions import (
13
+ BadTrajectoryException,
14
+ BaseSpireError,
15
+ DestinationAirportError,
16
+ FlightAltitudeProfileError,
17
+ FlightDuplicateTimestamps,
18
+ FlightInvariantFieldViolation,
19
+ FlightTooFastError,
20
+ FlightTooLongError,
21
+ FlightTooShortError,
22
+ FlightTooSlowError,
23
+ OrderingError,
24
+ OriginAirportError,
25
+ ROCDError,
26
+ SchemaError,
27
+ )
28
+ from pycontrails.physics import geo, units
29
+
30
+
31
+ def _segment_haversine_3d(
32
+ longitude: np.ndarray,
33
+ latitude: np.ndarray,
34
+ altitude_ft: np.ndarray,
35
+ ) -> np.ndarray:
36
+ """Calculate a 3D haversine distance between waypoints.
37
+
38
+ Returns the distance between each waypoint in meters.
39
+ """
40
+ horizontal_distance = geo.segment_haversine(longitude, latitude)
41
+
42
+ altitude_m = units.ft_to_m(altitude_ft)
43
+ alt0 = altitude_m[:-1]
44
+ alt1 = altitude_m[1:]
45
+ vertical_displacement = np.empty_like(altitude_m)
46
+ vertical_displacement[:-1] = alt1 - alt0
47
+ vertical_displacement[-1] = np.nan
48
+
49
+ distance = (horizontal_distance**2 + vertical_displacement**2) ** 0.5
50
+
51
+ # Roll the array to match usual pandas conventions.
52
+ # This moves the nan from the -1st index to the 0th index
53
+ return np.roll(distance, 1)
54
+
55
+
56
+ def _pointed_haversine_3d(
57
+ longitude: np.ndarray,
58
+ latitude: np.ndarray,
59
+ altitude_ft: np.ndarray,
60
+ lon0: float,
61
+ lat0: float,
62
+ alt0_ft: float,
63
+ ) -> np.ndarray:
64
+ horizontal_dinstance = geo.haversine(longitude, latitude, lon0, lat0) # type: ignore[type-var]
65
+ altitude_m = units.ft_to_m(altitude_ft)
66
+ alt0_m = units.ft_to_m(alt0_ft)
67
+ vertical_displacement = altitude_m - alt0_m
68
+ return (horizontal_dinstance**2 + vertical_displacement**2) ** 0.5 # type: ignore[operator]
69
+
70
+
71
+ class ValidateTrajectoryHandler:
72
+ """
73
+ Evaluates a trajectory and identifies if it violates any verification rules.
74
+
75
+ <LINK HERE TO HOSTED REFERENCE EXAMPLE(S)>.
76
+ """
77
+
78
+ CRUISE_ROCD_THRESHOLD_FPS = 4.2 # 4.2 ft/sec ~= 250 ft/min
79
+ CRUISE_LOW_ALTITUDE_THRESHOLD_FT = 15000.0 # lowest expected cruise altitude
80
+ INSTANTANEOUS_HIGH_GROUND_SPEED_THRESHOLD_MPS = 350.0 # 350m/sec ~= 780mph ~= 1260kph
81
+ INSTANTANEOUS_LOW_GROUND_SPEED_THRESHOLD_MPS = 45.0 # 45m/sec ~= 100mph ~= 160kph
82
+ AVG_LOW_GROUND_SPEED_THRESHOLD_MPS = 100.0 # 120m/sec ~= 223mph ~= 360 kph
83
+ AVG_LOW_GROUND_SPEED_ROLLING_WINDOW_PERIOD_MIN = 30.0 # rolling period for avg speed comparison
84
+ AIRPORT_DISTANCE_THRESHOLD_KM = 200.0
85
+ MIN_FLIGHT_LENGTH_HR = 0.4
86
+ MAX_FLIGHT_LENGTH_HR = 19.0
87
+
88
+ # expected schema of pandas dataframe passed on initialization
89
+ SCHEMA: ClassVar = {
90
+ "icao_address": pdtypes.is_string_dtype,
91
+ "flight_id": pdtypes.is_string_dtype,
92
+ "callsign": pdtypes.is_string_dtype,
93
+ "tail_number": pdtypes.is_string_dtype,
94
+ "flight_number": pdtypes.is_string_dtype,
95
+ "aircraft_type_icao": pdtypes.is_string_dtype,
96
+ "airline_iata": pdtypes.is_string_dtype,
97
+ "departure_airport_icao": pdtypes.is_string_dtype,
98
+ "departure_scheduled_time": pdtypes.is_datetime64_any_dtype,
99
+ "arrival_airport_icao": pdtypes.is_string_dtype,
100
+ "arrival_scheduled_time": pdtypes.is_datetime64_any_dtype,
101
+ "ingestion_time": pdtypes.is_datetime64_any_dtype,
102
+ "timestamp": pdtypes.is_datetime64_any_dtype,
103
+ "latitude": pdtypes.is_numeric_dtype,
104
+ "longitude": pdtypes.is_numeric_dtype,
105
+ "collection_type": pdtypes.is_string_dtype,
106
+ "altitude_baro": pdtypes.is_numeric_dtype,
107
+ }
108
+
109
+ airports_db: pd.DataFrame | None = None
110
+
111
+ def __init__(self) -> None:
112
+ self._df: pd.DataFrame | None = None
113
+
114
+ def set(self, trajectory: pd.DataFrame) -> None:
115
+ """
116
+ Set a single flight trajectory into handler state.
117
+
118
+ Parameters
119
+ ----------
120
+ trajectory
121
+ A dataframe representing a single flight trajectory.
122
+ Must include those columns itemized in :attr:`SCHEMA`.
123
+ """
124
+ if trajectory.empty:
125
+ msg = "The trajectory DataFrame is empty."
126
+ raise BadTrajectoryException(msg)
127
+
128
+ if "flight_id" not in trajectory:
129
+ msg = "The trajectory DataFrame must have a 'flight_id' column."
130
+ raise BadTrajectoryException(msg)
131
+
132
+ n_unique = trajectory["flight_id"].nunique()
133
+ if n_unique > 1:
134
+ msg = f"The trajectory DataFrame must have a unique flight_id. Found {n_unique}."
135
+ raise BadTrajectoryException(msg)
136
+
137
+ self._df = trajectory.copy()
138
+
139
+ def unset(self) -> None:
140
+ """Pop _df from handler state."""
141
+ self._df = None
142
+
143
+ @classmethod
144
+ def _find_airport_coords(cls, airport_icao: str | None) -> tuple[float, float, float]:
145
+ """
146
+ Find the latitude and longitude for a given airport.
147
+
148
+ Parameters
149
+ ----------
150
+ airport_icao : str | None
151
+ string representation of the airport's icao code
152
+
153
+ Returns
154
+ -------
155
+ tuple[float, float, float]
156
+ ``(longitude, latitude, alt_ft)`` of the airport.
157
+ Returns ``(np.nan, np.nan, np.nan)`` if it cannot be found.
158
+ """
159
+ if airport_icao is None:
160
+ return np.nan, np.nan, np.nan
161
+
162
+ if cls.airports_db is None:
163
+ cls.airports_db = airports.global_airport_database()
164
+
165
+ matches = cls.airports_db[cls.airports_db["icao_code"] == airport_icao]
166
+ if len(matches) == 0:
167
+ return np.nan, np.nan, np.nan
168
+ if len(matches) > 1:
169
+ msg = f"Found multiple matches for aiport icao {airport_icao} in airports database."
170
+ raise ValueError(msg)
171
+
172
+ lon = matches["longitude"].iloc[0].item()
173
+ lat = matches["latitude"].iloc[0].item()
174
+ alt_ft = matches["elevation_ft"].iloc[0].item()
175
+
176
+ return lon, lat, alt_ft
177
+
178
+ def _calculate_additional_fields(self) -> None:
179
+ """
180
+ Add additional columns to the provided dataframe.
181
+
182
+ These additional fields are needed to apply the validation ruleset.
183
+
184
+ The following fields are added:
185
+
186
+ - elapsed_seconds: time elapsed between two consecutive waypoints
187
+ - elapsed_distance_m: distance travelled between two consecutive waypoints
188
+ - ground_speed_m_s: ground speed in meters per second
189
+ - rocd_fps: rate of climb/descent in feet per second
190
+ - departure_airport_lat: latitude of the departure airport
191
+ - departure_airport_lon: longitude of the departure airport
192
+ - departure_airport_alt_ft: altitude of the departure airport
193
+ - arrival_airport_lat: latitude of the arrival airport
194
+ - arrival_airport_lon: longitude of the arrival airport
195
+ - arrival_airport_alt_ft: altitude of the arrival airport
196
+ - departure_airport_dist_m: distance to the departure airport
197
+ - arrival_airport_dist_m: distance to the arrival airport
198
+ """
199
+ if self._df is None:
200
+ msg = "No trajectory DataFrame has been set. Call set() before calling this method."
201
+ raise ValueError(msg)
202
+
203
+ elapsed_seconds = self._df["timestamp"].diff().dt.total_seconds()
204
+ self._df["elapsed_seconds"] = elapsed_seconds
205
+
206
+ elapsed_distance_m = _segment_haversine_3d(
207
+ self._df["longitude"].to_numpy(),
208
+ self._df["latitude"].to_numpy(),
209
+ self._df["altitude_baro"].to_numpy(),
210
+ )
211
+ self._df["elapsed_distance_m"] = elapsed_distance_m
212
+
213
+ ground_speed_m_s = self._df["elapsed_distance_m"] / self._df["elapsed_seconds"]
214
+ self._df["ground_speed_m_s"] = ground_speed_m_s.replace(np.inf, np.nan)
215
+
216
+ rocd_fps = self._df["altitude_baro"].diff() / self._df["elapsed_seconds"]
217
+ self._df["rocd_fps"] = rocd_fps
218
+
219
+ if self._df["departure_airport_icao"].nunique() > 1: # This has already been checked
220
+ raise ValueError("expected only one airport icao for flight departure airport.")
221
+ departure_airport_icao = self._df["departure_airport_icao"].iloc[0]
222
+
223
+ if self._df["arrival_airport_icao"].nunique() > 1: # This has already been checked
224
+ raise ValueError("expected only one airport icao for flight arrival airport.")
225
+ arrival_airport_icao = self._df["arrival_airport_icao"].iloc[0]
226
+
227
+ dep_lon, dep_lat, dep_alt_ft = self._find_airport_coords(departure_airport_icao)
228
+ arr_lon, arr_lat, arr_alt_ft = self._find_airport_coords(arrival_airport_icao)
229
+
230
+ self._df["departure_airport_lon"] = dep_lon
231
+ self._df["departure_airport_lat"] = dep_lat
232
+ self._df["departure_airport_alt_ft"] = dep_alt_ft
233
+ self._df["arrival_airport_lon"] = arr_lon
234
+ self._df["arrival_airport_lat"] = arr_lat
235
+ self._df["arrival_airport_alt_ft"] = arr_alt_ft
236
+
237
+ departure_airport_dist_m = _pointed_haversine_3d(
238
+ self._df["longitude"].to_numpy(),
239
+ self._df["latitude"].to_numpy(),
240
+ self._df["altitude_baro"].to_numpy(),
241
+ dep_lon,
242
+ dep_lat,
243
+ dep_alt_ft,
244
+ )
245
+ self._df["departure_airport_dist_m"] = departure_airport_dist_m
246
+
247
+ arrival_airport_dist_m = _pointed_haversine_3d(
248
+ self._df["longitude"].to_numpy(),
249
+ self._df["latitude"].to_numpy(),
250
+ self._df["altitude_baro"].to_numpy(),
251
+ arr_lon,
252
+ arr_lat,
253
+ arr_alt_ft,
254
+ )
255
+ self._df["arrival_airport_dist_m"] = arrival_airport_dist_m
256
+
257
+ def _is_valid_schema(self) -> SchemaError | None:
258
+ """Verify that a pandas dataframe has required cols, and that they are of required type."""
259
+ if self._df is None:
260
+ msg = "No trajectory DataFrame has been set. Call set() before calling this method."
261
+ raise ValueError(msg)
262
+
263
+ missing_cols = set(self.SCHEMA).difference(self._df)
264
+ if missing_cols:
265
+ msg = f"Trajectory DataFrame is missing expected fields: {sorted(missing_cols)}"
266
+ return SchemaError(msg)
267
+
268
+ col_types = self._df.dtypes
269
+ col_w_bad_dtypes = []
270
+ for col, check_fn in self.SCHEMA.items():
271
+ is_valid = check_fn(col_types[col])
272
+ if not is_valid:
273
+ col_w_bad_dtypes.append(f"{col} failed check {check_fn.__name__}")
274
+
275
+ if col_w_bad_dtypes:
276
+ msg = f"Trajectory DataFrame has columns with invalid data types: {col_w_bad_dtypes}"
277
+ return SchemaError(msg)
278
+
279
+ return None
280
+
281
+ def _is_timestamp_sorted_and_unique(self) -> list[OrderingError | FlightDuplicateTimestamps]:
282
+ """Verify that the data is sorted by waypoint timestamp in ascending order."""
283
+ if self._df is None:
284
+ msg = "No trajectory DataFrame has been set. Call set() before calling this method."
285
+ raise ValueError(msg)
286
+
287
+ violations: list[OrderingError | FlightDuplicateTimestamps] = []
288
+
289
+ ts_index = pd.Index(self._df["timestamp"])
290
+ if not ts_index.is_monotonic_increasing:
291
+ msg = "Trajectory DataFrame must be sorted by timestamp in ascending order."
292
+ violations.append(OrderingError(msg))
293
+
294
+ if ts_index.has_duplicates:
295
+ n_duplicates = ts_index.duplicated().sum()
296
+ msg = f"Trajectory DataFrame has {n_duplicates} duplicate timestamps."
297
+ violations.append(FlightDuplicateTimestamps(msg))
298
+
299
+ return violations
300
+
301
+ def _is_valid_invariant_fields(self) -> FlightInvariantFieldViolation | None:
302
+ """
303
+ Verify that fields expected to be invariant are indeed invariant.
304
+
305
+ Presence of null values does not constitute an invariance violation.
306
+ """
307
+ if self._df is None:
308
+ msg = "No trajectory DataFrame has been set. Call set() before calling this method."
309
+ raise ValueError(msg)
310
+
311
+ invariant_fields = (
312
+ "icao_address",
313
+ "flight_id",
314
+ "callsign",
315
+ "tail_number",
316
+ "aircraft_type_icao",
317
+ "airline_iata",
318
+ "departure_airport_icao",
319
+ "departure_scheduled_time",
320
+ "arrival_airport_icao",
321
+ "arrival_scheduled_time",
322
+ )
323
+
324
+ fields = []
325
+ for k in invariant_fields:
326
+ if self._df[k].nunique(dropna=True) > 1:
327
+ fields.append(k)
328
+
329
+ if fields:
330
+ msg = f"The following fields have multiple values for this trajectory: {fields}"
331
+ return FlightInvariantFieldViolation(msg)
332
+
333
+ return None
334
+
335
+ def _is_valid_flight_length(self) -> FlightTooShortError | FlightTooLongError | None:
336
+ """Verify that the flight is of a reasonable length."""
337
+ if self._df is None:
338
+ msg = "No trajectory DataFrame has been set. Call set() before calling this method."
339
+ raise ValueError(msg)
340
+
341
+ flight_duration_sec = np.ptp(self._df["timestamp"]).seconds
342
+ flight_duration_hours = flight_duration_sec / 60.0 / 60.0
343
+
344
+ if flight_duration_hours > self.MAX_FLIGHT_LENGTH_HR:
345
+ return FlightTooLongError(
346
+ f"flight exceeds max duration of {self.MAX_FLIGHT_LENGTH_HR} hours."
347
+ f"this trajectory spans {flight_duration_hours:.2f} hours."
348
+ )
349
+
350
+ if flight_duration_hours < self.MIN_FLIGHT_LENGTH_HR:
351
+ return FlightTooShortError(
352
+ f"flight less than min duration of {self.MIN_FLIGHT_LENGTH_HR} hours. "
353
+ f"this trajectory spans {flight_duration_hours:.2f} hours."
354
+ )
355
+
356
+ return None
357
+
358
+ def _is_from_origin_airport(self) -> OriginAirportError | None:
359
+ """Verify the trajectory origin is a reasonable distance from the origin airport."""
360
+ if self._df is None:
361
+ msg = "No trajectory DataFrame has been set. Call set() before calling this method."
362
+ raise ValueError(msg)
363
+
364
+ first_waypoint = self._df.iloc[0]
365
+ first_waypoint_dist_km = first_waypoint["departure_airport_dist_m"] / 1000.0
366
+ if first_waypoint_dist_km > self.AIRPORT_DISTANCE_THRESHOLD_KM:
367
+ return OriginAirportError(
368
+ "First waypoint in trajectory too far from departure airport icao: "
369
+ f"{first_waypoint['departure_airport_icao']}. "
370
+ f"Distance {first_waypoint_dist_km:.3f}km is greater than "
371
+ f"threshold of {self.AIRPORT_DISTANCE_THRESHOLD_KM}km."
372
+ )
373
+
374
+ return None
375
+
376
+ def _is_to_destination_airport(self) -> DestinationAirportError | None:
377
+ """Verify the trajectory destination is reasonable distance from the destination airport."""
378
+ if self._df is None:
379
+ msg = "No trajectory DataFrame has been set. Call set() before calling this method."
380
+ raise ValueError(msg)
381
+
382
+ last_waypoint = self._df.iloc[-1]
383
+ last_waypoint_dist_km = last_waypoint["arrival_airport_dist_m"] / 1000.0
384
+ if last_waypoint_dist_km > self.AIRPORT_DISTANCE_THRESHOLD_KM:
385
+ return DestinationAirportError(
386
+ "Last waypoint in trajectory too far from arrival airport icao: "
387
+ f"{last_waypoint['arrival_airport_icao']}. "
388
+ f"Distance {last_waypoint_dist_km:.3f}km is greater than "
389
+ f"threshold of {self.AIRPORT_DISTANCE_THRESHOLD_KM:.3f}km."
390
+ )
391
+
392
+ return None
393
+
394
+ def _is_too_slow(self) -> list[FlightTooSlowError]:
395
+ """
396
+ Evaluate the flight trajectory for unreasonably slow speed.
397
+
398
+ This is evaluated both for instantaneous discrete steps in the trajectory
399
+ (between consecutive waypoints), and on a rolling average basis.
400
+
401
+ For instantaneous speed, we don't consider the first or last 10 minutes of the flight.
402
+ """
403
+ if self._df is None:
404
+ msg = "No trajectory DataFrame has been set. Call set() before calling this method."
405
+ raise ValueError(msg)
406
+
407
+ violations: list[FlightTooSlowError] = []
408
+
409
+ # NOTE: When we get here, we have already checked that the timestamps are sorted and unique.
410
+ gs = self._df.set_index("timestamp")["ground_speed_m_s"]
411
+
412
+ t0 = self._df["timestamp"].iloc[0]
413
+ t1 = self._df["timestamp"].iloc[-1]
414
+ cropped_gs = gs[t0 + pd.Timedelta(minutes=10) : t1 - pd.Timedelta(minutes=10)]
415
+
416
+ cond = cropped_gs <= self.INSTANTANEOUS_LOW_GROUND_SPEED_THRESHOLD_MPS
417
+ if cond.any():
418
+ below_inst_thresh = cropped_gs[cond]
419
+ violations.append(
420
+ FlightTooSlowError(
421
+ f"Found {len(below_inst_thresh)} instances where speed between waypoints is "
422
+ "below threshold of "
423
+ f"{self.INSTANTANEOUS_LOW_GROUND_SPEED_THRESHOLD_MPS:.2f} m/s. "
424
+ f"max value: {below_inst_thresh.max():.2f}, "
425
+ f"min value: {below_inst_thresh.min():.2f},"
426
+ )
427
+ )
428
+
429
+ # Consider averages occurring at least window minutes after the flight origination
430
+ window = pd.Timedelta(minutes=self.AVG_LOW_GROUND_SPEED_ROLLING_WINDOW_PERIOD_MIN)
431
+ rolling_gs = gs.rolling(window).mean().loc[t0 + window :]
432
+
433
+ cond = rolling_gs <= self.AVG_LOW_GROUND_SPEED_THRESHOLD_MPS
434
+ if cond.any():
435
+ below_avg_thresh = rolling_gs[cond]
436
+ violations.append(
437
+ FlightTooSlowError(
438
+ f"Found {len(below_avg_thresh)} instances where rolling average speed is "
439
+ f"below threshold of {self.AVG_LOW_GROUND_SPEED_THRESHOLD_MPS} m/s "
440
+ f"(rolling window of "
441
+ f"{self.AVG_LOW_GROUND_SPEED_ROLLING_WINDOW_PERIOD_MIN} minutes). "
442
+ f"max value: {below_avg_thresh.max()}, "
443
+ f"min value: {below_avg_thresh.min()},"
444
+ )
445
+ )
446
+
447
+ return violations
448
+
449
+ def _is_too_fast(self) -> FlightTooFastError | None:
450
+ """
451
+ Evaluate the flight trajectory for reasonably high speed.
452
+
453
+ This is evaluated on discrete steps between consecutive waypoints.
454
+ """
455
+ if self._df is None:
456
+ msg = "No trajectory DataFrame has been set. Call set() before calling this method."
457
+ raise ValueError(msg)
458
+
459
+ cond = self._df["ground_speed_m_s"] >= self.INSTANTANEOUS_HIGH_GROUND_SPEED_THRESHOLD_MPS
460
+ if cond.any():
461
+ above_inst_thresh = self._df[cond]
462
+ return FlightTooFastError(
463
+ f"Found {len(above_inst_thresh)} instances where speed between waypoints is "
464
+ f"above threshold of {self.INSTANTANEOUS_HIGH_GROUND_SPEED_THRESHOLD_MPS:.2f} m/s. "
465
+ f"max value: {above_inst_thresh['ground_speed_m_s'].max():.2f}, "
466
+ f"min value: {above_inst_thresh['ground_speed_m_s'].min():.2f}"
467
+ )
468
+
469
+ return None
470
+
471
+ def _is_expected_altitude_profile(self) -> list[FlightAltitudeProfileError | ROCDError]:
472
+ """
473
+ Evaluate flight altitude profile.
474
+
475
+ Failure modes include:
476
+ RocdError
477
+ 1) flight climbs above alt threshold,
478
+ then descends below that threshold one or more times,
479
+ before making final descent to land.
480
+
481
+ FlightAltitudeProfileError
482
+ 2) rate of instantaneous (between consecutive waypoint) climb or descent is above threshold,
483
+ while aircraft is above the cruise altitude.
484
+ """
485
+ if self._df is None:
486
+ msg = "No trajectory DataFrame has been set. Call set() before calling this method."
487
+ raise ValueError(msg)
488
+
489
+ violations: list[FlightAltitudeProfileError | ROCDError] = []
490
+
491
+ # only evaluate rocd errors when at cruising altitude
492
+ rocd_above_thres = (self._df["rocd_fps"].abs() >= self.CRUISE_ROCD_THRESHOLD_FPS) & (
493
+ self._df["altitude_baro"] >= self.CRUISE_LOW_ALTITUDE_THRESHOLD_FT
494
+ )
495
+ if rocd_above_thres.any():
496
+ msg = (
497
+ "Flight trajectory has rate of climb/descent values "
498
+ "between consecutive waypoints that exceed threshold "
499
+ f"of {self.CRUISE_ROCD_THRESHOLD_FPS:.3f}ft/sec. "
500
+ f"Max value found: {self._df['rocd_fps'].abs().max():.3f}ft/sec"
501
+ )
502
+ violations.append(ROCDError(msg))
503
+
504
+ alt_below_thresh = self._df["altitude_baro"] <= self.CRUISE_LOW_ALTITUDE_THRESHOLD_FT
505
+ alt_thresh_transitions = alt_below_thresh.rolling(window=2).sum()
506
+ cond = alt_thresh_transitions == 1
507
+ if cond.sum() > 2:
508
+ msg = (
509
+ "Flight trajectory dropped below altitude threshold "
510
+ f"of {self.CRUISE_LOW_ALTITUDE_THRESHOLD_FT}ft while in-flight."
511
+ )
512
+ violations.append(FlightAltitudeProfileError(msg))
513
+
514
+ return violations
515
+
516
+ @property
517
+ def validation_df(self) -> pd.DataFrame:
518
+ """
519
+ Return an augmented trajectory dataframe.
520
+
521
+ Returns
522
+ -------
523
+ dataframe mirroring that provided to the handler,
524
+ but including the additional computed columns that are used in verification.
525
+ e.g. elapsed_sec, ground_speed_m_s, etc.
526
+ """
527
+ if self._df is None:
528
+ msg = "No trajectory DataFrame has been set. Call set() before calling this method."
529
+ raise ValueError(msg)
530
+
531
+ violations = self.evaluate()
532
+
533
+ FatalException = (
534
+ SchemaError | OrderingError | FlightDuplicateTimestamps | FlightInvariantFieldViolation
535
+ )
536
+ if any(isinstance(v, FatalException) for v in violations):
537
+ msg = f"Validation DataFrame has fatal violation(s): {violations}"
538
+ raise BadTrajectoryException(msg)
539
+
540
+ # safeguard to ensure this call follows the addition of the columns
541
+ # assumes calculate_additional_fields is idempotent
542
+ self._calculate_additional_fields()
543
+ return self._df
544
+
545
+ def evaluate(self) -> list[BaseSpireError]:
546
+ """Evaluate the flight trajectory for one or more violations.
547
+
548
+ This method performs 3 rounds of checks:
549
+
550
+ 1. Schema checks
551
+ 2. Timestamp ordering and invariant field checks
552
+ 3. Flight profile and motion checks
553
+
554
+ If any violations are found at the end of a round, the method returns the
555
+ current list of violations and does not proceed to the next round.
556
+ """
557
+ if self._df is None:
558
+ msg = "No trajectory DataFrame has been set. Call set() before calling this method."
559
+ raise ValueError(msg)
560
+
561
+ all_violations: list[BaseSpireError] = []
562
+
563
+ # Round 1 checks
564
+ schema_check = self._is_valid_schema()
565
+ if schema_check:
566
+ all_violations.append(schema_check)
567
+ return all_violations
568
+
569
+ # Round 2 checks: We're assuming the schema is valid
570
+ timestamp_check = self._is_timestamp_sorted_and_unique()
571
+ all_violations.extend(timestamp_check)
572
+
573
+ invariant_fields_check = self._is_valid_invariant_fields()
574
+ if invariant_fields_check:
575
+ all_violations.append(invariant_fields_check)
576
+
577
+ if all_violations:
578
+ return all_violations
579
+
580
+ # Round 3 checks: We're assuming the schema and timestamps are valid
581
+ # and no invariant field violations
582
+ self._calculate_additional_fields()
583
+
584
+ flight_length_check = self._is_valid_flight_length()
585
+ if flight_length_check:
586
+ all_violations.append(flight_length_check)
587
+
588
+ origin_airport_check = self._is_from_origin_airport()
589
+ if origin_airport_check:
590
+ all_violations.append(origin_airport_check)
591
+
592
+ destination_airport_check = self._is_to_destination_airport()
593
+ if destination_airport_check:
594
+ all_violations.append(destination_airport_check)
595
+
596
+ slow_speed_check = self._is_too_slow()
597
+ all_violations.extend(slow_speed_check)
598
+
599
+ fast_speed_check = self._is_too_fast()
600
+ if fast_speed_check:
601
+ all_violations.append(fast_speed_check)
602
+
603
+ altitude_profile_check = self._is_expected_altitude_profile()
604
+ all_violations.extend(altitude_profile_check)
605
+
606
+ return all_violations
@@ -10,7 +10,7 @@ import xarray as xr
10
10
 
11
11
  import pycontrails
12
12
  from pycontrails.core.flight import Flight
13
- from pycontrails.core.met import MetDataset, standardize_variables
13
+ from pycontrails.core.met import MetDataset
14
14
  from pycontrails.core.met_var import (
15
15
  AirTemperature,
16
16
  EastwardWind,
@@ -170,8 +170,8 @@ class ACCF(Model):
170
170
  **params_kwargs: Any,
171
171
  ) -> None:
172
172
  # Normalize ECMWF variables
173
- variables = (v[0] if isinstance(v, tuple) else v for v in self.met_variables)
174
- met = standardize_variables(met, variables)
173
+ variables = self.ecmwf_met_variables()
174
+ met = met.standardize_variables(variables)
175
175
 
176
176
  # If relative humidity is in percentage, convert to a proportion
177
177
  if met["relative_humidity"].attrs.get("units") == "%":
@@ -185,7 +185,7 @@ class ACCF(Model):
185
185
 
186
186
  if surface:
187
187
  surface = surface.copy()
188
- surface = standardize_variables(surface, self.sur_variables)
188
+ surface = surface.standardize_variables(self.sur_variables)
189
189
  surface.data = _rad_instantaneous_to_accumulated(surface.data)
190
190
  self.surface = surface
191
191
 
@@ -18,11 +18,12 @@ import numpy.typing as npt
18
18
  import pandas as pd
19
19
  import xarray as xr
20
20
 
21
- from pycontrails.core import met_var
21
+ from pycontrails.core import met_var, models
22
22
  from pycontrails.core.aircraft_performance import AircraftPerformance
23
23
  from pycontrails.core.fleet import Fleet
24
24
  from pycontrails.core.flight import Flight
25
25
  from pycontrails.core.met import MetDataset
26
+ from pycontrails.core.met_var import MetVariable
26
27
  from pycontrails.core.models import Model, interpolate_met
27
28
  from pycontrails.core.vector import GeoVectorDataset, VectorDataDict
28
29
  from pycontrails.datalib import ecmwf, gfs
@@ -482,6 +483,42 @@ class Cocip(Model):
482
483
 
483
484
  return self.source
484
485
 
486
+ @classmethod
487
+ def generic_rad_variables(cls) -> tuple[MetVariable, ...]:
488
+ """Return a model-agnostic list of required radiation variables.
489
+
490
+ Returns
491
+ -------
492
+ tuple[MetVariable]
493
+ List of model-agnostic variants of required variables
494
+ """
495
+ available = set(met_var.MET_VARIABLES)
496
+ return tuple(models._find_match(required, available) for required in cls.rad_variables)
497
+
498
+ @classmethod
499
+ def ecmwf_rad_variables(cls) -> tuple[MetVariable, ...]:
500
+ """Return an ECMWF-specific list of required radiation variables.
501
+
502
+ Returns
503
+ -------
504
+ tuple[MetVariable]
505
+ List of ECMWF-specific variants of required variables
506
+ """
507
+ available = set(ecmwf.ECMWF_VARIABLES)
508
+ return tuple(models._find_match(required, available) for required in cls.rad_variables)
509
+
510
+ @classmethod
511
+ def gfs_rad_variables(cls) -> tuple[MetVariable, ...]:
512
+ """Return a GFS-specific list of required radiation variables.
513
+
514
+ Returns
515
+ -------
516
+ tuple[MetVariable]
517
+ List of GFS-specific variants of required variables
518
+ """
519
+ available = set(gfs.GFS_VARIABLES)
520
+ return tuple(models._find_match(required, available) for required in cls.rad_variables)
521
+
485
522
  def _set_timesteps(self) -> None:
486
523
  """Set the :attr:`timesteps` based on the ``source`` time range.
487
524