arize 8.0.0b1__py3-none-any.whl → 8.0.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. arize/__init__.py +9 -2
  2. arize/_client_factory.py +50 -0
  3. arize/_exporter/client.py +18 -17
  4. arize/_exporter/parsers/tracing_data_parser.py +9 -4
  5. arize/_exporter/validation.py +1 -1
  6. arize/_flight/client.py +37 -17
  7. arize/_generated/api_client/api/datasets_api.py +6 -6
  8. arize/_generated/api_client/api/experiments_api.py +6 -6
  9. arize/_generated/api_client/api/projects_api.py +3 -3
  10. arize/_lazy.py +61 -10
  11. arize/client.py +66 -50
  12. arize/config.py +175 -48
  13. arize/constants/config.py +1 -0
  14. arize/constants/ml.py +9 -16
  15. arize/constants/spans.py +5 -10
  16. arize/datasets/client.py +45 -28
  17. arize/datasets/errors.py +1 -1
  18. arize/datasets/validation.py +2 -2
  19. arize/embeddings/auto_generator.py +16 -9
  20. arize/embeddings/base_generators.py +15 -9
  21. arize/embeddings/cv_generators.py +2 -2
  22. arize/embeddings/errors.py +2 -2
  23. arize/embeddings/nlp_generators.py +8 -8
  24. arize/embeddings/tabular_generators.py +6 -6
  25. arize/exceptions/base.py +0 -52
  26. arize/exceptions/config.py +22 -0
  27. arize/exceptions/parameters.py +1 -330
  28. arize/exceptions/values.py +8 -5
  29. arize/experiments/__init__.py +4 -0
  30. arize/experiments/client.py +31 -18
  31. arize/experiments/evaluators/base.py +12 -9
  32. arize/experiments/evaluators/executors.py +16 -7
  33. arize/experiments/evaluators/rate_limiters.py +3 -1
  34. arize/experiments/evaluators/types.py +9 -7
  35. arize/experiments/evaluators/utils.py +7 -5
  36. arize/experiments/functions.py +128 -58
  37. arize/experiments/tracing.py +4 -1
  38. arize/experiments/types.py +34 -31
  39. arize/logging.py +54 -33
  40. arize/ml/batch_validation/errors.py +10 -1004
  41. arize/ml/batch_validation/validator.py +351 -291
  42. arize/ml/bounded_executor.py +25 -6
  43. arize/ml/casting.py +51 -33
  44. arize/ml/client.py +43 -35
  45. arize/ml/proto.py +21 -22
  46. arize/ml/stream_validation.py +64 -27
  47. arize/ml/surrogate_explainer/mimic.py +18 -10
  48. arize/ml/types.py +27 -67
  49. arize/pre_releases.py +10 -6
  50. arize/projects/client.py +9 -4
  51. arize/py.typed +0 -0
  52. arize/regions.py +11 -11
  53. arize/spans/client.py +125 -31
  54. arize/spans/columns.py +32 -36
  55. arize/spans/conversion.py +12 -11
  56. arize/spans/validation/annotations/dataframe_form_validation.py +1 -1
  57. arize/spans/validation/annotations/value_validation.py +11 -14
  58. arize/spans/validation/common/argument_validation.py +3 -3
  59. arize/spans/validation/common/dataframe_form_validation.py +7 -7
  60. arize/spans/validation/common/value_validation.py +11 -14
  61. arize/spans/validation/evals/dataframe_form_validation.py +4 -4
  62. arize/spans/validation/evals/evals_validation.py +6 -6
  63. arize/spans/validation/evals/value_validation.py +1 -1
  64. arize/spans/validation/metadata/argument_validation.py +1 -1
  65. arize/spans/validation/metadata/dataframe_form_validation.py +2 -2
  66. arize/spans/validation/metadata/value_validation.py +23 -1
  67. arize/spans/validation/spans/dataframe_form_validation.py +2 -2
  68. arize/spans/validation/spans/spans_validation.py +6 -6
  69. arize/utils/arrow.py +38 -2
  70. arize/utils/cache.py +2 -2
  71. arize/utils/dataframe.py +4 -4
  72. arize/utils/online_tasks/dataframe_preprocessor.py +15 -11
  73. arize/utils/openinference_conversion.py +10 -10
  74. arize/utils/proto.py +0 -1
  75. arize/utils/types.py +6 -6
  76. arize/version.py +1 -1
  77. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/METADATA +32 -7
  78. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/RECORD +81 -78
  79. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/WHEEL +0 -0
  80. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/LICENSE +0 -0
  81. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/NOTICE +0 -0
@@ -6,7 +6,10 @@ import logging
6
6
  import math
7
7
  from datetime import datetime, timedelta, timezone
8
8
  from itertools import chain
9
- from typing import Any
9
+ from typing import TYPE_CHECKING, Any, cast
10
+
11
+ if TYPE_CHECKING:
12
+ from collections.abc import Sequence
10
13
 
11
14
  import numpy as np
12
15
  import pandas as pd
@@ -39,8 +42,70 @@ from arize.constants.ml import (
39
42
  MIN_PREDICTION_ID_LEN,
40
43
  MODEL_MAPPING_CONFIG,
41
44
  )
45
+ from arize.exceptions.base import (
46
+ InvalidDataFrameIndex,
47
+ InvalidFieldTypeConversion,
48
+ ValidationError,
49
+ )
50
+ from arize.exceptions.types import (
51
+ InvalidFieldTypeLlmConfig,
52
+ InvalidFieldTypePromptTemplates,
53
+ InvalidType,
54
+ InvalidTypeColumns,
55
+ InvalidTypeFeatures,
56
+ InvalidTypeShapValues,
57
+ InvalidTypeTags,
58
+ InvalidValueEmbeddingRawDataTooLong,
59
+ InvalidValueEmbeddingVectorDimensionality,
60
+ )
61
+ from arize.exceptions.values import (
62
+ InvalidBoundingBoxesCategories,
63
+ InvalidBoundingBoxesCoordinates,
64
+ InvalidBoundingBoxesScores,
65
+ InvalidMultiClassActScoreValue,
66
+ InvalidMultiClassClassNameLength,
67
+ InvalidMultiClassPredScoreValue,
68
+ InvalidMultiClassThresholdClasses,
69
+ InvalidNumClassesMultiClassMap,
70
+ InvalidPolygonCategories,
71
+ InvalidPolygonCoordinates,
72
+ InvalidPolygonScores,
73
+ InvalidRankingCategoryValue,
74
+ InvalidRankValue,
75
+ InvalidRecord,
76
+ InvalidStringLengthInColumn,
77
+ InvalidTagLength,
78
+ InvalidValueMissingValue,
79
+ InvalidValueTimestamp,
80
+ )
42
81
  from arize.logging import get_truncation_warning_message
43
- from arize.ml.batch_validation import errors as err
82
+ from arize.ml.batch_validation.errors import (
83
+ DuplicateColumnsInDataframe,
84
+ InvalidBatchId,
85
+ InvalidColumnNameEmptyString,
86
+ InvalidEnvironment,
87
+ InvalidFieldTypeEmbeddingFeatures,
88
+ InvalidFieldTypePromptResponse,
89
+ InvalidModelId,
90
+ InvalidModelType,
91
+ InvalidModelTypeAndMetricsCombination,
92
+ InvalidModelVersion,
93
+ InvalidNumberOfEmbeddings,
94
+ InvalidPredActColumnNamesForModelType,
95
+ InvalidPredActCVColumnNamesForModelType,
96
+ InvalidSchemaType,
97
+ InvalidShapSuffix,
98
+ MissingColumns,
99
+ MissingCVPredAct,
100
+ MissingPredictionIdColumnForDelayedRecords,
101
+ MissingPreprodAct,
102
+ MissingPreprodPredActNumericAndCategorical,
103
+ MissingReqPredActColumnNamesForMultiClass,
104
+ MissingRequiredColumnsForRankingModel,
105
+ MissingRequiredColumnsMetricsValidation,
106
+ MultipleCVPredAct,
107
+ ReservedColumns,
108
+ )
44
109
  from arize.ml.types import (
45
110
  CATEGORICAL_MODEL_TYPES,
46
111
  NUMERIC_MODEL_TYPES,
@@ -53,6 +118,7 @@ from arize.ml.types import (
53
118
  ModelTypes,
54
119
  PromptTemplateColumnNames,
55
120
  Schema,
121
+ _normalize_column_names,
56
122
  segments_intersect,
57
123
  )
58
124
  from arize.utils.types import (
@@ -74,8 +140,8 @@ class Validator:
74
140
  schema: BaseSchema,
75
141
  model_version: str | None = None,
76
142
  batch_id: str | None = None,
77
- ) -> list[err.ValidationError]:
78
- """Validate required checks for schema, environment, and DataFrame structure."""
143
+ ) -> list[ValidationError]:
144
+ """Validate required checks for schema, environment, and :class:`pandas.DataFrame` structure."""
79
145
  general_checks = chain(
80
146
  Validator._check_valid_schema_type(schema, environment),
81
147
  Validator._check_field_convertible_to_str(
@@ -115,7 +181,7 @@ class Validator:
115
181
  metric_families: list[Metrics] | None = None,
116
182
  model_version: str | None = None,
117
183
  batch_id: str | None = None,
118
- ) -> list[err.ValidationError]:
184
+ ) -> list[ValidationError]:
119
185
  """Validate parameters including model type, environment, and schema consistency."""
120
186
  # general checks
121
187
  general_checks = chain(
@@ -223,7 +289,7 @@ class Validator:
223
289
  model_type: ModelTypes,
224
290
  schema: BaseSchema,
225
291
  pyarrow_schema: pa.Schema,
226
- ) -> list[err.ValidationError]:
292
+ ) -> list[ValidationError]:
227
293
  """Validate column data types against expected types for the schema."""
228
294
  column_types = dict(
229
295
  zip(pyarrow_schema.names, pyarrow_schema.types, strict=True)
@@ -323,7 +389,7 @@ class Validator:
323
389
  environment: Environments,
324
390
  schema: BaseSchema,
325
391
  model_type: ModelTypes,
326
- ) -> list[err.ValidationError]:
392
+ ) -> list[ValidationError]:
327
393
  """Validate data values including ranges, formats, and consistency checks."""
328
394
  # ASSUMPTION: at this point the param and type checks should have passed.
329
395
  # This function may crash if that is not true, e.g. if columns are missing
@@ -350,25 +416,25 @@ class Validator:
350
416
  if isinstance(schema, Schema):
351
417
  general_checks = chain(
352
418
  general_checks,
353
- Validator._check_value_timestamp(dataframe, schema),
354
- Validator._check_id_field_str_length(
419
+ Validator._check_value_timestamp(dataframe, schema), # type: ignore[arg-type]
420
+ Validator._check_id_field_str_length( # type: ignore[arg-type]
355
421
  dataframe,
356
422
  "prediction_id_column_name",
357
423
  schema.prediction_id_column_name,
358
424
  ),
359
- Validator._check_embedding_vectors_dimensionality(
425
+ Validator._check_embedding_vectors_dimensionality( # type: ignore[arg-type]
360
426
  dataframe, schema
361
427
  ),
362
- Validator._check_embedding_raw_data_characters(
428
+ Validator._check_embedding_raw_data_characters( # type: ignore[arg-type]
363
429
  dataframe, schema
364
430
  ),
365
- Validator._check_invalid_record_prod(
431
+ Validator._check_invalid_record_prod( # type: ignore[arg-type]
366
432
  dataframe, environment, schema, model_type
367
433
  ),
368
- Validator._check_invalid_record_preprod(
434
+ Validator._check_invalid_record_preprod( # type: ignore[arg-type]
369
435
  dataframe, environment, schema, model_type
370
436
  ),
371
- Validator._check_value_tag(dataframe, schema),
437
+ Validator._check_value_tag(dataframe, schema), # type: ignore[arg-type]
372
438
  )
373
439
  if model_type == ModelTypes.RANKING:
374
440
  r_checks = chain(
@@ -444,15 +510,15 @@ class Validator:
444
510
  @staticmethod
445
511
  def _check_column_names_for_empty_strings(
446
512
  schema: BaseSchema,
447
- ) -> list[err.InvalidColumnNameEmptyString]:
513
+ ) -> list[InvalidColumnNameEmptyString]:
448
514
  if "" in schema.get_used_columns():
449
- return [err.InvalidColumnNameEmptyString()]
515
+ return [InvalidColumnNameEmptyString()]
450
516
  return []
451
517
 
452
518
  @staticmethod
453
519
  def _check_field_convertible_to_str(
454
520
  model_id: object, model_version: object, batch_id: object
455
- ) -> list[err.InvalidFieldTypeConversion]:
521
+ ) -> list[InvalidFieldTypeConversion]:
456
522
  # converting to a set first makes the checks run a lot faster
457
523
  wrong_fields = []
458
524
  if model_id is not None and not isinstance(model_id, str):
@@ -472,61 +538,59 @@ class Validator:
472
538
  wrong_fields.append("batch_id")
473
539
 
474
540
  if wrong_fields:
475
- return [err.InvalidFieldTypeConversion(wrong_fields, "string")]
541
+ return [InvalidFieldTypeConversion(wrong_fields, "string")]
476
542
  return []
477
543
 
478
544
  @staticmethod
479
545
  def _check_field_type_embedding_features_column_names(
480
546
  schema: Schema,
481
- ) -> list[err.InvalidFieldTypeEmbeddingFeatures]:
547
+ ) -> list[InvalidFieldTypeEmbeddingFeatures]:
482
548
  if schema.embedding_feature_column_names is not None:
483
549
  if not isinstance(schema.embedding_feature_column_names, dict):
484
- return [err.InvalidFieldTypeEmbeddingFeatures()]
550
+ return [InvalidFieldTypeEmbeddingFeatures()]
485
551
  for k, v in schema.embedding_feature_column_names.items():
486
552
  if not isinstance(k, str) or not isinstance(
487
553
  v, EmbeddingColumnNames
488
554
  ):
489
- return [err.InvalidFieldTypeEmbeddingFeatures()]
555
+ return [InvalidFieldTypeEmbeddingFeatures()]
490
556
  return []
491
557
 
492
558
  @staticmethod
493
559
  def _check_field_type_prompt_response(
494
560
  schema: Schema,
495
- ) -> list[err.InvalidFieldTypePromptResponse]:
496
- errors = []
561
+ ) -> list[InvalidFieldTypePromptResponse]:
562
+ errors: list[InvalidFieldTypePromptResponse] = []
497
563
  if schema.prompt_column_names is not None and not isinstance(
498
564
  schema.prompt_column_names, (str, EmbeddingColumnNames)
499
565
  ):
500
- errors.append(
501
- err.InvalidFieldTypePromptResponse("prompt_column_names")
502
- )
566
+ errors.append(InvalidFieldTypePromptResponse("prompt_column_names"))
503
567
  if schema.response_column_names is not None and not isinstance(
504
568
  schema.response_column_names, (str, EmbeddingColumnNames)
505
569
  ):
506
570
  errors.append(
507
- err.InvalidFieldTypePromptResponse("response_column_names")
571
+ InvalidFieldTypePromptResponse("response_column_names")
508
572
  )
509
573
  return errors
510
574
 
511
575
  @staticmethod
512
576
  def _check_field_type_prompt_templates(
513
577
  schema: Schema,
514
- ) -> list[err.InvalidFieldTypePromptTemplates]:
578
+ ) -> list[InvalidFieldTypePromptTemplates]:
515
579
  if schema.prompt_template_column_names is not None and not isinstance(
516
580
  schema.prompt_template_column_names, PromptTemplateColumnNames
517
581
  ):
518
- return [err.InvalidFieldTypePromptTemplates()]
582
+ return [InvalidFieldTypePromptTemplates()]
519
583
  return []
520
584
 
521
585
  @staticmethod
522
586
  def _check_field_type_llm_config(
523
587
  dataframe: pd.DataFrame,
524
588
  schema: Schema,
525
- ) -> list[err.InvalidFieldTypeLlmConfig | err.InvalidTypeColumns]:
589
+ ) -> list[InvalidFieldTypeLlmConfig | InvalidTypeColumns]:
526
590
  if schema.llm_config_column_names is None:
527
591
  return []
528
592
  if not isinstance(schema.llm_config_column_names, LLMConfigColumnNames):
529
- return [err.InvalidFieldTypeLlmConfig()]
593
+ return [InvalidFieldTypeLlmConfig()]
530
594
  col = schema.llm_config_column_names.params_column_name
531
595
  # We check the types if the columns are in the dataframe.
532
596
  # If the columns are reflected in the schema but not present
@@ -545,7 +609,7 @@ class Validator:
545
609
  )
546
610
  ):
547
611
  return [
548
- err.InvalidTypeColumns(
612
+ InvalidTypeColumns(
549
613
  wrong_type_columns=[col],
550
614
  expected_types=[
551
615
  "Dict[str, (bool, int, float, string or list[str])]"
@@ -557,9 +621,9 @@ class Validator:
557
621
  @staticmethod
558
622
  def _check_invalid_index(
559
623
  dataframe: pd.DataFrame,
560
- ) -> list[err.InvalidDataFrameIndex]:
624
+ ) -> list[InvalidDataFrameIndex]:
561
625
  if (dataframe.index != dataframe.reset_index(drop=True).index).any():
562
- return [err.InvalidDataFrameIndex()]
626
+ return [InvalidDataFrameIndex()]
563
627
  return []
564
628
 
565
629
  # ----------------
@@ -571,7 +635,7 @@ class Validator:
571
635
  model_type: ModelTypes,
572
636
  metric_families: list[Metrics] | None,
573
637
  schema: Schema,
574
- ) -> list[err.ValidationError]:
638
+ ) -> list[ValidationError]:
575
639
  if metric_families is None:
576
640
  return []
577
641
 
@@ -597,7 +661,7 @@ class Validator:
597
661
  if not valid_combination:
598
662
  # Model type + metrics combination is not valid.
599
663
  return [
600
- err.InvalidModelTypeAndMetricsCombination(
664
+ InvalidModelTypeAndMetricsCombination(
601
665
  model_type,
602
666
  metric_families,
603
667
  suggested_model_metric_combinations,
@@ -606,7 +670,7 @@ class Validator:
606
670
  if missing_columns:
607
671
  # For this model type, the schema is missing columns required for the requested metrics.
608
672
  return [
609
- err.MissingRequiredColumnsMetricsValidation(
673
+ MissingRequiredColumnsMetricsValidation(
610
674
  model_type, metric_families, missing_columns
611
675
  )
612
676
  ]
@@ -619,7 +683,7 @@ class Validator:
619
683
  schema: Schema,
620
684
  required_columns_map: list[dict[str, Any]],
621
685
  ) -> tuple[bool, list[str], list[list[str]]]:
622
- missing_columns = []
686
+ missing_columns: list[str] = []
623
687
  for item in required_columns_map:
624
688
  if model_type.name.lower() == item.get("external_model_type"):
625
689
  is_valid_combination = False
@@ -674,7 +738,7 @@ class Validator:
674
738
  @staticmethod
675
739
  def _check_existence_prediction_id_column_delayed_schema(
676
740
  schema: Schema, model_type: ModelTypes
677
- ) -> list[err.MissingPredictionIdColumnForDelayedRecords]:
741
+ ) -> list[MissingPredictionIdColumnForDelayedRecords]:
678
742
  if schema.prediction_id_column_name is not None:
679
743
  return []
680
744
  # TODO: Revise logic once prediction_label column addition (for generative models)
@@ -683,7 +747,7 @@ class Validator:
683
747
  # We skip GENERATIVE model types since they are assigned a default
684
748
  # prediction label column with values equal 1
685
749
  return [
686
- err.MissingPredictionIdColumnForDelayedRecords(
750
+ MissingPredictionIdColumnForDelayedRecords(
687
751
  schema.has_actual_columns(),
688
752
  schema.has_feature_importance_columns(),
689
753
  )
@@ -705,7 +769,7 @@ class Validator:
705
769
  def _check_missing_columns(
706
770
  dataframe: pd.DataFrame,
707
771
  schema: BaseSchema,
708
- ) -> list[err.MissingColumns]:
772
+ ) -> list[MissingColumns]:
709
773
  if isinstance(schema, CorpusSchema):
710
774
  return Validator._check_missing_columns_corpus_schema(
711
775
  dataframe, schema
@@ -718,7 +782,7 @@ class Validator:
718
782
  def _check_missing_columns_schema(
719
783
  dataframe: pd.DataFrame,
720
784
  schema: Schema,
721
- ) -> list[err.MissingColumns]:
785
+ ) -> list[MissingColumns]:
722
786
  # converting to a set first makes the checks run a lot faster
723
787
  existing_columns = set(dataframe.columns)
724
788
  missing_columns = []
@@ -733,7 +797,9 @@ class Validator:
733
797
  missing_columns.extend(
734
798
  [
735
799
  col
736
- for col in schema.feature_column_names
800
+ for col in _normalize_column_names(
801
+ schema.feature_column_names
802
+ )
737
803
  if col not in existing_columns
738
804
  ]
739
805
  )
@@ -768,7 +834,7 @@ class Validator:
768
834
  missing_columns.extend(
769
835
  [
770
836
  col
771
- for col in schema.tag_column_names
837
+ for col in _normalize_column_names(schema.tag_column_names)
772
838
  if col not in existing_columns
773
839
  ]
774
840
  )
@@ -901,14 +967,14 @@ class Validator:
901
967
  )
902
968
 
903
969
  if missing_columns:
904
- return [err.MissingColumns(missing_columns)]
970
+ return [MissingColumns(missing_columns)]
905
971
  return []
906
972
 
907
973
  @staticmethod
908
974
  def _check_missing_columns_corpus_schema(
909
975
  dataframe: pd.DataFrame,
910
976
  schema: CorpusSchema,
911
- ) -> list[err.MissingColumns]:
977
+ ) -> list[MissingColumns]:
912
978
  # converting to a set first makes the checks run a lot faster
913
979
  existing_columns = set(dataframe.columns)
914
980
  missing_columns = []
@@ -958,19 +1024,19 @@ class Validator:
958
1024
  schema.document_text_embedding_column_names.link_to_data_column_name
959
1025
  )
960
1026
  if missing_columns:
961
- return [err.MissingColumns(missing_columns)]
1027
+ return [MissingColumns(missing_columns)]
962
1028
  return []
963
1029
 
964
1030
  @staticmethod
965
1031
  def _check_valid_schema_type(
966
1032
  schema: BaseSchema,
967
1033
  environment: Environments,
968
- ) -> list[err.InvalidSchemaType]:
1034
+ ) -> list[InvalidSchemaType]:
969
1035
  if environment == Environments.CORPUS and not (
970
1036
  isinstance(schema, CorpusSchema)
971
1037
  ):
972
1038
  return [
973
- err.InvalidSchemaType(
1039
+ InvalidSchemaType(
974
1040
  schema_type=str(type(schema)), environment=environment
975
1041
  )
976
1042
  ]
@@ -978,7 +1044,7 @@ class Validator:
978
1044
  schema, CorpusSchema
979
1045
  ):
980
1046
  return [
981
- err.InvalidSchemaType(
1047
+ InvalidSchemaType(
982
1048
  schema_type=str(type(schema)), environment=environment
983
1049
  )
984
1050
  ]
@@ -987,26 +1053,23 @@ class Validator:
987
1053
  @staticmethod
988
1054
  def _check_invalid_shap_suffix(
989
1055
  schema: Schema,
990
- ) -> list[err.InvalidShapSuffix]:
1056
+ ) -> list[InvalidShapSuffix]:
991
1057
  invalid_column_names = set()
992
1058
 
993
1059
  if schema.feature_column_names is not None:
994
- for col in schema.feature_column_names:
1060
+ for col in _normalize_column_names(schema.feature_column_names):
995
1061
  if isinstance(col, str) and col.endswith("_shap"):
996
1062
  invalid_column_names.add(col)
997
1063
 
998
1064
  if schema.embedding_feature_column_names is not None:
999
1065
  for emb_col_names in schema.embedding_feature_column_names.values():
1000
- for col in emb_col_names:
1001
- if (
1002
- col is not None
1003
- and isinstance(col, str)
1004
- and col.endswith("_shap")
1005
- ):
1066
+ cols_list = [c for c in emb_col_names if c is not None]
1067
+ for col in cols_list:
1068
+ if col.endswith("_shap"):
1006
1069
  invalid_column_names.add(col)
1007
1070
 
1008
1071
  if schema.tag_column_names is not None:
1009
- for col in schema.tag_column_names:
1072
+ for col in _normalize_column_names(schema.tag_column_names):
1010
1073
  if isinstance(col, str) and col.endswith("_shap"):
1011
1074
  invalid_column_names.add(col)
1012
1075
 
@@ -1016,14 +1079,14 @@ class Validator:
1016
1079
  invalid_column_names.add(col)
1017
1080
 
1018
1081
  if invalid_column_names:
1019
- return [err.InvalidShapSuffix(invalid_column_names)]
1082
+ return [InvalidShapSuffix(invalid_column_names)]
1020
1083
  return []
1021
1084
 
1022
1085
  @staticmethod
1023
1086
  def _check_reserved_columns(
1024
1087
  schema: BaseSchema,
1025
1088
  model_type: ModelTypes,
1026
- ) -> list[err.ReservedColumns]:
1089
+ ) -> list[ReservedColumns]:
1027
1090
  if isinstance(schema, CorpusSchema):
1028
1091
  return []
1029
1092
  if isinstance(schema, Schema):
@@ -1127,29 +1190,29 @@ class Validator:
1127
1190
  )
1128
1191
 
1129
1192
  if reserved_columns:
1130
- return [err.ReservedColumns(reserved_columns)]
1193
+ return [ReservedColumns(reserved_columns)]
1131
1194
  return []
1132
1195
 
1133
1196
  @staticmethod
1134
1197
  def _check_invalid_model_id(
1135
1198
  model_id: str | None,
1136
- ) -> list[err.InvalidModelId]:
1199
+ ) -> list[InvalidModelId]:
1137
1200
  # assume it's been coerced to string beforehand
1138
1201
  if (not isinstance(model_id, str)) or len(model_id.strip()) == 0:
1139
- return [err.InvalidModelId()]
1202
+ return [InvalidModelId()]
1140
1203
  return []
1141
1204
 
1142
1205
  @staticmethod
1143
1206
  def _check_invalid_model_version(
1144
1207
  model_version: str | None = None,
1145
- ) -> list[err.InvalidModelVersion]:
1208
+ ) -> list[InvalidModelVersion]:
1146
1209
  if model_version is None:
1147
1210
  return []
1148
1211
  if (
1149
1212
  not isinstance(model_version, str)
1150
1213
  or len(model_version.strip()) == 0
1151
1214
  ):
1152
- return [err.InvalidModelVersion()]
1215
+ return [InvalidModelVersion()]
1153
1216
 
1154
1217
  return []
1155
1218
 
@@ -1157,35 +1220,35 @@ class Validator:
1157
1220
  def _check_invalid_batch_id(
1158
1221
  batch_id: str | None,
1159
1222
  environment: Environments,
1160
- ) -> list[err.InvalidBatchId]:
1223
+ ) -> list[InvalidBatchId]:
1161
1224
  # assume it's been coerced to string beforehand
1162
1225
  if environment in (Environments.VALIDATION,) and (
1163
1226
  (not isinstance(batch_id, str)) or len(batch_id.strip()) == 0
1164
1227
  ):
1165
- return [err.InvalidBatchId()]
1228
+ return [InvalidBatchId()]
1166
1229
  return []
1167
1230
 
1168
1231
  @staticmethod
1169
1232
  def _check_invalid_model_type(
1170
1233
  model_type: ModelTypes,
1171
- ) -> list[err.InvalidModelType]:
1234
+ ) -> list[InvalidModelType]:
1172
1235
  if model_type in (mt for mt in ModelTypes):
1173
1236
  return []
1174
- return [err.InvalidModelType()]
1237
+ return [InvalidModelType()]
1175
1238
 
1176
1239
  @staticmethod
1177
1240
  def _check_invalid_environment(
1178
1241
  environment: Environments,
1179
- ) -> list[err.InvalidEnvironment]:
1242
+ ) -> list[InvalidEnvironment]:
1180
1243
  if environment in (env for env in Environments):
1181
1244
  return []
1182
- return [err.InvalidEnvironment()]
1245
+ return [InvalidEnvironment()]
1183
1246
 
1184
1247
  @staticmethod
1185
1248
  def _check_existence_preprod_pred_act_score_or_label(
1186
1249
  schema: Schema,
1187
1250
  environment: Environments,
1188
- ) -> list[err.MissingPreprodPredActNumericAndCategorical]:
1251
+ ) -> list[MissingPreprodPredActNumericAndCategorical]:
1189
1252
  if environment in (Environments.VALIDATION, Environments.TRAINING) and (
1190
1253
  (
1191
1254
  schema.prediction_label_column_name is None
@@ -1196,13 +1259,13 @@ class Validator:
1196
1259
  and schema.actual_score_column_name is None
1197
1260
  )
1198
1261
  ):
1199
- return [err.MissingPreprodPredActNumericAndCategorical()]
1262
+ return [MissingPreprodPredActNumericAndCategorical()]
1200
1263
  return []
1201
1264
 
1202
1265
  @staticmethod
1203
1266
  def _check_exactly_one_cv_column_type(
1204
1267
  schema: Schema, environment: Environments
1205
- ) -> list[err.MultipleCVPredAct | err.MissingCVPredAct]:
1268
+ ) -> list[MultipleCVPredAct | MissingCVPredAct]:
1206
1269
  # Checks that the required prediction/actual columns are given in the schema depending on
1207
1270
  # the environment, for object detection models. There should be exactly one of
1208
1271
  # object detection, semantic segmentation, or instance segmentation columns.
@@ -1232,9 +1295,9 @@ class Validator:
1232
1295
  )
1233
1296
 
1234
1297
  if cv_types_count == 0:
1235
- return [err.MissingCVPredAct(environment)]
1298
+ return [MissingCVPredAct(environment)]
1236
1299
  if cv_types_count > 1:
1237
- return [err.MultipleCVPredAct(environment)]
1300
+ return [MultipleCVPredAct(environment)]
1238
1301
 
1239
1302
  elif environment in (
1240
1303
  Environments.TRAINING,
@@ -1265,16 +1328,16 @@ class Validator:
1265
1328
  )
1266
1329
 
1267
1330
  if cv_types_count == 0:
1268
- return [err.MissingCVPredAct(environment)]
1331
+ return [MissingCVPredAct(environment)]
1269
1332
  if cv_types_count > 1:
1270
- return [err.MultipleCVPredAct(environment)]
1333
+ return [MultipleCVPredAct(environment)]
1271
1334
 
1272
1335
  return []
1273
1336
 
1274
1337
  @staticmethod
1275
1338
  def _check_missing_object_detection_columns(
1276
1339
  schema: Schema, model_type: ModelTypes
1277
- ) -> list[err.InvalidPredActCVColumnNamesForModelType]:
1340
+ ) -> list[InvalidPredActCVColumnNamesForModelType]:
1278
1341
  # Checks that models that are not Object Detection models don't have, in the schema, the
1279
1342
  # object detection, semantic segmentation, or instance segmentation dedicated prediction/actual
1280
1343
  # column names
@@ -1286,13 +1349,13 @@ class Validator:
1286
1349
  or schema.instance_segmentation_prediction_column_names is not None
1287
1350
  or schema.instance_segmentation_actual_column_names is not None
1288
1351
  ):
1289
- return [err.InvalidPredActCVColumnNamesForModelType(model_type)]
1352
+ return [InvalidPredActCVColumnNamesForModelType(model_type)]
1290
1353
  return []
1291
1354
 
1292
1355
  @staticmethod
1293
1356
  def _check_missing_non_object_detection_columns(
1294
1357
  schema: Schema, model_type: ModelTypes
1295
- ) -> list[err.InvalidPredActColumnNamesForModelType]:
1358
+ ) -> list[InvalidPredActColumnNamesForModelType]:
1296
1359
  # Checks that object detection models don't have, in the schema, the columns reserved for
1297
1360
  # other model types
1298
1361
  columns_to_check = (
@@ -1317,7 +1380,7 @@ class Validator:
1317
1380
  "instance_segmentation_actual_column_names",
1318
1381
  ]
1319
1382
  return [
1320
- err.InvalidPredActColumnNamesForModelType(
1383
+ InvalidPredActColumnNamesForModelType(
1321
1384
  model_type, allowed_cols, wrong_cols
1322
1385
  )
1323
1386
  ]
@@ -1326,7 +1389,7 @@ class Validator:
1326
1389
  @staticmethod
1327
1390
  def _check_missing_multi_class_columns(
1328
1391
  schema: Schema, model_type: ModelTypes
1329
- ) -> list[err.InvalidPredActColumnNamesForModelType]:
1392
+ ) -> list[InvalidPredActColumnNamesForModelType]:
1330
1393
  # Checks that models that are not Multi Class models don't have, in the schema, the
1331
1394
  # multi class dedicated threshold column
1332
1395
  if (
@@ -1334,9 +1397,9 @@ class Validator:
1334
1397
  and schema.multi_class_threshold_scores_column_name is not None
1335
1398
  ):
1336
1399
  return [
1337
- err.InvalidPredActColumnNamesForModelType(
1400
+ InvalidPredActColumnNamesForModelType(
1338
1401
  model_type,
1339
- None,
1402
+ None, # type: ignore[arg-type]
1340
1403
  [schema.multi_class_threshold_scores_column_name],
1341
1404
  )
1342
1405
  ]
@@ -1345,7 +1408,7 @@ class Validator:
1345
1408
  @staticmethod
1346
1409
  def _check_existing_multi_class_columns(
1347
1410
  schema: Schema,
1348
- ) -> list[err.MissingReqPredActColumnNamesForMultiClass]:
1411
+ ) -> list[MissingReqPredActColumnNamesForMultiClass]:
1349
1412
  # Checks that models that are Multi Class models have, in the schema, the
1350
1413
  # required prediction score or actual score columns
1351
1414
  if (
@@ -1355,13 +1418,13 @@ class Validator:
1355
1418
  schema.multi_class_threshold_scores_column_name is not None
1356
1419
  and schema.prediction_score_column_name is None
1357
1420
  ):
1358
- return [err.MissingReqPredActColumnNamesForMultiClass()]
1421
+ return [MissingReqPredActColumnNamesForMultiClass()]
1359
1422
  return []
1360
1423
 
1361
1424
  @staticmethod
1362
1425
  def _check_missing_non_multi_class_columns(
1363
1426
  schema: Schema, model_type: ModelTypes
1364
- ) -> list[err.InvalidPredActColumnNamesForModelType]:
1427
+ ) -> list[InvalidPredActColumnNamesForModelType]:
1365
1428
  # Checks that multi class models don't have, in the schema, the columns reserved for
1366
1429
  # other model types
1367
1430
  columns_to_check = (
@@ -1387,8 +1450,10 @@ class Validator:
1387
1450
  "actual_score_column_name",
1388
1451
  ]
1389
1452
  return [
1390
- err.InvalidPredActColumnNamesForModelType(
1391
- model_type, allowed_cols, wrong_cols
1453
+ InvalidPredActColumnNamesForModelType(
1454
+ model_type,
1455
+ allowed_cols,
1456
+ wrong_cols, # type: ignore[arg-type]
1392
1457
  )
1393
1458
  ]
1394
1459
  return []
@@ -1397,17 +1462,17 @@ class Validator:
1397
1462
  def _check_existence_preprod_act(
1398
1463
  schema: Schema,
1399
1464
  environment: Environments,
1400
- ) -> list[err.MissingPreprodAct]:
1465
+ ) -> list[MissingPreprodAct]:
1401
1466
  if environment in (Environments.VALIDATION, Environments.TRAINING) and (
1402
1467
  schema.actual_label_column_name is None
1403
1468
  ):
1404
- return [err.MissingPreprodAct()]
1469
+ return [MissingPreprodAct()]
1405
1470
  return []
1406
1471
 
1407
1472
  @staticmethod
1408
1473
  def _check_existence_group_id_rank_category_relevance(
1409
1474
  schema: Schema,
1410
- ) -> list[err.MissingRequiredColumnsForRankingModel]:
1475
+ ) -> list[MissingRequiredColumnsForRankingModel]:
1411
1476
  # prediction_group_id and rank columns are required as ranking prediction columns.
1412
1477
  ranking_prediction_cols = (
1413
1478
  schema.prediction_label_column_name,
@@ -1425,13 +1490,13 @@ class Validator:
1425
1490
  # If there is prediction information (not delayed actuals),
1426
1491
  # there must exist a rank and prediction group id columns
1427
1492
  if has_prediction_info and any(col is None for col in required):
1428
- return [err.MissingRequiredColumnsForRankingModel()]
1493
+ return [MissingRequiredColumnsForRankingModel()]
1429
1494
  return []
1430
1495
 
1431
1496
  @staticmethod
1432
1497
  def _check_dataframe_for_duplicate_columns(
1433
1498
  schema: BaseSchema, dataframe: pd.DataFrame
1434
- ) -> list[err.DuplicateColumnsInDataframe]:
1499
+ ) -> list[DuplicateColumnsInDataframe]:
1435
1500
  # Get the columns used in the schema
1436
1501
  schema_col_used = schema.get_used_columns()
1437
1502
  # Get the duplicated column names from the dataframe
@@ -1441,17 +1506,17 @@ class Validator:
1441
1506
  col for col in duplicate_columns if col in schema_col_used
1442
1507
  ]
1443
1508
  if schema_duplicate_cols:
1444
- return [err.DuplicateColumnsInDataframe(schema_duplicate_cols)]
1509
+ return [DuplicateColumnsInDataframe(schema_duplicate_cols)]
1445
1510
  return []
1446
1511
 
1447
1512
  @staticmethod
1448
1513
  def _check_invalid_number_of_embeddings(
1449
1514
  schema: Schema,
1450
- ) -> list[err.InvalidNumberOfEmbeddings]:
1515
+ ) -> list[InvalidNumberOfEmbeddings]:
1451
1516
  if schema.embedding_feature_column_names is not None:
1452
1517
  number_of_embeddings = len(schema.embedding_feature_column_names)
1453
1518
  if number_of_embeddings > MAX_NUMBER_OF_EMBEDDINGS:
1454
- return [err.InvalidNumberOfEmbeddings(number_of_embeddings)]
1519
+ return [InvalidNumberOfEmbeddings(number_of_embeddings)]
1455
1520
  return []
1456
1521
 
1457
1522
  # -----------
@@ -1461,7 +1526,7 @@ class Validator:
1461
1526
  @staticmethod
1462
1527
  def _check_type_prediction_id(
1463
1528
  schema: Schema, column_types: dict[str, Any]
1464
- ) -> list[err.InvalidType]:
1529
+ ) -> list[InvalidType]:
1465
1530
  col = schema.prediction_id_column_name
1466
1531
  if col in column_types:
1467
1532
  # should mirror server side
@@ -1474,7 +1539,7 @@ class Validator:
1474
1539
  )
1475
1540
  if column_types[col] not in allowed_datatypes:
1476
1541
  return [
1477
- err.InvalidType(
1542
+ InvalidType(
1478
1543
  "Prediction IDs",
1479
1544
  expected_types=["str", "int"],
1480
1545
  found_data_type=column_types[col],
@@ -1485,7 +1550,7 @@ class Validator:
1485
1550
  @staticmethod
1486
1551
  def _check_type_timestamp(
1487
1552
  schema: Schema, column_types: dict[str, Any]
1488
- ) -> list[err.InvalidType]:
1553
+ ) -> list[InvalidType]:
1489
1554
  col = schema.timestamp_column_name
1490
1555
  if col in column_types:
1491
1556
  # should mirror server side
@@ -1501,7 +1566,7 @@ class Validator:
1501
1566
  and t not in allowed_datatypes
1502
1567
  ):
1503
1568
  return [
1504
- err.InvalidType(
1569
+ InvalidType(
1505
1570
  "Prediction timestamp",
1506
1571
  expected_types=["Date", "Timestamp", "int", "float"],
1507
1572
  found_data_type=t,
@@ -1512,7 +1577,7 @@ class Validator:
1512
1577
  @staticmethod
1513
1578
  def _check_type_features(
1514
1579
  schema: Schema, column_types: dict[str, Any]
1515
- ) -> list[err.InvalidTypeFeatures]:
1580
+ ) -> list[InvalidTypeFeatures]:
1516
1581
  if schema.feature_column_names is not None:
1517
1582
  # should mirror server side
1518
1583
  allowed_datatypes = (
@@ -1529,13 +1594,13 @@ class Validator:
1529
1594
  )
1530
1595
  wrong_type_cols = [
1531
1596
  col
1532
- for col in schema.feature_column_names
1597
+ for col in _normalize_column_names(schema.feature_column_names)
1533
1598
  if col in column_types
1534
1599
  and column_types[col] not in allowed_datatypes
1535
1600
  ]
1536
1601
  if wrong_type_cols:
1537
1602
  return [
1538
- err.InvalidTypeFeatures(
1603
+ InvalidTypeFeatures(
1539
1604
  wrong_type_cols,
1540
1605
  expected_types=[
1541
1606
  "float",
@@ -1551,7 +1616,7 @@ class Validator:
1551
1616
  @staticmethod
1552
1617
  def _check_type_embedding_features(
1553
1618
  schema: Schema, column_types: dict[str, Any]
1554
- ) -> list[err.InvalidTypeFeatures]:
1619
+ ) -> list[InvalidTypeFeatures]:
1555
1620
  if schema.embedding_feature_column_names is not None:
1556
1621
  # should mirror server side
1557
1622
  allowed_vector_datatypes = (
@@ -1599,20 +1664,20 @@ class Validator:
1599
1664
  wrong_type_embedding_errors = []
1600
1665
  if wrong_type_vector_columns:
1601
1666
  wrong_type_embedding_errors.append(
1602
- err.InvalidTypeFeatures(
1667
+ InvalidTypeFeatures(
1603
1668
  wrong_type_vector_columns,
1604
1669
  expected_types=["list[float], np.array[float]"],
1605
1670
  )
1606
1671
  )
1607
1672
  if wrong_type_data_columns:
1608
1673
  wrong_type_embedding_errors.append(
1609
- err.InvalidTypeFeatures(
1674
+ InvalidTypeFeatures(
1610
1675
  wrong_type_data_columns, expected_types=["list[string]"]
1611
1676
  )
1612
1677
  )
1613
1678
  if wrong_type_link_to_data_columns:
1614
1679
  wrong_type_embedding_errors.append(
1615
- err.InvalidTypeFeatures(
1680
+ InvalidTypeFeatures(
1616
1681
  wrong_type_link_to_data_columns,
1617
1682
  expected_types=["string"],
1618
1683
  )
@@ -1627,7 +1692,7 @@ class Validator:
1627
1692
  @staticmethod
1628
1693
  def _check_type_tags(
1629
1694
  schema: Schema, column_types: dict[str, Any]
1630
- ) -> list[err.InvalidTypeTags]:
1695
+ ) -> list[InvalidTypeTags]:
1631
1696
  if schema.tag_column_names is not None:
1632
1697
  # should mirror server side
1633
1698
  allowed_datatypes = (
@@ -1643,13 +1708,13 @@ class Validator:
1643
1708
  )
1644
1709
  wrong_type_cols = [
1645
1710
  col
1646
- for col in schema.tag_column_names
1711
+ for col in _normalize_column_names(schema.tag_column_names)
1647
1712
  if col in column_types
1648
1713
  and column_types[col] not in allowed_datatypes
1649
1714
  ]
1650
1715
  if wrong_type_cols:
1651
1716
  return [
1652
- err.InvalidTypeTags(
1717
+ InvalidTypeTags(
1653
1718
  wrong_type_cols, ["float", "int", "bool", "str"]
1654
1719
  )
1655
1720
  ]
@@ -1658,7 +1723,7 @@ class Validator:
1658
1723
  @staticmethod
1659
1724
  def _check_type_shap_values(
1660
1725
  schema: Schema, column_types: dict[str, Any]
1661
- ) -> list[err.InvalidTypeShapValues]:
1726
+ ) -> list[InvalidTypeShapValues]:
1662
1727
  if schema.shap_values_column_names is not None:
1663
1728
  # should mirror server side
1664
1729
  allowed_datatypes = (
@@ -1675,7 +1740,7 @@ class Validator:
1675
1740
  ]
1676
1741
  if wrong_type_cols:
1677
1742
  return [
1678
- err.InvalidTypeShapValues(
1743
+ InvalidTypeShapValues(
1679
1744
  wrong_type_cols, expected_types=["float", "int"]
1680
1745
  )
1681
1746
  ]
@@ -1684,12 +1749,13 @@ class Validator:
1684
1749
  @staticmethod
1685
1750
  def _check_type_pred_act_labels(
1686
1751
  model_type: ModelTypes, schema: Schema, column_types: dict[str, Any]
1687
- ) -> list[err.InvalidType]:
1752
+ ) -> list[InvalidType]:
1688
1753
  errors = []
1689
1754
  columns = (
1690
1755
  ("Prediction labels", schema.prediction_label_column_name),
1691
1756
  ("Actual labels", schema.actual_label_column_name),
1692
1757
  )
1758
+ allowed_datatypes: tuple[Any, ...]
1693
1759
  if (
1694
1760
  model_type in CATEGORICAL_MODEL_TYPES
1695
1761
  or model_type == ModelTypes.GENERATIVE_LLM
@@ -1713,7 +1779,7 @@ class Validator:
1713
1779
  and column_types[col] not in allowed_datatypes
1714
1780
  ):
1715
1781
  errors.append(
1716
- err.InvalidType(
1782
+ InvalidType(
1717
1783
  name,
1718
1784
  expected_types=["float", "int", "bool", "str"],
1719
1785
  found_data_type=column_types[col],
@@ -1737,7 +1803,7 @@ class Validator:
1737
1803
  and column_types[col] not in allowed_datatypes
1738
1804
  ):
1739
1805
  errors.append(
1740
- err.InvalidType(
1806
+ InvalidType(
1741
1807
  name,
1742
1808
  expected_types=["float", "int"],
1743
1809
  found_data_type=column_types[col],
@@ -1748,7 +1814,7 @@ class Validator:
1748
1814
  @staticmethod
1749
1815
  def _check_type_pred_act_scores(
1750
1816
  model_type: ModelTypes, schema: Schema, column_types: dict[str, Any]
1751
- ) -> list[err.InvalidType]:
1817
+ ) -> list[InvalidType]:
1752
1818
  errors = []
1753
1819
  columns = (
1754
1820
  ("Prediction scores", schema.prediction_score_column_name),
@@ -1777,7 +1843,7 @@ class Validator:
1777
1843
  and column_types[col] not in allowed_datatypes
1778
1844
  ):
1779
1845
  errors.append(
1780
- err.InvalidType(
1846
+ InvalidType(
1781
1847
  name,
1782
1848
  expected_types=["float", "int"],
1783
1849
  found_data_type=column_types[col],
@@ -1788,7 +1854,7 @@ class Validator:
1788
1854
  @staticmethod
1789
1855
  def _check_type_multi_class_pred_threshold_act_scores(
1790
1856
  schema: Schema, column_types: dict[str, Any]
1791
- ) -> list[err.InvalidType]:
1857
+ ) -> list[InvalidType]:
1792
1858
  """Check type for prediction / threshold / actual scores for multiclass model.
1793
1859
 
1794
1860
  Expect the scores to be a list of pyarrow structs that contains field
@@ -1834,7 +1900,7 @@ class Validator:
1834
1900
  and column_types[col] not in allowed_class_score_map_datatypes
1835
1901
  ):
1836
1902
  errors.append(
1837
- err.InvalidType(
1903
+ InvalidType(
1838
1904
  name,
1839
1905
  expected_types=[
1840
1906
  "List[Dict{class_name: str, score: int}]",
@@ -1848,7 +1914,7 @@ class Validator:
1848
1914
  @staticmethod
1849
1915
  def _check_type_prompt_response(
1850
1916
  schema: Schema, column_types: dict[str, Any]
1851
- ) -> list[err.InvalidTypeColumns]:
1917
+ ) -> list[InvalidTypeColumns]:
1852
1918
  fields_to_check = []
1853
1919
  if schema.prompt_column_names is not None:
1854
1920
  fields_to_check.append(schema.prompt_column_names)
@@ -1895,20 +1961,20 @@ class Validator:
1895
1961
  wrong_type_col_errors = []
1896
1962
  if wrong_type_vector_columns:
1897
1963
  wrong_type_col_errors.append(
1898
- err.InvalidTypeColumns(
1964
+ InvalidTypeColumns(
1899
1965
  wrong_type_vector_columns,
1900
1966
  expected_types=["list[float], np.array[float]"],
1901
1967
  )
1902
1968
  )
1903
1969
  if wrong_type_data_columns:
1904
1970
  wrong_type_col_errors.append(
1905
- err.InvalidTypeColumns(
1971
+ InvalidTypeColumns(
1906
1972
  wrong_type_data_columns, expected_types=["str, list[str]"]
1907
1973
  )
1908
1974
  )
1909
1975
  if wrong_type_str_columns:
1910
1976
  wrong_type_col_errors.append(
1911
- err.InvalidTypeColumns(
1977
+ InvalidTypeColumns(
1912
1978
  wrong_type_str_columns, expected_types=["str"]
1913
1979
  )
1914
1980
  )
@@ -1918,7 +1984,7 @@ class Validator:
1918
1984
  @staticmethod
1919
1985
  def _check_type_llm_prompt_templates(
1920
1986
  schema: Schema, column_types: dict[str, Any]
1921
- ) -> list[err.InvalidTypeColumns]:
1987
+ ) -> list[InvalidTypeColumns]:
1922
1988
  if schema.prompt_template_column_names is None:
1923
1989
  return []
1924
1990
 
@@ -1949,7 +2015,7 @@ class Validator:
1949
2015
  # Return errors if any
1950
2016
  if wrong_type_cols:
1951
2017
  return [
1952
- err.InvalidTypeColumns(
2018
+ InvalidTypeColumns(
1953
2019
  wrong_type_columns=wrong_type_cols,
1954
2020
  expected_types=["string"],
1955
2021
  )
@@ -1959,7 +2025,7 @@ class Validator:
1959
2025
  @staticmethod
1960
2026
  def _check_type_llm_config(
1961
2027
  schema: Schema, column_types: dict[str, Any]
1962
- ) -> list[err.InvalidTypeColumns]:
2028
+ ) -> list[InvalidTypeColumns]:
1963
2029
  if schema.llm_config_column_names is None:
1964
2030
  return []
1965
2031
 
@@ -1986,7 +2052,7 @@ class Validator:
1986
2052
  # Return errors if any
1987
2053
  if wrong_type_cols:
1988
2054
  return [
1989
- err.InvalidTypeColumns(
2055
+ InvalidTypeColumns(
1990
2056
  wrong_type_columns=wrong_type_cols,
1991
2057
  expected_types=["string"],
1992
2058
  )
@@ -1996,7 +2062,7 @@ class Validator:
1996
2062
  @staticmethod
1997
2063
  def _check_type_llm_run_metadata(
1998
2064
  schema: Schema, column_types: dict[str, Any]
1999
- ) -> list[err.InvalidTypeColumns]:
2065
+ ) -> list[InvalidTypeColumns]:
2000
2066
  if schema.llm_run_metadata_column_names is None:
2001
2067
  return []
2002
2068
 
@@ -2011,10 +2077,8 @@ class Validator:
2011
2077
  )
2012
2078
  wrong_type_cols = []
2013
2079
  if schema.tag_column_names:
2014
- if (
2015
- LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME
2016
- in schema.tag_column_names
2017
- ) and (
2080
+ tag_cols = _normalize_column_names(schema.tag_column_names)
2081
+ if (LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME in tag_cols) and (
2018
2082
  LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME in column_types
2019
2083
  and column_types[LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME]
2020
2084
  not in allowed_datatypes
@@ -2022,10 +2086,7 @@ class Validator:
2022
2086
  wrong_type_cols.append(
2023
2087
  schema.llm_run_metadata_column_names.total_token_count_column_name
2024
2088
  )
2025
- if (
2026
- LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME
2027
- in schema.tag_column_names
2028
- ) and (
2089
+ if (LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME in tag_cols) and (
2029
2090
  LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME in column_types
2030
2091
  and column_types[LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME]
2031
2092
  not in allowed_datatypes
@@ -2034,8 +2095,7 @@ class Validator:
2034
2095
  schema.llm_run_metadata_column_names.prompt_token_count_column_name
2035
2096
  )
2036
2097
  if (
2037
- LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME
2038
- in schema.tag_column_names
2098
+ LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME in tag_cols
2039
2099
  ) and (
2040
2100
  LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME in column_types
2041
2101
  and column_types[LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME]
@@ -2044,10 +2104,7 @@ class Validator:
2044
2104
  wrong_type_cols.append(
2045
2105
  schema.llm_run_metadata_column_names.response_token_count_column_name
2046
2106
  )
2047
- if (
2048
- LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME
2049
- in schema.tag_column_names
2050
- ) and (
2107
+ if (LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME in tag_cols) and (
2051
2108
  LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME in column_types
2052
2109
  and column_types[LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME]
2053
2110
  not in allowed_datatypes
@@ -2059,8 +2116,8 @@ class Validator:
2059
2116
  # Return errors if there are any
2060
2117
  if wrong_type_cols:
2061
2118
  return [
2062
- err.InvalidTypeColumns(
2063
- wrong_type_columns=wrong_type_cols,
2119
+ InvalidTypeColumns(
2120
+ wrong_type_columns=wrong_type_cols, # type: ignore[arg-type]
2064
2121
  expected_types=["int", "float"],
2065
2122
  )
2066
2123
  ]
@@ -2069,7 +2126,7 @@ class Validator:
2069
2126
  @staticmethod
2070
2127
  def _check_type_retrieved_document_ids(
2071
2128
  schema: Schema, column_types: dict[str, Any]
2072
- ) -> list[err.InvalidType]:
2129
+ ) -> list[InvalidType]:
2073
2130
  col = schema.retrieved_document_ids_column_name
2074
2131
  if col in column_types:
2075
2132
  # should mirror server side
@@ -2079,7 +2136,7 @@ class Validator:
2079
2136
  )
2080
2137
  if column_types[col] not in allowed_datatypes:
2081
2138
  return [
2082
- err.InvalidType(
2139
+ InvalidType(
2083
2140
  "Retrieved Document IDs",
2084
2141
  expected_types=["List[str]"],
2085
2142
  found_data_type=column_types[col],
@@ -2090,7 +2147,7 @@ class Validator:
2090
2147
  @staticmethod
2091
2148
  def _check_type_image_segment_coordinates(
2092
2149
  schema: Schema, column_types: dict[str, Any]
2093
- ) -> list[err.InvalidTypeColumns]:
2150
+ ) -> list[InvalidTypeColumns]:
2094
2151
  # should mirror server side
2095
2152
  allowed_coordinate_types = (
2096
2153
  pa.list_(pa.list_(pa.float64())),
@@ -2173,7 +2230,7 @@ class Validator:
2173
2230
 
2174
2231
  return (
2175
2232
  [
2176
- err.InvalidTypeColumns(
2233
+ InvalidTypeColumns(
2177
2234
  wrong_type_columns=wrong_type_cols,
2178
2235
  expected_types=["List[List[float]]"],
2179
2236
  )
@@ -2185,7 +2242,7 @@ class Validator:
2185
2242
  @staticmethod
2186
2243
  def _check_type_image_segment_categories(
2187
2244
  schema: Schema, column_types: dict[str, Any]
2188
- ) -> list[err.InvalidTypeColumns]:
2245
+ ) -> list[InvalidTypeColumns]:
2189
2246
  # should mirror server side
2190
2247
  allowed_category_datatypes = (
2191
2248
  pa.list_(pa.string()),
@@ -2242,7 +2299,7 @@ class Validator:
2242
2299
 
2243
2300
  return (
2244
2301
  [
2245
- err.InvalidTypeColumns(
2302
+ InvalidTypeColumns(
2246
2303
  wrong_type_columns=wrong_type_cols,
2247
2304
  expected_types=["List[str]"],
2248
2305
  )
@@ -2254,7 +2311,7 @@ class Validator:
2254
2311
  @staticmethod
2255
2312
  def _check_type_image_segment_scores(
2256
2313
  schema: Schema, column_types: dict[str, Any]
2257
- ) -> list[err.InvalidTypeColumns]:
2314
+ ) -> list[InvalidTypeColumns]:
2258
2315
  # should mirror server side
2259
2316
  allowed_score_datatypes = (
2260
2317
  pa.list_(pa.float64()),
@@ -2297,7 +2354,7 @@ class Validator:
2297
2354
 
2298
2355
  return (
2299
2356
  [
2300
- err.InvalidTypeColumns(
2357
+ InvalidTypeColumns(
2301
2358
  wrong_type_columns=wrong_type_cols,
2302
2359
  expected_types=["List[float]"],
2303
2360
  )
@@ -2313,7 +2370,7 @@ class Validator:
2313
2370
  @staticmethod
2314
2371
  def _check_embedding_vectors_dimensionality(
2315
2372
  dataframe: pd.DataFrame, schema: Schema
2316
- ) -> list[err.ValidationError]:
2373
+ ) -> list[ValidationError]:
2317
2374
  if schema.embedding_feature_column_names is None:
2318
2375
  return []
2319
2376
 
@@ -2331,7 +2388,7 @@ class Validator:
2331
2388
 
2332
2389
  return (
2333
2390
  [
2334
- err.InvalidValueEmbeddingVectorDimensionality(
2391
+ InvalidValueEmbeddingVectorDimensionality(
2335
2392
  invalid_low_dim_vector_cols,
2336
2393
  invalid_high_dim_vector_cols,
2337
2394
  ),
@@ -2343,7 +2400,7 @@ class Validator:
2343
2400
  @staticmethod
2344
2401
  def _check_embedding_raw_data_characters(
2345
2402
  dataframe: pd.DataFrame, schema: Schema
2346
- ) -> list[err.ValidationError]:
2403
+ ) -> list[ValidationError]:
2347
2404
  if schema.embedding_feature_column_names is None:
2348
2405
  return []
2349
2406
 
@@ -2361,7 +2418,7 @@ class Validator:
2361
2418
 
2362
2419
  if invalid_long_string_data_cols:
2363
2420
  return [
2364
- err.InvalidValueEmbeddingRawDataTooLong(
2421
+ InvalidValueEmbeddingRawDataTooLong(
2365
2422
  invalid_long_string_data_cols
2366
2423
  )
2367
2424
  ]
@@ -2377,20 +2434,20 @@ class Validator:
2377
2434
  @staticmethod
2378
2435
  def _check_value_rank(
2379
2436
  dataframe: pd.DataFrame, schema: Schema
2380
- ) -> list[err.InvalidRankValue]:
2437
+ ) -> list[InvalidRankValue]:
2381
2438
  col = schema.rank_column_name
2382
2439
  lbound, ubound = (1, 100)
2383
2440
 
2384
2441
  if col is not None and col in dataframe.columns:
2385
2442
  rank_min_max = dataframe[col].agg(["min", "max"])
2386
2443
  if rank_min_max["min"] < lbound or rank_min_max["max"] > ubound:
2387
- return [err.InvalidRankValue(col, "1-100")]
2444
+ return [InvalidRankValue(col, "1-100")]
2388
2445
  return []
2389
2446
 
2390
2447
  @staticmethod
2391
2448
  def _check_id_field_str_length(
2392
2449
  dataframe: pd.DataFrame, schema_name: str, id_col_name: str | None
2393
- ) -> list[err.ValidationError]:
2450
+ ) -> list[ValidationError]:
2394
2451
  """Require prediction_id to be a string of length between MIN and MAX.
2395
2452
 
2396
2453
  Between MIN_PREDICTION_ID_LEN and MAX_PREDICTION_ID_LEN.
@@ -2412,7 +2469,7 @@ class Validator:
2412
2469
  .all()
2413
2470
  ):
2414
2471
  return [
2415
- err.InvalidStringLengthInColumn(
2472
+ InvalidStringLengthInColumn(
2416
2473
  schema_name=schema_name,
2417
2474
  col_name=id_col_name,
2418
2475
  min_length=MIN_PREDICTION_ID_LEN,
@@ -2424,7 +2481,7 @@ class Validator:
2424
2481
  @staticmethod
2425
2482
  def _check_document_id_field_str_length(
2426
2483
  dataframe: pd.DataFrame, schema_name: str, id_col_name: str | None
2427
- ) -> list[err.ValidationError]:
2484
+ ) -> list[ValidationError]:
2428
2485
  """Require document id to be a string of length between MIN and MAX.
2429
2486
 
2430
2487
  Between MIN_DOCUMENT_ID_LEN and MAX_DOCUMENT_ID_LEN.
@@ -2446,7 +2503,7 @@ class Validator:
2446
2503
  .all()
2447
2504
  ):
2448
2505
  return [
2449
- err.InvalidStringLengthInColumn(
2506
+ InvalidStringLengthInColumn(
2450
2507
  schema_name=schema_name,
2451
2508
  col_name=id_col_name,
2452
2509
  min_length=MIN_DOCUMENT_ID_LEN,
@@ -2465,7 +2522,7 @@ class Validator:
2465
2522
  and len(dataframe)
2466
2523
  ):
2467
2524
  return True
2468
- return (
2525
+ return bool(
2469
2526
  dataframe[col_name]
2470
2527
  .astype(str)
2471
2528
  .str.len()
@@ -2476,21 +2533,21 @@ class Validator:
2476
2533
  @staticmethod
2477
2534
  def _check_value_tag(
2478
2535
  dataframe: pd.DataFrame, schema: Schema
2479
- ) -> list[err.InvalidTagLength]:
2536
+ ) -> list[InvalidTagLength]:
2480
2537
  if schema.tag_column_names is None:
2481
2538
  return []
2482
2539
 
2483
2540
  wrong_tag_cols = []
2484
2541
  truncated_tag_cols = []
2485
- for col in schema.tag_column_names:
2542
+ for col in _normalize_column_names(schema.tag_column_names):
2486
2543
  # This is to be defensive, validate_params should guarantee that this column is in
2487
2544
  # the dataframe, via _check_missing_columns, and return an error before reaching this
2488
2545
  # block if not
2489
2546
  # Checks max tag length when any values in a column are strings
2490
2547
  if (
2491
2548
  col in dataframe.columns
2492
- and dataframe[col].map(type).eq(str).any()
2493
- ): # type:ignore
2549
+ and dataframe[col].map(type).eq(str).any() # type: ignore[arg-type]
2550
+ ):
2494
2551
  max_tag_len = (
2495
2552
  dataframe[col]
2496
2553
  .apply(_check_value_string_length_helper)
@@ -2501,7 +2558,7 @@ class Validator:
2501
2558
  elif max_tag_len > MAX_TAG_LENGTH_TRUNCATION:
2502
2559
  truncated_tag_cols.append(col)
2503
2560
  if wrong_tag_cols:
2504
- return [err.InvalidTagLength(wrong_tag_cols)]
2561
+ return [InvalidTagLength(wrong_tag_cols)]
2505
2562
  if truncated_tag_cols:
2506
2563
  logger.warning(
2507
2564
  get_truncation_warning_message(
@@ -2513,7 +2570,8 @@ class Validator:
2513
2570
  @staticmethod
2514
2571
  def _check_value_ranking_category(
2515
2572
  dataframe: pd.DataFrame, schema: Schema
2516
- ) -> list[err.InvalidValueMissingValue | err.InvalidRankingCategoryValue]:
2573
+ ) -> list[InvalidValueMissingValue | InvalidRankingCategoryValue]:
2574
+ col: str | None
2517
2575
  if schema.relevance_labels_column_name is not None:
2518
2576
  col = schema.relevance_labels_column_name
2519
2577
  elif schema.attributions_column_name is not None:
@@ -2521,16 +2579,16 @@ class Validator:
2521
2579
  else:
2522
2580
  col = schema.actual_label_column_name
2523
2581
  if col is not None and col in dataframe.columns:
2524
- if dataframe[col].isnull().values.any(): # type: ignore
2582
+ if dataframe[col].isnull().any():
2525
2583
  # do not attach duplicated missing value error
2526
2584
  # which would be caught by _check_value_missing
2527
2585
  return []
2528
2586
  if dataframe[col].astype(str).str.len().min() == 0:
2529
- return [err.InvalidRankingCategoryValue(col)]
2587
+ return [InvalidRankingCategoryValue(col)]
2530
2588
  # empty list
2531
2589
  not_null_filter = dataframe[col].notnull()
2532
2590
  if dataframe[not_null_filter][col].map(len).min() == 0:
2533
- return [err.InvalidValueMissingValue(col, "empty list")]
2591
+ return [InvalidValueMissingValue(col, "empty list")]
2534
2592
  # no empty string in list
2535
2593
  if (
2536
2594
  dataframe[not_null_filter][col]
@@ -2538,13 +2596,13 @@ class Validator:
2538
2596
  .min()
2539
2597
  == 0
2540
2598
  ):
2541
- return [err.InvalidRankingCategoryValue(col)]
2599
+ return [InvalidRankingCategoryValue(col)]
2542
2600
  return []
2543
2601
 
2544
2602
  @staticmethod
2545
2603
  def _check_length_multi_class_maps(
2546
2604
  dataframe: pd.DataFrame, schema: Schema
2547
- ) -> list[err.InvalidNumClassesMultiClassMap]:
2605
+ ) -> list[InvalidNumClassesMultiClassMap]:
2548
2606
  # each entry in column is a list of dictionaries mapping class names and scores
2549
2607
  # validate length of list of dictionaries for each column
2550
2608
  invalid_cols = {}
@@ -2575,16 +2633,16 @@ class Validator:
2575
2633
  if invalid_num_classes:
2576
2634
  invalid_cols[col] = invalid_num_classes
2577
2635
  if invalid_cols:
2578
- return [err.InvalidNumClassesMultiClassMap(invalid_cols)]
2636
+ return [InvalidNumClassesMultiClassMap(invalid_cols)]
2579
2637
  return []
2580
2638
 
2581
2639
  @staticmethod
2582
2640
  def _check_classes_and_scores_values_in_multi_class_maps(
2583
2641
  dataframe: pd.DataFrame, schema: Schema
2584
2642
  ) -> list[
2585
- err.InvalidMultiClassClassNameLength
2586
- | err.InvalidMultiClassActScoreValue
2587
- | err.InvalidMultiClassPredScoreValue
2643
+ InvalidMultiClassClassNameLength
2644
+ | InvalidMultiClassActScoreValue
2645
+ | InvalidMultiClassPredScoreValue
2588
2646
  ]:
2589
2647
  """Validate the class names and score values of dictionaries.
2590
2648
 
@@ -2601,7 +2659,11 @@ class Validator:
2601
2659
  invalid_pred_scores = {}
2602
2660
  lbound, ubound = (0, 1)
2603
2661
  invalid_actual_scores = False
2604
- errors = []
2662
+ errors: list[
2663
+ InvalidMultiClassClassNameLength
2664
+ | InvalidMultiClassActScoreValue
2665
+ | InvalidMultiClassPredScoreValue
2666
+ ] = []
2605
2667
  for col in cols:
2606
2668
  if (
2607
2669
  col is None
@@ -2649,21 +2711,17 @@ class Validator:
2649
2711
  if invalid_scores_for_col:
2650
2712
  invalid_pred_scores[col] = invalid_scores_for_col
2651
2713
  if invalid_class_names:
2652
- errors.append(
2653
- err.InvalidMultiClassClassNameLength(invalid_class_names)
2654
- )
2714
+ errors.append(InvalidMultiClassClassNameLength(invalid_class_names))
2655
2715
  if invalid_pred_scores:
2656
- errors.append(
2657
- err.InvalidMultiClassPredScoreValue(invalid_pred_scores)
2658
- )
2716
+ errors.append(InvalidMultiClassPredScoreValue(invalid_pred_scores)) # type: ignore[arg-type]
2659
2717
  if invalid_actual_scores:
2660
- errors.append(err.InvalidMultiClassActScoreValue(col))
2718
+ errors.append(InvalidMultiClassActScoreValue(col)) # type: ignore[arg-type, arg-type]
2661
2719
  return errors
2662
2720
 
2663
2721
  @staticmethod
2664
2722
  def _check_each_multi_class_pred_has_threshold(
2665
2723
  dataframe: pd.DataFrame, schema: Schema
2666
- ) -> list[err.InvalidMultiClassThresholdClasses]:
2724
+ ) -> list[InvalidMultiClassThresholdClasses]:
2667
2725
  """Validate threshold scores for Multi Class models.
2668
2726
 
2669
2727
  If threshold scores column is included in schema and dataframe, validate that
@@ -2687,7 +2745,7 @@ class Validator:
2687
2745
  pred_class_set = set(pred_classes)
2688
2746
  if pred_class_set != thresh_class_set:
2689
2747
  return [
2690
- err.InvalidMultiClassThresholdClasses(
2748
+ InvalidMultiClassThresholdClasses(
2691
2749
  threshold_col, pred_class_set, thresh_class_set
2692
2750
  )
2693
2751
  ]
@@ -2697,7 +2755,7 @@ class Validator:
2697
2755
  def _check_value_timestamp(
2698
2756
  dataframe: pd.DataFrame,
2699
2757
  schema: Schema,
2700
- ) -> list[err.InvalidValueMissingValue | err.InvalidValueTimestamp]:
2758
+ ) -> list[InvalidValueMissingValue | InvalidValueTimestamp]:
2701
2759
  # Due to the timing difference between checking this here and the data finally
2702
2760
  # hitting the same check on server side, there's a some chance for a false
2703
2761
  # result, i.e. the check here succeeds but the same check on server side fails.
@@ -2706,11 +2764,9 @@ class Validator:
2706
2764
  # When a timestamp column has Date and NaN, pyarrow will be fine, but
2707
2765
  # pandas min/max will fail due to type incompatibility. So we check for
2708
2766
  # missing value first.
2709
- if dataframe[col].isnull().values.any(): # type: ignore
2767
+ if dataframe[col].isnull().any():
2710
2768
  return [
2711
- err.InvalidValueMissingValue(
2712
- "Prediction timestamp", "missing"
2713
- )
2769
+ InvalidValueMissingValue("Prediction timestamp", "missing")
2714
2770
  ]
2715
2771
 
2716
2772
  now_t = datetime.now(tz=timezone.utc)
@@ -2794,7 +2850,7 @@ class Validator:
2794
2850
  )
2795
2851
  )
2796
2852
  ):
2797
- return [err.InvalidValueTimestamp(timestamp_col_name=col)]
2853
+ return [InvalidValueTimestamp(timestamp_col_name=col)]
2798
2854
 
2799
2855
  return []
2800
2856
 
@@ -2803,9 +2859,9 @@ class Validator:
2803
2859
  @staticmethod
2804
2860
  def _check_invalid_missing_values(
2805
2861
  dataframe: pd.DataFrame, schema: BaseSchema, model_type: ModelTypes
2806
- ) -> list[err.InvalidValueMissingValue]:
2862
+ ) -> list[InvalidValueMissingValue]:
2807
2863
  errors = []
2808
- columns = ()
2864
+ columns: tuple[tuple[str, str | None], ...] = ()
2809
2865
  if isinstance(schema, CorpusSchema):
2810
2866
  columns = (("Document ID", schema.document_id_column_name),)
2811
2867
  elif isinstance(schema, Schema):
@@ -2824,7 +2880,7 @@ class Validator:
2824
2880
  if col is not None and col in dataframe.columns:
2825
2881
  if dataframe[col].isnull().any():
2826
2882
  errors.append(
2827
- err.InvalidValueMissingValue(
2883
+ InvalidValueMissingValue(
2828
2884
  name, wrong_values="missing", column=col
2829
2885
  )
2830
2886
  )
@@ -2834,7 +2890,7 @@ class Validator:
2834
2890
  and np.isinf(dataframe[col]).any()
2835
2891
  ):
2836
2892
  errors.append(
2837
- err.InvalidValueMissingValue(
2893
+ InvalidValueMissingValue(
2838
2894
  name, wrong_values="infinite", column=col
2839
2895
  )
2840
2896
  )
@@ -2850,7 +2906,7 @@ class Validator:
2850
2906
  environment: Environments,
2851
2907
  schema: Schema,
2852
2908
  model_type: ModelTypes,
2853
- ) -> list[err.InvalidRecord]:
2909
+ ) -> list[InvalidRecord]:
2854
2910
  if environment in (Environments.VALIDATION, Environments.TRAINING):
2855
2911
  return []
2856
2912
 
@@ -2894,7 +2950,7 @@ class Validator:
2894
2950
  environment: Environments,
2895
2951
  schema: Schema,
2896
2952
  model_type: ModelTypes,
2897
- ) -> list[err.InvalidRecord]:
2953
+ ) -> list[InvalidRecord]:
2898
2954
  """Validates there's not a single row in the dataframe with all nulls.
2899
2955
 
2900
2956
  Returns errors if any row has all of pred_label and pred_score evaluating to
@@ -2942,7 +2998,7 @@ class Validator:
2942
2998
  @staticmethod
2943
2999
  def _check_invalid_record_helper(
2944
3000
  dataframe: pd.DataFrame, column_names: list[str | None]
2945
- ) -> list[err.InvalidRecord]:
3001
+ ) -> list[InvalidRecord]:
2946
3002
  """Check that there are no null values in a subset of columns.
2947
3003
 
2948
3004
  The column subset is computed from the input list of columns `column_names`
@@ -2950,7 +3006,7 @@ class Validator:
2950
3006
  null values are found.
2951
3007
 
2952
3008
  Returns:
2953
- List[err.InvalidRecord]: An error expressing the rows that are problematic
3009
+ List[InvalidRecord]: An error expressing the rows that are problematic
2954
3010
 
2955
3011
  """
2956
3012
  columns_subset = [
@@ -2964,12 +3020,12 @@ class Validator:
2964
3020
  null_index = null_filter[null_filter].index.values
2965
3021
  if len(null_index) == 0:
2966
3022
  return []
2967
- return [err.InvalidRecord(columns_subset, null_index)] # type: ignore
3023
+ return [InvalidRecord(columns_subset, null_index)] # type: ignore[arg-type]
2968
3024
 
2969
3025
  @staticmethod
2970
3026
  def _check_type_prediction_group_id(
2971
3027
  schema: Schema, column_types: dict[str, Any]
2972
- ) -> list[err.InvalidType]:
3028
+ ) -> list[InvalidType]:
2973
3029
  col = schema.prediction_group_id_column_name
2974
3030
  if col in column_types:
2975
3031
  # should mirror server side
@@ -2982,7 +3038,7 @@ class Validator:
2982
3038
  )
2983
3039
  if column_types[col] not in allowed_datatypes:
2984
3040
  return [
2985
- err.InvalidType(
3041
+ InvalidType(
2986
3042
  "prediction_group_ids",
2987
3043
  expected_types=["str", "int"],
2988
3044
  found_data_type=column_types[col],
@@ -2993,7 +3049,7 @@ class Validator:
2993
3049
  @staticmethod
2994
3050
  def _check_type_rank(
2995
3051
  schema: Schema, column_types: dict[str, Any]
2996
- ) -> list[err.InvalidType]:
3052
+ ) -> list[InvalidType]:
2997
3053
  col = schema.rank_column_name
2998
3054
  if col in column_types:
2999
3055
  allowed_datatypes = (
@@ -3004,7 +3060,7 @@ class Validator:
3004
3060
  )
3005
3061
  if column_types[col] not in allowed_datatypes:
3006
3062
  return [
3007
- err.InvalidType(
3063
+ InvalidType(
3008
3064
  "rank",
3009
3065
  expected_types=["int"],
3010
3066
  found_data_type=column_types[col],
@@ -3015,7 +3071,8 @@ class Validator:
3015
3071
  @staticmethod
3016
3072
  def _check_type_ranking_category(
3017
3073
  schema: Schema, column_types: dict[str, Any]
3018
- ) -> list[err.InvalidType]:
3074
+ ) -> list[InvalidType]:
3075
+ col: str | None
3019
3076
  if schema.relevance_labels_column_name is not None:
3020
3077
  col = schema.relevance_labels_column_name
3021
3078
  elif schema.attributions_column_name is not None:
@@ -3026,7 +3083,7 @@ class Validator:
3026
3083
  allowed_datatypes = (pa.list_(pa.string()), pa.string(), pa.null())
3027
3084
  if column_types[col] not in allowed_datatypes:
3028
3085
  return [
3029
- err.InvalidType(
3086
+ InvalidType(
3030
3087
  "relevance labels column for ranking models",
3031
3088
  expected_types=["list of string", "string"],
3032
3089
  found_data_type=column_types[col],
@@ -3037,7 +3094,7 @@ class Validator:
3037
3094
  @staticmethod
3038
3095
  def _check_value_bounding_boxes_coordinates(
3039
3096
  dataframe: pd.DataFrame, schema: Schema
3040
- ) -> list[err.InvalidBoundingBoxesCoordinates]:
3097
+ ) -> list[InvalidBoundingBoxesCoordinates]:
3041
3098
  errors = []
3042
3099
  if schema.object_detection_prediction_column_names is not None:
3043
3100
  coords_col_name = schema.object_detection_prediction_column_names.bounding_boxes_coordinates_column_name # noqa: E501
@@ -3058,7 +3115,7 @@ class Validator:
3058
3115
  @staticmethod
3059
3116
  def _check_value_bounding_boxes_categories(
3060
3117
  dataframe: pd.DataFrame, schema: Schema
3061
- ) -> list[err.InvalidBoundingBoxesCategories]:
3118
+ ) -> list[InvalidBoundingBoxesCategories]:
3062
3119
  errors = []
3063
3120
  if schema.object_detection_prediction_column_names is not None:
3064
3121
  cat_col_name = schema.object_detection_prediction_column_names.categories_column_name
@@ -3079,7 +3136,7 @@ class Validator:
3079
3136
  @staticmethod
3080
3137
  def _check_value_bounding_boxes_scores(
3081
3138
  dataframe: pd.DataFrame, schema: Schema
3082
- ) -> list[err.InvalidBoundingBoxesScores]:
3139
+ ) -> list[InvalidBoundingBoxesScores]:
3083
3140
  errors = []
3084
3141
  if schema.object_detection_prediction_column_names is not None:
3085
3142
  sc_col_name = schema.object_detection_prediction_column_names.scores_column_name
@@ -3104,7 +3161,7 @@ class Validator:
3104
3161
  @staticmethod
3105
3162
  def _check_value_semantic_segmentation_polygon_coordinates(
3106
3163
  dataframe: pd.DataFrame, schema: Schema
3107
- ) -> list[err.InvalidPolygonCoordinates]:
3164
+ ) -> list[InvalidPolygonCoordinates]:
3108
3165
  errors = []
3109
3166
  if schema.semantic_segmentation_prediction_column_names is not None:
3110
3167
  coords_col_name = schema.semantic_segmentation_prediction_column_names.polygon_coordinates_column_name # noqa: E501
@@ -3125,7 +3182,7 @@ class Validator:
3125
3182
  @staticmethod
3126
3183
  def _check_value_semantic_segmentation_polygon_categories(
3127
3184
  dataframe: pd.DataFrame, schema: Schema
3128
- ) -> list[err.InvalidPolygonCategories]:
3185
+ ) -> list[InvalidPolygonCategories]:
3129
3186
  errors = []
3130
3187
  if schema.semantic_segmentation_prediction_column_names is not None:
3131
3188
  cat_col_name = schema.semantic_segmentation_prediction_column_names.categories_column_name
@@ -3146,7 +3203,7 @@ class Validator:
3146
3203
  @staticmethod
3147
3204
  def _check_value_instance_segmentation_polygon_coordinates(
3148
3205
  dataframe: pd.DataFrame, schema: Schema
3149
- ) -> list[err.InvalidPolygonCoordinates]:
3206
+ ) -> list[InvalidPolygonCoordinates]:
3150
3207
  errors = []
3151
3208
  if schema.instance_segmentation_prediction_column_names is not None:
3152
3209
  coords_col_name = schema.instance_segmentation_prediction_column_names.polygon_coordinates_column_name # noqa: E501
@@ -3167,7 +3224,7 @@ class Validator:
3167
3224
  @staticmethod
3168
3225
  def _check_value_instance_segmentation_polygon_categories(
3169
3226
  dataframe: pd.DataFrame, schema: Schema
3170
- ) -> list[err.InvalidPolygonCategories]:
3227
+ ) -> list[InvalidPolygonCategories]:
3171
3228
  errors = []
3172
3229
  if schema.instance_segmentation_prediction_column_names is not None:
3173
3230
  cat_col_name = schema.instance_segmentation_prediction_column_names.categories_column_name
@@ -3188,7 +3245,7 @@ class Validator:
3188
3245
  @staticmethod
3189
3246
  def _check_value_instance_segmentation_polygon_scores(
3190
3247
  dataframe: pd.DataFrame, schema: Schema
3191
- ) -> list[err.InvalidPolygonScores]:
3248
+ ) -> list[InvalidPolygonScores]:
3192
3249
  errors = []
3193
3250
  if schema.instance_segmentation_prediction_column_names is not None:
3194
3251
  sc_col_name = schema.instance_segmentation_prediction_column_names.scores_column_name
@@ -3203,7 +3260,7 @@ class Validator:
3203
3260
  @staticmethod
3204
3261
  def _check_value_instance_segmentation_bbox_coordinates(
3205
3262
  dataframe: pd.DataFrame, schema: Schema
3206
- ) -> list[err.InvalidBoundingBoxesCoordinates]:
3263
+ ) -> list[InvalidBoundingBoxesCoordinates]:
3207
3264
  errors = []
3208
3265
  if schema.instance_segmentation_prediction_column_names is not None:
3209
3266
  coords_col_name = schema.instance_segmentation_prediction_column_names.bounding_boxes_coordinates_column_name # noqa: E501
@@ -3226,7 +3283,7 @@ class Validator:
3226
3283
  @staticmethod
3227
3284
  def _check_value_prompt_response(
3228
3285
  dataframe: pd.DataFrame, schema: Schema
3229
- ) -> list[err.ValidationError]:
3286
+ ) -> list[ValidationError]:
3230
3287
  vector_cols_to_check = []
3231
3288
  text_cols_to_check = []
3232
3289
  if isinstance(schema.prompt_column_names, str):
@@ -3262,16 +3319,16 @@ class Validator:
3262
3319
  dataframe, vector_cols_to_check
3263
3320
  )
3264
3321
 
3265
- errors = []
3322
+ errors: list[ValidationError] = []
3266
3323
  if invalid_long_string_data_cols:
3267
3324
  errors.append(
3268
- err.InvalidValueEmbeddingRawDataTooLong(
3325
+ InvalidValueEmbeddingRawDataTooLong(
3269
3326
  invalid_long_string_data_cols
3270
3327
  )
3271
3328
  )
3272
3329
  if invalid_low_dim_vector_cols or invalid_high_dim_vector_cols:
3273
3330
  errors.append(
3274
- err.InvalidValueEmbeddingVectorDimensionality(
3331
+ InvalidValueEmbeddingVectorDimensionality( # type: ignore[arg-type]
3275
3332
  invalid_low_dim_vector_cols,
3276
3333
  invalid_high_dim_vector_cols,
3277
3334
  )
@@ -3291,7 +3348,7 @@ class Validator:
3291
3348
  @staticmethod
3292
3349
  def _check_value_llm_model_name(
3293
3350
  dataframe: pd.DataFrame, schema: Schema
3294
- ) -> list[err.InvalidStringLengthInColumn]:
3351
+ ) -> list[InvalidStringLengthInColumn]:
3295
3352
  if schema.llm_config_column_names is None:
3296
3353
  return []
3297
3354
  col = schema.llm_config_column_names.model_column_name
@@ -3301,7 +3358,7 @@ class Validator:
3301
3358
  )
3302
3359
  if max_len > MAX_LLM_MODEL_NAME_LENGTH:
3303
3360
  return [
3304
- err.InvalidStringLengthInColumn(
3361
+ InvalidStringLengthInColumn(
3305
3362
  schema_name="llm_config_column_names.model_column_name",
3306
3363
  col_name=col,
3307
3364
  min_length=0,
@@ -3319,7 +3376,7 @@ class Validator:
3319
3376
  @staticmethod
3320
3377
  def _check_value_llm_prompt_template(
3321
3378
  dataframe: pd.DataFrame, schema: Schema
3322
- ) -> list[err.InvalidStringLengthInColumn]:
3379
+ ) -> list[InvalidStringLengthInColumn]:
3323
3380
  if schema.prompt_template_column_names is None:
3324
3381
  return []
3325
3382
  col = schema.prompt_template_column_names.template_column_name
@@ -3329,7 +3386,7 @@ class Validator:
3329
3386
  )
3330
3387
  if max_len > MAX_PROMPT_TEMPLATE_LENGTH:
3331
3388
  return [
3332
- err.InvalidStringLengthInColumn(
3389
+ InvalidStringLengthInColumn(
3333
3390
  schema_name="prompt_template_column_names.template_column_name",
3334
3391
  col_name=col,
3335
3392
  min_length=0,
@@ -3348,7 +3405,7 @@ class Validator:
3348
3405
  @staticmethod
3349
3406
  def _check_value_llm_prompt_template_version(
3350
3407
  dataframe: pd.DataFrame, schema: Schema
3351
- ) -> list[err.InvalidStringLengthInColumn]:
3408
+ ) -> list[InvalidStringLengthInColumn]:
3352
3409
  if schema.prompt_template_column_names is None:
3353
3410
  return []
3354
3411
  col = schema.prompt_template_column_names.template_version_column_name
@@ -3358,7 +3415,7 @@ class Validator:
3358
3415
  )
3359
3416
  if max_len > MAX_PROMPT_TEMPLATE_VERSION_LENGTH:
3360
3417
  return [
3361
- err.InvalidStringLengthInColumn(
3418
+ InvalidStringLengthInColumn(
3362
3419
  schema_name="prompt_template_column_names.template_version_column_name",
3363
3420
  col_name=col,
3364
3421
  min_length=0,
@@ -3377,8 +3434,9 @@ class Validator:
3377
3434
  @staticmethod
3378
3435
  def _check_type_document_columns(
3379
3436
  schema: CorpusSchema, column_types: dict[str, Any]
3380
- ) -> list[err.InvalidTypeColumns]:
3437
+ ) -> list[InvalidTypeColumns]:
3381
3438
  invalid_types = []
3439
+ allowed_datatypes: tuple[Any, ...]
3382
3440
  # Check document id
3383
3441
  col = schema.document_id_column_name
3384
3442
  if col in column_types:
@@ -3391,7 +3449,7 @@ class Validator:
3391
3449
  )
3392
3450
  if column_types[col] not in allowed_datatypes:
3393
3451
  invalid_types += [
3394
- err.InvalidTypeColumns(
3452
+ InvalidTypeColumns(
3395
3453
  wrong_type_columns=[col],
3396
3454
  expected_types=["str", "int"],
3397
3455
  )
@@ -3403,7 +3461,7 @@ class Validator:
3403
3461
  allowed_datatype = pa.string()
3404
3462
  if column_types[col] != allowed_datatype:
3405
3463
  invalid_types += [
3406
- err.InvalidTypeColumns(
3464
+ InvalidTypeColumns(
3407
3465
  wrong_type_columns=[col],
3408
3466
  expected_types=["str"],
3409
3467
  )
@@ -3421,7 +3479,7 @@ class Validator:
3421
3479
  )
3422
3480
  if column_types[col] not in allowed_datatypes:
3423
3481
  invalid_types += [
3424
- err.InvalidTypeColumns(
3482
+ InvalidTypeColumns(
3425
3483
  wrong_type_columns=[col],
3426
3484
  expected_types=["list[float], np.array[float]"],
3427
3485
  )
@@ -3436,7 +3494,7 @@ class Validator:
3436
3494
  )
3437
3495
  if column_types[col] not in allowed_datatypes:
3438
3496
  invalid_types += [
3439
- err.InvalidTypeColumns(
3497
+ InvalidTypeColumns(
3440
3498
  wrong_type_columns=[col],
3441
3499
  expected_types=["list[str]"],
3442
3500
  )
@@ -3450,7 +3508,7 @@ class Validator:
3450
3508
  allowed_datatypes = (pa.string(),)
3451
3509
  if column_types[col] not in allowed_datatypes:
3452
3510
  invalid_types += [
3453
- err.InvalidTypeColumns(
3511
+ InvalidTypeColumns(
3454
3512
  wrong_type_columns=[col],
3455
3513
  expected_types=["str"],
3456
3514
  )
@@ -3517,15 +3575,16 @@ def _check_value_raw_data_length_helper(
3517
3575
 
3518
3576
  def _check_value_bounding_boxes_coordinates_helper(
3519
3577
  coordinates_col: pd.Series,
3520
- ) -> err.InvalidBoundingBoxesCoordinates | None:
3578
+ ) -> InvalidBoundingBoxesCoordinates | None:
3521
3579
  def check(boxes: object) -> None:
3522
3580
  # We allow for zero boxes. None coordinates list is not allowed (will break following tests:
3523
3581
  # 'NoneType is not iterable')
3524
3582
  if boxes is None:
3525
- raise err.InvalidBoundingBoxesCoordinates(reason="none_boxes")
3526
- for box in boxes:
3583
+ raise InvalidBoundingBoxesCoordinates(reason="none_boxes")
3584
+ # Type ignore: boxes comes from pandas Series, validated at runtime to be iterable
3585
+ for box in boxes: # type: ignore[attr-defined]
3527
3586
  if box is None or len(box) == 0:
3528
- raise err.InvalidBoundingBoxesCoordinates(
3587
+ raise InvalidBoundingBoxesCoordinates(
3529
3588
  reason="none_or_empty_box"
3530
3589
  )
3531
3590
  error = _box_coordinates_wrong_format(box)
@@ -3534,25 +3593,26 @@ def _check_value_bounding_boxes_coordinates_helper(
3534
3593
 
3535
3594
  try:
3536
3595
  coordinates_col.apply(check)
3537
- except err.InvalidBoundingBoxesCoordinates as e:
3596
+ except InvalidBoundingBoxesCoordinates as e:
3538
3597
  return e
3539
3598
  return None
3540
3599
 
3541
3600
 
3542
3601
  def _box_coordinates_wrong_format(
3543
3602
  box_coords: object,
3544
- ) -> err.InvalidBoundingBoxesCoordinates | None:
3603
+ ) -> InvalidBoundingBoxesCoordinates | None:
3545
3604
  if (
3546
3605
  # Coordinates should be a collection of 4 floats
3547
- len(box_coords) != 4
3606
+ len(box_coords) != 4 # type: ignore[arg-type]
3548
3607
  # Coordinates should be positive
3549
- or any(k < 0 for k in box_coords)
3608
+ # Type ignore: box_coords validated at runtime to be iterable/indexable
3609
+ or any(k < 0 for k in box_coords) # type: ignore[attr-defined]
3550
3610
  # Coordinates represent the top-left & bottom-right corners of a box: x1 < x2
3551
- or box_coords[0] >= box_coords[2]
3611
+ or box_coords[0] >= box_coords[2] # type: ignore[index]
3552
3612
  # Coordinates represent the top-left & bottom-right corners of a box: y1 < y2
3553
- or box_coords[1] >= box_coords[3]
3613
+ or box_coords[1] >= box_coords[3] # type: ignore[index]
3554
3614
  ):
3555
- return err.InvalidBoundingBoxesCoordinates(
3615
+ return InvalidBoundingBoxesCoordinates(
3556
3616
  reason="boxes_coordinates_wrong_format"
3557
3617
  )
3558
3618
  return None
@@ -3560,51 +3620,49 @@ def _box_coordinates_wrong_format(
3560
3620
 
3561
3621
  def _check_value_bounding_boxes_categories_helper(
3562
3622
  categories_col: pd.Series,
3563
- ) -> err.InvalidBoundingBoxesCategories | None:
3623
+ ) -> InvalidBoundingBoxesCategories | None:
3564
3624
  def check(categories: object) -> None:
3565
3625
  # We allow for zero boxes. None category list is not allowed (will break following tests:
3566
3626
  # 'NoneType is not iterable')
3567
3627
  if categories is None:
3568
- raise err.InvalidBoundingBoxesCategories(
3569
- reason="none_category_list"
3570
- )
3571
- for category in categories:
3628
+ raise InvalidBoundingBoxesCategories(reason="none_category_list")
3629
+ # Type ignore: categories validated at runtime to be iterable
3630
+ for category in categories: # type: ignore[attr-defined]
3572
3631
  # Allow for empty string category, no None values
3573
3632
  if category is None:
3574
- raise err.InvalidBoundingBoxesCategories(reason="none_category")
3633
+ raise InvalidBoundingBoxesCategories(reason="none_category")
3575
3634
 
3576
3635
  try:
3577
3636
  categories_col.apply(check)
3578
- except err.InvalidBoundingBoxesCategories as e:
3637
+ except InvalidBoundingBoxesCategories as e:
3579
3638
  return e
3580
3639
  return None
3581
3640
 
3582
3641
 
3583
3642
  def _check_value_bounding_boxes_scores_helper(
3584
3643
  scores_col: pd.Series,
3585
- ) -> err.InvalidBoundingBoxesScores | None:
3644
+ ) -> InvalidBoundingBoxesScores | None:
3586
3645
  def check(scores: object) -> None:
3587
3646
  # We allow for zero boxes. None confidence score list is not allowed (will break following tests:
3588
3647
  # 'NoneType is not iterable')
3589
3648
  if scores is None:
3590
- raise err.InvalidBoundingBoxesScores(reason="none_score_list")
3591
- for score in scores:
3649
+ raise InvalidBoundingBoxesScores(reason="none_score_list")
3650
+ # Type ignore: scores validated at runtime to be iterable
3651
+ for score in scores: # type: ignore[attr-defined]
3592
3652
  # Confidence scores are between 0 and 1
3593
3653
  if score < 0 or score > 1:
3594
- raise err.InvalidBoundingBoxesScores(
3595
- reason="scores_out_of_bounds"
3596
- )
3654
+ raise InvalidBoundingBoxesScores(reason="scores_out_of_bounds")
3597
3655
 
3598
3656
  try:
3599
3657
  scores_col.apply(check)
3600
- except err.InvalidBoundingBoxesScores as e:
3658
+ except InvalidBoundingBoxesScores as e:
3601
3659
  return e
3602
3660
  return None
3603
3661
 
3604
3662
 
3605
3663
  def _polygon_coordinates_wrong_format(
3606
3664
  polygon_coords: object,
3607
- ) -> err.InvalidPolygonCoordinates | None:
3665
+ ) -> InvalidPolygonCoordinates | None:
3608
3666
  """Check if polygon coordinates are valid.
3609
3667
 
3610
3668
  Validates:
@@ -3623,30 +3681,31 @@ def _polygon_coordinates_wrong_format(
3623
3681
  # Basic validations
3624
3682
  if (
3625
3683
  # Coordinates should be a collection of more than 6 floats (3 pairs of x,y coordinates)
3626
- len(polygon_coords) < 6
3684
+ len(polygon_coords) < 6 # type: ignore[arg-type]
3627
3685
  # Coordinates should be positive
3628
- or any(k < 0 for k in polygon_coords)
3686
+ # Type ignore: polygon_coords validated at runtime to be iterable
3687
+ or any(k < 0 for k in polygon_coords) # type: ignore[arg-type, attr-defined]
3629
3688
  # Coordinates should be a collection of pairs of floats
3630
- or len(polygon_coords) % 2 != 0
3689
+ or len(polygon_coords) % 2 != 0 # type: ignore[arg-type]
3631
3690
  ):
3632
- return err.InvalidPolygonCoordinates(
3691
+ return InvalidPolygonCoordinates(
3633
3692
  reason="polygon_coordinates_wrong_format",
3634
- coordinates=polygon_coords,
3693
+ coordinates=polygon_coords, # type: ignore[arg-type]
3635
3694
  )
3636
3695
 
3637
3696
  # Convert flat list to list of points [(x1,y1), (x2,y2), ...]
3697
+ coords_seq = cast("Sequence[float]", polygon_coords)
3638
3698
  points = [
3639
- (polygon_coords[i], polygon_coords[i + 1])
3640
- for i in range(0, len(polygon_coords), 2)
3699
+ (coords_seq[i], coords_seq[i + 1]) for i in range(0, len(coords_seq), 2)
3641
3700
  ]
3642
3701
 
3643
3702
  # Check for repeated vertices
3644
3703
  for i in range(len(points)):
3645
3704
  for j in range(i + 1, len(points)):
3646
3705
  if points[i] == points[j]:
3647
- return err.InvalidPolygonCoordinates(
3706
+ return InvalidPolygonCoordinates(
3648
3707
  reason="polygon_coordinates_repeated_vertices",
3649
- coordinates=polygon_coords,
3708
+ coordinates=polygon_coords, # type: ignore[arg-type]
3650
3709
  )
3651
3710
 
3652
3711
  # Check for self-intersections
@@ -3665,9 +3724,9 @@ def _polygon_coordinates_wrong_format(
3665
3724
  if segments_intersect(
3666
3725
  edges[i][0], edges[i][1], edges[j][0], edges[j][1]
3667
3726
  ):
3668
- return err.InvalidPolygonCoordinates(
3727
+ return InvalidPolygonCoordinates(
3669
3728
  reason="polygon_coordinates_self_intersecting_vertices",
3670
- coordinates=polygon_coords,
3729
+ coordinates=polygon_coords, # type: ignore[arg-type]
3671
3730
  )
3672
3731
 
3673
3732
  return None
@@ -3675,64 +3734,65 @@ def _polygon_coordinates_wrong_format(
3675
3734
 
3676
3735
  def _check_value_polygon_coordinates_helper(
3677
3736
  coordinates_col: pd.Series,
3678
- ) -> err.InvalidPolygonCoordinates | None:
3737
+ ) -> InvalidPolygonCoordinates | None:
3679
3738
  def check(polygons: object) -> None:
3680
3739
  # We allow for zero polygons. None coordinates list is not allowed (will break following tests:
3681
3740
  # 'NoneType is not iterable')
3682
3741
  if polygons is None:
3683
- raise err.InvalidPolygonCoordinates(reason="none_polygons")
3684
- for polygon in polygons:
3742
+ raise InvalidPolygonCoordinates(reason="none_polygons")
3743
+ # Type ignore: polygons validated at runtime to be iterable
3744
+ for polygon in polygons: # type: ignore[attr-defined]
3685
3745
  if polygon is None or len(polygon) == 0:
3686
- raise err.InvalidPolygonCoordinates(
3687
- reason="none_or_empty_polygon"
3688
- )
3746
+ raise InvalidPolygonCoordinates(reason="none_or_empty_polygon")
3689
3747
  error = _polygon_coordinates_wrong_format(polygon)
3690
3748
  if error is not None:
3691
3749
  raise error
3692
3750
 
3693
3751
  try:
3694
3752
  coordinates_col.apply(check)
3695
- except err.InvalidPolygonCoordinates as e:
3753
+ except InvalidPolygonCoordinates as e:
3696
3754
  return e
3697
3755
  return None
3698
3756
 
3699
3757
 
3700
3758
  def _check_value_polygon_categories_helper(
3701
3759
  categories_col: pd.Series,
3702
- ) -> err.InvalidPolygonCategories | None:
3760
+ ) -> InvalidPolygonCategories | None:
3703
3761
  def check(categories: object) -> None:
3704
3762
  # We allow for zero boxes. None category list is not allowed (will break following tests:
3705
3763
  # 'NoneType is not iterable')
3706
3764
  if categories is None:
3707
- raise err.InvalidPolygonCategories(reason="none_category_list")
3708
- for category in categories:
3765
+ raise InvalidPolygonCategories(reason="none_category_list")
3766
+ # Type ignore: categories validated at runtime to be iterable
3767
+ for category in categories: # type: ignore[attr-defined]
3709
3768
  # Allow for empty string category, no None values
3710
3769
  if category is None:
3711
- raise err.InvalidPolygonCategories(reason="none_category")
3770
+ raise InvalidPolygonCategories(reason="none_category")
3712
3771
 
3713
3772
  try:
3714
3773
  categories_col.apply(check)
3715
- except err.InvalidPolygonCategories as e:
3774
+ except InvalidPolygonCategories as e:
3716
3775
  return e
3717
3776
  return None
3718
3777
 
3719
3778
 
3720
3779
  def _check_value_polygon_scores_helper(
3721
3780
  scores_col: pd.Series,
3722
- ) -> err.InvalidPolygonScores | None:
3781
+ ) -> InvalidPolygonScores | None:
3723
3782
  def check(scores: object) -> None:
3724
3783
  # We allow for zero boxes. None confidence score list is not allowed (will break following tests:
3725
3784
  # 'NoneType is not iterable')
3726
3785
  if scores is None:
3727
- raise err.InvalidPolygonScores(reason="none_score_list")
3728
- for score in scores:
3786
+ raise InvalidPolygonScores(reason="none_score_list")
3787
+ # Type ignore: scores validated at runtime to be iterable
3788
+ for score in scores: # type: ignore[attr-defined]
3729
3789
  # Confidence scores are between 0 and 1
3730
3790
  if score < 0 or score > 1:
3731
- raise err.InvalidPolygonScores(reason="scores_out_of_bounds")
3791
+ raise InvalidPolygonScores(reason="scores_out_of_bounds")
3732
3792
 
3733
3793
  try:
3734
3794
  scores_col.apply(check)
3735
- except err.InvalidPolygonScores as e:
3795
+ except InvalidPolygonScores as e:
3736
3796
  return e
3737
3797
  return None
3738
3798