arize 8.0.0b0__py3-none-any.whl → 8.0.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. arize/__init__.py +1 -1
  2. arize/_client_factory.py +50 -0
  3. arize/_flight/client.py +4 -4
  4. arize/_generated/api_client/__init__.py +0 -2
  5. arize/_generated/api_client/api/datasets_api.py +6 -6
  6. arize/_generated/api_client/api/experiments_api.py +6 -6
  7. arize/_generated/api_client/api/projects_api.py +3 -3
  8. arize/_generated/api_client/models/__init__.py +0 -1
  9. arize/_generated/api_client/models/datasets_create_request.py +2 -10
  10. arize/_generated/api_client/models/datasets_examples_insert_request.py +2 -10
  11. arize/_generated/api_client/test/test_datasets_create_request.py +2 -6
  12. arize/_generated/api_client/test/test_datasets_examples_insert_request.py +2 -6
  13. arize/_generated/api_client/test/test_datasets_examples_list200_response.py +2 -6
  14. arize/_generated/api_client/test/test_datasets_examples_update_request.py +2 -6
  15. arize/_generated/api_client/test/test_experiments_create_request.py +2 -6
  16. arize/_generated/api_client/test/test_experiments_runs_list200_response.py +2 -6
  17. arize/_generated/api_client_README.md +0 -1
  18. arize/_lazy.py +25 -9
  19. arize/client.py +16 -52
  20. arize/config.py +9 -36
  21. arize/constants/ml.py +9 -16
  22. arize/constants/spans.py +5 -10
  23. arize/datasets/client.py +13 -9
  24. arize/datasets/errors.py +1 -1
  25. arize/datasets/validation.py +2 -2
  26. arize/embeddings/auto_generator.py +2 -2
  27. arize/embeddings/errors.py +2 -2
  28. arize/embeddings/tabular_generators.py +1 -1
  29. arize/exceptions/base.py +0 -52
  30. arize/exceptions/parameters.py +0 -329
  31. arize/experiments/__init__.py +2 -2
  32. arize/experiments/client.py +16 -10
  33. arize/experiments/evaluators/base.py +6 -6
  34. arize/experiments/evaluators/executors.py +10 -3
  35. arize/experiments/evaluators/types.py +2 -2
  36. arize/experiments/functions.py +24 -17
  37. arize/experiments/types.py +6 -8
  38. arize/logging.py +1 -1
  39. arize/ml/batch_validation/errors.py +10 -1004
  40. arize/ml/batch_validation/validator.py +273 -225
  41. arize/ml/casting.py +7 -7
  42. arize/ml/client.py +12 -11
  43. arize/ml/proto.py +6 -6
  44. arize/ml/stream_validation.py +2 -3
  45. arize/ml/surrogate_explainer/mimic.py +3 -3
  46. arize/ml/types.py +1 -55
  47. arize/pre_releases.py +6 -3
  48. arize/projects/client.py +9 -4
  49. arize/regions.py +2 -2
  50. arize/spans/client.py +14 -12
  51. arize/spans/columns.py +32 -36
  52. arize/spans/conversion.py +5 -6
  53. arize/spans/validation/common/argument_validation.py +3 -3
  54. arize/spans/validation/common/dataframe_form_validation.py +6 -6
  55. arize/spans/validation/common/value_validation.py +1 -1
  56. arize/spans/validation/evals/dataframe_form_validation.py +4 -4
  57. arize/spans/validation/evals/evals_validation.py +6 -6
  58. arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
  59. arize/spans/validation/spans/dataframe_form_validation.py +2 -2
  60. arize/spans/validation/spans/spans_validation.py +6 -6
  61. arize/utils/arrow.py +2 -2
  62. arize/utils/cache.py +2 -2
  63. arize/utils/dataframe.py +4 -4
  64. arize/utils/online_tasks/dataframe_preprocessor.py +7 -7
  65. arize/utils/openinference_conversion.py +10 -10
  66. arize/utils/proto.py +1 -1
  67. arize/version.py +1 -1
  68. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/METADATA +71 -63
  69. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/RECORD +72 -73
  70. arize/_generated/api_client/models/primitive_value.py +0 -172
  71. arize/_generated/api_client/test/test_primitive_value.py +0 -50
  72. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/WHEEL +0 -0
  73. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/licenses/LICENSE +0 -0
  74. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/licenses/NOTICE +0 -0
@@ -39,8 +39,70 @@ from arize.constants.ml import (
39
39
  MIN_PREDICTION_ID_LEN,
40
40
  MODEL_MAPPING_CONFIG,
41
41
  )
42
+ from arize.exceptions.base import (
43
+ InvalidDataFrameIndex,
44
+ InvalidFieldTypeConversion,
45
+ ValidationError,
46
+ )
47
+ from arize.exceptions.types import (
48
+ InvalidFieldTypeLlmConfig,
49
+ InvalidFieldTypePromptTemplates,
50
+ InvalidType,
51
+ InvalidTypeColumns,
52
+ InvalidTypeFeatures,
53
+ InvalidTypeShapValues,
54
+ InvalidTypeTags,
55
+ InvalidValueEmbeddingRawDataTooLong,
56
+ InvalidValueEmbeddingVectorDimensionality,
57
+ )
58
+ from arize.exceptions.values import (
59
+ InvalidBoundingBoxesCategories,
60
+ InvalidBoundingBoxesCoordinates,
61
+ InvalidBoundingBoxesScores,
62
+ InvalidMultiClassActScoreValue,
63
+ InvalidMultiClassClassNameLength,
64
+ InvalidMultiClassPredScoreValue,
65
+ InvalidMultiClassThresholdClasses,
66
+ InvalidNumClassesMultiClassMap,
67
+ InvalidPolygonCategories,
68
+ InvalidPolygonCoordinates,
69
+ InvalidPolygonScores,
70
+ InvalidRankingCategoryValue,
71
+ InvalidRankValue,
72
+ InvalidRecord,
73
+ InvalidStringLengthInColumn,
74
+ InvalidTagLength,
75
+ InvalidValueMissingValue,
76
+ InvalidValueTimestamp,
77
+ )
42
78
  from arize.logging import get_truncation_warning_message
43
- from arize.ml.batch_validation import errors as err
79
+ from arize.ml.batch_validation.errors import (
80
+ DuplicateColumnsInDataframe,
81
+ InvalidBatchId,
82
+ InvalidColumnNameEmptyString,
83
+ InvalidEnvironment,
84
+ InvalidFieldTypeEmbeddingFeatures,
85
+ InvalidFieldTypePromptResponse,
86
+ InvalidModelId,
87
+ InvalidModelType,
88
+ InvalidModelTypeAndMetricsCombination,
89
+ InvalidModelVersion,
90
+ InvalidNumberOfEmbeddings,
91
+ InvalidPredActColumnNamesForModelType,
92
+ InvalidPredActCVColumnNamesForModelType,
93
+ InvalidSchemaType,
94
+ InvalidShapSuffix,
95
+ MissingColumns,
96
+ MissingCVPredAct,
97
+ MissingPredictionIdColumnForDelayedRecords,
98
+ MissingPreprodAct,
99
+ MissingPreprodPredActNumericAndCategorical,
100
+ MissingReqPredActColumnNamesForMultiClass,
101
+ MissingRequiredColumnsForRankingModel,
102
+ MissingRequiredColumnsMetricsValidation,
103
+ MultipleCVPredAct,
104
+ ReservedColumns,
105
+ )
44
106
  from arize.ml.types import (
45
107
  CATEGORICAL_MODEL_TYPES,
46
108
  NUMERIC_MODEL_TYPES,
@@ -74,8 +136,8 @@ class Validator:
74
136
  schema: BaseSchema,
75
137
  model_version: str | None = None,
76
138
  batch_id: str | None = None,
77
- ) -> list[err.ValidationError]:
78
- """Validate required checks for schema, environment, and DataFrame structure."""
139
+ ) -> list[ValidationError]:
140
+ """Validate required checks for schema, environment, and :class:`pandas.DataFrame` structure."""
79
141
  general_checks = chain(
80
142
  Validator._check_valid_schema_type(schema, environment),
81
143
  Validator._check_field_convertible_to_str(
@@ -115,7 +177,7 @@ class Validator:
115
177
  metric_families: list[Metrics] | None = None,
116
178
  model_version: str | None = None,
117
179
  batch_id: str | None = None,
118
- ) -> list[err.ValidationError]:
180
+ ) -> list[ValidationError]:
119
181
  """Validate parameters including model type, environment, and schema consistency."""
120
182
  # general checks
121
183
  general_checks = chain(
@@ -223,7 +285,7 @@ class Validator:
223
285
  model_type: ModelTypes,
224
286
  schema: BaseSchema,
225
287
  pyarrow_schema: pa.Schema,
226
- ) -> list[err.ValidationError]:
288
+ ) -> list[ValidationError]:
227
289
  """Validate column data types against expected types for the schema."""
228
290
  column_types = dict(
229
291
  zip(pyarrow_schema.names, pyarrow_schema.types, strict=True)
@@ -323,7 +385,7 @@ class Validator:
323
385
  environment: Environments,
324
386
  schema: BaseSchema,
325
387
  model_type: ModelTypes,
326
- ) -> list[err.ValidationError]:
388
+ ) -> list[ValidationError]:
327
389
  """Validate data values including ranges, formats, and consistency checks."""
328
390
  # ASSUMPTION: at this point the param and type checks should have passed.
329
391
  # This function may crash if that is not true, e.g. if columns are missing
@@ -444,15 +506,15 @@ class Validator:
444
506
  @staticmethod
445
507
  def _check_column_names_for_empty_strings(
446
508
  schema: BaseSchema,
447
- ) -> list[err.InvalidColumnNameEmptyString]:
509
+ ) -> list[InvalidColumnNameEmptyString]:
448
510
  if "" in schema.get_used_columns():
449
- return [err.InvalidColumnNameEmptyString()]
511
+ return [InvalidColumnNameEmptyString()]
450
512
  return []
451
513
 
452
514
  @staticmethod
453
515
  def _check_field_convertible_to_str(
454
516
  model_id: object, model_version: object, batch_id: object
455
- ) -> list[err.InvalidFieldTypeConversion]:
517
+ ) -> list[InvalidFieldTypeConversion]:
456
518
  # converting to a set first makes the checks run a lot faster
457
519
  wrong_fields = []
458
520
  if model_id is not None and not isinstance(model_id, str):
@@ -472,61 +534,59 @@ class Validator:
472
534
  wrong_fields.append("batch_id")
473
535
 
474
536
  if wrong_fields:
475
- return [err.InvalidFieldTypeConversion(wrong_fields, "string")]
537
+ return [InvalidFieldTypeConversion(wrong_fields, "string")]
476
538
  return []
477
539
 
478
540
  @staticmethod
479
541
  def _check_field_type_embedding_features_column_names(
480
542
  schema: Schema,
481
- ) -> list[err.InvalidFieldTypeEmbeddingFeatures]:
543
+ ) -> list[InvalidFieldTypeEmbeddingFeatures]:
482
544
  if schema.embedding_feature_column_names is not None:
483
545
  if not isinstance(schema.embedding_feature_column_names, dict):
484
- return [err.InvalidFieldTypeEmbeddingFeatures()]
546
+ return [InvalidFieldTypeEmbeddingFeatures()]
485
547
  for k, v in schema.embedding_feature_column_names.items():
486
548
  if not isinstance(k, str) or not isinstance(
487
549
  v, EmbeddingColumnNames
488
550
  ):
489
- return [err.InvalidFieldTypeEmbeddingFeatures()]
551
+ return [InvalidFieldTypeEmbeddingFeatures()]
490
552
  return []
491
553
 
492
554
  @staticmethod
493
555
  def _check_field_type_prompt_response(
494
556
  schema: Schema,
495
- ) -> list[err.InvalidFieldTypePromptResponse]:
557
+ ) -> list[InvalidFieldTypePromptResponse]:
496
558
  errors = []
497
559
  if schema.prompt_column_names is not None and not isinstance(
498
560
  schema.prompt_column_names, (str, EmbeddingColumnNames)
499
561
  ):
500
- errors.append(
501
- err.InvalidFieldTypePromptResponse("prompt_column_names")
502
- )
562
+ errors.append(InvalidFieldTypePromptResponse("prompt_column_names"))
503
563
  if schema.response_column_names is not None and not isinstance(
504
564
  schema.response_column_names, (str, EmbeddingColumnNames)
505
565
  ):
506
566
  errors.append(
507
- err.InvalidFieldTypePromptResponse("response_column_names")
567
+ InvalidFieldTypePromptResponse("response_column_names")
508
568
  )
509
569
  return errors
510
570
 
511
571
  @staticmethod
512
572
  def _check_field_type_prompt_templates(
513
573
  schema: Schema,
514
- ) -> list[err.InvalidFieldTypePromptTemplates]:
574
+ ) -> list[InvalidFieldTypePromptTemplates]:
515
575
  if schema.prompt_template_column_names is not None and not isinstance(
516
576
  schema.prompt_template_column_names, PromptTemplateColumnNames
517
577
  ):
518
- return [err.InvalidFieldTypePromptTemplates()]
578
+ return [InvalidFieldTypePromptTemplates()]
519
579
  return []
520
580
 
521
581
  @staticmethod
522
582
  def _check_field_type_llm_config(
523
583
  dataframe: pd.DataFrame,
524
584
  schema: Schema,
525
- ) -> list[err.InvalidFieldTypeLlmConfig | err.InvalidTypeColumns]:
585
+ ) -> list[InvalidFieldTypeLlmConfig | InvalidTypeColumns]:
526
586
  if schema.llm_config_column_names is None:
527
587
  return []
528
588
  if not isinstance(schema.llm_config_column_names, LLMConfigColumnNames):
529
- return [err.InvalidFieldTypeLlmConfig()]
589
+ return [InvalidFieldTypeLlmConfig()]
530
590
  col = schema.llm_config_column_names.params_column_name
531
591
  # We check the types if the columns are in the dataframe.
532
592
  # If the columns are reflected in the schema but not present
@@ -545,7 +605,7 @@ class Validator:
545
605
  )
546
606
  ):
547
607
  return [
548
- err.InvalidTypeColumns(
608
+ InvalidTypeColumns(
549
609
  wrong_type_columns=[col],
550
610
  expected_types=[
551
611
  "Dict[str, (bool, int, float, string or list[str])]"
@@ -557,9 +617,9 @@ class Validator:
557
617
  @staticmethod
558
618
  def _check_invalid_index(
559
619
  dataframe: pd.DataFrame,
560
- ) -> list[err.InvalidDataFrameIndex]:
620
+ ) -> list[InvalidDataFrameIndex]:
561
621
  if (dataframe.index != dataframe.reset_index(drop=True).index).any():
562
- return [err.InvalidDataFrameIndex()]
622
+ return [InvalidDataFrameIndex()]
563
623
  return []
564
624
 
565
625
  # ----------------
@@ -571,7 +631,7 @@ class Validator:
571
631
  model_type: ModelTypes,
572
632
  metric_families: list[Metrics] | None,
573
633
  schema: Schema,
574
- ) -> list[err.ValidationError]:
634
+ ) -> list[ValidationError]:
575
635
  if metric_families is None:
576
636
  return []
577
637
 
@@ -597,7 +657,7 @@ class Validator:
597
657
  if not valid_combination:
598
658
  # Model type + metrics combination is not valid.
599
659
  return [
600
- err.InvalidModelTypeAndMetricsCombination(
660
+ InvalidModelTypeAndMetricsCombination(
601
661
  model_type,
602
662
  metric_families,
603
663
  suggested_model_metric_combinations,
@@ -606,7 +666,7 @@ class Validator:
606
666
  if missing_columns:
607
667
  # For this model type, the schema is missing columns required for the requested metrics.
608
668
  return [
609
- err.MissingRequiredColumnsMetricsValidation(
669
+ MissingRequiredColumnsMetricsValidation(
610
670
  model_type, metric_families, missing_columns
611
671
  )
612
672
  ]
@@ -674,7 +734,7 @@ class Validator:
674
734
  @staticmethod
675
735
  def _check_existence_prediction_id_column_delayed_schema(
676
736
  schema: Schema, model_type: ModelTypes
677
- ) -> list[err.MissingPredictionIdColumnForDelayedRecords]:
737
+ ) -> list[MissingPredictionIdColumnForDelayedRecords]:
678
738
  if schema.prediction_id_column_name is not None:
679
739
  return []
680
740
  # TODO: Revise logic once prediction_label column addition (for generative models)
@@ -683,7 +743,7 @@ class Validator:
683
743
  # We skip GENERATIVE model types since they are assigned a default
684
744
  # prediction label column with values equal 1
685
745
  return [
686
- err.MissingPredictionIdColumnForDelayedRecords(
746
+ MissingPredictionIdColumnForDelayedRecords(
687
747
  schema.has_actual_columns(),
688
748
  schema.has_feature_importance_columns(),
689
749
  )
@@ -705,7 +765,7 @@ class Validator:
705
765
  def _check_missing_columns(
706
766
  dataframe: pd.DataFrame,
707
767
  schema: BaseSchema,
708
- ) -> list[err.MissingColumns]:
768
+ ) -> list[MissingColumns]:
709
769
  if isinstance(schema, CorpusSchema):
710
770
  return Validator._check_missing_columns_corpus_schema(
711
771
  dataframe, schema
@@ -718,7 +778,7 @@ class Validator:
718
778
  def _check_missing_columns_schema(
719
779
  dataframe: pd.DataFrame,
720
780
  schema: Schema,
721
- ) -> list[err.MissingColumns]:
781
+ ) -> list[MissingColumns]:
722
782
  # converting to a set first makes the checks run a lot faster
723
783
  existing_columns = set(dataframe.columns)
724
784
  missing_columns = []
@@ -901,14 +961,14 @@ class Validator:
901
961
  )
902
962
 
903
963
  if missing_columns:
904
- return [err.MissingColumns(missing_columns)]
964
+ return [MissingColumns(missing_columns)]
905
965
  return []
906
966
 
907
967
  @staticmethod
908
968
  def _check_missing_columns_corpus_schema(
909
969
  dataframe: pd.DataFrame,
910
970
  schema: CorpusSchema,
911
- ) -> list[err.MissingColumns]:
971
+ ) -> list[MissingColumns]:
912
972
  # converting to a set first makes the checks run a lot faster
913
973
  existing_columns = set(dataframe.columns)
914
974
  missing_columns = []
@@ -958,19 +1018,19 @@ class Validator:
958
1018
  schema.document_text_embedding_column_names.link_to_data_column_name
959
1019
  )
960
1020
  if missing_columns:
961
- return [err.MissingColumns(missing_columns)]
1021
+ return [MissingColumns(missing_columns)]
962
1022
  return []
963
1023
 
964
1024
  @staticmethod
965
1025
  def _check_valid_schema_type(
966
1026
  schema: BaseSchema,
967
1027
  environment: Environments,
968
- ) -> list[err.InvalidSchemaType]:
1028
+ ) -> list[InvalidSchemaType]:
969
1029
  if environment == Environments.CORPUS and not (
970
1030
  isinstance(schema, CorpusSchema)
971
1031
  ):
972
1032
  return [
973
- err.InvalidSchemaType(
1033
+ InvalidSchemaType(
974
1034
  schema_type=str(type(schema)), environment=environment
975
1035
  )
976
1036
  ]
@@ -978,7 +1038,7 @@ class Validator:
978
1038
  schema, CorpusSchema
979
1039
  ):
980
1040
  return [
981
- err.InvalidSchemaType(
1041
+ InvalidSchemaType(
982
1042
  schema_type=str(type(schema)), environment=environment
983
1043
  )
984
1044
  ]
@@ -987,7 +1047,7 @@ class Validator:
987
1047
  @staticmethod
988
1048
  def _check_invalid_shap_suffix(
989
1049
  schema: Schema,
990
- ) -> list[err.InvalidShapSuffix]:
1050
+ ) -> list[InvalidShapSuffix]:
991
1051
  invalid_column_names = set()
992
1052
 
993
1053
  if schema.feature_column_names is not None:
@@ -1016,14 +1076,14 @@ class Validator:
1016
1076
  invalid_column_names.add(col)
1017
1077
 
1018
1078
  if invalid_column_names:
1019
- return [err.InvalidShapSuffix(invalid_column_names)]
1079
+ return [InvalidShapSuffix(invalid_column_names)]
1020
1080
  return []
1021
1081
 
1022
1082
  @staticmethod
1023
1083
  def _check_reserved_columns(
1024
1084
  schema: BaseSchema,
1025
1085
  model_type: ModelTypes,
1026
- ) -> list[err.ReservedColumns]:
1086
+ ) -> list[ReservedColumns]:
1027
1087
  if isinstance(schema, CorpusSchema):
1028
1088
  return []
1029
1089
  if isinstance(schema, Schema):
@@ -1127,29 +1187,29 @@ class Validator:
1127
1187
  )
1128
1188
 
1129
1189
  if reserved_columns:
1130
- return [err.ReservedColumns(reserved_columns)]
1190
+ return [ReservedColumns(reserved_columns)]
1131
1191
  return []
1132
1192
 
1133
1193
  @staticmethod
1134
1194
  def _check_invalid_model_id(
1135
1195
  model_id: str | None,
1136
- ) -> list[err.InvalidModelId]:
1196
+ ) -> list[InvalidModelId]:
1137
1197
  # assume it's been coerced to string beforehand
1138
1198
  if (not isinstance(model_id, str)) or len(model_id.strip()) == 0:
1139
- return [err.InvalidModelId()]
1199
+ return [InvalidModelId()]
1140
1200
  return []
1141
1201
 
1142
1202
  @staticmethod
1143
1203
  def _check_invalid_model_version(
1144
1204
  model_version: str | None = None,
1145
- ) -> list[err.InvalidModelVersion]:
1205
+ ) -> list[InvalidModelVersion]:
1146
1206
  if model_version is None:
1147
1207
  return []
1148
1208
  if (
1149
1209
  not isinstance(model_version, str)
1150
1210
  or len(model_version.strip()) == 0
1151
1211
  ):
1152
- return [err.InvalidModelVersion()]
1212
+ return [InvalidModelVersion()]
1153
1213
 
1154
1214
  return []
1155
1215
 
@@ -1157,35 +1217,35 @@ class Validator:
1157
1217
  def _check_invalid_batch_id(
1158
1218
  batch_id: str | None,
1159
1219
  environment: Environments,
1160
- ) -> list[err.InvalidBatchId]:
1220
+ ) -> list[InvalidBatchId]:
1161
1221
  # assume it's been coerced to string beforehand
1162
1222
  if environment in (Environments.VALIDATION,) and (
1163
1223
  (not isinstance(batch_id, str)) or len(batch_id.strip()) == 0
1164
1224
  ):
1165
- return [err.InvalidBatchId()]
1225
+ return [InvalidBatchId()]
1166
1226
  return []
1167
1227
 
1168
1228
  @staticmethod
1169
1229
  def _check_invalid_model_type(
1170
1230
  model_type: ModelTypes,
1171
- ) -> list[err.InvalidModelType]:
1231
+ ) -> list[InvalidModelType]:
1172
1232
  if model_type in (mt for mt in ModelTypes):
1173
1233
  return []
1174
- return [err.InvalidModelType()]
1234
+ return [InvalidModelType()]
1175
1235
 
1176
1236
  @staticmethod
1177
1237
  def _check_invalid_environment(
1178
1238
  environment: Environments,
1179
- ) -> list[err.InvalidEnvironment]:
1239
+ ) -> list[InvalidEnvironment]:
1180
1240
  if environment in (env for env in Environments):
1181
1241
  return []
1182
- return [err.InvalidEnvironment()]
1242
+ return [InvalidEnvironment()]
1183
1243
 
1184
1244
  @staticmethod
1185
1245
  def _check_existence_preprod_pred_act_score_or_label(
1186
1246
  schema: Schema,
1187
1247
  environment: Environments,
1188
- ) -> list[err.MissingPreprodPredActNumericAndCategorical]:
1248
+ ) -> list[MissingPreprodPredActNumericAndCategorical]:
1189
1249
  if environment in (Environments.VALIDATION, Environments.TRAINING) and (
1190
1250
  (
1191
1251
  schema.prediction_label_column_name is None
@@ -1196,13 +1256,13 @@ class Validator:
1196
1256
  and schema.actual_score_column_name is None
1197
1257
  )
1198
1258
  ):
1199
- return [err.MissingPreprodPredActNumericAndCategorical()]
1259
+ return [MissingPreprodPredActNumericAndCategorical()]
1200
1260
  return []
1201
1261
 
1202
1262
  @staticmethod
1203
1263
  def _check_exactly_one_cv_column_type(
1204
1264
  schema: Schema, environment: Environments
1205
- ) -> list[err.MultipleCVPredAct | err.MissingCVPredAct]:
1265
+ ) -> list[MultipleCVPredAct | MissingCVPredAct]:
1206
1266
  # Checks that the required prediction/actual columns are given in the schema depending on
1207
1267
  # the environment, for object detection models. There should be exactly one of
1208
1268
  # object detection, semantic segmentation, or instance segmentation columns.
@@ -1232,9 +1292,9 @@ class Validator:
1232
1292
  )
1233
1293
 
1234
1294
  if cv_types_count == 0:
1235
- return [err.MissingCVPredAct(environment)]
1295
+ return [MissingCVPredAct(environment)]
1236
1296
  if cv_types_count > 1:
1237
- return [err.MultipleCVPredAct(environment)]
1297
+ return [MultipleCVPredAct(environment)]
1238
1298
 
1239
1299
  elif environment in (
1240
1300
  Environments.TRAINING,
@@ -1265,16 +1325,16 @@ class Validator:
1265
1325
  )
1266
1326
 
1267
1327
  if cv_types_count == 0:
1268
- return [err.MissingCVPredAct(environment)]
1328
+ return [MissingCVPredAct(environment)]
1269
1329
  if cv_types_count > 1:
1270
- return [err.MultipleCVPredAct(environment)]
1330
+ return [MultipleCVPredAct(environment)]
1271
1331
 
1272
1332
  return []
1273
1333
 
1274
1334
  @staticmethod
1275
1335
  def _check_missing_object_detection_columns(
1276
1336
  schema: Schema, model_type: ModelTypes
1277
- ) -> list[err.InvalidPredActCVColumnNamesForModelType]:
1337
+ ) -> list[InvalidPredActCVColumnNamesForModelType]:
1278
1338
  # Checks that models that are not Object Detection models don't have, in the schema, the
1279
1339
  # object detection, semantic segmentation, or instance segmentation dedicated prediction/actual
1280
1340
  # column names
@@ -1286,13 +1346,13 @@ class Validator:
1286
1346
  or schema.instance_segmentation_prediction_column_names is not None
1287
1347
  or schema.instance_segmentation_actual_column_names is not None
1288
1348
  ):
1289
- return [err.InvalidPredActCVColumnNamesForModelType(model_type)]
1349
+ return [InvalidPredActCVColumnNamesForModelType(model_type)]
1290
1350
  return []
1291
1351
 
1292
1352
  @staticmethod
1293
1353
  def _check_missing_non_object_detection_columns(
1294
1354
  schema: Schema, model_type: ModelTypes
1295
- ) -> list[err.InvalidPredActColumnNamesForModelType]:
1355
+ ) -> list[InvalidPredActColumnNamesForModelType]:
1296
1356
  # Checks that object detection models don't have, in the schema, the columns reserved for
1297
1357
  # other model types
1298
1358
  columns_to_check = (
@@ -1317,7 +1377,7 @@ class Validator:
1317
1377
  "instance_segmentation_actual_column_names",
1318
1378
  ]
1319
1379
  return [
1320
- err.InvalidPredActColumnNamesForModelType(
1380
+ InvalidPredActColumnNamesForModelType(
1321
1381
  model_type, allowed_cols, wrong_cols
1322
1382
  )
1323
1383
  ]
@@ -1326,7 +1386,7 @@ class Validator:
1326
1386
  @staticmethod
1327
1387
  def _check_missing_multi_class_columns(
1328
1388
  schema: Schema, model_type: ModelTypes
1329
- ) -> list[err.InvalidPredActColumnNamesForModelType]:
1389
+ ) -> list[InvalidPredActColumnNamesForModelType]:
1330
1390
  # Checks that models that are not Multi Class models don't have, in the schema, the
1331
1391
  # multi class dedicated threshold column
1332
1392
  if (
@@ -1334,7 +1394,7 @@ class Validator:
1334
1394
  and schema.multi_class_threshold_scores_column_name is not None
1335
1395
  ):
1336
1396
  return [
1337
- err.InvalidPredActColumnNamesForModelType(
1397
+ InvalidPredActColumnNamesForModelType(
1338
1398
  model_type,
1339
1399
  None,
1340
1400
  [schema.multi_class_threshold_scores_column_name],
@@ -1345,7 +1405,7 @@ class Validator:
1345
1405
  @staticmethod
1346
1406
  def _check_existing_multi_class_columns(
1347
1407
  schema: Schema,
1348
- ) -> list[err.MissingReqPredActColumnNamesForMultiClass]:
1408
+ ) -> list[MissingReqPredActColumnNamesForMultiClass]:
1349
1409
  # Checks that models that are Multi Class models have, in the schema, the
1350
1410
  # required prediction score or actual score columns
1351
1411
  if (
@@ -1355,13 +1415,13 @@ class Validator:
1355
1415
  schema.multi_class_threshold_scores_column_name is not None
1356
1416
  and schema.prediction_score_column_name is None
1357
1417
  ):
1358
- return [err.MissingReqPredActColumnNamesForMultiClass()]
1418
+ return [MissingReqPredActColumnNamesForMultiClass()]
1359
1419
  return []
1360
1420
 
1361
1421
  @staticmethod
1362
1422
  def _check_missing_non_multi_class_columns(
1363
1423
  schema: Schema, model_type: ModelTypes
1364
- ) -> list[err.InvalidPredActColumnNamesForModelType]:
1424
+ ) -> list[InvalidPredActColumnNamesForModelType]:
1365
1425
  # Checks that multi class models don't have, in the schema, the columns reserved for
1366
1426
  # other model types
1367
1427
  columns_to_check = (
@@ -1387,7 +1447,7 @@ class Validator:
1387
1447
  "actual_score_column_name",
1388
1448
  ]
1389
1449
  return [
1390
- err.InvalidPredActColumnNamesForModelType(
1450
+ InvalidPredActColumnNamesForModelType(
1391
1451
  model_type, allowed_cols, wrong_cols
1392
1452
  )
1393
1453
  ]
@@ -1397,17 +1457,17 @@ class Validator:
1397
1457
  def _check_existence_preprod_act(
1398
1458
  schema: Schema,
1399
1459
  environment: Environments,
1400
- ) -> list[err.MissingPreprodAct]:
1460
+ ) -> list[MissingPreprodAct]:
1401
1461
  if environment in (Environments.VALIDATION, Environments.TRAINING) and (
1402
1462
  schema.actual_label_column_name is None
1403
1463
  ):
1404
- return [err.MissingPreprodAct()]
1464
+ return [MissingPreprodAct()]
1405
1465
  return []
1406
1466
 
1407
1467
  @staticmethod
1408
1468
  def _check_existence_group_id_rank_category_relevance(
1409
1469
  schema: Schema,
1410
- ) -> list[err.MissingRequiredColumnsForRankingModel]:
1470
+ ) -> list[MissingRequiredColumnsForRankingModel]:
1411
1471
  # prediction_group_id and rank columns are required as ranking prediction columns.
1412
1472
  ranking_prediction_cols = (
1413
1473
  schema.prediction_label_column_name,
@@ -1425,13 +1485,13 @@ class Validator:
1425
1485
  # If there is prediction information (not delayed actuals),
1426
1486
  # there must exist a rank and prediction group id columns
1427
1487
  if has_prediction_info and any(col is None for col in required):
1428
- return [err.MissingRequiredColumnsForRankingModel()]
1488
+ return [MissingRequiredColumnsForRankingModel()]
1429
1489
  return []
1430
1490
 
1431
1491
  @staticmethod
1432
1492
  def _check_dataframe_for_duplicate_columns(
1433
1493
  schema: BaseSchema, dataframe: pd.DataFrame
1434
- ) -> list[err.DuplicateColumnsInDataframe]:
1494
+ ) -> list[DuplicateColumnsInDataframe]:
1435
1495
  # Get the columns used in the schema
1436
1496
  schema_col_used = schema.get_used_columns()
1437
1497
  # Get the duplicated column names from the dataframe
@@ -1441,17 +1501,17 @@ class Validator:
1441
1501
  col for col in duplicate_columns if col in schema_col_used
1442
1502
  ]
1443
1503
  if schema_duplicate_cols:
1444
- return [err.DuplicateColumnsInDataframe(schema_duplicate_cols)]
1504
+ return [DuplicateColumnsInDataframe(schema_duplicate_cols)]
1445
1505
  return []
1446
1506
 
1447
1507
  @staticmethod
1448
1508
  def _check_invalid_number_of_embeddings(
1449
1509
  schema: Schema,
1450
- ) -> list[err.InvalidNumberOfEmbeddings]:
1510
+ ) -> list[InvalidNumberOfEmbeddings]:
1451
1511
  if schema.embedding_feature_column_names is not None:
1452
1512
  number_of_embeddings = len(schema.embedding_feature_column_names)
1453
1513
  if number_of_embeddings > MAX_NUMBER_OF_EMBEDDINGS:
1454
- return [err.InvalidNumberOfEmbeddings(number_of_embeddings)]
1514
+ return [InvalidNumberOfEmbeddings(number_of_embeddings)]
1455
1515
  return []
1456
1516
 
1457
1517
  # -----------
@@ -1461,7 +1521,7 @@ class Validator:
1461
1521
  @staticmethod
1462
1522
  def _check_type_prediction_id(
1463
1523
  schema: Schema, column_types: dict[str, Any]
1464
- ) -> list[err.InvalidType]:
1524
+ ) -> list[InvalidType]:
1465
1525
  col = schema.prediction_id_column_name
1466
1526
  if col in column_types:
1467
1527
  # should mirror server side
@@ -1474,7 +1534,7 @@ class Validator:
1474
1534
  )
1475
1535
  if column_types[col] not in allowed_datatypes:
1476
1536
  return [
1477
- err.InvalidType(
1537
+ InvalidType(
1478
1538
  "Prediction IDs",
1479
1539
  expected_types=["str", "int"],
1480
1540
  found_data_type=column_types[col],
@@ -1485,7 +1545,7 @@ class Validator:
1485
1545
  @staticmethod
1486
1546
  def _check_type_timestamp(
1487
1547
  schema: Schema, column_types: dict[str, Any]
1488
- ) -> list[err.InvalidType]:
1548
+ ) -> list[InvalidType]:
1489
1549
  col = schema.timestamp_column_name
1490
1550
  if col in column_types:
1491
1551
  # should mirror server side
@@ -1501,7 +1561,7 @@ class Validator:
1501
1561
  and t not in allowed_datatypes
1502
1562
  ):
1503
1563
  return [
1504
- err.InvalidType(
1564
+ InvalidType(
1505
1565
  "Prediction timestamp",
1506
1566
  expected_types=["Date", "Timestamp", "int", "float"],
1507
1567
  found_data_type=t,
@@ -1512,7 +1572,7 @@ class Validator:
1512
1572
  @staticmethod
1513
1573
  def _check_type_features(
1514
1574
  schema: Schema, column_types: dict[str, Any]
1515
- ) -> list[err.InvalidTypeFeatures]:
1575
+ ) -> list[InvalidTypeFeatures]:
1516
1576
  if schema.feature_column_names is not None:
1517
1577
  # should mirror server side
1518
1578
  allowed_datatypes = (
@@ -1535,7 +1595,7 @@ class Validator:
1535
1595
  ]
1536
1596
  if wrong_type_cols:
1537
1597
  return [
1538
- err.InvalidTypeFeatures(
1598
+ InvalidTypeFeatures(
1539
1599
  wrong_type_cols,
1540
1600
  expected_types=[
1541
1601
  "float",
@@ -1551,7 +1611,7 @@ class Validator:
1551
1611
  @staticmethod
1552
1612
  def _check_type_embedding_features(
1553
1613
  schema: Schema, column_types: dict[str, Any]
1554
- ) -> list[err.InvalidTypeFeatures]:
1614
+ ) -> list[InvalidTypeFeatures]:
1555
1615
  if schema.embedding_feature_column_names is not None:
1556
1616
  # should mirror server side
1557
1617
  allowed_vector_datatypes = (
@@ -1599,20 +1659,20 @@ class Validator:
1599
1659
  wrong_type_embedding_errors = []
1600
1660
  if wrong_type_vector_columns:
1601
1661
  wrong_type_embedding_errors.append(
1602
- err.InvalidTypeFeatures(
1662
+ InvalidTypeFeatures(
1603
1663
  wrong_type_vector_columns,
1604
1664
  expected_types=["list[float], np.array[float]"],
1605
1665
  )
1606
1666
  )
1607
1667
  if wrong_type_data_columns:
1608
1668
  wrong_type_embedding_errors.append(
1609
- err.InvalidTypeFeatures(
1669
+ InvalidTypeFeatures(
1610
1670
  wrong_type_data_columns, expected_types=["list[string]"]
1611
1671
  )
1612
1672
  )
1613
1673
  if wrong_type_link_to_data_columns:
1614
1674
  wrong_type_embedding_errors.append(
1615
- err.InvalidTypeFeatures(
1675
+ InvalidTypeFeatures(
1616
1676
  wrong_type_link_to_data_columns,
1617
1677
  expected_types=["string"],
1618
1678
  )
@@ -1627,7 +1687,7 @@ class Validator:
1627
1687
  @staticmethod
1628
1688
  def _check_type_tags(
1629
1689
  schema: Schema, column_types: dict[str, Any]
1630
- ) -> list[err.InvalidTypeTags]:
1690
+ ) -> list[InvalidTypeTags]:
1631
1691
  if schema.tag_column_names is not None:
1632
1692
  # should mirror server side
1633
1693
  allowed_datatypes = (
@@ -1649,7 +1709,7 @@ class Validator:
1649
1709
  ]
1650
1710
  if wrong_type_cols:
1651
1711
  return [
1652
- err.InvalidTypeTags(
1712
+ InvalidTypeTags(
1653
1713
  wrong_type_cols, ["float", "int", "bool", "str"]
1654
1714
  )
1655
1715
  ]
@@ -1658,7 +1718,7 @@ class Validator:
1658
1718
  @staticmethod
1659
1719
  def _check_type_shap_values(
1660
1720
  schema: Schema, column_types: dict[str, Any]
1661
- ) -> list[err.InvalidTypeShapValues]:
1721
+ ) -> list[InvalidTypeShapValues]:
1662
1722
  if schema.shap_values_column_names is not None:
1663
1723
  # should mirror server side
1664
1724
  allowed_datatypes = (
@@ -1675,7 +1735,7 @@ class Validator:
1675
1735
  ]
1676
1736
  if wrong_type_cols:
1677
1737
  return [
1678
- err.InvalidTypeShapValues(
1738
+ InvalidTypeShapValues(
1679
1739
  wrong_type_cols, expected_types=["float", "int"]
1680
1740
  )
1681
1741
  ]
@@ -1684,7 +1744,7 @@ class Validator:
1684
1744
  @staticmethod
1685
1745
  def _check_type_pred_act_labels(
1686
1746
  model_type: ModelTypes, schema: Schema, column_types: dict[str, Any]
1687
- ) -> list[err.InvalidType]:
1747
+ ) -> list[InvalidType]:
1688
1748
  errors = []
1689
1749
  columns = (
1690
1750
  ("Prediction labels", schema.prediction_label_column_name),
@@ -1713,7 +1773,7 @@ class Validator:
1713
1773
  and column_types[col] not in allowed_datatypes
1714
1774
  ):
1715
1775
  errors.append(
1716
- err.InvalidType(
1776
+ InvalidType(
1717
1777
  name,
1718
1778
  expected_types=["float", "int", "bool", "str"],
1719
1779
  found_data_type=column_types[col],
@@ -1737,7 +1797,7 @@ class Validator:
1737
1797
  and column_types[col] not in allowed_datatypes
1738
1798
  ):
1739
1799
  errors.append(
1740
- err.InvalidType(
1800
+ InvalidType(
1741
1801
  name,
1742
1802
  expected_types=["float", "int"],
1743
1803
  found_data_type=column_types[col],
@@ -1748,7 +1808,7 @@ class Validator:
1748
1808
  @staticmethod
1749
1809
  def _check_type_pred_act_scores(
1750
1810
  model_type: ModelTypes, schema: Schema, column_types: dict[str, Any]
1751
- ) -> list[err.InvalidType]:
1811
+ ) -> list[InvalidType]:
1752
1812
  errors = []
1753
1813
  columns = (
1754
1814
  ("Prediction scores", schema.prediction_score_column_name),
@@ -1777,7 +1837,7 @@ class Validator:
1777
1837
  and column_types[col] not in allowed_datatypes
1778
1838
  ):
1779
1839
  errors.append(
1780
- err.InvalidType(
1840
+ InvalidType(
1781
1841
  name,
1782
1842
  expected_types=["float", "int"],
1783
1843
  found_data_type=column_types[col],
@@ -1788,7 +1848,7 @@ class Validator:
1788
1848
  @staticmethod
1789
1849
  def _check_type_multi_class_pred_threshold_act_scores(
1790
1850
  schema: Schema, column_types: dict[str, Any]
1791
- ) -> list[err.InvalidType]:
1851
+ ) -> list[InvalidType]:
1792
1852
  """Check type for prediction / threshold / actual scores for multiclass model.
1793
1853
 
1794
1854
  Expect the scores to be a list of pyarrow structs that contains field
@@ -1834,7 +1894,7 @@ class Validator:
1834
1894
  and column_types[col] not in allowed_class_score_map_datatypes
1835
1895
  ):
1836
1896
  errors.append(
1837
- err.InvalidType(
1897
+ InvalidType(
1838
1898
  name,
1839
1899
  expected_types=[
1840
1900
  "List[Dict{class_name: str, score: int}]",
@@ -1848,7 +1908,7 @@ class Validator:
1848
1908
  @staticmethod
1849
1909
  def _check_type_prompt_response(
1850
1910
  schema: Schema, column_types: dict[str, Any]
1851
- ) -> list[err.InvalidTypeColumns]:
1911
+ ) -> list[InvalidTypeColumns]:
1852
1912
  fields_to_check = []
1853
1913
  if schema.prompt_column_names is not None:
1854
1914
  fields_to_check.append(schema.prompt_column_names)
@@ -1895,20 +1955,20 @@ class Validator:
1895
1955
  wrong_type_col_errors = []
1896
1956
  if wrong_type_vector_columns:
1897
1957
  wrong_type_col_errors.append(
1898
- err.InvalidTypeColumns(
1958
+ InvalidTypeColumns(
1899
1959
  wrong_type_vector_columns,
1900
1960
  expected_types=["list[float], np.array[float]"],
1901
1961
  )
1902
1962
  )
1903
1963
  if wrong_type_data_columns:
1904
1964
  wrong_type_col_errors.append(
1905
- err.InvalidTypeColumns(
1965
+ InvalidTypeColumns(
1906
1966
  wrong_type_data_columns, expected_types=["str, list[str]"]
1907
1967
  )
1908
1968
  )
1909
1969
  if wrong_type_str_columns:
1910
1970
  wrong_type_col_errors.append(
1911
- err.InvalidTypeColumns(
1971
+ InvalidTypeColumns(
1912
1972
  wrong_type_str_columns, expected_types=["str"]
1913
1973
  )
1914
1974
  )
@@ -1918,7 +1978,7 @@ class Validator:
1918
1978
  @staticmethod
1919
1979
  def _check_type_llm_prompt_templates(
1920
1980
  schema: Schema, column_types: dict[str, Any]
1921
- ) -> list[err.InvalidTypeColumns]:
1981
+ ) -> list[InvalidTypeColumns]:
1922
1982
  if schema.prompt_template_column_names is None:
1923
1983
  return []
1924
1984
 
@@ -1949,7 +2009,7 @@ class Validator:
1949
2009
  # Return errors if any
1950
2010
  if wrong_type_cols:
1951
2011
  return [
1952
- err.InvalidTypeColumns(
2012
+ InvalidTypeColumns(
1953
2013
  wrong_type_columns=wrong_type_cols,
1954
2014
  expected_types=["string"],
1955
2015
  )
@@ -1959,7 +2019,7 @@ class Validator:
1959
2019
  @staticmethod
1960
2020
  def _check_type_llm_config(
1961
2021
  schema: Schema, column_types: dict[str, Any]
1962
- ) -> list[err.InvalidTypeColumns]:
2022
+ ) -> list[InvalidTypeColumns]:
1963
2023
  if schema.llm_config_column_names is None:
1964
2024
  return []
1965
2025
 
@@ -1986,7 +2046,7 @@ class Validator:
1986
2046
  # Return errors if any
1987
2047
  if wrong_type_cols:
1988
2048
  return [
1989
- err.InvalidTypeColumns(
2049
+ InvalidTypeColumns(
1990
2050
  wrong_type_columns=wrong_type_cols,
1991
2051
  expected_types=["string"],
1992
2052
  )
@@ -1996,7 +2056,7 @@ class Validator:
1996
2056
  @staticmethod
1997
2057
  def _check_type_llm_run_metadata(
1998
2058
  schema: Schema, column_types: dict[str, Any]
1999
- ) -> list[err.InvalidTypeColumns]:
2059
+ ) -> list[InvalidTypeColumns]:
2000
2060
  if schema.llm_run_metadata_column_names is None:
2001
2061
  return []
2002
2062
 
@@ -2059,7 +2119,7 @@ class Validator:
2059
2119
  # Return errors if there are any
2060
2120
  if wrong_type_cols:
2061
2121
  return [
2062
- err.InvalidTypeColumns(
2122
+ InvalidTypeColumns(
2063
2123
  wrong_type_columns=wrong_type_cols,
2064
2124
  expected_types=["int", "float"],
2065
2125
  )
@@ -2069,7 +2129,7 @@ class Validator:
2069
2129
  @staticmethod
2070
2130
  def _check_type_retrieved_document_ids(
2071
2131
  schema: Schema, column_types: dict[str, Any]
2072
- ) -> list[err.InvalidType]:
2132
+ ) -> list[InvalidType]:
2073
2133
  col = schema.retrieved_document_ids_column_name
2074
2134
  if col in column_types:
2075
2135
  # should mirror server side
@@ -2079,7 +2139,7 @@ class Validator:
2079
2139
  )
2080
2140
  if column_types[col] not in allowed_datatypes:
2081
2141
  return [
2082
- err.InvalidType(
2142
+ InvalidType(
2083
2143
  "Retrieved Document IDs",
2084
2144
  expected_types=["List[str]"],
2085
2145
  found_data_type=column_types[col],
@@ -2090,7 +2150,7 @@ class Validator:
2090
2150
  @staticmethod
2091
2151
  def _check_type_image_segment_coordinates(
2092
2152
  schema: Schema, column_types: dict[str, Any]
2093
- ) -> list[err.InvalidTypeColumns]:
2153
+ ) -> list[InvalidTypeColumns]:
2094
2154
  # should mirror server side
2095
2155
  allowed_coordinate_types = (
2096
2156
  pa.list_(pa.list_(pa.float64())),
@@ -2173,7 +2233,7 @@ class Validator:
2173
2233
 
2174
2234
  return (
2175
2235
  [
2176
- err.InvalidTypeColumns(
2236
+ InvalidTypeColumns(
2177
2237
  wrong_type_columns=wrong_type_cols,
2178
2238
  expected_types=["List[List[float]]"],
2179
2239
  )
@@ -2185,7 +2245,7 @@ class Validator:
2185
2245
  @staticmethod
2186
2246
  def _check_type_image_segment_categories(
2187
2247
  schema: Schema, column_types: dict[str, Any]
2188
- ) -> list[err.InvalidTypeColumns]:
2248
+ ) -> list[InvalidTypeColumns]:
2189
2249
  # should mirror server side
2190
2250
  allowed_category_datatypes = (
2191
2251
  pa.list_(pa.string()),
@@ -2242,7 +2302,7 @@ class Validator:
2242
2302
 
2243
2303
  return (
2244
2304
  [
2245
- err.InvalidTypeColumns(
2305
+ InvalidTypeColumns(
2246
2306
  wrong_type_columns=wrong_type_cols,
2247
2307
  expected_types=["List[str]"],
2248
2308
  )
@@ -2254,7 +2314,7 @@ class Validator:
2254
2314
  @staticmethod
2255
2315
  def _check_type_image_segment_scores(
2256
2316
  schema: Schema, column_types: dict[str, Any]
2257
- ) -> list[err.InvalidTypeColumns]:
2317
+ ) -> list[InvalidTypeColumns]:
2258
2318
  # should mirror server side
2259
2319
  allowed_score_datatypes = (
2260
2320
  pa.list_(pa.float64()),
@@ -2297,7 +2357,7 @@ class Validator:
2297
2357
 
2298
2358
  return (
2299
2359
  [
2300
- err.InvalidTypeColumns(
2360
+ InvalidTypeColumns(
2301
2361
  wrong_type_columns=wrong_type_cols,
2302
2362
  expected_types=["List[float]"],
2303
2363
  )
@@ -2313,7 +2373,7 @@ class Validator:
2313
2373
  @staticmethod
2314
2374
  def _check_embedding_vectors_dimensionality(
2315
2375
  dataframe: pd.DataFrame, schema: Schema
2316
- ) -> list[err.ValidationError]:
2376
+ ) -> list[ValidationError]:
2317
2377
  if schema.embedding_feature_column_names is None:
2318
2378
  return []
2319
2379
 
@@ -2331,7 +2391,7 @@ class Validator:
2331
2391
 
2332
2392
  return (
2333
2393
  [
2334
- err.InvalidValueEmbeddingVectorDimensionality(
2394
+ InvalidValueEmbeddingVectorDimensionality(
2335
2395
  invalid_low_dim_vector_cols,
2336
2396
  invalid_high_dim_vector_cols,
2337
2397
  ),
@@ -2343,7 +2403,7 @@ class Validator:
2343
2403
  @staticmethod
2344
2404
  def _check_embedding_raw_data_characters(
2345
2405
  dataframe: pd.DataFrame, schema: Schema
2346
- ) -> list[err.ValidationError]:
2406
+ ) -> list[ValidationError]:
2347
2407
  if schema.embedding_feature_column_names is None:
2348
2408
  return []
2349
2409
 
@@ -2361,7 +2421,7 @@ class Validator:
2361
2421
 
2362
2422
  if invalid_long_string_data_cols:
2363
2423
  return [
2364
- err.InvalidValueEmbeddingRawDataTooLong(
2424
+ InvalidValueEmbeddingRawDataTooLong(
2365
2425
  invalid_long_string_data_cols
2366
2426
  )
2367
2427
  ]
@@ -2377,20 +2437,20 @@ class Validator:
2377
2437
  @staticmethod
2378
2438
  def _check_value_rank(
2379
2439
  dataframe: pd.DataFrame, schema: Schema
2380
- ) -> list[err.InvalidRankValue]:
2440
+ ) -> list[InvalidRankValue]:
2381
2441
  col = schema.rank_column_name
2382
2442
  lbound, ubound = (1, 100)
2383
2443
 
2384
2444
  if col is not None and col in dataframe.columns:
2385
2445
  rank_min_max = dataframe[col].agg(["min", "max"])
2386
2446
  if rank_min_max["min"] < lbound or rank_min_max["max"] > ubound:
2387
- return [err.InvalidRankValue(col, "1-100")]
2447
+ return [InvalidRankValue(col, "1-100")]
2388
2448
  return []
2389
2449
 
2390
2450
  @staticmethod
2391
2451
  def _check_id_field_str_length(
2392
2452
  dataframe: pd.DataFrame, schema_name: str, id_col_name: str | None
2393
- ) -> list[err.ValidationError]:
2453
+ ) -> list[ValidationError]:
2394
2454
  """Require prediction_id to be a string of length between MIN and MAX.
2395
2455
 
2396
2456
  Between MIN_PREDICTION_ID_LEN and MAX_PREDICTION_ID_LEN.
@@ -2412,7 +2472,7 @@ class Validator:
2412
2472
  .all()
2413
2473
  ):
2414
2474
  return [
2415
- err.InvalidStringLengthInColumn(
2475
+ InvalidStringLengthInColumn(
2416
2476
  schema_name=schema_name,
2417
2477
  col_name=id_col_name,
2418
2478
  min_length=MIN_PREDICTION_ID_LEN,
@@ -2424,7 +2484,7 @@ class Validator:
2424
2484
  @staticmethod
2425
2485
  def _check_document_id_field_str_length(
2426
2486
  dataframe: pd.DataFrame, schema_name: str, id_col_name: str | None
2427
- ) -> list[err.ValidationError]:
2487
+ ) -> list[ValidationError]:
2428
2488
  """Require document id to be a string of length between MIN and MAX.
2429
2489
 
2430
2490
  Between MIN_DOCUMENT_ID_LEN and MAX_DOCUMENT_ID_LEN.
@@ -2446,7 +2506,7 @@ class Validator:
2446
2506
  .all()
2447
2507
  ):
2448
2508
  return [
2449
- err.InvalidStringLengthInColumn(
2509
+ InvalidStringLengthInColumn(
2450
2510
  schema_name=schema_name,
2451
2511
  col_name=id_col_name,
2452
2512
  min_length=MIN_DOCUMENT_ID_LEN,
@@ -2476,7 +2536,7 @@ class Validator:
2476
2536
  @staticmethod
2477
2537
  def _check_value_tag(
2478
2538
  dataframe: pd.DataFrame, schema: Schema
2479
- ) -> list[err.InvalidTagLength]:
2539
+ ) -> list[InvalidTagLength]:
2480
2540
  if schema.tag_column_names is None:
2481
2541
  return []
2482
2542
 
@@ -2501,7 +2561,7 @@ class Validator:
2501
2561
  elif max_tag_len > MAX_TAG_LENGTH_TRUNCATION:
2502
2562
  truncated_tag_cols.append(col)
2503
2563
  if wrong_tag_cols:
2504
- return [err.InvalidTagLength(wrong_tag_cols)]
2564
+ return [InvalidTagLength(wrong_tag_cols)]
2505
2565
  if truncated_tag_cols:
2506
2566
  logger.warning(
2507
2567
  get_truncation_warning_message(
@@ -2513,7 +2573,7 @@ class Validator:
2513
2573
  @staticmethod
2514
2574
  def _check_value_ranking_category(
2515
2575
  dataframe: pd.DataFrame, schema: Schema
2516
- ) -> list[err.InvalidValueMissingValue | err.InvalidRankingCategoryValue]:
2576
+ ) -> list[InvalidValueMissingValue | InvalidRankingCategoryValue]:
2517
2577
  if schema.relevance_labels_column_name is not None:
2518
2578
  col = schema.relevance_labels_column_name
2519
2579
  elif schema.attributions_column_name is not None:
@@ -2526,11 +2586,11 @@ class Validator:
2526
2586
  # which would be caught by _check_value_missing
2527
2587
  return []
2528
2588
  if dataframe[col].astype(str).str.len().min() == 0:
2529
- return [err.InvalidRankingCategoryValue(col)]
2589
+ return [InvalidRankingCategoryValue(col)]
2530
2590
  # empty list
2531
2591
  not_null_filter = dataframe[col].notnull()
2532
2592
  if dataframe[not_null_filter][col].map(len).min() == 0:
2533
- return [err.InvalidValueMissingValue(col, "empty list")]
2593
+ return [InvalidValueMissingValue(col, "empty list")]
2534
2594
  # no empty string in list
2535
2595
  if (
2536
2596
  dataframe[not_null_filter][col]
@@ -2538,13 +2598,13 @@ class Validator:
2538
2598
  .min()
2539
2599
  == 0
2540
2600
  ):
2541
- return [err.InvalidRankingCategoryValue(col)]
2601
+ return [InvalidRankingCategoryValue(col)]
2542
2602
  return []
2543
2603
 
2544
2604
  @staticmethod
2545
2605
  def _check_length_multi_class_maps(
2546
2606
  dataframe: pd.DataFrame, schema: Schema
2547
- ) -> list[err.InvalidNumClassesMultiClassMap]:
2607
+ ) -> list[InvalidNumClassesMultiClassMap]:
2548
2608
  # each entry in column is a list of dictionaries mapping class names and scores
2549
2609
  # validate length of list of dictionaries for each column
2550
2610
  invalid_cols = {}
@@ -2575,16 +2635,16 @@ class Validator:
2575
2635
  if invalid_num_classes:
2576
2636
  invalid_cols[col] = invalid_num_classes
2577
2637
  if invalid_cols:
2578
- return [err.InvalidNumClassesMultiClassMap(invalid_cols)]
2638
+ return [InvalidNumClassesMultiClassMap(invalid_cols)]
2579
2639
  return []
2580
2640
 
2581
2641
  @staticmethod
2582
2642
  def _check_classes_and_scores_values_in_multi_class_maps(
2583
2643
  dataframe: pd.DataFrame, schema: Schema
2584
2644
  ) -> list[
2585
- err.InvalidMultiClassClassNameLength
2586
- | err.InvalidMultiClassActScoreValue
2587
- | err.InvalidMultiClassPredScoreValue
2645
+ InvalidMultiClassClassNameLength
2646
+ | InvalidMultiClassActScoreValue
2647
+ | InvalidMultiClassPredScoreValue
2588
2648
  ]:
2589
2649
  """Validate the class names and score values of dictionaries.
2590
2650
 
@@ -2649,21 +2709,17 @@ class Validator:
2649
2709
  if invalid_scores_for_col:
2650
2710
  invalid_pred_scores[col] = invalid_scores_for_col
2651
2711
  if invalid_class_names:
2652
- errors.append(
2653
- err.InvalidMultiClassClassNameLength(invalid_class_names)
2654
- )
2712
+ errors.append(InvalidMultiClassClassNameLength(invalid_class_names))
2655
2713
  if invalid_pred_scores:
2656
- errors.append(
2657
- err.InvalidMultiClassPredScoreValue(invalid_pred_scores)
2658
- )
2714
+ errors.append(InvalidMultiClassPredScoreValue(invalid_pred_scores))
2659
2715
  if invalid_actual_scores:
2660
- errors.append(err.InvalidMultiClassActScoreValue(col))
2716
+ errors.append(InvalidMultiClassActScoreValue(col))
2661
2717
  return errors
2662
2718
 
2663
2719
  @staticmethod
2664
2720
  def _check_each_multi_class_pred_has_threshold(
2665
2721
  dataframe: pd.DataFrame, schema: Schema
2666
- ) -> list[err.InvalidMultiClassThresholdClasses]:
2722
+ ) -> list[InvalidMultiClassThresholdClasses]:
2667
2723
  """Validate threshold scores for Multi Class models.
2668
2724
 
2669
2725
  If threshold scores column is included in schema and dataframe, validate that
@@ -2687,7 +2743,7 @@ class Validator:
2687
2743
  pred_class_set = set(pred_classes)
2688
2744
  if pred_class_set != thresh_class_set:
2689
2745
  return [
2690
- err.InvalidMultiClassThresholdClasses(
2746
+ InvalidMultiClassThresholdClasses(
2691
2747
  threshold_col, pred_class_set, thresh_class_set
2692
2748
  )
2693
2749
  ]
@@ -2697,7 +2753,7 @@ class Validator:
2697
2753
  def _check_value_timestamp(
2698
2754
  dataframe: pd.DataFrame,
2699
2755
  schema: Schema,
2700
- ) -> list[err.InvalidValueMissingValue | err.InvalidValueTimestamp]:
2756
+ ) -> list[InvalidValueMissingValue | InvalidValueTimestamp]:
2701
2757
  # Due to the timing difference between checking this here and the data finally
2702
2758
  # hitting the same check on server side, there's a some chance for a false
2703
2759
  # result, i.e. the check here succeeds but the same check on server side fails.
@@ -2708,9 +2764,7 @@ class Validator:
2708
2764
  # missing value first.
2709
2765
  if dataframe[col].isnull().values.any(): # type: ignore
2710
2766
  return [
2711
- err.InvalidValueMissingValue(
2712
- "Prediction timestamp", "missing"
2713
- )
2767
+ InvalidValueMissingValue("Prediction timestamp", "missing")
2714
2768
  ]
2715
2769
 
2716
2770
  now_t = datetime.now(tz=timezone.utc)
@@ -2794,7 +2848,7 @@ class Validator:
2794
2848
  )
2795
2849
  )
2796
2850
  ):
2797
- return [err.InvalidValueTimestamp(timestamp_col_name=col)]
2851
+ return [InvalidValueTimestamp(timestamp_col_name=col)]
2798
2852
 
2799
2853
  return []
2800
2854
 
@@ -2803,7 +2857,7 @@ class Validator:
2803
2857
  @staticmethod
2804
2858
  def _check_invalid_missing_values(
2805
2859
  dataframe: pd.DataFrame, schema: BaseSchema, model_type: ModelTypes
2806
- ) -> list[err.InvalidValueMissingValue]:
2860
+ ) -> list[InvalidValueMissingValue]:
2807
2861
  errors = []
2808
2862
  columns = ()
2809
2863
  if isinstance(schema, CorpusSchema):
@@ -2824,7 +2878,7 @@ class Validator:
2824
2878
  if col is not None and col in dataframe.columns:
2825
2879
  if dataframe[col].isnull().any():
2826
2880
  errors.append(
2827
- err.InvalidValueMissingValue(
2881
+ InvalidValueMissingValue(
2828
2882
  name, wrong_values="missing", column=col
2829
2883
  )
2830
2884
  )
@@ -2834,7 +2888,7 @@ class Validator:
2834
2888
  and np.isinf(dataframe[col]).any()
2835
2889
  ):
2836
2890
  errors.append(
2837
- err.InvalidValueMissingValue(
2891
+ InvalidValueMissingValue(
2838
2892
  name, wrong_values="infinite", column=col
2839
2893
  )
2840
2894
  )
@@ -2850,7 +2904,7 @@ class Validator:
2850
2904
  environment: Environments,
2851
2905
  schema: Schema,
2852
2906
  model_type: ModelTypes,
2853
- ) -> list[err.InvalidRecord]:
2907
+ ) -> list[InvalidRecord]:
2854
2908
  if environment in (Environments.VALIDATION, Environments.TRAINING):
2855
2909
  return []
2856
2910
 
@@ -2894,7 +2948,7 @@ class Validator:
2894
2948
  environment: Environments,
2895
2949
  schema: Schema,
2896
2950
  model_type: ModelTypes,
2897
- ) -> list[err.InvalidRecord]:
2951
+ ) -> list[InvalidRecord]:
2898
2952
  """Validates there's not a single row in the dataframe with all nulls.
2899
2953
 
2900
2954
  Returns errors if any row has all of pred_label and pred_score evaluating to
@@ -2942,7 +2996,7 @@ class Validator:
2942
2996
  @staticmethod
2943
2997
  def _check_invalid_record_helper(
2944
2998
  dataframe: pd.DataFrame, column_names: list[str | None]
2945
- ) -> list[err.InvalidRecord]:
2999
+ ) -> list[InvalidRecord]:
2946
3000
  """Check that there are no null values in a subset of columns.
2947
3001
 
2948
3002
  The column subset is computed from the input list of columns `column_names`
@@ -2950,7 +3004,7 @@ class Validator:
2950
3004
  null values are found.
2951
3005
 
2952
3006
  Returns:
2953
- List[err.InvalidRecord]: An error expressing the rows that are problematic
3007
+ List[InvalidRecord]: An error expressing the rows that are problematic
2954
3008
 
2955
3009
  """
2956
3010
  columns_subset = [
@@ -2964,12 +3018,12 @@ class Validator:
2964
3018
  null_index = null_filter[null_filter].index.values
2965
3019
  if len(null_index) == 0:
2966
3020
  return []
2967
- return [err.InvalidRecord(columns_subset, null_index)] # type: ignore
3021
+ return [InvalidRecord(columns_subset, null_index)] # type: ignore
2968
3022
 
2969
3023
  @staticmethod
2970
3024
  def _check_type_prediction_group_id(
2971
3025
  schema: Schema, column_types: dict[str, Any]
2972
- ) -> list[err.InvalidType]:
3026
+ ) -> list[InvalidType]:
2973
3027
  col = schema.prediction_group_id_column_name
2974
3028
  if col in column_types:
2975
3029
  # should mirror server side
@@ -2982,7 +3036,7 @@ class Validator:
2982
3036
  )
2983
3037
  if column_types[col] not in allowed_datatypes:
2984
3038
  return [
2985
- err.InvalidType(
3039
+ InvalidType(
2986
3040
  "prediction_group_ids",
2987
3041
  expected_types=["str", "int"],
2988
3042
  found_data_type=column_types[col],
@@ -2993,7 +3047,7 @@ class Validator:
2993
3047
  @staticmethod
2994
3048
  def _check_type_rank(
2995
3049
  schema: Schema, column_types: dict[str, Any]
2996
- ) -> list[err.InvalidType]:
3050
+ ) -> list[InvalidType]:
2997
3051
  col = schema.rank_column_name
2998
3052
  if col in column_types:
2999
3053
  allowed_datatypes = (
@@ -3004,7 +3058,7 @@ class Validator:
3004
3058
  )
3005
3059
  if column_types[col] not in allowed_datatypes:
3006
3060
  return [
3007
- err.InvalidType(
3061
+ InvalidType(
3008
3062
  "rank",
3009
3063
  expected_types=["int"],
3010
3064
  found_data_type=column_types[col],
@@ -3015,7 +3069,7 @@ class Validator:
3015
3069
  @staticmethod
3016
3070
  def _check_type_ranking_category(
3017
3071
  schema: Schema, column_types: dict[str, Any]
3018
- ) -> list[err.InvalidType]:
3072
+ ) -> list[InvalidType]:
3019
3073
  if schema.relevance_labels_column_name is not None:
3020
3074
  col = schema.relevance_labels_column_name
3021
3075
  elif schema.attributions_column_name is not None:
@@ -3026,7 +3080,7 @@ class Validator:
3026
3080
  allowed_datatypes = (pa.list_(pa.string()), pa.string(), pa.null())
3027
3081
  if column_types[col] not in allowed_datatypes:
3028
3082
  return [
3029
- err.InvalidType(
3083
+ InvalidType(
3030
3084
  "relevance labels column for ranking models",
3031
3085
  expected_types=["list of string", "string"],
3032
3086
  found_data_type=column_types[col],
@@ -3037,7 +3091,7 @@ class Validator:
3037
3091
  @staticmethod
3038
3092
  def _check_value_bounding_boxes_coordinates(
3039
3093
  dataframe: pd.DataFrame, schema: Schema
3040
- ) -> list[err.InvalidBoundingBoxesCoordinates]:
3094
+ ) -> list[InvalidBoundingBoxesCoordinates]:
3041
3095
  errors = []
3042
3096
  if schema.object_detection_prediction_column_names is not None:
3043
3097
  coords_col_name = schema.object_detection_prediction_column_names.bounding_boxes_coordinates_column_name # noqa: E501
@@ -3058,7 +3112,7 @@ class Validator:
3058
3112
  @staticmethod
3059
3113
  def _check_value_bounding_boxes_categories(
3060
3114
  dataframe: pd.DataFrame, schema: Schema
3061
- ) -> list[err.InvalidBoundingBoxesCategories]:
3115
+ ) -> list[InvalidBoundingBoxesCategories]:
3062
3116
  errors = []
3063
3117
  if schema.object_detection_prediction_column_names is not None:
3064
3118
  cat_col_name = schema.object_detection_prediction_column_names.categories_column_name
@@ -3079,7 +3133,7 @@ class Validator:
3079
3133
  @staticmethod
3080
3134
  def _check_value_bounding_boxes_scores(
3081
3135
  dataframe: pd.DataFrame, schema: Schema
3082
- ) -> list[err.InvalidBoundingBoxesScores]:
3136
+ ) -> list[InvalidBoundingBoxesScores]:
3083
3137
  errors = []
3084
3138
  if schema.object_detection_prediction_column_names is not None:
3085
3139
  sc_col_name = schema.object_detection_prediction_column_names.scores_column_name
@@ -3104,7 +3158,7 @@ class Validator:
3104
3158
  @staticmethod
3105
3159
  def _check_value_semantic_segmentation_polygon_coordinates(
3106
3160
  dataframe: pd.DataFrame, schema: Schema
3107
- ) -> list[err.InvalidPolygonCoordinates]:
3161
+ ) -> list[InvalidPolygonCoordinates]:
3108
3162
  errors = []
3109
3163
  if schema.semantic_segmentation_prediction_column_names is not None:
3110
3164
  coords_col_name = schema.semantic_segmentation_prediction_column_names.polygon_coordinates_column_name # noqa: E501
@@ -3125,7 +3179,7 @@ class Validator:
3125
3179
  @staticmethod
3126
3180
  def _check_value_semantic_segmentation_polygon_categories(
3127
3181
  dataframe: pd.DataFrame, schema: Schema
3128
- ) -> list[err.InvalidPolygonCategories]:
3182
+ ) -> list[InvalidPolygonCategories]:
3129
3183
  errors = []
3130
3184
  if schema.semantic_segmentation_prediction_column_names is not None:
3131
3185
  cat_col_name = schema.semantic_segmentation_prediction_column_names.categories_column_name
@@ -3146,7 +3200,7 @@ class Validator:
3146
3200
  @staticmethod
3147
3201
  def _check_value_instance_segmentation_polygon_coordinates(
3148
3202
  dataframe: pd.DataFrame, schema: Schema
3149
- ) -> list[err.InvalidPolygonCoordinates]:
3203
+ ) -> list[InvalidPolygonCoordinates]:
3150
3204
  errors = []
3151
3205
  if schema.instance_segmentation_prediction_column_names is not None:
3152
3206
  coords_col_name = schema.instance_segmentation_prediction_column_names.polygon_coordinates_column_name # noqa: E501
@@ -3167,7 +3221,7 @@ class Validator:
3167
3221
  @staticmethod
3168
3222
  def _check_value_instance_segmentation_polygon_categories(
3169
3223
  dataframe: pd.DataFrame, schema: Schema
3170
- ) -> list[err.InvalidPolygonCategories]:
3224
+ ) -> list[InvalidPolygonCategories]:
3171
3225
  errors = []
3172
3226
  if schema.instance_segmentation_prediction_column_names is not None:
3173
3227
  cat_col_name = schema.instance_segmentation_prediction_column_names.categories_column_name
@@ -3188,7 +3242,7 @@ class Validator:
3188
3242
  @staticmethod
3189
3243
  def _check_value_instance_segmentation_polygon_scores(
3190
3244
  dataframe: pd.DataFrame, schema: Schema
3191
- ) -> list[err.InvalidPolygonScores]:
3245
+ ) -> list[InvalidPolygonScores]:
3192
3246
  errors = []
3193
3247
  if schema.instance_segmentation_prediction_column_names is not None:
3194
3248
  sc_col_name = schema.instance_segmentation_prediction_column_names.scores_column_name
@@ -3203,7 +3257,7 @@ class Validator:
3203
3257
  @staticmethod
3204
3258
  def _check_value_instance_segmentation_bbox_coordinates(
3205
3259
  dataframe: pd.DataFrame, schema: Schema
3206
- ) -> list[err.InvalidBoundingBoxesCoordinates]:
3260
+ ) -> list[InvalidBoundingBoxesCoordinates]:
3207
3261
  errors = []
3208
3262
  if schema.instance_segmentation_prediction_column_names is not None:
3209
3263
  coords_col_name = schema.instance_segmentation_prediction_column_names.bounding_boxes_coordinates_column_name # noqa: E501
@@ -3226,7 +3280,7 @@ class Validator:
3226
3280
  @staticmethod
3227
3281
  def _check_value_prompt_response(
3228
3282
  dataframe: pd.DataFrame, schema: Schema
3229
- ) -> list[err.ValidationError]:
3283
+ ) -> list[ValidationError]:
3230
3284
  vector_cols_to_check = []
3231
3285
  text_cols_to_check = []
3232
3286
  if isinstance(schema.prompt_column_names, str):
@@ -3265,13 +3319,13 @@ class Validator:
3265
3319
  errors = []
3266
3320
  if invalid_long_string_data_cols:
3267
3321
  errors.append(
3268
- err.InvalidValueEmbeddingRawDataTooLong(
3322
+ InvalidValueEmbeddingRawDataTooLong(
3269
3323
  invalid_long_string_data_cols
3270
3324
  )
3271
3325
  )
3272
3326
  if invalid_low_dim_vector_cols or invalid_high_dim_vector_cols:
3273
3327
  errors.append(
3274
- err.InvalidValueEmbeddingVectorDimensionality(
3328
+ InvalidValueEmbeddingVectorDimensionality(
3275
3329
  invalid_low_dim_vector_cols,
3276
3330
  invalid_high_dim_vector_cols,
3277
3331
  )
@@ -3291,7 +3345,7 @@ class Validator:
3291
3345
  @staticmethod
3292
3346
  def _check_value_llm_model_name(
3293
3347
  dataframe: pd.DataFrame, schema: Schema
3294
- ) -> list[err.InvalidStringLengthInColumn]:
3348
+ ) -> list[InvalidStringLengthInColumn]:
3295
3349
  if schema.llm_config_column_names is None:
3296
3350
  return []
3297
3351
  col = schema.llm_config_column_names.model_column_name
@@ -3301,7 +3355,7 @@ class Validator:
3301
3355
  )
3302
3356
  if max_len > MAX_LLM_MODEL_NAME_LENGTH:
3303
3357
  return [
3304
- err.InvalidStringLengthInColumn(
3358
+ InvalidStringLengthInColumn(
3305
3359
  schema_name="llm_config_column_names.model_column_name",
3306
3360
  col_name=col,
3307
3361
  min_length=0,
@@ -3319,7 +3373,7 @@ class Validator:
3319
3373
  @staticmethod
3320
3374
  def _check_value_llm_prompt_template(
3321
3375
  dataframe: pd.DataFrame, schema: Schema
3322
- ) -> list[err.InvalidStringLengthInColumn]:
3376
+ ) -> list[InvalidStringLengthInColumn]:
3323
3377
  if schema.prompt_template_column_names is None:
3324
3378
  return []
3325
3379
  col = schema.prompt_template_column_names.template_column_name
@@ -3329,7 +3383,7 @@ class Validator:
3329
3383
  )
3330
3384
  if max_len > MAX_PROMPT_TEMPLATE_LENGTH:
3331
3385
  return [
3332
- err.InvalidStringLengthInColumn(
3386
+ InvalidStringLengthInColumn(
3333
3387
  schema_name="prompt_template_column_names.template_column_name",
3334
3388
  col_name=col,
3335
3389
  min_length=0,
@@ -3348,7 +3402,7 @@ class Validator:
3348
3402
  @staticmethod
3349
3403
  def _check_value_llm_prompt_template_version(
3350
3404
  dataframe: pd.DataFrame, schema: Schema
3351
- ) -> list[err.InvalidStringLengthInColumn]:
3405
+ ) -> list[InvalidStringLengthInColumn]:
3352
3406
  if schema.prompt_template_column_names is None:
3353
3407
  return []
3354
3408
  col = schema.prompt_template_column_names.template_version_column_name
@@ -3358,7 +3412,7 @@ class Validator:
3358
3412
  )
3359
3413
  if max_len > MAX_PROMPT_TEMPLATE_VERSION_LENGTH:
3360
3414
  return [
3361
- err.InvalidStringLengthInColumn(
3415
+ InvalidStringLengthInColumn(
3362
3416
  schema_name="prompt_template_column_names.template_version_column_name",
3363
3417
  col_name=col,
3364
3418
  min_length=0,
@@ -3377,7 +3431,7 @@ class Validator:
3377
3431
  @staticmethod
3378
3432
  def _check_type_document_columns(
3379
3433
  schema: CorpusSchema, column_types: dict[str, Any]
3380
- ) -> list[err.InvalidTypeColumns]:
3434
+ ) -> list[InvalidTypeColumns]:
3381
3435
  invalid_types = []
3382
3436
  # Check document id
3383
3437
  col = schema.document_id_column_name
@@ -3391,7 +3445,7 @@ class Validator:
3391
3445
  )
3392
3446
  if column_types[col] not in allowed_datatypes:
3393
3447
  invalid_types += [
3394
- err.InvalidTypeColumns(
3448
+ InvalidTypeColumns(
3395
3449
  wrong_type_columns=[col],
3396
3450
  expected_types=["str", "int"],
3397
3451
  )
@@ -3403,7 +3457,7 @@ class Validator:
3403
3457
  allowed_datatype = pa.string()
3404
3458
  if column_types[col] != allowed_datatype:
3405
3459
  invalid_types += [
3406
- err.InvalidTypeColumns(
3460
+ InvalidTypeColumns(
3407
3461
  wrong_type_columns=[col],
3408
3462
  expected_types=["str"],
3409
3463
  )
@@ -3421,7 +3475,7 @@ class Validator:
3421
3475
  )
3422
3476
  if column_types[col] not in allowed_datatypes:
3423
3477
  invalid_types += [
3424
- err.InvalidTypeColumns(
3478
+ InvalidTypeColumns(
3425
3479
  wrong_type_columns=[col],
3426
3480
  expected_types=["list[float], np.array[float]"],
3427
3481
  )
@@ -3436,7 +3490,7 @@ class Validator:
3436
3490
  )
3437
3491
  if column_types[col] not in allowed_datatypes:
3438
3492
  invalid_types += [
3439
- err.InvalidTypeColumns(
3493
+ InvalidTypeColumns(
3440
3494
  wrong_type_columns=[col],
3441
3495
  expected_types=["list[str]"],
3442
3496
  )
@@ -3450,7 +3504,7 @@ class Validator:
3450
3504
  allowed_datatypes = (pa.string(),)
3451
3505
  if column_types[col] not in allowed_datatypes:
3452
3506
  invalid_types += [
3453
- err.InvalidTypeColumns(
3507
+ InvalidTypeColumns(
3454
3508
  wrong_type_columns=[col],
3455
3509
  expected_types=["str"],
3456
3510
  )
@@ -3517,15 +3571,15 @@ def _check_value_raw_data_length_helper(
3517
3571
 
3518
3572
  def _check_value_bounding_boxes_coordinates_helper(
3519
3573
  coordinates_col: pd.Series,
3520
- ) -> err.InvalidBoundingBoxesCoordinates | None:
3574
+ ) -> InvalidBoundingBoxesCoordinates | None:
3521
3575
  def check(boxes: object) -> None:
3522
3576
  # We allow for zero boxes. None coordinates list is not allowed (will break following tests:
3523
3577
  # 'NoneType is not iterable')
3524
3578
  if boxes is None:
3525
- raise err.InvalidBoundingBoxesCoordinates(reason="none_boxes")
3579
+ raise InvalidBoundingBoxesCoordinates(reason="none_boxes")
3526
3580
  for box in boxes:
3527
3581
  if box is None or len(box) == 0:
3528
- raise err.InvalidBoundingBoxesCoordinates(
3582
+ raise InvalidBoundingBoxesCoordinates(
3529
3583
  reason="none_or_empty_box"
3530
3584
  )
3531
3585
  error = _box_coordinates_wrong_format(box)
@@ -3534,14 +3588,14 @@ def _check_value_bounding_boxes_coordinates_helper(
3534
3588
 
3535
3589
  try:
3536
3590
  coordinates_col.apply(check)
3537
- except err.InvalidBoundingBoxesCoordinates as e:
3591
+ except InvalidBoundingBoxesCoordinates as e:
3538
3592
  return e
3539
3593
  return None
3540
3594
 
3541
3595
 
3542
3596
  def _box_coordinates_wrong_format(
3543
3597
  box_coords: object,
3544
- ) -> err.InvalidBoundingBoxesCoordinates | None:
3598
+ ) -> InvalidBoundingBoxesCoordinates | None:
3545
3599
  if (
3546
3600
  # Coordinates should be a collection of 4 floats
3547
3601
  len(box_coords) != 4
@@ -3552,7 +3606,7 @@ def _box_coordinates_wrong_format(
3552
3606
  # Coordinates represent the top-left & bottom-right corners of a box: y1 < y2
3553
3607
  or box_coords[1] >= box_coords[3]
3554
3608
  ):
3555
- return err.InvalidBoundingBoxesCoordinates(
3609
+ return InvalidBoundingBoxesCoordinates(
3556
3610
  reason="boxes_coordinates_wrong_format"
3557
3611
  )
3558
3612
  return None
@@ -3560,51 +3614,47 @@ def _box_coordinates_wrong_format(
3560
3614
 
3561
3615
  def _check_value_bounding_boxes_categories_helper(
3562
3616
  categories_col: pd.Series,
3563
- ) -> err.InvalidBoundingBoxesCategories | None:
3617
+ ) -> InvalidBoundingBoxesCategories | None:
3564
3618
  def check(categories: object) -> None:
3565
3619
  # We allow for zero boxes. None category list is not allowed (will break following tests:
3566
3620
  # 'NoneType is not iterable')
3567
3621
  if categories is None:
3568
- raise err.InvalidBoundingBoxesCategories(
3569
- reason="none_category_list"
3570
- )
3622
+ raise InvalidBoundingBoxesCategories(reason="none_category_list")
3571
3623
  for category in categories:
3572
3624
  # Allow for empty string category, no None values
3573
3625
  if category is None:
3574
- raise err.InvalidBoundingBoxesCategories(reason="none_category")
3626
+ raise InvalidBoundingBoxesCategories(reason="none_category")
3575
3627
 
3576
3628
  try:
3577
3629
  categories_col.apply(check)
3578
- except err.InvalidBoundingBoxesCategories as e:
3630
+ except InvalidBoundingBoxesCategories as e:
3579
3631
  return e
3580
3632
  return None
3581
3633
 
3582
3634
 
3583
3635
  def _check_value_bounding_boxes_scores_helper(
3584
3636
  scores_col: pd.Series,
3585
- ) -> err.InvalidBoundingBoxesScores | None:
3637
+ ) -> InvalidBoundingBoxesScores | None:
3586
3638
  def check(scores: object) -> None:
3587
3639
  # We allow for zero boxes. None confidence score list is not allowed (will break following tests:
3588
3640
  # 'NoneType is not iterable')
3589
3641
  if scores is None:
3590
- raise err.InvalidBoundingBoxesScores(reason="none_score_list")
3642
+ raise InvalidBoundingBoxesScores(reason="none_score_list")
3591
3643
  for score in scores:
3592
3644
  # Confidence scores are between 0 and 1
3593
3645
  if score < 0 or score > 1:
3594
- raise err.InvalidBoundingBoxesScores(
3595
- reason="scores_out_of_bounds"
3596
- )
3646
+ raise InvalidBoundingBoxesScores(reason="scores_out_of_bounds")
3597
3647
 
3598
3648
  try:
3599
3649
  scores_col.apply(check)
3600
- except err.InvalidBoundingBoxesScores as e:
3650
+ except InvalidBoundingBoxesScores as e:
3601
3651
  return e
3602
3652
  return None
3603
3653
 
3604
3654
 
3605
3655
  def _polygon_coordinates_wrong_format(
3606
3656
  polygon_coords: object,
3607
- ) -> err.InvalidPolygonCoordinates | None:
3657
+ ) -> InvalidPolygonCoordinates | None:
3608
3658
  """Check if polygon coordinates are valid.
3609
3659
 
3610
3660
  Validates:
@@ -3629,7 +3679,7 @@ def _polygon_coordinates_wrong_format(
3629
3679
  # Coordinates should be a collection of pairs of floats
3630
3680
  or len(polygon_coords) % 2 != 0
3631
3681
  ):
3632
- return err.InvalidPolygonCoordinates(
3682
+ return InvalidPolygonCoordinates(
3633
3683
  reason="polygon_coordinates_wrong_format",
3634
3684
  coordinates=polygon_coords,
3635
3685
  )
@@ -3644,7 +3694,7 @@ def _polygon_coordinates_wrong_format(
3644
3694
  for i in range(len(points)):
3645
3695
  for j in range(i + 1, len(points)):
3646
3696
  if points[i] == points[j]:
3647
- return err.InvalidPolygonCoordinates(
3697
+ return InvalidPolygonCoordinates(
3648
3698
  reason="polygon_coordinates_repeated_vertices",
3649
3699
  coordinates=polygon_coords,
3650
3700
  )
@@ -3665,7 +3715,7 @@ def _polygon_coordinates_wrong_format(
3665
3715
  if segments_intersect(
3666
3716
  edges[i][0], edges[i][1], edges[j][0], edges[j][1]
3667
3717
  ):
3668
- return err.InvalidPolygonCoordinates(
3718
+ return InvalidPolygonCoordinates(
3669
3719
  reason="polygon_coordinates_self_intersecting_vertices",
3670
3720
  coordinates=polygon_coords,
3671
3721
  )
@@ -3675,64 +3725,62 @@ def _polygon_coordinates_wrong_format(
3675
3725
 
3676
3726
  def _check_value_polygon_coordinates_helper(
3677
3727
  coordinates_col: pd.Series,
3678
- ) -> err.InvalidPolygonCoordinates | None:
3728
+ ) -> InvalidPolygonCoordinates | None:
3679
3729
  def check(polygons: object) -> None:
3680
3730
  # We allow for zero polygons. None coordinates list is not allowed (will break following tests:
3681
3731
  # 'NoneType is not iterable')
3682
3732
  if polygons is None:
3683
- raise err.InvalidPolygonCoordinates(reason="none_polygons")
3733
+ raise InvalidPolygonCoordinates(reason="none_polygons")
3684
3734
  for polygon in polygons:
3685
3735
  if polygon is None or len(polygon) == 0:
3686
- raise err.InvalidPolygonCoordinates(
3687
- reason="none_or_empty_polygon"
3688
- )
3736
+ raise InvalidPolygonCoordinates(reason="none_or_empty_polygon")
3689
3737
  error = _polygon_coordinates_wrong_format(polygon)
3690
3738
  if error is not None:
3691
3739
  raise error
3692
3740
 
3693
3741
  try:
3694
3742
  coordinates_col.apply(check)
3695
- except err.InvalidPolygonCoordinates as e:
3743
+ except InvalidPolygonCoordinates as e:
3696
3744
  return e
3697
3745
  return None
3698
3746
 
3699
3747
 
3700
3748
  def _check_value_polygon_categories_helper(
3701
3749
  categories_col: pd.Series,
3702
- ) -> err.InvalidPolygonCategories | None:
3750
+ ) -> InvalidPolygonCategories | None:
3703
3751
  def check(categories: object) -> None:
3704
3752
  # We allow for zero boxes. None category list is not allowed (will break following tests:
3705
3753
  # 'NoneType is not iterable')
3706
3754
  if categories is None:
3707
- raise err.InvalidPolygonCategories(reason="none_category_list")
3755
+ raise InvalidPolygonCategories(reason="none_category_list")
3708
3756
  for category in categories:
3709
3757
  # Allow for empty string category, no None values
3710
3758
  if category is None:
3711
- raise err.InvalidPolygonCategories(reason="none_category")
3759
+ raise InvalidPolygonCategories(reason="none_category")
3712
3760
 
3713
3761
  try:
3714
3762
  categories_col.apply(check)
3715
- except err.InvalidPolygonCategories as e:
3763
+ except InvalidPolygonCategories as e:
3716
3764
  return e
3717
3765
  return None
3718
3766
 
3719
3767
 
3720
3768
  def _check_value_polygon_scores_helper(
3721
3769
  scores_col: pd.Series,
3722
- ) -> err.InvalidPolygonScores | None:
3770
+ ) -> InvalidPolygonScores | None:
3723
3771
  def check(scores: object) -> None:
3724
3772
  # We allow for zero boxes. None confidence score list is not allowed (will break following tests:
3725
3773
  # 'NoneType is not iterable')
3726
3774
  if scores is None:
3727
- raise err.InvalidPolygonScores(reason="none_score_list")
3775
+ raise InvalidPolygonScores(reason="none_score_list")
3728
3776
  for score in scores:
3729
3777
  # Confidence scores are between 0 and 1
3730
3778
  if score < 0 or score > 1:
3731
- raise err.InvalidPolygonScores(reason="scores_out_of_bounds")
3779
+ raise InvalidPolygonScores(reason="scores_out_of_bounds")
3732
3780
 
3733
3781
  try:
3734
3782
  scores_col.apply(check)
3735
- except err.InvalidPolygonScores as e:
3783
+ except InvalidPolygonScores as e:
3736
3784
  return e
3737
3785
  return None
3738
3786