arize 8.0.0b1__py3-none-any.whl → 8.0.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. arize/__init__.py +9 -2
  2. arize/_client_factory.py +50 -0
  3. arize/_exporter/client.py +18 -17
  4. arize/_exporter/parsers/tracing_data_parser.py +9 -4
  5. arize/_exporter/validation.py +1 -1
  6. arize/_flight/client.py +37 -17
  7. arize/_generated/api_client/api/datasets_api.py +6 -6
  8. arize/_generated/api_client/api/experiments_api.py +6 -6
  9. arize/_generated/api_client/api/projects_api.py +3 -3
  10. arize/_lazy.py +61 -10
  11. arize/client.py +66 -50
  12. arize/config.py +175 -48
  13. arize/constants/config.py +1 -0
  14. arize/constants/ml.py +9 -16
  15. arize/constants/spans.py +5 -10
  16. arize/datasets/client.py +45 -28
  17. arize/datasets/errors.py +1 -1
  18. arize/datasets/validation.py +2 -2
  19. arize/embeddings/auto_generator.py +16 -9
  20. arize/embeddings/base_generators.py +15 -9
  21. arize/embeddings/cv_generators.py +2 -2
  22. arize/embeddings/errors.py +2 -2
  23. arize/embeddings/nlp_generators.py +8 -8
  24. arize/embeddings/tabular_generators.py +6 -6
  25. arize/exceptions/base.py +0 -52
  26. arize/exceptions/config.py +22 -0
  27. arize/exceptions/parameters.py +1 -330
  28. arize/exceptions/values.py +8 -5
  29. arize/experiments/__init__.py +4 -0
  30. arize/experiments/client.py +31 -18
  31. arize/experiments/evaluators/base.py +12 -9
  32. arize/experiments/evaluators/executors.py +16 -7
  33. arize/experiments/evaluators/rate_limiters.py +3 -1
  34. arize/experiments/evaluators/types.py +9 -7
  35. arize/experiments/evaluators/utils.py +7 -5
  36. arize/experiments/functions.py +128 -58
  37. arize/experiments/tracing.py +4 -1
  38. arize/experiments/types.py +34 -31
  39. arize/logging.py +54 -33
  40. arize/ml/batch_validation/errors.py +10 -1004
  41. arize/ml/batch_validation/validator.py +351 -291
  42. arize/ml/bounded_executor.py +25 -6
  43. arize/ml/casting.py +51 -33
  44. arize/ml/client.py +43 -35
  45. arize/ml/proto.py +21 -22
  46. arize/ml/stream_validation.py +64 -27
  47. arize/ml/surrogate_explainer/mimic.py +18 -10
  48. arize/ml/types.py +27 -67
  49. arize/pre_releases.py +10 -6
  50. arize/projects/client.py +9 -4
  51. arize/py.typed +0 -0
  52. arize/regions.py +11 -11
  53. arize/spans/client.py +125 -31
  54. arize/spans/columns.py +32 -36
  55. arize/spans/conversion.py +12 -11
  56. arize/spans/validation/annotations/dataframe_form_validation.py +1 -1
  57. arize/spans/validation/annotations/value_validation.py +11 -14
  58. arize/spans/validation/common/argument_validation.py +3 -3
  59. arize/spans/validation/common/dataframe_form_validation.py +7 -7
  60. arize/spans/validation/common/value_validation.py +11 -14
  61. arize/spans/validation/evals/dataframe_form_validation.py +4 -4
  62. arize/spans/validation/evals/evals_validation.py +6 -6
  63. arize/spans/validation/evals/value_validation.py +1 -1
  64. arize/spans/validation/metadata/argument_validation.py +1 -1
  65. arize/spans/validation/metadata/dataframe_form_validation.py +2 -2
  66. arize/spans/validation/metadata/value_validation.py +23 -1
  67. arize/spans/validation/spans/dataframe_form_validation.py +2 -2
  68. arize/spans/validation/spans/spans_validation.py +6 -6
  69. arize/utils/arrow.py +38 -2
  70. arize/utils/cache.py +2 -2
  71. arize/utils/dataframe.py +4 -4
  72. arize/utils/online_tasks/dataframe_preprocessor.py +15 -11
  73. arize/utils/openinference_conversion.py +10 -10
  74. arize/utils/proto.py +0 -1
  75. arize/utils/types.py +6 -6
  76. arize/version.py +1 -1
  77. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/METADATA +32 -7
  78. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/RECORD +81 -78
  79. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/WHEEL +0 -0
  80. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/LICENSE +0 -0
  81. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/NOTICE +0 -0
arize/datasets/client.py CHANGED
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  import logging
6
6
  import time
7
7
  import uuid
8
- from typing import TYPE_CHECKING
8
+ from typing import TYPE_CHECKING, Any, cast
9
9
 
10
10
  import pandas as pd
11
11
  import pyarrow as pa
@@ -24,6 +24,11 @@ from arize.utils.openinference_conversion import (
24
24
  from arize.utils.size import get_payload_size_mb
25
25
 
26
26
  if TYPE_CHECKING:
27
+ # builtins is needed to use builtins.list in type annotations because
28
+ # the class has a list() method that shadows the built-in list type
29
+ import builtins
30
+
31
+ from arize._generated.api_client.api_client import ApiClient
27
32
  from arize.config import SDKConfiguration
28
33
 
29
34
  logger = logging.getLogger(__name__)
@@ -41,18 +46,21 @@ class DatasetsClient:
41
46
  :class:`arize.config.SDKConfiguration`.
42
47
  """
43
48
 
44
- def __init__(self, *, sdk_config: SDKConfiguration) -> None:
49
+ def __init__(
50
+ self, *, sdk_config: SDKConfiguration, generated_client: ApiClient
51
+ ) -> None:
45
52
  """
46
53
  Args:
47
54
  sdk_config: Resolved SDK configuration.
55
+ generated_client: Shared generated API client instance.
48
56
  """ # noqa: D205, D212
49
57
  self._sdk_config = sdk_config
50
58
 
51
59
  # Import at runtime so it's still lazy and extras-gated by the parent
52
60
  from arize._generated import api_client as gen
53
61
 
54
- # Use the shared generated client from the config
55
- self._api = gen.DatasetsApi(self._sdk_config.get_generated_client())
62
+ # Use the provided client directly
63
+ self._api = gen.DatasetsApi(generated_client)
56
64
 
57
65
  @prerelease_endpoint(key="datasets.list", stage=ReleaseStage.BETA)
58
66
  def list(
@@ -93,7 +101,7 @@ class DatasetsClient:
93
101
  *,
94
102
  name: str,
95
103
  space_id: str,
96
- examples: list[dict[str, object]] | pd.DataFrame,
104
+ examples: builtins.list[dict[str, object]] | pd.DataFrame,
97
105
  force_http: bool = False,
98
106
  ) -> models.Dataset:
99
107
  """Create a dataset with JSON examples.
@@ -117,7 +125,7 @@ class DatasetsClient:
117
125
  space_id: Space ID to create the dataset in.
118
126
  examples: Dataset examples either as:
119
127
  - a list of JSON-like dicts, or
120
- - a pandas DataFrame (will be converted to records for REST).
128
+ - a :class:`pandas.DataFrame` (will be converted to records for REST).
121
129
  force_http: If True, force REST upload even if the payload exceeds the
122
130
  configured REST payload threshold.
123
131
 
@@ -125,7 +133,7 @@ class DatasetsClient:
125
133
  The created dataset object as returned by the API.
126
134
 
127
135
  Raises:
128
- TypeError: If `examples` is not a list of dicts or a pandas DataFrame.
136
+ TypeError: If `examples` is not a list of dicts or a :class:`pandas.DataFrame`.
129
137
  RuntimeError: If the Flight upload path is selected and the Flight request
130
138
  fails.
131
139
  arize._generated.api_client.exceptions.ApiException: If the REST API
@@ -146,7 +154,7 @@ class DatasetsClient:
146
154
  from arize._generated import api_client as gen
147
155
 
148
156
  data = (
149
- examples.to_dict(orient="records") # type: ignore
157
+ examples.to_dict(orient="records")
150
158
  if isinstance(examples, pd.DataFrame)
151
159
  else examples
152
160
  )
@@ -154,7 +162,8 @@ class DatasetsClient:
154
162
  body = gen.DatasetsCreateRequest(
155
163
  name=name,
156
164
  space_id=space_id,
157
- examples=data,
165
+ # Cast: pandas to_dict returns dict[Hashable, Any] but API requires dict[str, Any]
166
+ examples=cast("list[dict[str, Any]]", data),
158
167
  )
159
168
  return self._api.datasets_create(datasets_create_request=body)
160
169
 
@@ -165,15 +174,12 @@ class DatasetsClient:
165
174
  "Trying to convert to DataFrame for more efficient upload via "
166
175
  "gRPC + Flight."
167
176
  )
168
- data = (
169
- examples
170
- if isinstance(examples, pd.DataFrame)
171
- else pd.DataFrame(examples)
172
- )
177
+ if not isinstance(examples, pd.DataFrame):
178
+ examples = pd.DataFrame(examples)
173
179
  return self._create_dataset_via_flight(
174
180
  name=name,
175
181
  space_id=space_id,
176
- examples=data,
182
+ examples=examples,
177
183
  )
178
184
 
179
185
  @prerelease_endpoint(key="datasets.get", stage=ReleaseStage.BETA)
@@ -205,7 +211,8 @@ class DatasetsClient:
205
211
  Args:
206
212
  dataset_id: Dataset ID to delete.
207
213
 
208
- Returns: This method returns None on success (common empty 204 response)
214
+ Returns:
215
+ This method returns None on success (common empty 204 response).
209
216
 
210
217
  Raises:
211
218
  arize._generated.api_client.exceptions.ApiException: If the REST API
@@ -275,7 +282,11 @@ class DatasetsClient:
275
282
  )
276
283
  if dataset_df is not None:
277
284
  return models.DatasetsExamplesList200Response(
278
- examples=dataset_df.to_dict(orient="records"), # type: ignore
285
+ # Cast: Pydantic validates and converts dicts to DatasetExample at runtime
286
+ examples=cast(
287
+ "list[models.DatasetExample]",
288
+ dataset_df.to_dict(orient="records"),
289
+ ),
279
290
  pagination=models.PaginationMetadata(
280
291
  has_more=False, # Note that all=True
281
292
  ),
@@ -316,7 +327,11 @@ class DatasetsClient:
316
327
  )
317
328
 
318
329
  return models.DatasetsExamplesList200Response(
319
- examples=dataset_df.to_dict(orient="records"), # type: ignore
330
+ # Cast: Pydantic validates and converts dicts to DatasetExample at runtime
331
+ examples=cast(
332
+ "list[models.DatasetExample]",
333
+ dataset_df.to_dict(orient="records"),
334
+ ),
320
335
  pagination=models.PaginationMetadata(
321
336
  has_more=False, # Note that all=True
322
337
  ),
@@ -331,7 +346,7 @@ class DatasetsClient:
331
346
  *,
332
347
  dataset_id: str,
333
348
  dataset_version_id: str = "",
334
- examples: list[dict[str, object]] | pd.DataFrame,
349
+ examples: builtins.list[dict[str, object]] | pd.DataFrame,
335
350
  ) -> models.Dataset:
336
351
  """Append new examples to an existing dataset.
337
352
 
@@ -354,14 +369,13 @@ class DatasetsClient:
354
369
  the latest dataset version is selected.
355
370
  examples: Examples to append, provided as either:
356
371
  - a list of JSON-like dicts, or
357
- - a pandas DataFrame (converted to records before upload).
372
+ - a :class:`pandas.DataFrame` (converted to records before upload).
358
373
 
359
374
  Returns:
360
375
  The updated dataset object. To see the examples, use `list_examples()`.
361
376
 
362
377
  Raises:
363
- AssertionError: If `examples` is not a list of dicts or a pandas
364
- DataFrame.
378
+ AssertionError: If `examples` is not a list of dicts or a :class:`pandas.DataFrame`.
365
379
  arize._generated.api_client.exceptions.ApiException: If the REST API
366
380
  returns an error response (e.g. 400/401/403/404/429).
367
381
  """
@@ -373,11 +387,14 @@ class DatasetsClient:
373
387
  )
374
388
 
375
389
  data = (
376
- examples.to_dict(orient="records") # type: ignore
390
+ examples.to_dict(orient="records")
377
391
  if isinstance(examples, pd.DataFrame)
378
392
  else examples
379
393
  )
380
- body = gen.DatasetsExamplesInsertRequest(examples=data)
394
+ # Cast: pandas to_dict returns dict[Hashable, Any] but API requires dict[str, Any]
395
+ body = gen.DatasetsExamplesInsertRequest(
396
+ examples=cast("list[dict[str, Any]]", data)
397
+ )
381
398
 
382
399
  return self._api.datasets_examples_insert(
383
400
  dataset_id=dataset_id,
@@ -390,7 +407,7 @@ class DatasetsClient:
390
407
  name: str,
391
408
  space_id: str,
392
409
  examples: pd.DataFrame,
393
- ) -> object:
410
+ ) -> models.Dataset:
394
411
  """Internal method to create a dataset using Flight protocol for large example sets."""
395
412
  data = examples.copy()
396
413
  # Convert datetime columns to int64 (ms since epoch)
@@ -450,19 +467,19 @@ def _set_default_columns_for_dataset(df: pd.DataFrame) -> pd.DataFrame:
450
467
  """Set default values for created_at and updated_at columns if missing or null."""
451
468
  current_time = int(time.time() * 1000)
452
469
  if "created_at" in df.columns:
453
- if df["created_at"].isnull().values.any(): # type: ignore
470
+ if df["created_at"].isnull().any():
454
471
  df["created_at"].fillna(current_time, inplace=True)
455
472
  else:
456
473
  df["created_at"] = current_time
457
474
 
458
475
  if "updated_at" in df.columns:
459
- if df["updated_at"].isnull().values.any(): # type: ignore
476
+ if df["updated_at"].isnull().any():
460
477
  df["updated_at"].fillna(current_time, inplace=True)
461
478
  else:
462
479
  df["updated_at"] = current_time
463
480
 
464
481
  if "id" in df.columns:
465
- if df["id"].isnull().values.any(): # type: ignore
482
+ if df["id"].isnull().any():
466
483
  df["id"] = df["id"].apply(
467
484
  lambda x: str(uuid.uuid4()) if pd.isnull(x) else x
468
485
  )
arize/datasets/errors.py CHANGED
@@ -80,7 +80,7 @@ class RequiredColumnsError(DatasetError):
80
80
 
81
81
 
82
82
  class EmptyDatasetError(DatasetError):
83
- """Raised when dataset DataFrame has no rows."""
83
+ """Raised when dataset :class:`pandas.DataFrame` has no rows."""
84
84
 
85
85
  def error_message(self) -> str:
86
86
  """Return the error message for this exception."""
@@ -8,12 +8,12 @@ from arize.datasets import errors as err
8
8
  def validate_dataset_df(
9
9
  df: pd.DataFrame,
10
10
  ) -> list[err.DatasetError]:
11
- """Validate a dataset DataFrame for structural and content errors.
11
+ """Validate a dataset :class:`pandas.DataFrame` for structural and content errors.
12
12
 
13
13
  Checks for required columns, unique ID values, and non-empty data.
14
14
 
15
15
  Args:
16
- df: The pandas DataFrame to validate.
16
+ df: The :class:`pandas.DataFrame` to validate.
17
17
 
18
18
  Returns:
19
19
  A list of DatasetError objects found during validation. Empty list if valid.
@@ -1,5 +1,7 @@
1
1
  """Automatic embedding generation factory for various ML use cases."""
2
2
 
3
+ from typing import TypeAlias
4
+
3
5
  import pandas as pd
4
6
 
5
7
  from arize.embeddings import constants
@@ -24,9 +26,14 @@ from arize.embeddings.nlp_generators import (
24
26
  from arize.embeddings.tabular_generators import (
25
27
  EmbeddingGeneratorForTabularFeatures,
26
28
  )
27
- from arize.embeddings.usecases import UseCases
29
+ from arize.embeddings.usecases import (
30
+ CVUseCases,
31
+ NLPUseCases,
32
+ TabularUseCases,
33
+ UseCases,
34
+ )
28
35
 
29
- UseCaseLike = str | UseCases.NLP | UseCases.CV | UseCases.STRUCTURED
36
+ UseCaseLike: TypeAlias = str | NLPUseCases | CVUseCases | TabularUseCases
30
37
 
31
38
 
32
39
  class EmbeddingGenerator:
@@ -49,20 +56,20 @@ class EmbeddingGenerator:
49
56
  ) -> BaseEmbeddingGenerator:
50
57
  """Create an embedding generator for the specified use case."""
51
58
  if use_case == UseCases.NLP.SEQUENCE_CLASSIFICATION:
52
- return EmbeddingGeneratorForNLPSequenceClassification(**kwargs)
59
+ return EmbeddingGeneratorForNLPSequenceClassification(**kwargs) # type: ignore[arg-type]
53
60
  if use_case == UseCases.NLP.SUMMARIZATION:
54
- return EmbeddingGeneratorForNLPSummarization(**kwargs)
61
+ return EmbeddingGeneratorForNLPSummarization(**kwargs) # type: ignore[arg-type]
55
62
  if use_case == UseCases.CV.IMAGE_CLASSIFICATION:
56
- return EmbeddingGeneratorForCVImageClassification(**kwargs)
63
+ return EmbeddingGeneratorForCVImageClassification(**kwargs) # type: ignore[arg-type]
57
64
  if use_case == UseCases.CV.OBJECT_DETECTION:
58
- return EmbeddingGeneratorForCVObjectDetection(**kwargs)
65
+ return EmbeddingGeneratorForCVObjectDetection(**kwargs) # type: ignore[arg-type]
59
66
  if use_case == UseCases.STRUCTURED.TABULAR_EMBEDDINGS:
60
- return EmbeddingGeneratorForTabularFeatures(**kwargs)
67
+ return EmbeddingGeneratorForTabularFeatures(**kwargs) # type: ignore[arg-type]
61
68
  raise ValueError(f"Invalid use case {use_case}")
62
69
 
63
70
  @classmethod
64
71
  def list_default_models(cls) -> pd.DataFrame:
65
- """Return a DataFrame of default models for each use case."""
72
+ """Return a :class:`pandas.DataFrame` of default models for each use case."""
66
73
  df = pd.DataFrame(
67
74
  {
68
75
  "Area": ["NLP", "NLP", "CV", "CV", "STRUCTURED"],
@@ -87,7 +94,7 @@ class EmbeddingGenerator:
87
94
 
88
95
  @classmethod
89
96
  def list_pretrained_models(cls) -> pd.DataFrame:
90
- """Return a DataFrame of all available pretrained models."""
97
+ """Return a :class:`pandas.DataFrame` of all available pretrained models."""
91
98
  data = {
92
99
  "Task": ["NLP" for _ in NLP_PRETRAINED_MODELS]
93
100
  + ["CV" for _ in CV_PRETRAINED_MODELS],
@@ -14,11 +14,15 @@ try:
14
14
  import torch
15
15
  from datasets import Dataset
16
16
  from PIL import Image
17
- from transformers import ( # type: ignore
17
+ from transformers import (
18
18
  AutoImageProcessor,
19
19
  AutoModel,
20
20
  AutoTokenizer,
21
+ BaseImageProcessor,
21
22
  BatchEncoding,
23
+ BatchFeature,
24
+ PreTrainedModel,
25
+ PreTrainedTokenizerBase,
22
26
  )
23
27
  from transformers.utils import logging as transformer_logging
24
28
  except ImportError as e:
@@ -67,7 +71,9 @@ class BaseEmbeddingGenerator(ABC):
67
71
  raise
68
72
 
69
73
  @abstractmethod
70
- def generate_embeddings(self, **kwargs: object) -> pd.Series:
74
+ def generate_embeddings(
75
+ self, **kwargs: object
76
+ ) -> pd.Series | tuple[pd.Series, pd.Series]:
71
77
  """Generate embeddings for the input data."""
72
78
  ...
73
79
 
@@ -95,7 +101,7 @@ class BaseEmbeddingGenerator(ABC):
95
101
  return self.__model_name
96
102
 
97
103
  @property
98
- def model(self) -> object:
104
+ def model(self) -> PreTrainedModel:
99
105
  """Return the underlying model instance."""
100
106
  return self.__model
101
107
 
@@ -183,7 +189,7 @@ class NLPEmbeddingGenerator(BaseEmbeddingGenerator):
183
189
  tokenizer_max_length: Maximum sequence length for the tokenizer.
184
190
  **kwargs: Additional arguments for model initialization.
185
191
  """
186
- super().__init__(use_case=use_case, model_name=model_name, **kwargs)
192
+ super().__init__(use_case=use_case, model_name=model_name, **kwargs) # type: ignore[arg-type]
187
193
  self.__tokenizer_max_length = tokenizer_max_length
188
194
  # We don't check for the tokenizer's existence since it is coupled with the corresponding model
189
195
  # We check the model's existence in `BaseEmbeddingGenerator`
@@ -193,7 +199,7 @@ class NLPEmbeddingGenerator(BaseEmbeddingGenerator):
193
199
  )
194
200
 
195
201
  @property
196
- def tokenizer(self) -> object:
202
+ def tokenizer(self) -> PreTrainedTokenizerBase:
197
203
  """Return the tokenizer instance for text processing."""
198
204
  return self.__tokenizer
199
205
 
@@ -240,7 +246,7 @@ class CVEmbeddingGenerator(BaseEmbeddingGenerator):
240
246
  model_name: Name of the pre-trained vision model.
241
247
  **kwargs: Additional arguments for model initialization.
242
248
  """
243
- super().__init__(use_case=use_case, model_name=model_name, **kwargs)
249
+ super().__init__(use_case=use_case, model_name=model_name, **kwargs) # type: ignore[arg-type]
244
250
  logger.info("Downloading image processor")
245
251
  # We don't check for the image processor's existence since it is coupled with the corresponding model
246
252
  # We check the model's existence in `BaseEmbeddingGenerator`
@@ -249,7 +255,7 @@ class CVEmbeddingGenerator(BaseEmbeddingGenerator):
249
255
  )
250
256
 
251
257
  @property
252
- def image_processor(self) -> object:
258
+ def image_processor(self) -> BaseImageProcessor:
253
259
  """Return the image processor instance for image preprocessing."""
254
260
  return self.__image_processor
255
261
 
@@ -262,7 +268,7 @@ class CVEmbeddingGenerator(BaseEmbeddingGenerator):
262
268
 
263
269
  def preprocess_image(
264
270
  self, batch: dict[str, list[str]], local_image_feat_name: str
265
- ) -> object:
271
+ ) -> BatchFeature:
266
272
  """Preprocess a batch of images for model input."""
267
273
  return self.image_processor(
268
274
  [
@@ -272,7 +278,7 @@ class CVEmbeddingGenerator(BaseEmbeddingGenerator):
272
278
  return_tensors="pt",
273
279
  ).to(self.device)
274
280
 
275
- def generate_embeddings(self, local_image_path_col: pd.Series) -> pd.Series:
281
+ def generate_embeddings(self, local_image_path_col: pd.Series) -> pd.Series: # type: ignore[override]
276
282
  """Obtain embedding vectors from your image data using pre-trained image models.
277
283
 
278
284
  :param local_image_path_col: a pandas Series containing the local path to the images to
@@ -25,7 +25,7 @@ class EmbeddingGeneratorForCVImageClassification(CVEmbeddingGenerator):
25
25
  super().__init__(
26
26
  use_case=UseCases.CV.IMAGE_CLASSIFICATION,
27
27
  model_name=model_name,
28
- **kwargs,
28
+ **kwargs, # type: ignore[arg-type]
29
29
  )
30
30
 
31
31
 
@@ -46,5 +46,5 @@ class EmbeddingGeneratorForCVObjectDetection(CVEmbeddingGenerator):
46
46
  super().__init__(
47
47
  use_case=UseCases.CV.OBJECT_DETECTION,
48
48
  model_name=model_name,
49
- **kwargs,
49
+ **kwargs, # type: ignore[arg-type]
50
50
  )
@@ -2,7 +2,7 @@
2
2
 
3
3
 
4
4
  class InvalidIndexError(Exception):
5
- """Raised when DataFrame or Series has an invalid index."""
5
+ """Raised when :class:`pandas.DataFrame` or Series has an invalid index."""
6
6
 
7
7
  def __repr__(self) -> str:
8
8
  """Return a string representation for debugging and logging."""
@@ -16,7 +16,7 @@ class InvalidIndexError(Exception):
16
16
  """Initialize the exception with field name context.
17
17
 
18
18
  Args:
19
- field_name: Name of the DataFrame or Series field with invalid index.
19
+ field_name: Name of the :class:`pandas.DataFrame` or Series field with invalid index.
20
20
  """
21
21
  self.field_name = field_name
22
22
 
@@ -39,10 +39,10 @@ class EmbeddingGeneratorForNLPSequenceClassification(NLPEmbeddingGenerator):
39
39
  super().__init__(
40
40
  use_case=UseCases.NLP.SEQUENCE_CLASSIFICATION,
41
41
  model_name=model_name,
42
- **kwargs,
42
+ **kwargs, # type: ignore[arg-type]
43
43
  )
44
44
 
45
- def generate_embeddings(
45
+ def generate_embeddings( # type: ignore[override]
46
46
  self,
47
47
  text_col: pd.Series,
48
48
  class_label_col: pd.Series | None = None,
@@ -65,10 +65,10 @@ class EmbeddingGeneratorForNLPSequenceClassification(NLPEmbeddingGenerator):
65
65
  if class_label_col is not None:
66
66
  if not isinstance(class_label_col, pd.Series):
67
67
  raise TypeError("class_label_col must be a pandas Series")
68
- df = pd.concat(
68
+ temp_df = pd.concat(
69
69
  {"text": text_col, "class_label": class_label_col}, axis=1
70
70
  )
71
- prepared_text_col = df.apply(
71
+ prepared_text_col = temp_df.apply(
72
72
  lambda row: f" The classification label is {row['class_label']}. {row['text']}",
73
73
  axis=1,
74
74
  )
@@ -83,8 +83,8 @@ class EmbeddingGeneratorForNLPSequenceClassification(NLPEmbeddingGenerator):
83
83
  batched=True,
84
84
  batch_size=self.batch_size,
85
85
  )
86
- df: pd.DataFrame = ds.to_pandas()
87
- return df["embedding_vector"]
86
+ result_df: pd.DataFrame = ds.to_pandas()
87
+ return result_df["embedding_vector"]
88
88
 
89
89
 
90
90
  class EmbeddingGeneratorForNLPSummarization(NLPEmbeddingGenerator):
@@ -104,10 +104,10 @@ class EmbeddingGeneratorForNLPSummarization(NLPEmbeddingGenerator):
104
104
  super().__init__(
105
105
  use_case=UseCases.NLP.SUMMARIZATION,
106
106
  model_name=model_name,
107
- **kwargs,
107
+ **kwargs, # type: ignore[arg-type]
108
108
  )
109
109
 
110
- def generate_embeddings(
110
+ def generate_embeddings( # type: ignore[override]
111
111
  self,
112
112
  text_col: pd.Series,
113
113
  ) -> pd.Series:
@@ -64,10 +64,10 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
64
64
  super().__init__(
65
65
  use_case=UseCases.STRUCTURED.TABULAR_EMBEDDINGS,
66
66
  model_name=model_name,
67
- **kwargs,
67
+ **kwargs, # type: ignore[arg-type]
68
68
  )
69
69
 
70
- def generate_embeddings(
70
+ def generate_embeddings( # type: ignore[override]
71
71
  self,
72
72
  df: pd.DataFrame,
73
73
  selected_columns: list[str],
@@ -145,11 +145,11 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
145
145
  batch_size=self.batch_size,
146
146
  )
147
147
 
148
- df: pd.DataFrame = ds.to_pandas()
148
+ result_df: pd.DataFrame = ds.to_pandas()
149
149
  if return_prompt_col:
150
- return df["embedding_vector"], prompts
150
+ return result_df["embedding_vector"], prompts
151
151
 
152
- return df["embedding_vector"]
152
+ return result_df["embedding_vector"]
153
153
 
154
154
  @staticmethod
155
155
  def __prompt_fn(row: pd.DataFrame, columns: list[str]) -> str:
@@ -172,5 +172,5 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
172
172
 
173
173
  @staticmethod
174
174
  def list_pretrained_models() -> pd.DataFrame:
175
- """Return a DataFrame of available pretrained tabular models."""
175
+ """Return a :class:`pandas.DataFrame` of available pretrained tabular models."""
176
176
  return pd.DataFrame({"Model Name": sorted(TABULAR_PRETRAINED_MODELS)})
arize/exceptions/base.py CHANGED
@@ -39,21 +39,6 @@ class ValidationFailure(Exception):
39
39
  self.errors = errors
40
40
 
41
41
 
42
- # ----------------------
43
- # Minimum required checks
44
- # ----------------------
45
- # class InvalidColumnNameEmptyString(ValidationError):
46
- # def __repr__(self) -> str:
47
- # return "Invalid_Column_Name_Empty_String"
48
- #
49
- # def error_message(self) -> str:
50
- # return (
51
- # "Empty column name found: ''. The schema cannot point to columns in the "
52
- # "dataframe denoted by an empty string. You can see the columns used in the "
53
- # "schema by running schema.get_used_columns()"
54
- # )
55
-
56
-
57
42
  class InvalidFieldTypeConversion(ValidationError):
58
43
  """Raised when fields cannot be converted to required type."""
59
44
 
@@ -79,31 +64,6 @@ class InvalidFieldTypeConversion(ValidationError):
79
64
  )
80
65
 
81
66
 
82
- # class InvalidFieldTypeEmbeddingFeatures(ValidationError):
83
- # def __repr__(self) -> str:
84
- # return "Invalid_Input_Type_Embedding_Features"
85
- #
86
- # def __init__(self) -> None:
87
- # pass
88
- #
89
- # def error_message(self) -> str:
90
- # return (
91
- # "schema.embedding_feature_column_names should be a dictionary mapping strings "
92
- # "to EmbeddingColumnNames objects"
93
- # )
94
-
95
-
96
- # class InvalidFieldTypePromptResponse(ValidationError):
97
- # def __repr__(self) -> str:
98
- # return "Invalid_Input_Type_Prompt_Response"
99
- #
100
- # def __init__(self, name: str) -> None:
101
- # self.name = name
102
- #
103
- # def error_message(self) -> str:
104
- # return f"'{self.name}' must be of type str or EmbeddingColumnNames"
105
-
106
-
107
67
  class InvalidDataFrameIndex(ValidationError):
108
68
  """Raised when DataFrame has an invalid index that needs to be reset."""
109
69
 
@@ -117,15 +77,3 @@ class InvalidDataFrameIndex(ValidationError):
117
77
  "The index of the dataframe is invalid; "
118
78
  "reset the index by using df.reset_index(drop=True, inplace=True)"
119
79
  )
120
-
121
-
122
- # class InvalidSchemaType(ValidationError):
123
- # def __repr__(self) -> str:
124
- # return "Invalid_Schema_Type"
125
- #
126
- # def __init__(self, schema_type: str, environment: Environments) -> None:
127
- # self.schema_type = schema_type
128
- # self.environment = environment
129
- #
130
- # def error_message(self) -> str:
131
- # return f"Cannot use a {self.schema_type} for a model with environment: {self.environment}"
@@ -0,0 +1,22 @@
1
+ """Configuration validation exceptions."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class MultipleEndpointOverridesError(Exception):
7
+ """Raised when multiple endpoint override options are provided.
8
+
9
+ Only one of the following can be specified: region, single_host/single_port, or base_domain.
10
+ """
11
+
12
+ def __init__(self, message: str) -> None:
13
+ """Initialize the exception with an optional custom message.
14
+
15
+ Args:
16
+ message: Custom error message, or empty string.
17
+ """
18
+ self.message = message
19
+
20
+ def __str__(self) -> str:
21
+ """Return the error message."""
22
+ return self.message