arize 8.0.0b0__py3-none-any.whl → 8.0.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. arize/__init__.py +1 -1
  2. arize/_client_factory.py +50 -0
  3. arize/_flight/client.py +4 -4
  4. arize/_generated/api_client/__init__.py +0 -2
  5. arize/_generated/api_client/api/datasets_api.py +6 -6
  6. arize/_generated/api_client/api/experiments_api.py +6 -6
  7. arize/_generated/api_client/api/projects_api.py +3 -3
  8. arize/_generated/api_client/models/__init__.py +0 -1
  9. arize/_generated/api_client/models/datasets_create_request.py +2 -10
  10. arize/_generated/api_client/models/datasets_examples_insert_request.py +2 -10
  11. arize/_generated/api_client/test/test_datasets_create_request.py +2 -6
  12. arize/_generated/api_client/test/test_datasets_examples_insert_request.py +2 -6
  13. arize/_generated/api_client/test/test_datasets_examples_list200_response.py +2 -6
  14. arize/_generated/api_client/test/test_datasets_examples_update_request.py +2 -6
  15. arize/_generated/api_client/test/test_experiments_create_request.py +2 -6
  16. arize/_generated/api_client/test/test_experiments_runs_list200_response.py +2 -6
  17. arize/_generated/api_client_README.md +0 -1
  18. arize/_lazy.py +25 -9
  19. arize/client.py +16 -52
  20. arize/config.py +9 -36
  21. arize/constants/ml.py +9 -16
  22. arize/constants/spans.py +5 -10
  23. arize/datasets/client.py +13 -9
  24. arize/datasets/errors.py +1 -1
  25. arize/datasets/validation.py +2 -2
  26. arize/embeddings/auto_generator.py +2 -2
  27. arize/embeddings/errors.py +2 -2
  28. arize/embeddings/tabular_generators.py +1 -1
  29. arize/exceptions/base.py +0 -52
  30. arize/exceptions/parameters.py +0 -329
  31. arize/experiments/__init__.py +2 -2
  32. arize/experiments/client.py +16 -10
  33. arize/experiments/evaluators/base.py +6 -6
  34. arize/experiments/evaluators/executors.py +10 -3
  35. arize/experiments/evaluators/types.py +2 -2
  36. arize/experiments/functions.py +24 -17
  37. arize/experiments/types.py +6 -8
  38. arize/logging.py +1 -1
  39. arize/ml/batch_validation/errors.py +10 -1004
  40. arize/ml/batch_validation/validator.py +273 -225
  41. arize/ml/casting.py +7 -7
  42. arize/ml/client.py +12 -11
  43. arize/ml/proto.py +6 -6
  44. arize/ml/stream_validation.py +2 -3
  45. arize/ml/surrogate_explainer/mimic.py +3 -3
  46. arize/ml/types.py +1 -55
  47. arize/pre_releases.py +6 -3
  48. arize/projects/client.py +9 -4
  49. arize/regions.py +2 -2
  50. arize/spans/client.py +14 -12
  51. arize/spans/columns.py +32 -36
  52. arize/spans/conversion.py +5 -6
  53. arize/spans/validation/common/argument_validation.py +3 -3
  54. arize/spans/validation/common/dataframe_form_validation.py +6 -6
  55. arize/spans/validation/common/value_validation.py +1 -1
  56. arize/spans/validation/evals/dataframe_form_validation.py +4 -4
  57. arize/spans/validation/evals/evals_validation.py +6 -6
  58. arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
  59. arize/spans/validation/spans/dataframe_form_validation.py +2 -2
  60. arize/spans/validation/spans/spans_validation.py +6 -6
  61. arize/utils/arrow.py +2 -2
  62. arize/utils/cache.py +2 -2
  63. arize/utils/dataframe.py +4 -4
  64. arize/utils/online_tasks/dataframe_preprocessor.py +7 -7
  65. arize/utils/openinference_conversion.py +10 -10
  66. arize/utils/proto.py +1 -1
  67. arize/version.py +1 -1
  68. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/METADATA +71 -63
  69. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/RECORD +72 -73
  70. arize/_generated/api_client/models/primitive_value.py +0 -172
  71. arize/_generated/api_client/test/test_primitive_value.py +0 -50
  72. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/WHEEL +0 -0
  73. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/licenses/LICENSE +0 -0
  74. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/licenses/NOTICE +0 -0
arize/client.py CHANGED
@@ -20,9 +20,6 @@ if TYPE_CHECKING:
20
20
 
21
21
  logger = logging.getLogger(__name__)
22
22
 
23
- # TODO(Kiko): Clean commented lines over the SDK
24
- # TODO(Kiko): Implement https://github.com/Arize-ai/arize/pull/59917
25
-
26
23
  # TODO(Kiko): Go private connect. Need a `base_domain`, such that we get:
27
24
  # - api.<base_domain>
28
25
  # - app.<base_domain>
@@ -31,29 +28,23 @@ logger = logging.getLogger(__name__)
31
28
 
32
29
  # TODO(Kiko): Enforce type checking, remove all type ignores
33
30
 
34
- # TODO(Kiko): Go over docstrings
35
- # TODO(Kiko): Missing parameter descriptions in some docstrings
36
- # TODO(Kiko): Missing return descriptions in some docstrings
37
-
38
31
  # TODO(Kiko): Go over headers on each logging call
32
+ # TODO(Kiko): InvalidAdditionalHeadersError is unused. Have we handled extra headers?
39
33
 
40
34
  # TODO(Kiko): Need to implement 'Update existing examples in a dataset'
41
35
 
42
- # TODO(Kiko): why logs don't show on scripts, only on jupyter notebooks
43
36
  # TODO(Kiko): test caching in colab environment
44
37
  # TODO(Kiko): Protobuf versioning is too old
45
38
  # TODO(Kiko): Go through main APIs and add CtxAdapter where missing
46
39
  # TODO(Kiko): Search and handle other TODOs
47
- # TODO(Kiko): Go over **every file** and do not import anything at runtime, use `if TYPE_CHECKING`
48
- # with `from __future__ import annotations` (must include for Python < 3.11)
49
40
 
50
41
 
51
42
  class ArizeClient(LazySubclientsMixin):
52
43
  """Root client for the Arize SDK.
53
44
 
54
45
  The ArizeClient provides access to all Arize platform services including datasets,
55
- experiments, ML models, projects, and spans. It uses SDKConfiguration internally to
56
- manage configuration settings.
46
+ experiments, ML models, projects, and spans. It uses :class:`arize.config.SDKConfiguration`
47
+ internally to manage configuration settings.
57
48
 
58
49
  All parameters are optional (except api_key which must be provided via argument
59
50
  or environment variable). For each parameter, values are resolved in this order:
@@ -105,44 +96,16 @@ class ArizeClient(LazySubclientsMixin):
105
96
  "SpansClient",
106
97
  ),
107
98
  }
108
- _EXTRAS: ClassVar[dict[str, tuple[str | None, tuple[str, ...]]]] = {
109
- # Gate only the generated-backed ones
110
- "datasets": (
111
- "datasets-experiments",
112
- (
113
- "pydantic",
114
- "openinference.semconv",
115
- ),
116
- ),
117
- "experiments": (
118
- "datasets-experiments",
119
- (
120
- "pydantic",
121
- "wrapt",
122
- # "numpy",
123
- # "openinference.semconv",
124
- # "opentelemetry.sdk",
125
- # "opentelemetry.exporter.otlp.proto.grpc.trace_exporter",
126
- ),
127
- ),
128
- "spans": (
129
- "spans",
130
- (
131
- "google.protobuf",
132
- "numpy",
133
- "openinference.semconv",
134
- "opentelemetry",
135
- "pandas",
136
- "pyarrow",
137
- "requests",
138
- "tqdm",
139
- ),
140
- ),
141
- # Imports are gated in each method of the models client
142
- # This is to allow for very lean package install if people only
143
- # want to stream ML records
144
- "ml": (None, ()),
145
- }
99
+ # DISABLED: Optional dependency gating system
100
+ # This dict would map subclients to their optional dependencies and extra names.
101
+ # When enabled, it prevents loading subclients if dependencies aren't installed,
102
+ # showing: "Install via: pip install arize[extra-name]"
103
+ #
104
+ # To re-enable, populate with entries like:
105
+ # "subclient_name": ("extra-name", ("package1", "package2", "package3")),
106
+ # "another_subclient": (None, ()), # No requirements
107
+ #
108
+ _EXTRAS: ClassVar[dict[str, tuple[str | None, tuple[str, ...]]]] = {}
146
109
 
147
110
  def __init__(
148
111
  self,
@@ -175,7 +138,7 @@ class ArizeClient(LazySubclientsMixin):
175
138
  api_key: Arize API key for authentication. Required - must be provided here
176
139
  or via ARIZE_API_KEY environment variable. Raises MissingAPIKeyError if not set.
177
140
  region: Arize region (e.g., Region.US_CENTRAL, Region.EU_WEST). When specified,
178
- overrides individual host/port settings. ENV: ARIZE_REGION. Default: Region.UNSPECIFIED.
141
+ overrides individual host/port settings. ENV: ARIZE_REGION. Default: Region.UNSET.
179
142
  api_host: Custom API endpoint host. ENV: ARIZE_API_HOST. Default: "api.arize.com".
180
143
  api_scheme: API endpoint scheme (http/https). ENV: ARIZE_API_SCHEME. Default: "https".
181
144
  otlp_host: OTLP endpoint host. ENV: ARIZE_OTLP_HOST. Default: "otlp.arize.com".
@@ -210,7 +173,8 @@ class ArizeClient(LazySubclientsMixin):
210
173
 
211
174
  Notes:
212
175
  Values provided to this class override environment variables, which in turn
213
- override default values. See SDKConfiguration for detailed parameter documentation.
176
+ override default values. See :class:`arize.config.SDKConfiguration`
177
+ for detailed parameter documentation.
214
178
  """
215
179
  cfg_kwargs: dict = {}
216
180
  if api_key is not None:
arize/config.py CHANGED
@@ -3,10 +3,8 @@
3
3
  import logging
4
4
  import os
5
5
  import sys
6
- import threading
7
6
  from dataclasses import dataclass, field, fields
8
7
  from pathlib import Path
9
- from typing import Any
10
8
 
11
9
  from arize.constants.config import (
12
10
  DEFAULT_API_HOST,
@@ -167,6 +165,9 @@ def _parse_bool(val: bool | str | None) -> bool:
167
165
  class SDKConfiguration:
168
166
  """Configuration for the Arize SDK with endpoint and authentication settings.
169
167
 
168
+ This class holds pure configuration data and does not manage client lifecycle.
169
+ Client creation and caching is handled by :class:`arize.ArizeClient`.
170
+
170
171
  This class is used internally by ArizeClient to manage SDK configuration. It is not
171
172
  recommended to use this class directly; users should interact with ArizeClient
172
173
  instead.
@@ -225,13 +226,16 @@ class SDKConfiguration:
225
226
  region: Arize region (e.g., US_CENTRAL, EU_WEST). When specified, overrides
226
227
  individual host/port settings.
227
228
  Environment variable: ARIZE_REGION.
228
- Default: Region.UNSPECIFIED.
229
+ Default: :class:`Region.UNSET`.
229
230
  single_host: Single host to use for all endpoints. Overrides individual host settings.
230
231
  Environment variable: ARIZE_SINGLE_HOST.
231
232
  Default: "" (not set).
232
233
  single_port: Single port to use for all endpoints. Overrides individual port settings (0-65535).
233
234
  Environment variable: ARIZE_SINGLE_PORT.
234
235
  Default: 0 (not set).
236
+
237
+ Raises:
238
+ MissingAPIKeyError: If api_key is not provided via argument or environment variable.
235
239
  """
236
240
 
237
241
  api_key: str = field(
@@ -323,17 +327,11 @@ class SDKConfiguration:
323
327
  )
324
328
  )
325
329
 
326
- # Private, excluded from comparisons & repr
327
- _gen_client: Any = field(default=None, repr=False, compare=False)
328
- _gen_lock: threading.Lock = field(
329
- default_factory=threading.Lock, repr=False, compare=False
330
- )
331
-
332
330
  def __post_init__(self) -> None:
333
331
  """Validate and configure SDK endpoints after initialization.
334
332
 
335
333
  Raises:
336
- MissingAPIKeyError: If API key is not provided.
334
+ MissingAPIKeyError: If api_key is not provided via argument or environment variable.
337
335
  """
338
336
  # Validate Configuration
339
337
  if not self.api_key:
@@ -341,7 +339,7 @@ class SDKConfiguration:
341
339
 
342
340
  has_single_host = bool(self.single_host)
343
341
  has_single_port = self.single_port != 0
344
- has_region = self.region is not Region.UNSPECIFIED
342
+ has_region = self.region is not Region.UNSET
345
343
  if (has_single_host or has_single_port) and has_region:
346
344
  logger.info(
347
345
  "Multiple endpoint override options provided. Preference order is: "
@@ -444,28 +442,3 @@ class SDKConfiguration:
444
442
  lines.append(f" {f.name}={val!r},")
445
443
  lines.append(")")
446
444
  return "\n".join(lines)
447
-
448
- # TODO(Kiko): This may not be well placed in this class
449
- def get_generated_client(self) -> object:
450
- """Get or create the generated OpenAPI client instance."""
451
- # If already cached, return immediately
452
- if self._gen_client is not None:
453
- return self._gen_client
454
-
455
- # Thread-safe initialization
456
- with self._gen_lock:
457
- if self._gen_client is not None:
458
- return self._gen_client
459
-
460
- # Import lazily so extra dependencies can be
461
- # enforced outside the configuration class
462
- from arize._generated import api_client as gen
463
-
464
- cfg = gen.Configuration(host=self.api_url)
465
- if self.api_key:
466
- cfg.access_token = self.api_key
467
- client = gen.ApiClient(cfg)
468
-
469
- # Bypass frozen to set the cache once
470
- object.__setattr__(self, "_gen_client", client)
471
- return client
arize/constants/ml.py CHANGED
@@ -3,50 +3,43 @@
3
3
  import json
4
4
  from pathlib import Path
5
5
 
6
- # MAX_BYTES_PER_BULK_RECORD = 100000
7
- # MAX_DAYS_WITHIN_RANGE = 365
8
6
  MIN_PREDICTION_ID_LEN = 1
9
7
  MAX_PREDICTION_ID_LEN = 512
10
8
  MIN_DOCUMENT_ID_LEN = 1
11
9
  MAX_DOCUMENT_ID_LEN = 128
12
- # # The maximum number of character for tag values
10
+ # The maximum number of character for tag values
13
11
  MAX_TAG_LENGTH = 20_000
14
12
  MAX_TAG_LENGTH_TRUNCATION = 1_000
15
- # # The maximum number of character for embedding raw data
13
+ # The maximum number of character for embedding raw data
16
14
  MAX_RAW_DATA_CHARACTERS = 2_000_000
17
15
  MAX_RAW_DATA_CHARACTERS_TRUNCATION = 5_000
18
16
  # The maximum number of acceptable years in the past from current time for prediction_timestamps
19
17
  MAX_PAST_YEARS_FROM_CURRENT_TIME = 5
20
18
  # The maximum number of acceptable years in the future from current time for prediction_timestamps
21
19
  MAX_FUTURE_YEARS_FROM_CURRENT_TIME = 1
22
- # # The maximum number of character for llm model name
20
+ # The maximum number of character for llm model name
23
21
  MAX_LLM_MODEL_NAME_LENGTH = 20_000
24
22
  MAX_LLM_MODEL_NAME_LENGTH_TRUNCATION = 50
25
- # # The maximum number of character for prompt template
23
+ # The maximum number of character for prompt template
26
24
  MAX_PROMPT_TEMPLATE_LENGTH = 50_000
27
25
  MAX_PROMPT_TEMPLATE_LENGTH_TRUNCATION = 5_000
28
- # # The maximum number of character for prompt template version
26
+ # The maximum number of character for prompt template version
29
27
  MAX_PROMPT_TEMPLATE_VERSION_LENGTH = 20_000
30
28
  MAX_PROMPT_TEMPLATE_VERSION_LENGTH_TRUNCATION = 50
31
- # # The maximum number of embeddings
29
+ # The maximum number of embeddings
32
30
  MAX_NUMBER_OF_EMBEDDINGS = 30
33
31
  MAX_EMBEDDING_DIMENSIONALITY = 20_000
34
- # # The maximum number of classes for multi class
32
+ # The maximum number of classes for multi class
35
33
  MAX_NUMBER_OF_MULTI_CLASS_CLASSES = 500
36
34
  MAX_MULTI_CLASS_NAME_LENGTH = 100
37
35
  # The maximum number of references in embedding similarity search params
38
36
  MAX_NUMBER_OF_SIMILARITY_REFERENCES = 10
39
- #
40
- # # Arize generated columns
41
- # GENERATED_PREDICTION_LABEL_COL = "arize_generated_prediction_label"
42
- # GENERATED_LLM_PARAMS_JSON_COL = "arize_generated_llm_params_json"
43
- #
44
- # # reserved columns for LLM run metadata
37
+ # reserved columns for LLM run metadata
45
38
  LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME = "total_token_count" # noqa: S105
46
39
  LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME = "prompt_token_count" # noqa: S105
47
40
  LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME = "response_token_count" # noqa: S105
48
41
  LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME = "response_latency_ms"
49
- #
42
+
50
43
  # all reserved tags
51
44
  RESERVED_TAG_COLS = [
52
45
  LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME,
arize/constants/spans.py CHANGED
@@ -5,19 +5,15 @@ DEFAULT_DATETIME_FMT = "%Y-%m-%dT%H:%M:%S.%f+00:00"
5
5
  # Minumum/Maximum number of characters for span/trace/parent ids in spans
6
6
  SPAN_ID_MIN_STR_LENGTH = 12
7
7
  SPAN_ID_MAX_STR_LENGTH = 128
8
- # # Minumum/Maximum number of characters for span name
8
+ # Minumum/Maximum number of characters for span name
9
9
  SPAN_NAME_MIN_STR_LENGTH = 0
10
10
  SPAN_NAME_MAX_STR_LENGTH = 50
11
- # # Minumum/Maximum number of characters for span status message
11
+ # Minumum/Maximum number of characters for span status message
12
12
  SPAN_STATUS_MSG_MIN_STR_LENGTH = 0
13
13
  SPAN_STATUS_MSG_MAX_STR_LENGTH = 10_000
14
- # # Minumum/Maximum number of characters for span event name
14
+ # Minumum/Maximum number of characters for span event name
15
15
  SPAN_EVENT_NAME_MAX_STR_LENGTH = 100
16
- # # Minumum/Maximum number of characters for span event attributes
17
- # SPAN_EVENT_ATTRS_MAX_STR_LENGTH = 10_000
18
- # # Maximum number of characters for span kind
19
- # SPAN_KIND_MAX_STR_LENGTH = 100
20
- # SPAN_EXCEPTION_TYPE_MAX_STR_LENGTH = 100
16
+ # Minumum/Maximum number of characters for span event attributes
21
17
  SPAN_EXCEPTION_MESSAGE_MAX_STR_LENGTH = 100
22
18
  SPAN_EXCEPTION_STACK_TRACE_MAX_STR_LENGTH = 10_000
23
19
  SPAN_IO_VALUE_MAX_STR_LENGTH = 4_000_000
@@ -29,7 +25,6 @@ SPAN_LLM_MESSAGE_ROLE_MAX_STR_LENGTH = 100
29
25
  SPAN_LLM_MESSAGE_CONTENT_MAX_STR_LENGTH = 4_000_000
30
26
  SPAN_LLM_TOOL_CALL_FUNCTION_NAME_MAX_STR_LENGTH = 500
31
27
  SPAN_LLM_PROMPT_TEMPLATE_MAX_STR_LENGTH = 4_000_000
32
- # SPAN_LLM_PROMPT_TEMPLATE_VARIABLES_MAX_STR_LENGTH = 10_000
33
28
  SPAN_LLM_PROMPT_TEMPLATE_VERSION_MAX_STR_LENGTH = 100
34
29
  SPAN_TOOL_NAME_MAX_STR_LENGTH = 100
35
30
  SPAN_TOOL_DESCRIPTION_MAX_STR_LENGTH = 1_000
@@ -43,7 +38,7 @@ JSON_STRING_MAX_STR_LENGTH = 4_000_000
43
38
  EVAL_LABEL_MIN_STR_LENGTH = 1 # we do not accept empty strings
44
39
  EVAL_LABEL_MAX_STR_LENGTH = 100
45
40
  EVAL_EXPLANATION_MAX_STR_LENGTH = 10_000
46
- #
41
+
47
42
  # # Annotation related constants
48
43
  ANNOTATION_LABEL_MIN_STR_LENGTH = 1
49
44
  ANNOTATION_LABEL_MAX_STR_LENGTH = 100 # Max length for annotation label string
arize/datasets/client.py CHANGED
@@ -24,6 +24,7 @@ from arize.utils.openinference_conversion import (
24
24
  from arize.utils.size import get_payload_size_mb
25
25
 
26
26
  if TYPE_CHECKING:
27
+ from arize._generated.api_client.api_client import ApiClient
27
28
  from arize.config import SDKConfiguration
28
29
 
29
30
  logger = logging.getLogger(__name__)
@@ -41,18 +42,21 @@ class DatasetsClient:
41
42
  :class:`arize.config.SDKConfiguration`.
42
43
  """
43
44
 
44
- def __init__(self, *, sdk_config: SDKConfiguration) -> None:
45
+ def __init__(
46
+ self, *, sdk_config: SDKConfiguration, generated_client: ApiClient
47
+ ) -> None:
45
48
  """
46
49
  Args:
47
50
  sdk_config: Resolved SDK configuration.
51
+ generated_client: Shared generated API client instance.
48
52
  """ # noqa: D205, D212
49
53
  self._sdk_config = sdk_config
50
54
 
51
55
  # Import at runtime so it's still lazy and extras-gated by the parent
52
56
  from arize._generated import api_client as gen
53
57
 
54
- # Use the shared generated client from the config
55
- self._api = gen.DatasetsApi(self._sdk_config.get_generated_client())
58
+ # Use the provided client directly
59
+ self._api = gen.DatasetsApi(generated_client)
56
60
 
57
61
  @prerelease_endpoint(key="datasets.list", stage=ReleaseStage.BETA)
58
62
  def list(
@@ -117,7 +121,7 @@ class DatasetsClient:
117
121
  space_id: Space ID to create the dataset in.
118
122
  examples: Dataset examples either as:
119
123
  - a list of JSON-like dicts, or
120
- - a pandas DataFrame (will be converted to records for REST).
124
+ - a :class:`pandas.DataFrame` (will be converted to records for REST).
121
125
  force_http: If True, force REST upload even if the payload exceeds the
122
126
  configured REST payload threshold.
123
127
 
@@ -125,7 +129,7 @@ class DatasetsClient:
125
129
  The created dataset object as returned by the API.
126
130
 
127
131
  Raises:
128
- TypeError: If `examples` is not a list of dicts or a pandas DataFrame.
132
+ TypeError: If `examples` is not a list of dicts or a :class:`pandas.DataFrame`.
129
133
  RuntimeError: If the Flight upload path is selected and the Flight request
130
134
  fails.
131
135
  arize._generated.api_client.exceptions.ApiException: If the REST API
@@ -205,7 +209,8 @@ class DatasetsClient:
205
209
  Args:
206
210
  dataset_id: Dataset ID to delete.
207
211
 
208
- Returns: This method returns None on success (common empty 204 response)
212
+ Returns:
213
+ This method returns None on success (common empty 204 response).
209
214
 
210
215
  Raises:
211
216
  arize._generated.api_client.exceptions.ApiException: If the REST API
@@ -354,14 +359,13 @@ class DatasetsClient:
354
359
  the latest dataset version is selected.
355
360
  examples: Examples to append, provided as either:
356
361
  - a list of JSON-like dicts, or
357
- - a pandas DataFrame (converted to records before upload).
362
+ - a :class:`pandas.DataFrame` (converted to records before upload).
358
363
 
359
364
  Returns:
360
365
  The updated dataset object. To see the examples, use `list_examples()`.
361
366
 
362
367
  Raises:
363
- AssertionError: If `examples` is not a list of dicts or a pandas
364
- DataFrame.
368
+ AssertionError: If `examples` is not a list of dicts or a :class:`pandas.DataFrame`.
365
369
  arize._generated.api_client.exceptions.ApiException: If the REST API
366
370
  returns an error response (e.g. 400/401/403/404/429).
367
371
  """
arize/datasets/errors.py CHANGED
@@ -80,7 +80,7 @@ class RequiredColumnsError(DatasetError):
80
80
 
81
81
 
82
82
  class EmptyDatasetError(DatasetError):
83
- """Raised when dataset DataFrame has no rows."""
83
+ """Raised when dataset :class:`pandas.DataFrame` has no rows."""
84
84
 
85
85
  def error_message(self) -> str:
86
86
  """Return the error message for this exception."""
@@ -8,12 +8,12 @@ from arize.datasets import errors as err
8
8
  def validate_dataset_df(
9
9
  df: pd.DataFrame,
10
10
  ) -> list[err.DatasetError]:
11
- """Validate a dataset DataFrame for structural and content errors.
11
+ """Validate a dataset :class:`pandas.DataFrame` for structural and content errors.
12
12
 
13
13
  Checks for required columns, unique ID values, and non-empty data.
14
14
 
15
15
  Args:
16
- df: The pandas DataFrame to validate.
16
+ df: The :class:`pandas.DataFrame` to validate.
17
17
 
18
18
  Returns:
19
19
  A list of DatasetError objects found during validation. Empty list if valid.
@@ -62,7 +62,7 @@ class EmbeddingGenerator:
62
62
 
63
63
  @classmethod
64
64
  def list_default_models(cls) -> pd.DataFrame:
65
- """Return a DataFrame of default models for each use case."""
65
+ """Return a :class:`pandas.DataFrame` of default models for each use case."""
66
66
  df = pd.DataFrame(
67
67
  {
68
68
  "Area": ["NLP", "NLP", "CV", "CV", "STRUCTURED"],
@@ -87,7 +87,7 @@ class EmbeddingGenerator:
87
87
 
88
88
  @classmethod
89
89
  def list_pretrained_models(cls) -> pd.DataFrame:
90
- """Return a DataFrame of all available pretrained models."""
90
+ """Return a :class:`pandas.DataFrame` of all available pretrained models."""
91
91
  data = {
92
92
  "Task": ["NLP" for _ in NLP_PRETRAINED_MODELS]
93
93
  + ["CV" for _ in CV_PRETRAINED_MODELS],
@@ -2,7 +2,7 @@
2
2
 
3
3
 
4
4
  class InvalidIndexError(Exception):
5
- """Raised when DataFrame or Series has an invalid index."""
5
+ """Raised when :class:`pandas.DataFrame` or Series has an invalid index."""
6
6
 
7
7
  def __repr__(self) -> str:
8
8
  """Return a string representation for debugging and logging."""
@@ -16,7 +16,7 @@ class InvalidIndexError(Exception):
16
16
  """Initialize the exception with field name context.
17
17
 
18
18
  Args:
19
- field_name: Name of the DataFrame or Series field with invalid index.
19
+ field_name: Name of the :class:`pandas.DataFrame` or Series field with invalid index.
20
20
  """
21
21
  self.field_name = field_name
22
22
 
@@ -172,5 +172,5 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
172
172
 
173
173
  @staticmethod
174
174
  def list_pretrained_models() -> pd.DataFrame:
175
- """Return a DataFrame of available pretrained tabular models."""
175
+ """Return a :class:`pandas.DataFrame` of available pretrained tabular models."""
176
176
  return pd.DataFrame({"Model Name": sorted(TABULAR_PRETRAINED_MODELS)})
arize/exceptions/base.py CHANGED
@@ -39,21 +39,6 @@ class ValidationFailure(Exception):
39
39
  self.errors = errors
40
40
 
41
41
 
42
- # ----------------------
43
- # Minimum required checks
44
- # ----------------------
45
- # class InvalidColumnNameEmptyString(ValidationError):
46
- # def __repr__(self) -> str:
47
- # return "Invalid_Column_Name_Empty_String"
48
- #
49
- # def error_message(self) -> str:
50
- # return (
51
- # "Empty column name found: ''. The schema cannot point to columns in the "
52
- # "dataframe denoted by an empty string. You can see the columns used in the "
53
- # "schema by running schema.get_used_columns()"
54
- # )
55
-
56
-
57
42
  class InvalidFieldTypeConversion(ValidationError):
58
43
  """Raised when fields cannot be converted to required type."""
59
44
 
@@ -79,31 +64,6 @@ class InvalidFieldTypeConversion(ValidationError):
79
64
  )
80
65
 
81
66
 
82
- # class InvalidFieldTypeEmbeddingFeatures(ValidationError):
83
- # def __repr__(self) -> str:
84
- # return "Invalid_Input_Type_Embedding_Features"
85
- #
86
- # def __init__(self) -> None:
87
- # pass
88
- #
89
- # def error_message(self) -> str:
90
- # return (
91
- # "schema.embedding_feature_column_names should be a dictionary mapping strings "
92
- # "to EmbeddingColumnNames objects"
93
- # )
94
-
95
-
96
- # class InvalidFieldTypePromptResponse(ValidationError):
97
- # def __repr__(self) -> str:
98
- # return "Invalid_Input_Type_Prompt_Response"
99
- #
100
- # def __init__(self, name: str) -> None:
101
- # self.name = name
102
- #
103
- # def error_message(self) -> str:
104
- # return f"'{self.name}' must be of type str or EmbeddingColumnNames"
105
-
106
-
107
67
  class InvalidDataFrameIndex(ValidationError):
108
68
  """Raised when DataFrame has an invalid index that needs to be reset."""
109
69
 
@@ -117,15 +77,3 @@ class InvalidDataFrameIndex(ValidationError):
117
77
  "The index of the dataframe is invalid; "
118
78
  "reset the index by using df.reset_index(drop=True, inplace=True)"
119
79
  )
120
-
121
-
122
- # class InvalidSchemaType(ValidationError):
123
- # def __repr__(self) -> str:
124
- # return "Invalid_Schema_Type"
125
- #
126
- # def __init__(self, schema_type: str, environment: Environments) -> None:
127
- # self.schema_type = schema_type
128
- # self.environment = environment
129
- #
130
- # def error_message(self) -> str:
131
- # return f"Cannot use a {self.schema_type} for a model with environment: {self.environment}"