arize 8.0.0b1__py3-none-any.whl → 8.0.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. arize/__init__.py +1 -1
  2. arize/_client_factory.py +50 -0
  3. arize/_flight/client.py +4 -4
  4. arize/_generated/api_client/api/datasets_api.py +6 -6
  5. arize/_generated/api_client/api/experiments_api.py +6 -6
  6. arize/_generated/api_client/api/projects_api.py +3 -3
  7. arize/_lazy.py +25 -9
  8. arize/client.py +6 -16
  9. arize/config.py +9 -36
  10. arize/constants/ml.py +9 -16
  11. arize/constants/spans.py +5 -10
  12. arize/datasets/client.py +13 -9
  13. arize/datasets/errors.py +1 -1
  14. arize/datasets/validation.py +2 -2
  15. arize/embeddings/auto_generator.py +2 -2
  16. arize/embeddings/errors.py +2 -2
  17. arize/embeddings/tabular_generators.py +1 -1
  18. arize/exceptions/base.py +0 -52
  19. arize/exceptions/parameters.py +0 -329
  20. arize/experiments/client.py +14 -7
  21. arize/experiments/evaluators/base.py +6 -6
  22. arize/experiments/evaluators/executors.py +10 -3
  23. arize/experiments/evaluators/types.py +2 -2
  24. arize/experiments/functions.py +18 -11
  25. arize/experiments/types.py +3 -5
  26. arize/logging.py +1 -1
  27. arize/ml/batch_validation/errors.py +10 -1004
  28. arize/ml/batch_validation/validator.py +273 -225
  29. arize/ml/casting.py +7 -7
  30. arize/ml/client.py +12 -11
  31. arize/ml/proto.py +6 -6
  32. arize/ml/stream_validation.py +2 -3
  33. arize/ml/surrogate_explainer/mimic.py +3 -3
  34. arize/ml/types.py +1 -55
  35. arize/pre_releases.py +6 -3
  36. arize/projects/client.py +9 -4
  37. arize/regions.py +2 -2
  38. arize/spans/client.py +13 -11
  39. arize/spans/columns.py +32 -36
  40. arize/spans/conversion.py +5 -6
  41. arize/spans/validation/common/argument_validation.py +3 -3
  42. arize/spans/validation/common/dataframe_form_validation.py +6 -6
  43. arize/spans/validation/common/value_validation.py +1 -1
  44. arize/spans/validation/evals/dataframe_form_validation.py +4 -4
  45. arize/spans/validation/evals/evals_validation.py +6 -6
  46. arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
  47. arize/spans/validation/spans/dataframe_form_validation.py +2 -2
  48. arize/spans/validation/spans/spans_validation.py +6 -6
  49. arize/utils/arrow.py +2 -2
  50. arize/utils/cache.py +2 -2
  51. arize/utils/dataframe.py +4 -4
  52. arize/utils/online_tasks/dataframe_preprocessor.py +7 -7
  53. arize/utils/openinference_conversion.py +10 -10
  54. arize/utils/proto.py +1 -1
  55. arize/version.py +1 -1
  56. {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/METADATA +23 -6
  57. {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/RECORD +60 -59
  58. {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/WHEEL +0 -0
  59. {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/licenses/LICENSE +0 -0
  60. {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/licenses/NOTICE +0 -0
arize/ml/casting.py CHANGED
@@ -1,6 +1,6 @@
1
+ # type: ignore[pb2]
1
2
  """Type casting utilities for ML model data conversion."""
2
3
 
3
- # type: ignore[pb2]
4
4
  from __future__ import annotations
5
5
 
6
6
  import math
@@ -132,12 +132,12 @@ def cast_typed_columns(
132
132
  a column across many SDK uploads.
133
133
 
134
134
  Args:
135
- dataframe (pd.DataFrame): A deepcopy of the user's dataframe.
135
+ dataframe (:class:`pandas.DataFrame`): A deepcopy of the user's dataframe.
136
136
  schema (Schema): The schema, which may include feature and tag column names
137
137
  in a TypedColumns object or a List[string].
138
138
 
139
139
  Returns:
140
- tuple[pd.DataFrame, Schema]: A tuple containing:
140
+ tuple[:class:`pandas.DataFrame`, Schema]: A tuple containing:
141
141
  - dataframe: The dataframe, with columns cast to the specified types.
142
142
  - schema: A new Schema object, with feature and tag column names converted
143
143
  to the List[string] format expected in downstream validation.
@@ -290,12 +290,12 @@ def _cast_columns(
290
290
  (feature_column_names or tag_column_names)
291
291
 
292
292
  Args:
293
- dataframe (pd.DataFrame): A deepcopy of the user's dataframe.
293
+ dataframe (:class:`pandas.DataFrame`): A deepcopy of the user's dataframe.
294
294
  columns (TypedColumns): The TypedColumns object, which specifies the columns
295
295
  to cast (and/or to not cast) and their target types.
296
296
 
297
297
  Returns:
298
- pd.DataFrame: The dataframe with columns cast to the specified types.
298
+ :class:`pandas.DataFrame`: The dataframe with columns cast to the specified types.
299
299
 
300
300
  Raises:
301
301
  ColumnCastingError: If casting fails.
@@ -350,12 +350,12 @@ def _cast_df(
350
350
  """Cast columns in a dataframe to the specified type.
351
351
 
352
352
  Args:
353
- df (pd.DataFrame): A deepcopy of the user's dataframe.
353
+ df (:class:`pandas.DataFrame`): A deepcopy of the user's dataframe.
354
354
  cols (list[str]): The list of column names to cast.
355
355
  target_type_str (str): The target type to cast to.
356
356
 
357
357
  Returns:
358
- pd.DataFrame: The dataframe with columns cast to the specified types.
358
+ :class:`pandas.DataFrame`: The dataframe with columns cast to the specified types.
359
359
 
360
360
  Raises:
361
361
  Exception: If casting fails. Common exceptions raised by astype() are
arize/ml/client.py CHANGED
@@ -1,6 +1,6 @@
1
+ # type: ignore[pb2]
1
2
  """Client implementation for managing ML models in the Arize platform."""
2
3
 
3
- # type: ignore[pb2]
4
4
  from __future__ import annotations
5
5
 
6
6
  import copy
@@ -542,7 +542,7 @@ class MLModelsClient:
542
542
  timeout: float | None = None,
543
543
  tmp_dir: str = "",
544
544
  ) -> requests.Response:
545
- """Log a batch of model predictions and actuals to Arize from a pandas DataFrame.
545
+ """Log a batch of model predictions and actuals to Arize from a :class:`pandas.DataFrame`.
546
546
 
547
547
  This method uploads multiple records to Arize in a single batch operation using
548
548
  Apache Arrow format for efficient transfer. The dataframe structure is defined
@@ -554,8 +554,8 @@ class MLModelsClient:
554
554
  model_type: The type of model. Supported types: BINARY, MULTI_CLASS, REGRESSION,
555
555
  RANKING, OBJECT_DETECTION. Note: GENERATIVE_LLM is not supported; use the
556
556
  spans module instead.
557
- dataframe: Pandas DataFrame containing the data to upload. Columns should
558
- correspond to the schema field mappings.
557
+ dataframe (:class:`pandas.DataFrame`): Pandas DataFrame containing the data to
558
+ upload. Columns should correspond to the schema field mappings.
559
559
  schema: Schema object (Schema or CorpusSchema) that defines the mapping between
560
560
  dataframe columns and Arize data fields (e.g., prediction_label_column_name,
561
561
  feature_column_names, etc.).
@@ -811,10 +811,10 @@ class MLModelsClient:
811
811
  similarity_search_params: SimilaritySearchParams | None = None,
812
812
  stream_chunk_size: int | None = None,
813
813
  ) -> pd.DataFrame:
814
- """Export model data from Arize to a pandas DataFrame.
814
+ """Export model data from Arize to a :class:`pandas.DataFrame`.
815
815
 
816
816
  Retrieves prediction and optional actual data for a model within a specified time
817
- range and returns it as a pandas DataFrame for analysis.
817
+ range and returns it as a :class:`pandas.DataFrame` for analysis.
818
818
 
819
819
  Args:
820
820
  space_id: The space ID where the model resides.
@@ -835,8 +835,9 @@ class MLModelsClient:
835
835
  stream_chunk_size: Optional chunk size for streaming large result sets.
836
836
 
837
837
  Returns:
838
- A pandas DataFrame containing the exported data with columns for predictions,
839
- actuals (if requested), features, tags, timestamps, and other model metadata.
838
+ :class:`pandas.DataFrame`: A pandas DataFrame containing the exported data
839
+ with columns for predictions, actuals (if requested), features, tags,
840
+ timestamps, and other model metadata.
840
841
 
841
842
  Raises:
842
843
  RuntimeError: If the Flight client request fails or returns no response.
@@ -895,7 +896,7 @@ class MLModelsClient:
895
896
  """Export model data from Arize to a Parquet file and return as DataFrame.
896
897
 
897
898
  Retrieves prediction and optional actual data for a model within a specified time
898
- range, saves it as a Parquet file, and returns it as a pandas DataFrame.
899
+ range, saves it as a Parquet file, and returns it as a :class:`pandas.DataFrame`.
899
900
 
900
901
  Args:
901
902
  space_id: The space ID where the model resides.
@@ -916,8 +917,8 @@ class MLModelsClient:
916
917
  stream_chunk_size: Optional chunk size for streaming large result sets.
917
918
 
918
919
  Returns:
919
- A pandas DataFrame containing the exported data. The data is also saved to a
920
- Parquet file by the underlying export client.
920
+ :class:`pandas.DataFrame`: A pandas DataFrame containing the exported data.
921
+ The data is also saved to a Parquet file by the underlying export client.
921
922
 
922
923
  Raises:
923
924
  RuntimeError: If the Flight client request fails or returns no response.
arize/ml/proto.py CHANGED
@@ -1,6 +1,6 @@
1
+ # type: ignore[pb2]
1
2
  """Protocol buffer utilities for ML model data serialization."""
2
3
 
3
- # type: ignore[pb2]
4
4
  from __future__ import annotations
5
5
 
6
6
  from google.protobuf.timestamp_pb2 import Timestamp
@@ -30,10 +30,10 @@ def get_pb_dictionary(d: dict[object, object] | None) -> dict[str, object]:
30
30
  """Convert a dictionary to protobuf format with string keys and pb2.Value values.
31
31
 
32
32
  Args:
33
- d: Dictionary to convert, or None.
33
+ d: Dictionary to convert, or :obj:`None`.
34
34
 
35
35
  Returns:
36
- Dictionary with string keys and protobuf Value objects, or empty dict if input is None.
36
+ Dictionary with string keys and protobuf Value objects, or empty dict if input is :obj:`None`.
37
37
  """
38
38
  if d is None:
39
39
  return {}
@@ -56,7 +56,7 @@ def get_pb_value(name: str | int | float, value: pb2.Value) -> pb2.Value:
56
56
  value: The value to convert to protobuf format.
57
57
 
58
58
  Returns:
59
- A pb2.Value protobuf object, or None if value cannot be converted.
59
+ A pb2.Value protobuf object, or :obj:`None` if value cannot be converted.
60
60
 
61
61
  Raises:
62
62
  TypeError: If value type is not supported.
@@ -139,10 +139,10 @@ def get_pb_timestamp(time_overwrite: int | None) -> object | None:
139
139
  """Convert a Unix timestamp to a protobuf Timestamp object.
140
140
 
141
141
  Args:
142
- time_overwrite: Unix epoch time in seconds, or None.
142
+ time_overwrite: Unix epoch time in seconds, or :obj:`None`.
143
143
 
144
144
  Returns:
145
- A protobuf Timestamp object, or None if input is None.
145
+ A protobuf Timestamp object, or :obj:`None` if input is :obj:`None`.
146
146
 
147
147
  Raises:
148
148
  TypeError: If time_overwrite is not an integer.
@@ -1,6 +1,5 @@
1
- """Stream validation logic for ML model predictions."""
2
-
3
1
  # type: ignore[pb2]
2
+ """Stream validation logic for ML model predictions."""
4
3
 
5
4
  from arize.constants.ml import MAX_PREDICTION_ID_LEN, MIN_PREDICTION_ID_LEN
6
5
  from arize.exceptions.parameters import (
@@ -185,7 +184,7 @@ def validate_and_convert_prediction_id(
185
184
  """Validate and convert a prediction ID to string format, or generate one if absent.
186
185
 
187
186
  Args:
188
- prediction_id: The prediction ID to validate/convert, or None.
187
+ prediction_id: The prediction ID to validate/convert, or :obj:`None`.
189
188
  environment: The environment context (training, validation, production).
190
189
  prediction_label: Optional prediction label for delayed record detection.
191
190
  actual_label: Optional actual label for delayed record detection.
@@ -36,7 +36,7 @@ class Mimic:
36
36
  """Initialize the Mimic explainer with training data and model.
37
37
 
38
38
  Args:
39
- X: Training data DataFrame for the surrogate model.
39
+ X: Training data :class:`pandas.DataFrame` for the surrogate model.
40
40
  model_func: Model function to explain.
41
41
  """
42
42
  self.explainer = MimicExplainer(
@@ -48,7 +48,7 @@ class Mimic:
48
48
  )
49
49
 
50
50
  def explain(self, X: pd.DataFrame) -> pd.DataFrame:
51
- """Explain feature importance for the given input DataFrame."""
51
+ """Explain feature importance for the given input :class:`pandas.DataFrame`."""
52
52
  return pd.DataFrame(
53
53
  self.explainer.explain_local(X).local_importance_values,
54
54
  columns=X.columns,
@@ -59,7 +59,7 @@ class Mimic:
59
59
  def augment(
60
60
  df: pd.DataFrame, schema: Schema, model_type: ModelTypes
61
61
  ) -> tuple[pd.DataFrame, Schema]:
62
- """Augment the DataFrame and schema with SHAP values for explainability."""
62
+ """Augment the :class:`pandas.DataFrame` and schema with SHAP values for explainability."""
63
63
  features = schema.feature_column_names
64
64
  X = df[features]
65
65
 
arize/ml/types.py CHANGED
@@ -16,27 +16,13 @@ from typing import (
16
16
  import numpy as np
17
17
 
18
18
  from arize.constants.ml import (
19
- # MAX_MULTI_CLASS_NAME_LENGTH,
20
- # MAX_NUMBER_OF_MULTI_CLASS_CLASSES,
21
19
  MAX_MULTI_CLASS_NAME_LENGTH,
22
20
  MAX_NUMBER_OF_MULTI_CLASS_CLASSES,
23
21
  MAX_NUMBER_OF_SIMILARITY_REFERENCES,
24
22
  MAX_RAW_DATA_CHARACTERS,
25
23
  MAX_RAW_DATA_CHARACTERS_TRUNCATION,
26
- # MAX_RAW_DATA_CHARACTERS,
27
- # MAX_RAW_DATA_CHARACTERS_TRUNCATION,
28
24
  )
29
25
  from arize.exceptions.parameters import InvalidValueType
30
-
31
- #
32
- # from arize.utils.constants import (
33
- # MAX_MULTI_CLASS_NAME_LENGTH,
34
- # MAX_NUMBER_OF_MULTI_CLASS_CLASSES,
35
- # MAX_NUMBER_OF_SIMILARITY_REFERENCES,
36
- # MAX_RAW_DATA_CHARACTERS,
37
- # MAX_RAW_DATA_CHARACTERS_TRUNCATION,
38
- # )
39
- # from arize.utils.errors import InvalidValueType
40
26
  from arize.logging import get_truncation_warning_message
41
27
  from arize.utils.types import is_dict_of, is_iterable_of, is_list_of
42
28
 
@@ -299,30 +285,6 @@ class Embedding(NamedTuple):
299
285
  return any(isinstance(data, t) for t in (list, np.ndarray))
300
286
 
301
287
 
302
- # @dataclass
303
- # class _PromptOrResponseText:
304
- # data: str
305
- #
306
- # def validate(self, name: str) -> None:
307
- # # Validate that data is a string
308
- # if not isinstance(self.data, str):
309
- # raise TypeError(f"'{name}' must be a str")
310
- #
311
- # character_count = len(self.data)
312
- # if character_count > MAX_RAW_DATA_CHARACTERS:
313
- # raise ValueError(
314
- # f"'{name}' field must not contain more than {MAX_RAW_DATA_CHARACTERS} characters. "
315
- # f"Found {character_count}."
316
- # )
317
- # elif character_count > MAX_RAW_DATA_CHARACTERS_TRUNCATION:
318
- # logger.warning(
319
- # get_truncation_warning_message(
320
- # f"'{name}'", MAX_RAW_DATA_CHARACTERS_TRUNCATION
321
- # )
322
- # )
323
- # return None
324
-
325
-
326
288
  class LLMRunMetadata(NamedTuple):
327
289
  """Metadata for LLM execution including token counts and latency."""
328
290
 
@@ -1021,22 +983,6 @@ class LLMRunMetadataColumnNames:
1021
983
  )
1022
984
 
1023
985
 
1024
- # @dataclass
1025
- # class DocumentColumnNames:
1026
- # id_column_name: Optional[str] = None
1027
- # version_column_name: Optional[str] = None
1028
- # text_embedding_column_names: Optional[EmbeddingColumnNames] = None
1029
- #
1030
- # def __iter__(self):
1031
- # return iter(
1032
- # (
1033
- # self.id_column_name,
1034
- # self.version_column_name,
1035
- # self.text_embedding_column_names,
1036
- # )
1037
- # )
1038
- #
1039
- #
1040
986
  @dataclass
1041
987
  class SimilarityReference:
1042
988
  """Reference to a prediction for similarity search operations."""
@@ -1531,7 +1477,7 @@ def add_to_column_count_dictionary(
1531
1477
 
1532
1478
  Args:
1533
1479
  column_dictionary: Dictionary mapping column names to counts.
1534
- col: The column name to increment, or None to skip.
1480
+ col: The column name to increment, or :obj:`None` to skip.
1535
1481
  """
1536
1482
  if col:
1537
1483
  if col in column_dictionary:
arize/pre_releases.py CHANGED
@@ -4,6 +4,7 @@ import functools
4
4
  import logging
5
5
  from collections.abc import Callable
6
6
  from enum import StrEnum
7
+ from typing import TypeVar
7
8
 
8
9
  from arize.version import __version__
9
10
 
@@ -19,6 +20,8 @@ class ReleaseStage(StrEnum):
19
20
 
20
21
  _WARNED: set[str] = set()
21
22
 
23
+ _F = TypeVar("_F", bound=Callable)
24
+
22
25
 
23
26
  def _format_prerelease_message(*, key: str, stage: ReleaseStage) -> str:
24
27
  article = "an" if stage is ReleaseStage.ALPHA else "a"
@@ -28,10 +31,10 @@ def _format_prerelease_message(*, key: str, stage: ReleaseStage) -> str:
28
31
  )
29
32
 
30
33
 
31
- def prerelease_endpoint(*, stage: ReleaseStage, key: str) -> object:
34
+ def prerelease_endpoint(*, stage: ReleaseStage, key: str) -> Callable[[_F], _F]:
32
35
  """Decorate a method to emit a prerelease warning via logging once per process."""
33
36
 
34
- def deco(fn: Callable[..., object]) -> object:
37
+ def deco(fn: _F) -> _F:
35
38
  @functools.wraps(fn)
36
39
  def wrapper(*args: object, **kwargs: object) -> object:
37
40
  if key not in _WARNED:
@@ -39,6 +42,6 @@ def prerelease_endpoint(*, stage: ReleaseStage, key: str) -> object:
39
42
  logger.warning(_format_prerelease_message(key=key, stage=stage))
40
43
  return fn(*args, **kwargs)
41
44
 
42
- return wrapper
45
+ return wrapper # type: ignore[return-value]
43
46
 
44
47
  return deco
arize/projects/client.py CHANGED
@@ -9,6 +9,7 @@ from arize.pre_releases import ReleaseStage, prerelease_endpoint
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from arize._generated.api_client import models
12
+ from arize._generated.api_client.api_client import ApiClient
12
13
  from arize.config import SDKConfiguration
13
14
 
14
15
  logger = logging.getLogger(__name__)
@@ -26,18 +27,21 @@ class ProjectsClient:
26
27
  :class:`arize.config.SDKConfiguration`.
27
28
  """
28
29
 
29
- def __init__(self, *, sdk_config: SDKConfiguration) -> None:
30
+ def __init__(
31
+ self, *, sdk_config: SDKConfiguration, generated_client: ApiClient
32
+ ) -> None:
30
33
  """
31
34
  Args:
32
35
  sdk_config: Resolved SDK configuration.
36
+ generated_client: Shared generated API client instance.
33
37
  """ # noqa: D205, D212
34
38
  self._sdk_config = sdk_config
35
39
 
36
40
  # Import at runtime so it's still lazy and extras-gated by the parent
37
41
  from arize._generated import api_client as gen
38
42
 
39
- # Use the shared generated client from the config
40
- self._api = gen.ProjectsApi(self._sdk_config.get_generated_client())
43
+ # Use the provided client directly
44
+ self._api = gen.ProjectsApi(generated_client)
41
45
 
42
46
  @prerelease_endpoint(key="projects.list", stage=ReleaseStage.BETA)
43
47
  def list(
@@ -125,7 +129,8 @@ class ProjectsClient:
125
129
  Args:
126
130
  project_id: Project ID.
127
131
 
128
- Returns: This method returns None on success (common empty 204 response)
132
+ Returns:
133
+ This method returns None on success (common empty 204 response).
129
134
 
130
135
  Raises:
131
136
  arize._generated.api_client.exceptions.ApiException: If the API request fails
arize/regions.py CHANGED
@@ -13,7 +13,7 @@ class Region(StrEnum):
13
13
  EU_WEST_1 = "eu-west-1a"
14
14
  CA_CENTRAL_1 = "ca-central-1a"
15
15
  US_EAST_1 = "us-east-1b"
16
- UNSPECIFIED = ""
16
+ UNSET = ""
17
17
 
18
18
 
19
19
  @dataclass(frozen=True)
@@ -36,5 +36,5 @@ def _get_region_endpoints(region: Region) -> RegionEndpoints:
36
36
 
37
37
 
38
38
  REGION_ENDPOINTS: dict[Region, RegionEndpoints] = {
39
- r: _get_region_endpoints(r) for r in Region if r != Region.UNSPECIFIED
39
+ r: _get_region_endpoints(r) for r in Region if r != Region.UNSET
40
40
  }
arize/spans/client.py CHANGED
@@ -1,6 +1,6 @@
1
+ # type: ignore[pb2]
1
2
  """Client implementation for managing spans and traces in the Arize platform."""
2
3
 
3
- # type: ignore[pb2]
4
4
  from __future__ import annotations
5
5
 
6
6
  import json
@@ -78,10 +78,11 @@ class SpansClient:
78
78
  Args:
79
79
  space_id: The space ID where the project resides.
80
80
  project_name: A unique name to identify your project in the Arize platform.
81
- dataframe: The dataframe containing the LLM traces.
82
- evals_dataframe: A dataframe containing LLM evaluations data.
83
- The evaluations are joined to their corresponding spans via a left outer join, i.e.,
84
- using only `context.span_id` from the spans dataframe. Defaults to None.
81
+ dataframe (:class:`pandas.DataFrame`): The dataframe containing the LLM traces.
82
+ evals_dataframe (:class:`pandas.DataFrame` | :obj:`None`): A dataframe containing
83
+ LLM evaluations data. The evaluations are joined to their corresponding spans
84
+ via a left outer join, i.e., using only `context.span_id` from the spans
85
+ dataframe. Defaults to None.
85
86
  datetime_format: format for the timestamp captured in the LLM traces.
86
87
  Defaults to "%Y-%m-%dT%H:%M:%S.%f+00:00".
87
88
  validate: When set to True, validation is run before sending data.
@@ -280,7 +281,7 @@ class SpansClient:
280
281
  Args:
281
282
  space_id: The space ID where the project resides.
282
283
  project_name: A unique name to identify your project in the Arize platform.
283
- dataframe: A dataframe containing LLM evaluations data.
284
+ dataframe (:class:`pandas.DataFrame`): A dataframe containing LLM evaluations data.
284
285
  validate: When set to True, validation is run before sending data.
285
286
  Defaults to True.
286
287
  force_http: Force the use of HTTP for data upload. Defaults to False.
@@ -453,7 +454,7 @@ class SpansClient:
453
454
  Args:
454
455
  space_id: The space ID where the project resides.
455
456
  project_name: A unique name to identify your project in the Arize platform.
456
- dataframe: A dataframe containing LLM annotation data.
457
+ dataframe (:class:`pandas.DataFrame`): A dataframe containing LLM annotation data.
457
458
  validate: When set to True, validation is run before sending data.
458
459
  Defaults to True.
459
460
  """
@@ -684,7 +685,8 @@ class SpansClient:
684
685
  Args:
685
686
  space_id: The space ID where the project resides.
686
687
  project_name: A unique name to identify your project in the Arize platform.
687
- dataframe: DataFrame with span_ids and either patch documents or metadata field columns.
688
+ dataframe (:class:`pandas.DataFrame`): DataFrame with span_ids and either patch
689
+ documents or metadata field columns.
688
690
  patch_document_column_name: Name of the column containing JSON patch documents.
689
691
  Defaults to "patch_document".
690
692
  validate: When set to True, validation is run before sending data.
@@ -1004,14 +1006,14 @@ class SpansClient:
1004
1006
  columns: list | None = None,
1005
1007
  stream_chunk_size: int | None = None,
1006
1008
  ) -> pd.DataFrame:
1007
- """Export span data from Arize to a pandas DataFrame.
1009
+ """Export span data from Arize to a :class:`pandas.DataFrame`.
1008
1010
 
1009
1011
  Retrieves trace/span data from the specified project within a time range
1010
- and returns it as a pandas DataFrame. Supports filtering with SQL-like
1012
+ and returns it as a :class:`pandas.DataFrame`. Supports filtering with SQL-like
1011
1013
  WHERE clauses and similarity search for semantic retrieval.
1012
1014
 
1013
1015
  Returns:
1014
- DataFrame containing the requested span data with columns
1016
+ :class:`pandas.DataFrame`: DataFrame containing the requested span data with columns
1015
1017
  for span metadata, attributes, events, and any custom fields.
1016
1018
  """
1017
1019
  with ArizeFlightClient(
arize/spans/columns.py CHANGED
@@ -39,8 +39,6 @@ class SpanColumn:
39
39
  self.data_type = data_type
40
40
 
41
41
 
42
- #
43
- #
44
42
  # Root level columns
45
43
  SPAN_TRACE_ID_COL = SpanColumn(
46
44
  name="context.trace_id",
@@ -96,18 +94,18 @@ SPAN_KIND_COL = SpanColumn(
96
94
  data_type=SpanColumnDataType.STRING,
97
95
  )
98
96
  # Attributes Exception columns
99
- # SPAN_ATTRIBUTES_EXCEPTION_TYPE_COL = SpanColumn(
100
- # name=f"attributes.{otel.SpanAttributes.EXCEPTION_TYPE}",
101
- # data_type=SpanColumnDataType.STRING,
102
- # )
97
+ SPAN_ATTRIBUTES_EXCEPTION_TYPE_COL = SpanColumn(
98
+ name=f"attributes.{otel.SpanAttributes.EXCEPTION_TYPE}",
99
+ data_type=SpanColumnDataType.STRING,
100
+ )
103
101
  SPAN_ATTRIBUTES_EXCEPTION_MESSAGE_COL = SpanColumn(
104
102
  name=f"attributes.{otel.SpanAttributes.EXCEPTION_MESSAGE}",
105
103
  data_type=SpanColumnDataType.STRING,
106
104
  )
107
- # SPAN_ATTRIBUTES_EXCEPTION_ESCAPED_COL = SpanColumn(
108
- # name=f"attributes.{otel.SpanAttributes.EXCEPTION_ESCAPED}",
109
- # data_type=SpanColumnDataType.BOOL,
110
- # )
105
+ SPAN_ATTRIBUTES_EXCEPTION_ESCAPED_COL = SpanColumn(
106
+ name=f"attributes.{otel.SpanAttributes.EXCEPTION_ESCAPED}",
107
+ data_type=SpanColumnDataType.BOOL,
108
+ )
111
109
  SPAN_ATTRIBUTES_EXCEPTION_STACKTRACE_COL = SpanColumn(
112
110
  name=f"attributes.{otel.SpanAttributes.EXCEPTION_STACKTRACE}",
113
111
  data_type=SpanColumnDataType.STRING,
@@ -176,20 +174,19 @@ SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_VERSION_COL = SpanColumn(
176
174
  name=f"attributes.{oinf.SpanAttributes.LLM_PROMPT_TEMPLATE_VERSION}",
177
175
  data_type=SpanColumnDataType.STRING,
178
176
  )
179
- # SPAN_ATTRIBUTES_LLM_PROMPT_TOKEN_COUNT_COL = SpanColumn(
180
- # name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_PROMPT}",
181
- # data_type=SpanColumnDataType.NUMERIC,
182
- # )
183
- # SPAN_ATTRIBUTES_LLM_COMPLETION_TOKEN_COUNT_COL = SpanColumn(
184
- # name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_COMPLETION}",
185
- # data_type=SpanColumnDataType.NUMERIC,
186
- # )
187
- # SPAN_ATTRIBUTES_LLM_TOTAL_TOKEN_COUNT_COL = SpanColumn(
188
- # name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_TOTAL}",
189
- # data_type=SpanColumnDataType.NUMERIC,
190
- # )
177
+ SPAN_ATTRIBUTES_LLM_PROMPT_TOKEN_COUNT_COL = SpanColumn(
178
+ name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_PROMPT}",
179
+ data_type=SpanColumnDataType.NUMERIC,
180
+ )
181
+ SPAN_ATTRIBUTES_LLM_COMPLETION_TOKEN_COUNT_COL = SpanColumn(
182
+ name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_COMPLETION}",
183
+ data_type=SpanColumnDataType.NUMERIC,
184
+ )
185
+ SPAN_ATTRIBUTES_LLM_TOTAL_TOKEN_COUNT_COL = SpanColumn(
186
+ name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_TOTAL}",
187
+ data_type=SpanColumnDataType.NUMERIC,
188
+ )
191
189
  # Attributes Message Keys
192
- # SPAN_ATTRIBUTES_MESSAGE_NAME_KEY = f"{oinf.MessageAttributes.MESSAGE_NAME}"
193
190
  SPAN_ATTRIBUTES_MESSAGE_ROLE_KEY = f"{oinf.MessageAttributes.MESSAGE_ROLE}"
194
191
  SPAN_ATTRIBUTES_MESSAGE_CONTENT_KEY = (
195
192
  f"{oinf.MessageAttributes.MESSAGE_CONTENT}"
@@ -223,7 +220,6 @@ SPAN_ATTRIBUTES_RETRIEVAL_DOCUMENTS_COL = SpanColumn(
223
220
  )
224
221
  # Document Object Keys
225
222
  SPAN_ATTRIBUTES_DOCUMENT_ID_KEY = f"{oinf.DocumentAttributes.DOCUMENT_ID}"
226
- # SPAN_ATTRIBUTES_DOCUMENT_SCORE_KEY = f"{oinf.DocumentAttributes.DOCUMENT_SCORE}"
227
223
  SPAN_ATTRIBUTES_DOCUMENT_CONTENT_KEY = (
228
224
  f"{oinf.DocumentAttributes.DOCUMENT_CONTENT}"
229
225
  )
@@ -247,10 +243,10 @@ SPAN_ATTRIBUTES_RERANKER_MODEL_NAME_COL = SpanColumn(
247
243
  name=f"attributes.{oinf.RerankerAttributes.RERANKER_MODEL_NAME}",
248
244
  data_type=SpanColumnDataType.STRING,
249
245
  )
250
- # SPAN_ATTRIBUTES_RERANKER_TOP_K_COL = SpanColumn(
251
- # name=f"attributes.{oinf.RerankerAttributes.RERANKER_TOP_K}",
252
- # data_type=SpanColumnDataType.NUMERIC,
253
- # )
246
+ SPAN_ATTRIBUTES_RERANKER_TOP_K_COL = SpanColumn(
247
+ name=f"attributes.{oinf.RerankerAttributes.RERANKER_TOP_K}",
248
+ data_type=SpanColumnDataType.NUMERIC,
249
+ )
254
250
  SPAN_ATTRIBUTES_SESSION_ID = SpanColumn(
255
251
  name=f"attributes.{oinf.SpanAttributes.SESSION_ID}",
256
252
  data_type=SpanColumnDataType.STRING,
@@ -281,9 +277,9 @@ SPAN_OPENINFERENCE_COLUMNS = [
281
277
  SPAN_STATUS_CODE_COL,
282
278
  SPAN_STATUS_MESSAGE_COL,
283
279
  SPAN_EVENTS_COL,
284
- # SPAN_ATTRIBUTES_EXCEPTION_TYPE_COL,
280
+ SPAN_ATTRIBUTES_EXCEPTION_TYPE_COL,
285
281
  SPAN_ATTRIBUTES_EXCEPTION_MESSAGE_COL,
286
- # SPAN_ATTRIBUTES_EXCEPTION_ESCAPED_COL,
282
+ SPAN_ATTRIBUTES_EXCEPTION_ESCAPED_COL,
287
283
  SPAN_ATTRIBUTES_EXCEPTION_STACKTRACE_COL,
288
284
  SPAN_ATTRIBUTES_INPUT_VALUE_COL,
289
285
  SPAN_ATTRIBUTES_INPUT_MIME_TYPE_COL,
@@ -297,9 +293,9 @@ SPAN_OPENINFERENCE_COLUMNS = [
297
293
  SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_TEMPLATE_COL,
298
294
  SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_VARIABLES_COL,
299
295
  SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_VERSION_COL,
300
- # SPAN_ATTRIBUTES_LLM_PROMPT_TOKEN_COUNT_COL,
301
- # SPAN_ATTRIBUTES_LLM_COMPLETION_TOKEN_COUNT_COL,
302
- # SPAN_ATTRIBUTES_LLM_TOTAL_TOKEN_COUNT_COL,
296
+ SPAN_ATTRIBUTES_LLM_PROMPT_TOKEN_COUNT_COL,
297
+ SPAN_ATTRIBUTES_LLM_COMPLETION_TOKEN_COUNT_COL,
298
+ SPAN_ATTRIBUTES_LLM_TOTAL_TOKEN_COUNT_COL,
303
299
  SPAN_ATTRIBUTES_TOOL_NAME_COL,
304
300
  SPAN_ATTRIBUTES_TOOL_DESCRIPTION_COL,
305
301
  SPAN_ATTRIBUTES_TOOL_PARAMETERS_COL,
@@ -308,18 +304,18 @@ SPAN_OPENINFERENCE_COLUMNS = [
308
304
  SPAN_ATTRIBUTES_RERANKER_OUTPUT_DOCUMENTS_COL,
309
305
  SPAN_ATTRIBUTES_RERANKER_QUERY_COL,
310
306
  SPAN_ATTRIBUTES_RERANKER_MODEL_NAME_COL,
311
- # SPAN_ATTRIBUTES_RERANKER_TOP_K_COL,
307
+ SPAN_ATTRIBUTES_RERANKER_TOP_K_COL,
312
308
  SPAN_ATTRIBUTES_SESSION_ID,
313
309
  SPAN_ATTRIBUTES_USER_ID,
314
310
  SPAN_ATTRIBUTES_METADATA,
315
311
  SPAN_ATTRIBUTES_LLM_TOOLS_COL,
316
312
  ]
317
- #
313
+
318
314
  # List of columns that must be present in the dataframe
319
315
  SPAN_OPENINFERENCE_REQUIRED_COLUMNS = [
320
316
  col for col in SPAN_OPENINFERENCE_COLUMNS if col.required
321
317
  ]
322
- #
318
+
323
319
  # Eval columns
324
320
  # EVAL_COLUMN_PREFIX = "eval."
325
321
  # SESSION_EVAL_COLUMN_PREFIX = "session_eval."
arize/spans/conversion.py CHANGED
@@ -7,22 +7,21 @@ from datetime import datetime, timezone
7
7
  import numpy as np
8
8
  import pandas as pd
9
9
 
10
- # from arize.utils.logging import logger
11
10
  from arize.spans.columns import SPAN_OPENINFERENCE_COLUMNS, SpanColumnDataType
12
11
 
13
12
 
14
13
  def convert_timestamps(df: pd.DataFrame, fmt: str = "") -> pd.DataFrame:
15
- """Convert timestamp columns in a DataFrame to nanoseconds.
14
+ """Convert timestamp columns in a :class:`pandas.DataFrame` to nanoseconds.
16
15
 
17
16
  Args:
18
- df: The pandas DataFrame containing timestamp columns.
17
+ df: The :class:`pandas.DataFrame` containing timestamp columns.
19
18
  fmt: Optional datetime format string for parsing string timestamps. Defaults to "".
20
19
 
21
20
  Returns:
22
- The DataFrame with timestamp columns converted to nanoseconds.
21
+ The :class:`pandas.DataFrame` with timestamp columns converted to nanoseconds.
23
22
 
24
23
  Raises:
25
- KeyError: If required timestamp column is not found in DataFrame.
24
+ KeyError: If required timestamp column is not found in :class:`pandas.DataFrame`.
26
25
  """
27
26
  for col in SPAN_OPENINFERENCE_COLUMNS:
28
27
  if col.data_type != SpanColumnDataType.TIMESTAMP:
@@ -70,7 +69,7 @@ def jsonify_dictionaries(df: pd.DataFrame) -> pd.DataFrame:
70
69
  """Convert dictionary and list-of-dictionary columns to JSON strings.
71
70
 
72
71
  Args:
73
- df: The pandas DataFrame containing dictionary columns.
72
+ df: The :class:`pandas.DataFrame` containing dictionary columns.
74
73
 
75
74
  Returns:
76
75
  The DataFrame with dictionary columns converted to JSON strings.
@@ -39,13 +39,13 @@ def check_field_convertible_to_str(
39
39
  def check_dataframe_type(
40
40
  dataframe: object,
41
41
  ) -> list[InvalidTypeArgument]:
42
- """Validates that the provided argument is a pandas DataFrame.
42
+ """Validates that the provided argument is a :class:`pandas.DataFrame`.
43
43
 
44
44
  Args:
45
- dataframe: The object to validate as a pandas DataFrame.
45
+ dataframe: The object to validate as a :class:`pandas.DataFrame`.
46
46
 
47
47
  Returns:
48
- List of validation errors if not a DataFrame (empty if valid).
48
+ List of validation errors if not a :class:`pandas.DataFrame` (empty if valid).
49
49
  """
50
50
  if not isinstance(dataframe, pd.DataFrame):
51
51
  return [