arize 8.0.0b0__py3-none-any.whl → 8.0.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. arize/__init__.py +1 -1
  2. arize/_client_factory.py +50 -0
  3. arize/_flight/client.py +4 -4
  4. arize/_generated/api_client/__init__.py +0 -2
  5. arize/_generated/api_client/api/datasets_api.py +6 -6
  6. arize/_generated/api_client/api/experiments_api.py +6 -6
  7. arize/_generated/api_client/api/projects_api.py +3 -3
  8. arize/_generated/api_client/models/__init__.py +0 -1
  9. arize/_generated/api_client/models/datasets_create_request.py +2 -10
  10. arize/_generated/api_client/models/datasets_examples_insert_request.py +2 -10
  11. arize/_generated/api_client/test/test_datasets_create_request.py +2 -6
  12. arize/_generated/api_client/test/test_datasets_examples_insert_request.py +2 -6
  13. arize/_generated/api_client/test/test_datasets_examples_list200_response.py +2 -6
  14. arize/_generated/api_client/test/test_datasets_examples_update_request.py +2 -6
  15. arize/_generated/api_client/test/test_experiments_create_request.py +2 -6
  16. arize/_generated/api_client/test/test_experiments_runs_list200_response.py +2 -6
  17. arize/_generated/api_client_README.md +0 -1
  18. arize/_lazy.py +25 -9
  19. arize/client.py +16 -52
  20. arize/config.py +9 -36
  21. arize/constants/ml.py +9 -16
  22. arize/constants/spans.py +5 -10
  23. arize/datasets/client.py +13 -9
  24. arize/datasets/errors.py +1 -1
  25. arize/datasets/validation.py +2 -2
  26. arize/embeddings/auto_generator.py +2 -2
  27. arize/embeddings/errors.py +2 -2
  28. arize/embeddings/tabular_generators.py +1 -1
  29. arize/exceptions/base.py +0 -52
  30. arize/exceptions/parameters.py +0 -329
  31. arize/experiments/__init__.py +2 -2
  32. arize/experiments/client.py +16 -10
  33. arize/experiments/evaluators/base.py +6 -6
  34. arize/experiments/evaluators/executors.py +10 -3
  35. arize/experiments/evaluators/types.py +2 -2
  36. arize/experiments/functions.py +24 -17
  37. arize/experiments/types.py +6 -8
  38. arize/logging.py +1 -1
  39. arize/ml/batch_validation/errors.py +10 -1004
  40. arize/ml/batch_validation/validator.py +273 -225
  41. arize/ml/casting.py +7 -7
  42. arize/ml/client.py +12 -11
  43. arize/ml/proto.py +6 -6
  44. arize/ml/stream_validation.py +2 -3
  45. arize/ml/surrogate_explainer/mimic.py +3 -3
  46. arize/ml/types.py +1 -55
  47. arize/pre_releases.py +6 -3
  48. arize/projects/client.py +9 -4
  49. arize/regions.py +2 -2
  50. arize/spans/client.py +14 -12
  51. arize/spans/columns.py +32 -36
  52. arize/spans/conversion.py +5 -6
  53. arize/spans/validation/common/argument_validation.py +3 -3
  54. arize/spans/validation/common/dataframe_form_validation.py +6 -6
  55. arize/spans/validation/common/value_validation.py +1 -1
  56. arize/spans/validation/evals/dataframe_form_validation.py +4 -4
  57. arize/spans/validation/evals/evals_validation.py +6 -6
  58. arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
  59. arize/spans/validation/spans/dataframe_form_validation.py +2 -2
  60. arize/spans/validation/spans/spans_validation.py +6 -6
  61. arize/utils/arrow.py +2 -2
  62. arize/utils/cache.py +2 -2
  63. arize/utils/dataframe.py +4 -4
  64. arize/utils/online_tasks/dataframe_preprocessor.py +7 -7
  65. arize/utils/openinference_conversion.py +10 -10
  66. arize/utils/proto.py +1 -1
  67. arize/version.py +1 -1
  68. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/METADATA +71 -63
  69. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/RECORD +72 -73
  70. arize/_generated/api_client/models/primitive_value.py +0 -172
  71. arize/_generated/api_client/test/test_primitive_value.py +0 -50
  72. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/WHEEL +0 -0
  73. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/licenses/LICENSE +0 -0
  74. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/licenses/NOTICE +0 -0
@@ -17,10 +17,10 @@ if TYPE_CHECKING:
17
17
  def check_dataframe_index(
18
18
  dataframe: pd.DataFrame,
19
19
  ) -> list[InvalidDataFrameIndex]:
20
- """Validates that the DataFrame has a default integer index.
20
+ """Validates that the :class:`pandas.DataFrame` has a default integer index.
21
21
 
22
22
  Args:
23
- dataframe: The DataFrame to validate.
23
+ dataframe: The :class:`pandas.DataFrame` to validate.
24
24
 
25
25
  Returns:
26
26
  List of validation errors if index is not default (empty if valid).
@@ -34,10 +34,10 @@ def check_dataframe_required_column_set(
34
34
  df: pd.DataFrame,
35
35
  required_columns: list[str],
36
36
  ) -> list[InvalidDataFrameMissingColumns]:
37
- """Validates that the DataFrame contains all required columns.
37
+ """Validates that the :class:`pandas.DataFrame` contains all required columns.
38
38
 
39
39
  Args:
40
- df: The DataFrame to validate.
40
+ df: The :class:`pandas.DataFrame` to validate.
41
41
  required_columns: List of column names that must be present.
42
42
 
43
43
  Returns:
@@ -56,10 +56,10 @@ def check_dataframe_required_column_set(
56
56
  def check_dataframe_for_duplicate_columns(
57
57
  df: pd.DataFrame,
58
58
  ) -> list[InvalidDataFrameDuplicateColumns]:
59
- """Validates that the DataFrame has no duplicate column names.
59
+ """Validates that the :class:`pandas.DataFrame` has no duplicate column names.
60
60
 
61
61
  Args:
62
- df: The DataFrame to validate.
62
+ df: The :class:`pandas.DataFrame` to validate.
63
63
 
64
64
  Returns:
65
65
  List of validation errors if duplicate columns exist (empty if valid).
@@ -56,7 +56,7 @@ def check_invalid_model_version(
56
56
  model_version: The optional model version to validate.
57
57
 
58
58
  Returns:
59
- List of validation errors if model version is invalid (empty if valid or None).
59
+ List of validation errors if model version is invalid (empty if valid or :obj:`None`).
60
60
  """
61
61
  if model_version is None:
62
62
  return []
@@ -27,10 +27,10 @@ def log_info_dataframe_extra_column_names(
27
27
  """Logs informational message about columns that don't follow evaluation naming conventions.
28
28
 
29
29
  Args:
30
- df: DataFrame to check for extra column names, or None.
30
+ df: DataFrame to check for extra column names, or :obj:`None`.
31
31
 
32
32
  Returns:
33
- None.
33
+ :obj:`None`.
34
34
  """
35
35
  if df is None:
36
36
  return
@@ -57,13 +57,13 @@ def log_info_dataframe_extra_column_names(
57
57
  def check_dataframe_column_content_type(
58
58
  df: pd.DataFrame,
59
59
  ) -> list[InvalidDataFrameColumnContentTypes]:
60
- """Validates that evaluation DataFrame columns contain expected data types.
60
+ """Validates that evaluation :class:`pandas.DataFrame` columns contain expected data types.
61
61
 
62
62
  Checks that label columns contain strings, score columns contain numbers,
63
63
  and explanation columns contain strings.
64
64
 
65
65
  Args:
66
- df: The DataFrame to validate.
66
+ df: The :class:`pandas.DataFrame` to validate.
67
67
 
68
68
  Returns:
69
69
  List of validation errors for columns with incorrect types.
@@ -55,13 +55,13 @@ def validate_argument_types(
55
55
  def validate_dataframe_form(
56
56
  evals_dataframe: pd.DataFrame,
57
57
  ) -> list[ValidationError]:
58
- """Validate the structure and form of an evaluations DataFrame.
58
+ """Validate the structure and form of an evaluations :class:`pandas.DataFrame`.
59
59
 
60
60
  Args:
61
- evals_dataframe: The DataFrame containing evaluation data to validate.
61
+ evals_dataframe: The :class:`pandas.DataFrame` containing evaluation data to validate.
62
62
 
63
63
  Returns:
64
- List of validation errors found in the DataFrame structure.
64
+ List of validation errors found in the :class:`pandas.DataFrame` structure.
65
65
  """
66
66
  df_validation.log_info_dataframe_extra_column_names(evals_dataframe)
67
67
  checks = chain(
@@ -84,15 +84,15 @@ def validate_values(
84
84
  project_name: str,
85
85
  model_version: str | None = None,
86
86
  ) -> list[ValidationError]:
87
- """Validate the values within an evaluations DataFrame.
87
+ """Validate the values within an evaluations :class:`pandas.DataFrame`.
88
88
 
89
89
  Args:
90
- evals_dataframe: The DataFrame containing evaluation data to validate.
90
+ evals_dataframe: The :class:`pandas.DataFrame` containing evaluation data to validate.
91
91
  project_name: The project name associated with the evaluations.
92
92
  model_version: Optional model version. Defaults to None.
93
93
 
94
94
  Returns:
95
- List of validation errors found in DataFrame values.
95
+ List of validation errors found in :class:`pandas.DataFrame` values.
96
96
  """
97
97
  checks = chain(
98
98
  # Common
@@ -7,7 +7,7 @@ from arize.spans.columns import SPAN_SPAN_ID_COL
7
7
 
8
8
 
9
9
  class MetadataFormError(ValidationError):
10
- """Raised when metadata DataFrame structure or format is invalid."""
10
+ """Raised when metadata :class:`pandas.DataFrame` structure or format is invalid."""
11
11
 
12
12
  def __init__(self, message: str, resolution: str) -> None:
13
13
  """Initialize the exception with metadata form error context.
@@ -50,13 +50,13 @@ def log_info_dataframe_extra_column_names(
50
50
  def check_dataframe_column_content_type(
51
51
  df: pd.DataFrame,
52
52
  ) -> list[InvalidDataFrameColumnContentTypes]:
53
- """Validates that span DataFrame columns contain data types matching Open Inference Specification.
53
+ """Validates span :class:`pandas.DataFrame` columns match OpenInference types.
54
54
 
55
55
  Checks that columns have appropriate data types: lists of dicts, dicts, numeric,
56
56
  boolean, timestamp, JSON strings, or plain strings based on column specifications.
57
57
 
58
58
  Args:
59
- df: The DataFrame to validate.
59
+ df: The :class:`pandas.DataFrame` to validate.
60
60
 
61
61
  Returns:
62
62
  List of validation errors for columns with incorrect types.
@@ -56,13 +56,13 @@ def validate_argument_types(
56
56
  def validate_dataframe_form(
57
57
  spans_dataframe: pd.DataFrame,
58
58
  ) -> list[ValidationError]:
59
- """Validate the structure and form of a spans DataFrame.
59
+ """Validate the structure and form of a spans :class:`pandas.DataFrame`.
60
60
 
61
61
  Args:
62
- spans_dataframe: The DataFrame containing spans data to validate.
62
+ spans_dataframe: The :class:`pandas.DataFrame` containing spans data to validate.
63
63
 
64
64
  Returns:
65
- List of validation errors found in the DataFrame structure.
65
+ List of validation errors found in the :class:`pandas.DataFrame` structure.
66
66
  """
67
67
  df_validation.log_info_dataframe_extra_column_names(spans_dataframe)
68
68
  checks = chain(
@@ -88,15 +88,15 @@ def validate_values(
88
88
  project_name: str,
89
89
  model_version: str | None = None,
90
90
  ) -> list[ValidationError]:
91
- """Validate the values within a spans DataFrame.
91
+ """Validate the values within a spans :class:`pandas.DataFrame`.
92
92
 
93
93
  Args:
94
- spans_dataframe: The DataFrame containing spans data to validate.
94
+ spans_dataframe: The :class:`pandas.DataFrame` containing spans data to validate.
95
95
  project_name: The project name associated with the spans.
96
96
  model_version: Optional model version. Defaults to None.
97
97
 
98
98
  Returns:
99
- List of validation errors found in DataFrame values.
99
+ List of validation errors found in :class:`pandas.DataFrame` values.
100
100
  """
101
101
  checks = chain(
102
102
  # Common
arize/utils/arrow.py CHANGED
@@ -1,6 +1,6 @@
1
+ # type: ignore[pb2]
1
2
  """Apache Arrow utilities for data serialization and file operations."""
2
3
 
3
- # type: ignore[pb2]
4
4
  from __future__ import annotations
5
5
 
6
6
  import base64
@@ -38,7 +38,7 @@ def post_arrow_table(
38
38
  pa_table: The PyArrow table containing the data.
39
39
  proto_schema: The protobuf schema for the data.
40
40
  headers: HTTP headers for the request.
41
- timeout: Request timeout in seconds, or None for no timeout.
41
+ timeout: Request timeout in seconds, or :obj:`None` for no timeout.
42
42
  verify: Whether to verify SSL certificates.
43
43
  max_chunksize: Maximum chunk size for splitting large tables.
44
44
  tmp_dir: Temporary directory for serialization. Defaults to "".
arize/utils/cache.py CHANGED
@@ -31,7 +31,7 @@ def load_cached_resource(
31
31
  format: File format for cached data. Defaults to "parquet".
32
32
 
33
33
  Returns:
34
- The cached DataFrame if found and valid, None otherwise.
34
+ The cached :class:`pandas.DataFrame` if found and valid, :obj:`None` otherwise.
35
35
  """
36
36
  key = _get_cache_key(resource, resource_id, resource_updated_at)
37
37
  filepath = _get_abs_file_path(cache_dir, f"{key}.{format}", resource)
@@ -59,7 +59,7 @@ def cache_resource(
59
59
  resource: Resource type name (e.g., "dataset", "experiment").
60
60
  resource_id: Unique identifier for the resource.
61
61
  resource_updated_at: Optional timestamp of last resource update.
62
- resource_data: DataFrame containing the resource data.
62
+ resource_data: :class:`pandas.DataFrame` containing the resource data.
63
63
  format: File format for cached data. Defaults to "parquet".
64
64
  """
65
65
  key = _get_cache_key(resource, resource_id, resource_updated_at)
arize/utils/dataframe.py CHANGED
@@ -9,10 +9,10 @@ from arize.ml.types import BaseSchema
9
9
 
10
10
  # Resets the dataframe index if it is not a RangeIndex
11
11
  def reset_dataframe_index(dataframe: pd.DataFrame) -> None:
12
- """Reset the DataFrame index in-place if it is not a RangeIndex.
12
+ """Reset the :class:`pandas.DataFrame` index in-place if it is not a RangeIndex.
13
13
 
14
14
  Args:
15
- dataframe: The pandas DataFrame to reset.
15
+ dataframe: The :class:`pandas.DataFrame` to reset.
16
16
  """
17
17
  if not isinstance(dataframe.index, pd.RangeIndex):
18
18
  drop = dataframe.index.name in dataframe.columns
@@ -25,10 +25,10 @@ def remove_extraneous_columns(
25
25
  column_list: list[str] | None = None,
26
26
  regex: str | None = None,
27
27
  ) -> pd.DataFrame:
28
- """Filter DataFrame to keep only relevant columns based on schema, list, or regex.
28
+ """Filter :class:`pandas.DataFrame` to keep only relevant columns based on schema, list, or regex.
29
29
 
30
30
  Args:
31
- df: The pandas DataFrame to filter.
31
+ df: The :class:`pandas.DataFrame` to filter.
32
32
  schema: Optional schema defining used columns. Defaults to None.
33
33
  column_list: Optional explicit list of columns to keep. Defaults to None.
34
34
  regex: Optional regex pattern to match column names. Defaults to None.
@@ -10,7 +10,7 @@ logger = logging.getLogger(__name__)
10
10
 
11
11
 
12
12
  class ColumnNotFoundError(Exception):
13
- """Raised when a specified column is not found in the DataFrame."""
13
+ """Raised when a specified column is not found in the :class:`pandas.DataFrame`."""
14
14
 
15
15
  def __init__(self, attribute: str) -> None:
16
16
  """Initialize with the attribute that couldn't be mapped to a column.
@@ -27,13 +27,13 @@ class ColumnNotFoundError(Exception):
27
27
  def extract_nested_data_to_column(
28
28
  attributes: list[str], df: pd.DataFrame
29
29
  ) -> pd.DataFrame:
30
- """Extract nested attributes from complex data structures into new DataFrame columns.
30
+ """Extract nested attributes from complex data structures into new :class:`pandas.DataFrame` columns.
31
31
 
32
32
  This function, used in Online Tasks, is typically run on data exported from Arize.
33
- It prepares the DataFrame by extracting relevant attributes from complex, deeply
33
+ It prepares the :class:`pandas.DataFrame` by extracting relevant attributes from complex, deeply
34
34
  nested data structures, such as those found in LLM outputs or JSON-like records.
35
35
  It helps extract specific values from these nested structures by identifying the
36
- longest matching column name in the DataFrame and recursively accessing the desired
36
+ longest matching column name in the :class:`pandas.DataFrame` and recursively accessing the desired
37
37
  attribute path within each row. This preprocessing step ensures that the extracted
38
38
  values are available as new columns, allowing evaluators to process and assess
39
39
  these values effectively.
@@ -127,7 +127,7 @@ def _introspect_arize_attribute(value: object, attribute: str) -> object:
127
127
  attribute: "0.message.content"
128
128
  Returns: 'The capital of China is Beijing.'
129
129
 
130
- - Returns None immediately when a key or index is not found
130
+ - Returns :obj:`None` immediately when a key or index is not found
131
131
  - Handles integer parts for lists
132
132
  - Parses JSON strings
133
133
  - Converts NumPy arrays to lists
@@ -174,10 +174,10 @@ def _parse_value(
174
174
  2) Else if `current_value` is a dict, check if `attribute_parts_unprocessed[0]` is a key.
175
175
  If not found, try combining `attribute_parts_unprocessed[0] + '.' + attribute_parts_unprocessed[1]`...
176
176
  to handle dotted keys in the dict.
177
- 3) If none match, return (None, 1) to signal "not found, consume 1 part."
177
+ 3) If none match, return (:obj:`None`, 1) to signal "not found, consume 1 part."
178
178
 
179
179
  Returns (parsed_value, num_parts_processed):
180
- - parsed_value: the found value or None if not found
180
+ - parsed_value: the found value or :obj:`None` if not found
181
181
  - num_parts_processed: how many parts were processed (1 or more)
182
182
  """
183
183
  if not attribute_parts_unprocessed:
@@ -11,13 +11,13 @@ logger = logging.getLogger(__name__)
11
11
 
12
12
 
13
13
  def convert_datetime_columns_to_int(df: pd.DataFrame) -> pd.DataFrame:
14
- """Convert datetime columns in a DataFrame to milliseconds since epoch.
14
+ """Convert datetime columns in a :class:`pandas.DataFrame` to milliseconds since epoch.
15
15
 
16
16
  Args:
17
- df: The pandas DataFrame to convert.
17
+ df: The :class:`pandas.DataFrame` to convert.
18
18
 
19
19
  Returns:
20
- The DataFrame with datetime columns converted to integers.
20
+ The :class:`pandas.DataFrame` with datetime columns converted to integers.
21
21
  """
22
22
  for col in df.select_dtypes(
23
23
  include=["datetime64[ns]", "datetime64[ns, UTC]"]
@@ -27,13 +27,13 @@ def convert_datetime_columns_to_int(df: pd.DataFrame) -> pd.DataFrame:
27
27
 
28
28
 
29
29
  def convert_boolean_columns_to_str(df: pd.DataFrame) -> pd.DataFrame:
30
- """Convert boolean columns in a DataFrame to string type.
30
+ """Convert boolean columns in a :class:`pandas.DataFrame` to string type.
31
31
 
32
32
  Args:
33
- df: The pandas DataFrame to convert.
33
+ df: The :class:`pandas.DataFrame` to convert.
34
34
 
35
35
  Returns:
36
- The DataFrame with boolean columns converted to strings.
36
+ The :class:`pandas.DataFrame` with boolean columns converted to strings.
37
37
  """
38
38
  for col in df.columns:
39
39
  if df[col].dtype == "bool":
@@ -45,10 +45,10 @@ def convert_default_columns_to_json_str(df: pd.DataFrame) -> pd.DataFrame:
45
45
  """Convert dictionary values in specific columns to JSON strings.
46
46
 
47
47
  Args:
48
- df: The pandas DataFrame to convert.
48
+ df: The :class:`pandas.DataFrame` to convert.
49
49
 
50
50
  Returns:
51
- The DataFrame with dictionaries in eligible columns converted to JSON strings.
51
+ The :class:`pandas.DataFrame` with dictionaries in eligible columns converted to JSON strings.
52
52
  """
53
53
  for col in df.columns:
54
54
  if _should_convert_json(col):
@@ -68,10 +68,10 @@ def convert_json_str_to_dict(df: pd.DataFrame) -> pd.DataFrame:
68
68
  """Convert JSON string values in specific columns to Python dictionaries.
69
69
 
70
70
  Args:
71
- df: The pandas DataFrame to convert.
71
+ df: The :class:`pandas.DataFrame` to convert.
72
72
 
73
73
  Returns:
74
- The DataFrame with JSON strings in eligible columns converted to dictionaries.
74
+ The :class:`pandas.DataFrame` with JSON strings in eligible columns converted to dictionaries.
75
75
  """
76
76
  for col in df.columns:
77
77
  if _should_convert_json(col):
arize/utils/proto.py CHANGED
@@ -1,6 +1,6 @@
1
+ # type: ignore[pb2]
1
2
  """Protocol buffer schema utilities for tracing data."""
2
3
 
3
- # type: ignore[pb2]
4
4
  from arize._generated.protocol.rec import public_pb2 as pb2
5
5
 
6
6
 
arize/version.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Version information for the Arize SDK."""
2
2
 
3
- __version__ = "8.0.0b0"
3
+ __version__ = "8.0.0b2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arize
3
- Version: 8.0.0b0
3
+ Version: 8.0.0b2
4
4
  Summary: A helper library to interact with Arize AI APIs
5
5
  Project-URL: Homepage, https://arize.com
6
6
  Project-URL: Documentation, https://docs.arize.com/arize
@@ -26,47 +26,36 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
26
  Classifier: Topic :: System :: Logging
27
27
  Classifier: Topic :: System :: Monitoring
28
28
  Requires-Python: >=3.10
29
- Requires-Dist: lazy-imports
30
29
  Requires-Dist: numpy>=2.0.0
31
- Provides-Extra: auto-embeddings
32
- Requires-Dist: datasets!=2.14.*,<3,>=2.8; extra == 'auto-embeddings'
33
- Requires-Dist: pandas<3,>=1.0.0; extra == 'auto-embeddings'
34
- Requires-Dist: pillow<11,>=8.4.0; extra == 'auto-embeddings'
35
- Requires-Dist: tokenizers<1,>=0.13; extra == 'auto-embeddings'
36
- Requires-Dist: torch<3,>=1.13; extra == 'auto-embeddings'
37
- Requires-Dist: transformers<5,>=4.25; extra == 'auto-embeddings'
38
- Provides-Extra: datasets-experiments
39
- Requires-Dist: numpy>=2.0.0; extra == 'datasets-experiments'
40
- Requires-Dist: openinference-semantic-conventions<1,>=0.1.21; extra == 'datasets-experiments'
41
- Requires-Dist: opentelemetry-api>=1.38.0; extra == 'datasets-experiments'
42
- Requires-Dist: opentelemetry-exporter-otlp-proto-common>=1.38.0; extra == 'datasets-experiments'
43
- Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.38.0; extra == 'datasets-experiments'
44
- Requires-Dist: opentelemetry-proto>=1.38.0; extra == 'datasets-experiments'
45
- Requires-Dist: opentelemetry-sdk>=1.38.0; extra == 'datasets-experiments'
46
- Requires-Dist: pydantic; extra == 'datasets-experiments'
47
- Requires-Dist: wrapt<2.0.0,>=1.0.0; extra == 'datasets-experiments'
30
+ Requires-Dist: openinference-semantic-conventions<1,>=0.1.25
31
+ Requires-Dist: opentelemetry-exporter-otlp-proto-common>=1.38.0
32
+ Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.38.0
33
+ Requires-Dist: opentelemetry-sdk>=1.38.0
34
+ Requires-Dist: opentelemetry-semantic-conventions<1,>=0.43b0
35
+ Requires-Dist: pandas<3,>=2.0.0
36
+ Requires-Dist: protobuf<6,>=4.21.0
37
+ Requires-Dist: pyarrow>=0.15.0
38
+ Requires-Dist: pydantic<3,>=2
39
+ Requires-Dist: python-dateutil<3,>=2.8.2
40
+ Requires-Dist: requests-futures<2,>=1.0.0
41
+ Requires-Dist: requests<3,>=2.0.0
42
+ Requires-Dist: tqdm<5,>4
43
+ Requires-Dist: typing-extensions<5,>=4.7.1
44
+ Requires-Dist: urllib3<3,>=2.1.0
45
+ Requires-Dist: wrapt<2.0.0,>=1.0.0
48
46
  Provides-Extra: dev
49
47
  Requires-Dist: pytest==8.4.2; extra == 'dev'
50
48
  Requires-Dist: ruff==0.13.2; extra == 'dev'
51
- Provides-Extra: mimic-explainer
52
- Requires-Dist: interpret-community[mimic]<1,>=0.22.0; extra == 'mimic-explainer'
53
- Provides-Extra: ml-batch
54
- Requires-Dist: pandas<3,>=1.0.0; extra == 'ml-batch'
55
- Requires-Dist: protobuf<6,>=4.21.0; extra == 'ml-batch'
56
- Requires-Dist: pyarrow>=0.15.0; extra == 'ml-batch'
57
- Requires-Dist: requests<3,>=2.0.0; extra == 'ml-batch'
58
- Requires-Dist: tqdm; extra == 'ml-batch'
59
- Provides-Extra: ml-stream
60
- Requires-Dist: protobuf<6,>=4.21.0; extra == 'ml-stream'
61
- Requires-Dist: requests-futures<2,>=1.0.0; extra == 'ml-stream'
62
- Provides-Extra: spans
63
- Requires-Dist: openinference-semantic-conventions<1,>=0.1.21; extra == 'spans'
64
- Requires-Dist: opentelemetry-semantic-conventions<1,>=0.43b0; extra == 'spans'
65
- Requires-Dist: pandas<3,>=1.0.0; extra == 'spans'
66
- Requires-Dist: protobuf<6,>=4.21.0; extra == 'spans'
67
- Requires-Dist: pyarrow>=0.15.0; extra == 'spans'
68
- Requires-Dist: requests<3,>=2.0.0; extra == 'spans'
69
- Requires-Dist: tqdm; extra == 'spans'
49
+ Provides-Extra: embeddings
50
+ Requires-Dist: datasets!=2.14.*,<3,>=2.8; extra == 'embeddings'
51
+ Requires-Dist: pillow<11,>=8.4.0; extra == 'embeddings'
52
+ Requires-Dist: tokenizers<1,>=0.13; extra == 'embeddings'
53
+ Requires-Dist: torch<3,>=1.13; extra == 'embeddings'
54
+ Requires-Dist: transformers<5,>=4.25; extra == 'embeddings'
55
+ Provides-Extra: mimic
56
+ Requires-Dist: interpret-community[mimic]<1,>=0.22.0; extra == 'mimic'
57
+ Provides-Extra: otel
58
+ Requires-Dist: arize-otel<1,>=0.11.0; extra == 'otel'
70
59
  Description-Content-Type: text/markdown
71
60
 
72
61
  <p align="center">
@@ -92,6 +81,8 @@ Description-Content-Type: text/markdown
92
81
  - [Overview](#overview)
93
82
  - [Key Features](#key-features)
94
83
  - [Installation](#installation)
84
+ - [Optional Dependencies](#optional-dependencies)
85
+ - [Migrating from Version 7](#migrating-from-version-7)
95
86
  - [Usage](#usage)
96
87
  - [Instrumentation](#instrumentation)
97
88
  - [Operations on Spans](#operations-on-spans)
@@ -130,16 +121,18 @@ Description-Content-Type: text/markdown
130
121
 
131
122
  A helper package to interact with Arize AI APIs.
132
123
 
133
- Arize is an AI engineering platform. It helps engineers develop, evaluate, and observe AI applications and agents.
124
+ Arize is an AI engineering platform. It helps engineers develop, evaluate, and observe AI applications and agents.
125
+
126
+ Arize has both Enterprise and OSS products to support this goal:
134
127
 
135
- Arize has both Enterprise and OSS products to support this goal:
136
128
  - [Arize AX](https://arize.com/) — an enterprise AI engineering platform from development to production, with an embedded AI Copilot
137
129
  - [Phoenix](https://github.com/Arize-ai/phoenix) — a lightweight, open-source project for tracing, prompt engineering, and evaluation
138
130
  - [OpenInference](https://github.com/Arize-ai/openinference) — an open-source instrumentation package to trace LLM applications across models and frameworks
139
131
 
140
- We log over 1 trillion inferences and spans, 10 million evaluation runs, and 2 million OSS downloads every month.
132
+ We log over 1 trillion inferences and spans, 10 million evaluation runs, and 2 million OSS downloads every month.
141
133
 
142
134
  # Key Features
135
+
143
136
  - [**_Tracing_**](https://docs.arize.com/arize/observe/tracing) - Trace your LLM application's runtime using OpenTelemetry-based instrumentation.
144
137
  - [**_Evaluation_**](https://docs.arize.com/arize/evaluate/online-evals) - Leverage LLMs to benchmark your application's performance using response and retrieval evals.
145
138
  - [**_Datasets_**](https://docs.arize.com/arize/develop/datasets) - Create versioned datasets of examples for experimentation, evaluation, and fine-tuning.
@@ -149,19 +142,36 @@ We log over 1 trillion inferences and spans, 10 million evaluation runs, and 2 m
149
142
 
150
143
  # Installation
151
144
 
152
- Install Arize (version 8 is currently under alpha release) via `pip` or `conda`:
145
+ Install the base package:
153
146
 
154
147
  ```bash
155
- pip install arize==8.0.0ax
148
+ pip install arize
156
149
  ```
157
- where `x` denotes the specific alpha release. Install the `arize-otel` package for auto-instrumentation of your LLM library:
150
+
151
+ ## Optional Dependencies
152
+
153
+ The following optional extras provide specialized functionality:
154
+
155
+ > **Note:** The `otel` extra installs the `arize-otel` package, which is also available as a standalone package. If you only need auto-instrumentation without the full SDK, install `arize-otel` directly.
156
+
157
+ | Extra | Install Command | What It Provides |
158
+ |-------|----------------|------------------|
159
+ | **otel** | `pip install arize[otel]` | OpenTelemetry auto-instrumentation package (arize-otel) for automatic tracing |
160
+ | **embeddings** | `pip install arize[embeddings]` | Automatic embedding generation for NLP, CV, and structured data (Pillow, datasets, tokenizers, torch, transformers) |
161
+ | **mimic** | `pip install arize[mimic]` | MIMIC explainer for model interpretability |
162
+
163
+ Install multiple extras:
158
164
 
159
165
  ```bash
160
- pip install arize-otel
166
+ pip install arize[otel,embeddings,mimic]
161
167
  ```
162
168
 
169
+ ## Migrating from Version 7
170
+
171
+ If you're upgrading from version 7, please refer to the [Migration Guide](https://arize.com/docs/api-clients/python/version-8/migration) for detailed migration steps and breaking changes.
172
+
163
173
  # Usage
164
-
174
+
165
175
  ## Instrumentation
166
176
 
167
177
  See [arize-otel in PyPI](https://pypi.org/project/arize-otel/):
@@ -183,9 +193,8 @@ OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
183
193
 
184
194
  ## Operations on Spans
185
195
 
186
- Use `arize.spans` to interact with spans: log spans into Arize, update the span's evaluations, annotations and metadata in bulk.
187
-
188
- > **WARNING**: This is currently under an alpha release. Install with `pip install arize==8.0.0ax` where the `x` denotes the specific alpha version. Check the [pre-releases](https://pypi.org/project/arize/#history) page in PyPI.
196
+ Use `arize.spans` to interact with spans: log spans into Arize, update the span's
197
+ evaluations, annotations and metadata in bulk.
189
198
 
190
199
  ### Logging spans
191
200
 
@@ -259,9 +268,8 @@ df = client.spans.export_to_df(
259
268
 
260
269
  ## Operations on ML Models
261
270
 
262
- Use `arize.models` to interact with ML models: log ML data (traininv, validation, production) into Arize, either streaming or in batches.
263
-
264
- > **WARNING**: This is currently under an alpha release. Install with `pip install arize==8.0.0ax` where the `x` denotes the specific alpha version. Check the [pre-releases](https://pypi.org/project/arize/#history) page in PyPI.
271
+ Use `arize.ml` to interact with ML models: log ML data (training, validation, production)
272
+ into Arize, either streaming or in batches.
265
273
 
266
274
  ### Stream log ML Data for a Classification use-case
267
275
 
@@ -276,7 +284,7 @@ MODEL_NAME = "<your-model-name>"
276
284
  features=...
277
285
  embedding_features=...
278
286
 
279
- response = client.models.log_stream(
287
+ response = client.ml.log_stream(
280
288
  space_id=SPACE_ID,
281
289
  model_name=MODEL_NAME,
282
290
  model_type=ModelTypes.SCORE_CATEGORICAL,
@@ -328,7 +336,7 @@ schema = Schema(
328
336
  )
329
337
 
330
338
  # Logging Production DataFrame
331
- response = client.models.log_batch(
339
+ response = client.ml.log_batch(
332
340
  space_id=SPACE_ID,
333
341
  model_name=MODEL_NAME,
334
342
  model_type=ModelTypes.OBJECT_DETECTION,
@@ -356,7 +364,7 @@ SPACE_ID = "<your-space-id>"
356
364
  MODEL_NAME = "<your-model-name>"
357
365
  MODEL_VERSION = "1.0"
358
366
 
359
- df = client.models.export_to_df(
367
+ df = client.ml.export_to_df(
360
368
  space_id=SPACE_ID,
361
369
  model_name=MODEL_NAME,
362
370
  environment=Environments.TRAINING,
@@ -414,7 +422,7 @@ The response is an object of type `DatasetsList200Response`, and you can access
414
422
 
415
423
  ```python
416
424
  # Get the list of datasets from the response
417
- dataset_list = resp.datasets
425
+ dataset_list = resp.datasets
418
426
  # Get the response as a dictionary
419
427
  resp_dict = resp.to_dict()
420
428
  # Get the response in JSON format
@@ -530,7 +538,7 @@ The response is an object of type `ExperimentsList200Response`, and you can acce
530
538
 
531
539
  ```python
532
540
  # Get the list of datasets from the response
533
- experiment_list = resp.experiments
541
+ experiment_list = resp.experiments
534
542
  # Get the response as a dictionary
535
543
  resp_dict = resp.to_dict()
536
544
  # Get the response in JSON format
@@ -554,7 +562,7 @@ experiment, experiment_df = client.run_experiment(
554
562
  concurrency=..., # The number of concurrent tasks to run. Defaults to 3.
555
563
  set_global_tracer_provider=..., # If True, sets the global tracer provider for the experiment. Defaults to False
556
564
  exit_on_error=..., # If True, the experiment will stop running on first occurrence of an error. Defaults to False
557
- )
565
+ )
558
566
  ```
559
567
 
560
568
  The `Experiment` object also counts with convenience method similar to `List***` objects:
@@ -649,7 +657,7 @@ resp_df = resp.to_df()
649
657
 
650
658
  ### In Code
651
659
 
652
- You can use `configure_logging` to set up the logging behavior of the Arize package to your needs.
660
+ You can use `configure_logging` to set up the logging behavior of the Arize package to your needs.
653
661
 
654
662
  ```python
655
663
  from arize.logging import configure_logging
@@ -657,7 +665,7 @@ from arize.logging import configure_logging
657
665
  configure_logging(
658
666
  level=..., # Defaults to logging.INFO
659
667
  structured=..., # if True, emit JSON logs. Defaults to False
660
- )
668
+ )
661
669
  ```
662
670
 
663
671
  ### Via Environment Variables
@@ -668,11 +676,11 @@ Configure the same options as the section above, via:
668
676
  import os
669
677
 
670
678
  # Whether or not you want to disable logging altogether
671
- os.environ["ARIZE_LOG_ENABLE"] = "true"
679
+ os.environ["ARIZE_LOG_ENABLE"] = "true"
672
680
  # Set up the logging level
673
- os.environ["ARIZE_LOG_LEVEL"] = "debug"
681
+ os.environ["ARIZE_LOG_LEVEL"] = "debug"
674
682
  # Whether or not you want structured JSON logs
675
- os.environ["ARIZE_LOG_STRUCTURED"] = "false"
683
+ os.environ["ARIZE_LOG_STRUCTURED"] = "false"
676
684
  ```
677
685
 
678
686
  The default behavior of Arize's logs is: enabled, `INFO` level, and not structured.