arize 8.0.0a21__py3-none-any.whl → 8.0.0a23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. arize/__init__.py +17 -9
  2. arize/_exporter/client.py +55 -36
  3. arize/_exporter/parsers/tracing_data_parser.py +41 -30
  4. arize/_exporter/validation.py +3 -3
  5. arize/_flight/client.py +208 -77
  6. arize/_generated/api_client/__init__.py +30 -6
  7. arize/_generated/api_client/api/__init__.py +1 -0
  8. arize/_generated/api_client/api/datasets_api.py +864 -190
  9. arize/_generated/api_client/api/experiments_api.py +167 -131
  10. arize/_generated/api_client/api/projects_api.py +1197 -0
  11. arize/_generated/api_client/api_client.py +2 -2
  12. arize/_generated/api_client/configuration.py +42 -34
  13. arize/_generated/api_client/exceptions.py +2 -2
  14. arize/_generated/api_client/models/__init__.py +15 -4
  15. arize/_generated/api_client/models/dataset.py +10 -10
  16. arize/_generated/api_client/models/dataset_example.py +111 -0
  17. arize/_generated/api_client/models/dataset_example_update.py +100 -0
  18. arize/_generated/api_client/models/dataset_version.py +13 -13
  19. arize/_generated/api_client/models/datasets_create_request.py +16 -8
  20. arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
  21. arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
  22. arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
  23. arize/_generated/api_client/models/datasets_list200_response.py +10 -4
  24. arize/_generated/api_client/models/experiment.py +14 -16
  25. arize/_generated/api_client/models/experiment_run.py +108 -0
  26. arize/_generated/api_client/models/experiment_run_create.py +102 -0
  27. arize/_generated/api_client/models/experiments_create_request.py +16 -10
  28. arize/_generated/api_client/models/experiments_list200_response.py +10 -4
  29. arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
  30. arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
  31. arize/_generated/api_client/models/primitive_value.py +172 -0
  32. arize/_generated/api_client/models/problem.py +100 -0
  33. arize/_generated/api_client/models/project.py +99 -0
  34. arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
  35. arize/_generated/api_client/models/projects_list200_response.py +106 -0
  36. arize/_generated/api_client/rest.py +2 -2
  37. arize/_generated/api_client/test/test_dataset.py +4 -2
  38. arize/_generated/api_client/test/test_dataset_example.py +56 -0
  39. arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
  40. arize/_generated/api_client/test/test_dataset_version.py +7 -2
  41. arize/_generated/api_client/test/test_datasets_api.py +27 -13
  42. arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
  43. arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
  44. arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
  45. arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
  46. arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
  47. arize/_generated/api_client/test/test_experiment.py +2 -4
  48. arize/_generated/api_client/test/test_experiment_run.py +56 -0
  49. arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
  50. arize/_generated/api_client/test/test_experiments_api.py +6 -6
  51. arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
  52. arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
  53. arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
  54. arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
  55. arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
  56. arize/_generated/api_client/test/test_problem.py +57 -0
  57. arize/_generated/api_client/test/test_project.py +58 -0
  58. arize/_generated/api_client/test/test_projects_api.py +59 -0
  59. arize/_generated/api_client/test/test_projects_create_request.py +54 -0
  60. arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
  61. arize/_generated/api_client_README.md +43 -29
  62. arize/_generated/protocol/flight/flight_pb2.py +400 -0
  63. arize/_lazy.py +27 -19
  64. arize/client.py +269 -55
  65. arize/config.py +365 -116
  66. arize/constants/__init__.py +1 -0
  67. arize/constants/config.py +11 -4
  68. arize/constants/ml.py +6 -4
  69. arize/constants/openinference.py +2 -0
  70. arize/constants/pyarrow.py +2 -0
  71. arize/constants/spans.py +3 -1
  72. arize/datasets/__init__.py +1 -0
  73. arize/datasets/client.py +299 -84
  74. arize/datasets/errors.py +32 -2
  75. arize/datasets/validation.py +18 -8
  76. arize/embeddings/__init__.py +2 -0
  77. arize/embeddings/auto_generator.py +23 -19
  78. arize/embeddings/base_generators.py +89 -36
  79. arize/embeddings/constants.py +2 -0
  80. arize/embeddings/cv_generators.py +26 -4
  81. arize/embeddings/errors.py +27 -5
  82. arize/embeddings/nlp_generators.py +31 -12
  83. arize/embeddings/tabular_generators.py +32 -20
  84. arize/embeddings/usecases.py +12 -2
  85. arize/exceptions/__init__.py +1 -0
  86. arize/exceptions/auth.py +11 -1
  87. arize/exceptions/base.py +29 -4
  88. arize/exceptions/models.py +21 -2
  89. arize/exceptions/parameters.py +31 -0
  90. arize/exceptions/spaces.py +12 -1
  91. arize/exceptions/types.py +86 -7
  92. arize/exceptions/values.py +220 -20
  93. arize/experiments/__init__.py +1 -0
  94. arize/experiments/client.py +390 -286
  95. arize/experiments/evaluators/__init__.py +1 -0
  96. arize/experiments/evaluators/base.py +74 -41
  97. arize/experiments/evaluators/exceptions.py +6 -3
  98. arize/experiments/evaluators/executors.py +121 -73
  99. arize/experiments/evaluators/rate_limiters.py +106 -57
  100. arize/experiments/evaluators/types.py +34 -7
  101. arize/experiments/evaluators/utils.py +65 -27
  102. arize/experiments/functions.py +103 -101
  103. arize/experiments/tracing.py +52 -44
  104. arize/experiments/types.py +56 -31
  105. arize/logging.py +54 -22
  106. arize/models/__init__.py +1 -0
  107. arize/models/batch_validation/__init__.py +1 -0
  108. arize/models/batch_validation/errors.py +543 -65
  109. arize/models/batch_validation/validator.py +339 -300
  110. arize/models/bounded_executor.py +20 -7
  111. arize/models/casting.py +75 -29
  112. arize/models/client.py +326 -107
  113. arize/models/proto.py +95 -40
  114. arize/models/stream_validation.py +42 -14
  115. arize/models/surrogate_explainer/__init__.py +1 -0
  116. arize/models/surrogate_explainer/mimic.py +24 -13
  117. arize/pre_releases.py +43 -0
  118. arize/projects/__init__.py +1 -0
  119. arize/projects/client.py +129 -0
  120. arize/regions.py +40 -0
  121. arize/spans/__init__.py +1 -0
  122. arize/spans/client.py +130 -106
  123. arize/spans/columns.py +13 -0
  124. arize/spans/conversion.py +54 -38
  125. arize/spans/validation/__init__.py +1 -0
  126. arize/spans/validation/annotations/__init__.py +1 -0
  127. arize/spans/validation/annotations/annotations_validation.py +6 -4
  128. arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
  129. arize/spans/validation/annotations/value_validation.py +35 -11
  130. arize/spans/validation/common/__init__.py +1 -0
  131. arize/spans/validation/common/argument_validation.py +33 -8
  132. arize/spans/validation/common/dataframe_form_validation.py +35 -9
  133. arize/spans/validation/common/errors.py +211 -11
  134. arize/spans/validation/common/value_validation.py +80 -13
  135. arize/spans/validation/evals/__init__.py +1 -0
  136. arize/spans/validation/evals/dataframe_form_validation.py +28 -8
  137. arize/spans/validation/evals/evals_validation.py +34 -4
  138. arize/spans/validation/evals/value_validation.py +26 -3
  139. arize/spans/validation/metadata/__init__.py +1 -1
  140. arize/spans/validation/metadata/argument_validation.py +14 -5
  141. arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
  142. arize/spans/validation/metadata/value_validation.py +24 -10
  143. arize/spans/validation/spans/__init__.py +1 -0
  144. arize/spans/validation/spans/dataframe_form_validation.py +34 -13
  145. arize/spans/validation/spans/spans_validation.py +35 -4
  146. arize/spans/validation/spans/value_validation.py +76 -7
  147. arize/types.py +293 -157
  148. arize/utils/__init__.py +1 -0
  149. arize/utils/arrow.py +31 -15
  150. arize/utils/cache.py +34 -6
  151. arize/utils/dataframe.py +19 -2
  152. arize/utils/online_tasks/__init__.py +2 -0
  153. arize/utils/online_tasks/dataframe_preprocessor.py +53 -41
  154. arize/utils/openinference_conversion.py +44 -5
  155. arize/utils/proto.py +10 -0
  156. arize/utils/size.py +5 -3
  157. arize/version.py +3 -1
  158. {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/METADATA +4 -3
  159. arize-8.0.0a23.dist-info/RECORD +174 -0
  160. {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/WHEEL +1 -1
  161. arize-8.0.0a23.dist-info/licenses/LICENSE +176 -0
  162. arize-8.0.0a23.dist-info/licenses/NOTICE +13 -0
  163. arize/_generated/protocol/flight/export_pb2.py +0 -61
  164. arize/_generated/protocol/flight/ingest_pb2.py +0 -365
  165. arize-8.0.0a21.dist-info/RECORD +0 -146
  166. arize-8.0.0a21.dist-info/licenses/LICENSE.md +0 -12
@@ -1,6 +1,7 @@
1
+ """DataFrame form validation for span evaluations."""
2
+
1
3
  import logging
2
4
  import re
3
- from typing import List
4
5
 
5
6
  import pandas as pd
6
7
 
@@ -12,7 +13,7 @@ from arize.spans.columns import (
12
13
  EVAL_SCORE_PATTERN,
13
14
  SPAN_SPAN_ID_COL,
14
15
  )
15
- from arize.spans.conversion import isMissingValue
16
+ from arize.spans.conversion import is_missing_value
16
17
  from arize.spans.validation.common.errors import (
17
18
  InvalidDataFrameColumnContentTypes,
18
19
  )
@@ -23,8 +24,16 @@ logger = logging.getLogger(__name__)
23
24
  def log_info_dataframe_extra_column_names(
24
25
  df: pd.DataFrame | None,
25
26
  ) -> None:
27
+ """Logs informational message about columns that don't follow evaluation naming conventions.
28
+
29
+ Args:
30
+ df: DataFrame to check for extra column names, or None.
31
+
32
+ Returns:
33
+ None.
34
+ """
26
35
  if df is None:
27
- return None
36
+ return
28
37
  irrelevant_columns = [
29
38
  col
30
39
  for col in df.columns
@@ -42,12 +51,23 @@ def log_info_dataframe_extra_column_names(
42
51
  "- eval.<your-eval-name>.score"
43
52
  "- eval.<your-eval-name>.explanation"
44
53
  )
45
- return None
54
+ return
46
55
 
47
56
 
48
57
  def check_dataframe_column_content_type(
49
58
  df: pd.DataFrame,
50
- ) -> List[InvalidDataFrameColumnContentTypes]:
59
+ ) -> list[InvalidDataFrameColumnContentTypes]:
60
+ """Validates that evaluation DataFrame columns contain expected data types.
61
+
62
+ Checks that label columns contain strings, score columns contain numbers,
63
+ and explanation columns contain strings.
64
+
65
+ Args:
66
+ df: The DataFrame to validate.
67
+
68
+ Returns:
69
+ List of validation errors for columns with incorrect types.
70
+ """
51
71
  wrong_labels_cols = []
52
72
  wrong_scores_cols = []
53
73
  wrong_explanations_cols = []
@@ -67,18 +87,18 @@ def check_dataframe_column_content_type(
67
87
  )
68
88
  if eval_label_re.match(column):
69
89
  if not all(
70
- isinstance(value, str) or isMissingValue(value)
90
+ isinstance(value, str) or is_missing_value(value)
71
91
  for value in df[column]
72
92
  ):
73
93
  wrong_labels_cols.append(column)
74
94
  elif eval_score_re.match(column):
75
95
  if not all(
76
- isinstance(value, (int, float)) or isMissingValue(value)
96
+ isinstance(value, (int, float)) or is_missing_value(value)
77
97
  for value in df[column]
78
98
  ):
79
99
  wrong_scores_cols.append(column)
80
100
  elif eval_explanation_re.match(column) and not all(
81
- isinstance(value, str) or isMissingValue(value)
101
+ isinstance(value, str) or is_missing_value(value)
82
102
  for value in df[column]
83
103
  ):
84
104
  wrong_explanations_cols.append(column)
@@ -1,7 +1,9 @@
1
+ """Evaluation validation orchestration for spans."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
5
  from itertools import chain
4
- from typing import TYPE_CHECKING, List
6
+ from typing import TYPE_CHECKING
5
7
 
6
8
  from arize.spans.columns import SPAN_SPAN_ID_COL
7
9
  from arize.spans.validation.common import (
@@ -30,7 +32,17 @@ def validate_argument_types(
30
32
  evals_dataframe: pd.DataFrame,
31
33
  project_name: str,
32
34
  model_version: str | None = None,
33
- ) -> List[ValidationError]:
35
+ ) -> list[ValidationError]:
36
+ """Validate argument types for evaluation data submission.
37
+
38
+ Args:
39
+ evals_dataframe: The DataFrame containing evaluation data.
40
+ project_name: The project name to validate.
41
+ model_version: Optional model version to validate. Defaults to None.
42
+
43
+ Returns:
44
+ List of validation errors found in argument types.
45
+ """
34
46
  checks = chain(
35
47
  common_arg_validation.check_field_convertible_to_str(
36
48
  project_name, model_version
@@ -42,7 +54,15 @@ def validate_argument_types(
42
54
 
43
55
  def validate_dataframe_form(
44
56
  evals_dataframe: pd.DataFrame,
45
- ) -> List[ValidationError]:
57
+ ) -> list[ValidationError]:
58
+ """Validate the structure and form of an evaluations DataFrame.
59
+
60
+ Args:
61
+ evals_dataframe: The DataFrame containing evaluation data to validate.
62
+
63
+ Returns:
64
+ List of validation errors found in the DataFrame structure.
65
+ """
46
66
  df_validation.log_info_dataframe_extra_column_names(evals_dataframe)
47
67
  checks = chain(
48
68
  # Common
@@ -63,7 +83,17 @@ def validate_values(
63
83
  evals_dataframe: pd.DataFrame,
64
84
  project_name: str,
65
85
  model_version: str | None = None,
66
- ) -> List[ValidationError]:
86
+ ) -> list[ValidationError]:
87
+ """Validate the values within an evaluations DataFrame.
88
+
89
+ Args:
90
+ evals_dataframe: The DataFrame containing evaluation data to validate.
91
+ project_name: The project name associated with the evaluations.
92
+ model_version: Optional model version. Defaults to None.
93
+
94
+ Returns:
95
+ List of validation errors found in DataFrame values.
96
+ """
67
97
  checks = chain(
68
98
  # Common
69
99
  common_value_validation.check_invalid_project_name(project_name),
@@ -1,8 +1,10 @@
1
+ """Value validation logic for span evaluation data."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
5
  import re
4
6
  from itertools import chain
5
- from typing import TYPE_CHECKING, List
7
+ from typing import TYPE_CHECKING
6
8
 
7
9
  from arize.constants.spans import (
8
10
  EVAL_EXPLANATION_MAX_STR_LENGTH,
@@ -26,7 +28,18 @@ if TYPE_CHECKING:
26
28
 
27
29
  def check_eval_cols(
28
30
  dataframe: pd.DataFrame,
29
- ) -> List[ValidationError]:
31
+ ) -> list[ValidationError]:
32
+ """Validates evaluation column values for proper length and format.
33
+
34
+ Checks label strings for length constraints, scores for valid numeric values,
35
+ and explanations for length constraints.
36
+
37
+ Args:
38
+ dataframe: The DataFrame containing evaluation columns.
39
+
40
+ Returns:
41
+ List of validation errors found in evaluation columns.
42
+ """
30
43
  checks = []
31
44
  for col in dataframe.columns:
32
45
  if col.endswith(EVAL_LABEL_SUFFIX):
@@ -64,7 +77,17 @@ def check_eval_cols(
64
77
  # is not null
65
78
  def check_eval_columns_null_values(
66
79
  dataframe: pd.DataFrame,
67
- ) -> List[ValidationError]:
80
+ ) -> list[ValidationError]:
81
+ """Validates that evaluation columns don't have orphan explanations without labels or scores.
82
+
83
+ Ensures that if an explanation exists, at least one of label or score is non-null.
84
+
85
+ Args:
86
+ dataframe: The DataFrame containing evaluation columns.
87
+
88
+ Returns:
89
+ List of validation errors for evaluations with invalid null combinations.
90
+ """
68
91
  invalid_eval_names = []
69
92
  eval_prefix_and_name = set()
70
93
  for col in dataframe.columns:
@@ -1 +1 @@
1
-
1
+ """Metadata validation for LLM tracing spans."""
@@ -1,4 +1,4 @@
1
- from typing import List
1
+ """Argument validation for span metadata."""
2
2
 
3
3
  import pandas as pd
4
4
 
@@ -6,22 +6,31 @@ from arize.exceptions.base import ValidationError
6
6
 
7
7
 
8
8
  class MetadataArgumentError(ValidationError):
9
+ """Raised when metadata arguments are invalid or incorrectly specified."""
10
+
9
11
  def __init__(self, message: str, resolution: str) -> None:
12
+ """Initialize the exception with metadata argument error context.
13
+
14
+ Args:
15
+ message: Error message describing the invalid argument.
16
+ resolution: Guidance on how to resolve the error.
17
+ """
10
18
  self.message = message
11
19
  self.resolution = resolution
12
20
 
13
21
  def __repr__(self) -> str:
22
+ """Return a string representation for debugging and logging."""
14
23
  return "Metadata_Argument_Error"
15
24
 
16
25
  def error_message(self) -> str:
26
+ """Return the error message for this exception."""
17
27
  return f"{self.message} {self.resolution}"
18
28
 
19
29
 
20
30
  def validate_argument_types(
21
- metadata_dataframe, project_name
22
- ) -> List[ValidationError]:
23
- """
24
- Validates the types of arguments passed to update_spans_metadata.
31
+ metadata_dataframe: object, project_name: object
32
+ ) -> list[ValidationError]:
33
+ """Validates the types of arguments passed to update_spans_metadata.
25
34
 
26
35
  Args:
27
36
  metadata_dataframe: DataFrame with span IDs and patch documents
@@ -1,26 +1,38 @@
1
- from typing import List
1
+ """DataFrame form validation for span metadata."""
2
+
3
+ import pandas as pd
2
4
 
3
5
  from arize.exceptions.base import ValidationError
4
6
  from arize.spans.columns import SPAN_SPAN_ID_COL
5
7
 
6
8
 
7
9
  class MetadataFormError(ValidationError):
10
+ """Raised when metadata DataFrame structure or format is invalid."""
11
+
8
12
  def __init__(self, message: str, resolution: str) -> None:
13
+ """Initialize the exception with metadata form error context.
14
+
15
+ Args:
16
+ message: Error message describing the invalid DataFrame structure.
17
+ resolution: Guidance on how to resolve the error.
18
+ """
9
19
  self.message = message
10
20
  self.resolution = resolution
11
21
 
12
22
  def __repr__(self) -> str:
23
+ """Return a string representation for debugging and logging."""
13
24
  return "Metadata_Form_Error"
14
25
 
15
26
  def error_message(self) -> str:
27
+ """Return the error message for this exception."""
16
28
  return f"{self.message} {self.resolution}"
17
29
 
18
30
 
19
31
  def validate_dataframe_form(
20
- metadata_dataframe, patch_document_column_name="patch_document"
21
- ) -> List[ValidationError]:
22
- """
23
- Validates the structure of the metadata update dataframe.
32
+ metadata_dataframe: pd.DataFrame,
33
+ patch_document_column_name: str = "patch_document",
34
+ ) -> list[ValidationError]:
35
+ """Validates the structure of the metadata update dataframe.
24
36
 
25
37
  Args:
26
38
  metadata_dataframe: DataFrame with span IDs and patch documents or attributes.metadata.* columns
@@ -90,11 +102,15 @@ def validate_dataframe_form(
90
102
 
91
103
  # If using metadata fields, check each one
92
104
  if has_metadata_fields:
93
- for col in metadata_columns:
94
- if (
95
- metadata_dataframe[col].isna().all()
96
- ): # All values in column are null
97
- null_columns.append(col)
105
+ null_columns.extend(
106
+ [
107
+ col
108
+ for col in metadata_columns
109
+ if metadata_dataframe[col]
110
+ .isna()
111
+ .all() # All values in column are null
112
+ ]
113
+ )
98
114
 
99
115
  if null_columns:
100
116
  errors.append(
@@ -1,5 +1,8 @@
1
+ """Value validation logic for span metadata."""
2
+
1
3
  import json
2
- from typing import List
4
+
5
+ import pandas as pd
3
6
 
4
7
  from arize.constants.spans import (
5
8
  MAX_JSON_NESTING_DEPTH,
@@ -10,20 +13,32 @@ from arize.spans.columns import SPAN_SPAN_ID_COL
10
13
 
11
14
 
12
15
  class MetadataValueError(ValidationError):
16
+ """Raised when metadata values are invalid or violate constraints."""
17
+
13
18
  def __init__(self, message: str, resolution: str) -> None:
19
+ """Initialize the exception with metadata value error context.
20
+
21
+ Args:
22
+ message: Error message describing the invalid value.
23
+ resolution: Guidance on how to resolve the error.
24
+ """
14
25
  self.message = message
15
26
  self.resolution = resolution
16
27
 
17
28
  def __repr__(self) -> str:
29
+ """Return a string representation for debugging and logging."""
18
30
  return "Metadata_Value_Error"
19
31
 
20
32
  def error_message(self) -> str:
33
+ """Return the error message for this exception."""
21
34
  return f"{self.message} {self.resolution}"
22
35
 
23
36
 
24
- def calculate_json_depth(obj, current_depth=1):
37
+ def calculate_json_depth(obj: object, current_depth: int = 1) -> int:
25
38
  """Calculate the maximum nesting depth of a JSON object.
26
- Stops recursing once MAX_JSON_NESTING_DEPTH + 1 is reached for efficiency."""
39
+
40
+ Stops recursing once MAX_JSON_NESTING_DEPTH + 1 is reached for efficiency.
41
+ """
27
42
  # If we've already exceeded the max depth, return the current depth to avoid unnecessary recursion
28
43
  if current_depth > MAX_JSON_NESTING_DEPTH:
29
44
  return current_depth
@@ -32,19 +47,18 @@ def calculate_json_depth(obj, current_depth=1):
32
47
  return max(
33
48
  [calculate_json_depth(v, current_depth + 1) for v in obj.values()]
34
49
  )
35
- elif isinstance(obj, list) and obj:
50
+ if isinstance(obj, list) and obj:
36
51
  return max(
37
52
  [calculate_json_depth(item, current_depth + 1) for item in obj]
38
53
  )
39
- else:
40
- return current_depth
54
+ return current_depth
41
55
 
42
56
 
43
57
  def validate_values(
44
- metadata_dataframe, patch_document_column_name="patch_document"
45
- ) -> List[ValidationError]:
46
- """
47
- Validates the values in the metadata update dataframe.
58
+ metadata_dataframe: pd.DataFrame,
59
+ patch_document_column_name: str = "patch_document",
60
+ ) -> list[ValidationError]:
61
+ """Validates the values in the metadata update dataframe.
48
62
 
49
63
  Args:
50
64
  metadata_dataframe: DataFrame with span IDs and patch documents or attributes.metadata.* columns
@@ -0,0 +1 @@
1
+ """Span data validation for LLM tracing."""
@@ -1,15 +1,14 @@
1
+ """DataFrame form validation for spans."""
2
+
1
3
  import logging
4
+ from collections.abc import Iterable
2
5
  from datetime import datetime
3
- from typing import Iterable, List
4
6
 
5
7
  import pandas as pd
6
8
  from pandas.api.types import is_bool_dtype, is_numeric_dtype
7
9
 
8
- from arize.spans.columns import (
9
- SPAN_OPENINFERENCE_COLUMNS,
10
- SpanColumnDataType,
11
- )
12
- from arize.spans.conversion import isMissingValue
10
+ from arize.spans.columns import SPAN_OPENINFERENCE_COLUMNS, SpanColumnDataType
11
+ from arize.spans.conversion import is_missing_value
13
12
  from arize.spans.validation.common.errors import (
14
13
  InvalidDataFrameColumnContentTypes,
15
14
  )
@@ -21,6 +20,14 @@ logger = logging.getLogger(__name__)
21
20
  def log_info_dataframe_extra_column_names(
22
21
  df: pd.DataFrame,
23
22
  ) -> None:
23
+ """Logs informational message about columns not part of Open Inference Specification.
24
+
25
+ Args:
26
+ df: DataFrame to check for extra column names.
27
+
28
+ Returns:
29
+ None.
30
+ """
24
31
  min_col_set = [col.name for col in SPAN_OPENINFERENCE_COLUMNS]
25
32
  extra_col_names = [col for col in df.columns if col not in min_col_set]
26
33
  if extra_col_names:
@@ -31,7 +38,7 @@ def log_info_dataframe_extra_column_names(
31
38
  "extra_columns": extra_col_names,
32
39
  },
33
40
  )
34
- return None
41
+ return
35
42
 
36
43
 
37
44
  # TODO(Kiko): Performance improvements
@@ -42,7 +49,18 @@ def log_info_dataframe_extra_column_names(
42
49
  # https://github.com/pandas-dev/pandas/blob/f538741432edf55c6b9fb5d0d496d2dd1d7c2457/pandas/core/dtypes/common.py
43
50
  def check_dataframe_column_content_type(
44
51
  df: pd.DataFrame,
45
- ) -> List[InvalidDataFrameColumnContentTypes]:
52
+ ) -> list[InvalidDataFrameColumnContentTypes]:
53
+ """Validates that span DataFrame columns contain data types matching Open Inference Specification.
54
+
55
+ Checks that columns have appropriate data types: lists of dicts, dicts, numeric,
56
+ boolean, timestamp, JSON strings, or plain strings based on column specifications.
57
+
58
+ Args:
59
+ df: The DataFrame to validate.
60
+
61
+ Returns:
62
+ List of validation errors for columns with incorrect types.
63
+ """
46
64
  # We let this values be in the dataframe and don't use them to verify type
47
65
  # They will be serialized by arrow and understood as missing values
48
66
  wrong_lists_of_dicts_cols = []
@@ -57,7 +75,7 @@ def check_dataframe_column_content_type(
57
75
  continue
58
76
  if col.data_type == SpanColumnDataType.LIST_DICT:
59
77
  for row in df[col.name]:
60
- if not isinstance(row, Iterable) and isMissingValue(row):
78
+ if not isinstance(row, Iterable) and is_missing_value(row):
61
79
  continue
62
80
  if not (
63
81
  is_list_of(row, dict) or is_array_of(row, dict)
@@ -68,7 +86,10 @@ def check_dataframe_column_content_type(
68
86
  break
69
87
  elif col.data_type == SpanColumnDataType.DICT:
70
88
  if not all(
71
- (isMissingValue(row) or is_dict_of(row, key_allowed_types=str))
89
+ (
90
+ is_missing_value(row)
91
+ or is_dict_of(row, key_allowed_types=str)
92
+ )
72
93
  for row in df[col.name]
73
94
  ):
74
95
  wrong_dicts_cols.append(col.name)
@@ -82,7 +103,7 @@ def check_dataframe_column_content_type(
82
103
  # Accept strings and datetime objects, and int64
83
104
  if not all(
84
105
  (
85
- isMissingValue(row)
106
+ is_missing_value(row)
86
107
  or isinstance(row, (str, datetime, pd.Timestamp, int))
87
108
  )
88
109
  for row in df[col.name]
@@ -92,12 +113,12 @@ def check_dataframe_column_content_type(
92
113
  # We check the correctness of the JSON strings when we check the values
93
114
  # of the data in the dataframe
94
115
  if not all(
95
- (isMissingValue(row) or isinstance(row, str))
116
+ (is_missing_value(row) or isinstance(row, str))
96
117
  for row in df[col.name]
97
118
  ):
98
119
  wrong_JSON_cols.append(col.name)
99
120
  elif col.data_type == SpanColumnDataType.STRING and not all(
100
- (isMissingValue(row) or isinstance(row, str))
121
+ (is_missing_value(row) or isinstance(row, str))
101
122
  for row in df[col.name]
102
123
  ):
103
124
  wrong_string_cols.append(col.name)
@@ -1,7 +1,9 @@
1
+ """Span validation orchestration and coordination."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
5
  from itertools import chain
4
- from typing import TYPE_CHECKING, List
6
+ from typing import TYPE_CHECKING
5
7
 
6
8
  from arize.spans.columns import SPAN_OPENINFERENCE_REQUIRED_COLUMNS
7
9
  from arize.spans.validation.common import (
@@ -29,7 +31,18 @@ def validate_argument_types(
29
31
  project_name: str,
30
32
  dt_fmt: str,
31
33
  model_version: str | None = None,
32
- ) -> List[ValidationError]:
34
+ ) -> list[ValidationError]:
35
+ """Validate argument types for spans data submission.
36
+
37
+ Args:
38
+ spans_dataframe: The DataFrame containing spans data.
39
+ project_name: The project name to validate.
40
+ dt_fmt: The datetime format string to validate.
41
+ model_version: Optional model version to validate. Defaults to None.
42
+
43
+ Returns:
44
+ List of validation errors found in argument types.
45
+ """
33
46
  checks = chain(
34
47
  common_arg_validation.check_field_convertible_to_str(
35
48
  project_name, model_version
@@ -42,7 +55,15 @@ def validate_argument_types(
42
55
 
43
56
  def validate_dataframe_form(
44
57
  spans_dataframe: pd.DataFrame,
45
- ) -> List[ValidationError]:
58
+ ) -> list[ValidationError]:
59
+ """Validate the structure and form of a spans DataFrame.
60
+
61
+ Args:
62
+ spans_dataframe: The DataFrame containing spans data to validate.
63
+
64
+ Returns:
65
+ List of validation errors found in the DataFrame structure.
66
+ """
46
67
  df_validation.log_info_dataframe_extra_column_names(spans_dataframe)
47
68
  checks = chain(
48
69
  # Common
@@ -66,7 +87,17 @@ def validate_values(
66
87
  spans_dataframe: pd.DataFrame,
67
88
  project_name: str,
68
89
  model_version: str | None = None,
69
- ) -> List[ValidationError]:
90
+ ) -> list[ValidationError]:
91
+ """Validate the values within a spans DataFrame.
92
+
93
+ Args:
94
+ spans_dataframe: The DataFrame containing spans data to validate.
95
+ project_name: The project name associated with the spans.
96
+ model_version: Optional model version. Defaults to None.
97
+
98
+ Returns:
99
+ List of validation errors found in DataFrame values.
100
+ """
70
101
  checks = chain(
71
102
  # Common
72
103
  common_value_validation.check_invalid_project_name(project_name),
@@ -1,7 +1,9 @@
1
+ """Value validation logic for span data."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
5
  from itertools import chain
4
- from typing import TYPE_CHECKING, List
6
+ from typing import TYPE_CHECKING
5
7
 
6
8
  from arize.constants import spans as tracing_constants
7
9
  from arize.constants.ml import MAX_EMBEDDING_DIMENSIONALITY
@@ -23,7 +25,18 @@ if TYPE_CHECKING:
23
25
 
24
26
  def check_span_root_field_values(
25
27
  dataframe: pd.DataFrame,
26
- ) -> List[ValidationError]:
28
+ ) -> list[ValidationError]:
29
+ """Validates root-level span field values for proper format and constraints.
30
+
31
+ Checks span ID, trace ID, parent span ID, name, status code, status message,
32
+ timestamps, and events for conformance to specification limits.
33
+
34
+ Args:
35
+ dataframe: The DataFrame containing span data.
36
+
37
+ Returns:
38
+ List of validation errors found in root span fields.
39
+ """
27
40
  return list(
28
41
  chain(
29
42
  value_validation.check_string_column_value_length(
@@ -77,7 +90,18 @@ def check_span_root_field_values(
77
90
 
78
91
  def check_span_attributes_values(
79
92
  dataframe: pd.DataFrame,
80
- ) -> List[ValidationError]:
93
+ ) -> list[ValidationError]:
94
+ """Validates span attribute values for proper format and constraints.
95
+
96
+ Checks all span attributes including LLM parameters, embeddings, documents,
97
+ tools, and other metadata fields for conformance to specification limits.
98
+
99
+ Args:
100
+ dataframe: The DataFrame containing span data.
101
+
102
+ Returns:
103
+ List of validation errors found in span attributes.
104
+ """
81
105
  return list(
82
106
  chain(
83
107
  value_validation.check_string_column_value_length(
@@ -242,7 +266,17 @@ def check_span_attributes_values(
242
266
 
243
267
  def check_event_column_value(
244
268
  df: pd.DataFrame,
245
- ) -> List[InvalidEventValueInColumn]:
269
+ ) -> list[InvalidEventValueInColumn]:
270
+ """Validates span event column values for proper format and length constraints.
271
+
272
+ Checks event names for length limits and attributes for proper dictionary structure.
273
+
274
+ Args:
275
+ df: The DataFrame containing span events.
276
+
277
+ Returns:
278
+ List of validation errors found in event column values.
279
+ """
246
280
  col_name = tracing_cols.SPAN_EVENTS_COL.name
247
281
  if col_name not in df.columns:
248
282
  return []
@@ -284,7 +318,18 @@ def check_event_column_value(
284
318
 
285
319
  def check_embeddings_column_value(
286
320
  df: pd.DataFrame,
287
- ) -> List[InvalidEmbeddingValueInColumn]:
321
+ ) -> list[InvalidEmbeddingValueInColumn]:
322
+ """Validates embedding column values for proper vector dimensions and text length.
323
+
324
+ Checks that embedding vectors are within dimensionality limits and text
325
+ values don't exceed maximum length.
326
+
327
+ Args:
328
+ df: The DataFrame containing embedding data.
329
+
330
+ Returns:
331
+ List of validation errors found in embedding column values.
332
+ """
288
333
  col_name = tracing_cols.SPAN_ATTRIBUTES_EMBEDDING_EMBEDDINGS_COL.name
289
334
  if col_name not in df.columns:
290
335
  return []
@@ -332,7 +377,19 @@ def check_embeddings_column_value(
332
377
  def check_LLM_IO_messages_column_value(
333
378
  df: pd.DataFrame,
334
379
  col_name: str,
335
- ) -> List[InvalidLLMMessageValueInColumn]:
380
+ ) -> list[InvalidLLMMessageValueInColumn]:
381
+ """Validates LLM input/output message column values for proper format and length.
382
+
383
+ Checks message role, content, and tool calls for conformance to length limits
384
+ and proper JSON formatting.
385
+
386
+ Args:
387
+ df: The DataFrame containing LLM messages.
388
+ col_name: Name of the message column to validate.
389
+
390
+ Returns:
391
+ List of validation errors found in message column values.
392
+ """
336
393
  if col_name not in df.columns:
337
394
  return []
338
395
 
@@ -407,7 +464,19 @@ def check_LLM_IO_messages_column_value(
407
464
  def check_documents_column_value(
408
465
  df: pd.DataFrame,
409
466
  col_name: str,
410
- ) -> List[InvalidDocumentValueInColumn]:
467
+ ) -> list[InvalidDocumentValueInColumn]:
468
+ """Validates document column values for proper format and length constraints.
469
+
470
+ Checks document ID, content, and metadata for conformance to length limits
471
+ and proper data type requirements.
472
+
473
+ Args:
474
+ df: The DataFrame containing documents.
475
+ col_name: Name of the document column to validate.
476
+
477
+ Returns:
478
+ List of validation errors found in document column values.
479
+ """
411
480
  if col_name not in df.columns:
412
481
  return []
413
482