arize 8.0.0a21__py3-none-any.whl → 8.0.0a23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. arize/__init__.py +17 -9
  2. arize/_exporter/client.py +55 -36
  3. arize/_exporter/parsers/tracing_data_parser.py +41 -30
  4. arize/_exporter/validation.py +3 -3
  5. arize/_flight/client.py +208 -77
  6. arize/_generated/api_client/__init__.py +30 -6
  7. arize/_generated/api_client/api/__init__.py +1 -0
  8. arize/_generated/api_client/api/datasets_api.py +864 -190
  9. arize/_generated/api_client/api/experiments_api.py +167 -131
  10. arize/_generated/api_client/api/projects_api.py +1197 -0
  11. arize/_generated/api_client/api_client.py +2 -2
  12. arize/_generated/api_client/configuration.py +42 -34
  13. arize/_generated/api_client/exceptions.py +2 -2
  14. arize/_generated/api_client/models/__init__.py +15 -4
  15. arize/_generated/api_client/models/dataset.py +10 -10
  16. arize/_generated/api_client/models/dataset_example.py +111 -0
  17. arize/_generated/api_client/models/dataset_example_update.py +100 -0
  18. arize/_generated/api_client/models/dataset_version.py +13 -13
  19. arize/_generated/api_client/models/datasets_create_request.py +16 -8
  20. arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
  21. arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
  22. arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
  23. arize/_generated/api_client/models/datasets_list200_response.py +10 -4
  24. arize/_generated/api_client/models/experiment.py +14 -16
  25. arize/_generated/api_client/models/experiment_run.py +108 -0
  26. arize/_generated/api_client/models/experiment_run_create.py +102 -0
  27. arize/_generated/api_client/models/experiments_create_request.py +16 -10
  28. arize/_generated/api_client/models/experiments_list200_response.py +10 -4
  29. arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
  30. arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
  31. arize/_generated/api_client/models/primitive_value.py +172 -0
  32. arize/_generated/api_client/models/problem.py +100 -0
  33. arize/_generated/api_client/models/project.py +99 -0
  34. arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
  35. arize/_generated/api_client/models/projects_list200_response.py +106 -0
  36. arize/_generated/api_client/rest.py +2 -2
  37. arize/_generated/api_client/test/test_dataset.py +4 -2
  38. arize/_generated/api_client/test/test_dataset_example.py +56 -0
  39. arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
  40. arize/_generated/api_client/test/test_dataset_version.py +7 -2
  41. arize/_generated/api_client/test/test_datasets_api.py +27 -13
  42. arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
  43. arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
  44. arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
  45. arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
  46. arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
  47. arize/_generated/api_client/test/test_experiment.py +2 -4
  48. arize/_generated/api_client/test/test_experiment_run.py +56 -0
  49. arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
  50. arize/_generated/api_client/test/test_experiments_api.py +6 -6
  51. arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
  52. arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
  53. arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
  54. arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
  55. arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
  56. arize/_generated/api_client/test/test_problem.py +57 -0
  57. arize/_generated/api_client/test/test_project.py +58 -0
  58. arize/_generated/api_client/test/test_projects_api.py +59 -0
  59. arize/_generated/api_client/test/test_projects_create_request.py +54 -0
  60. arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
  61. arize/_generated/api_client_README.md +43 -29
  62. arize/_generated/protocol/flight/flight_pb2.py +400 -0
  63. arize/_lazy.py +27 -19
  64. arize/client.py +269 -55
  65. arize/config.py +365 -116
  66. arize/constants/__init__.py +1 -0
  67. arize/constants/config.py +11 -4
  68. arize/constants/ml.py +6 -4
  69. arize/constants/openinference.py +2 -0
  70. arize/constants/pyarrow.py +2 -0
  71. arize/constants/spans.py +3 -1
  72. arize/datasets/__init__.py +1 -0
  73. arize/datasets/client.py +299 -84
  74. arize/datasets/errors.py +32 -2
  75. arize/datasets/validation.py +18 -8
  76. arize/embeddings/__init__.py +2 -0
  77. arize/embeddings/auto_generator.py +23 -19
  78. arize/embeddings/base_generators.py +89 -36
  79. arize/embeddings/constants.py +2 -0
  80. arize/embeddings/cv_generators.py +26 -4
  81. arize/embeddings/errors.py +27 -5
  82. arize/embeddings/nlp_generators.py +31 -12
  83. arize/embeddings/tabular_generators.py +32 -20
  84. arize/embeddings/usecases.py +12 -2
  85. arize/exceptions/__init__.py +1 -0
  86. arize/exceptions/auth.py +11 -1
  87. arize/exceptions/base.py +29 -4
  88. arize/exceptions/models.py +21 -2
  89. arize/exceptions/parameters.py +31 -0
  90. arize/exceptions/spaces.py +12 -1
  91. arize/exceptions/types.py +86 -7
  92. arize/exceptions/values.py +220 -20
  93. arize/experiments/__init__.py +1 -0
  94. arize/experiments/client.py +390 -286
  95. arize/experiments/evaluators/__init__.py +1 -0
  96. arize/experiments/evaluators/base.py +74 -41
  97. arize/experiments/evaluators/exceptions.py +6 -3
  98. arize/experiments/evaluators/executors.py +121 -73
  99. arize/experiments/evaluators/rate_limiters.py +106 -57
  100. arize/experiments/evaluators/types.py +34 -7
  101. arize/experiments/evaluators/utils.py +65 -27
  102. arize/experiments/functions.py +103 -101
  103. arize/experiments/tracing.py +52 -44
  104. arize/experiments/types.py +56 -31
  105. arize/logging.py +54 -22
  106. arize/models/__init__.py +1 -0
  107. arize/models/batch_validation/__init__.py +1 -0
  108. arize/models/batch_validation/errors.py +543 -65
  109. arize/models/batch_validation/validator.py +339 -300
  110. arize/models/bounded_executor.py +20 -7
  111. arize/models/casting.py +75 -29
  112. arize/models/client.py +326 -107
  113. arize/models/proto.py +95 -40
  114. arize/models/stream_validation.py +42 -14
  115. arize/models/surrogate_explainer/__init__.py +1 -0
  116. arize/models/surrogate_explainer/mimic.py +24 -13
  117. arize/pre_releases.py +43 -0
  118. arize/projects/__init__.py +1 -0
  119. arize/projects/client.py +129 -0
  120. arize/regions.py +40 -0
  121. arize/spans/__init__.py +1 -0
  122. arize/spans/client.py +130 -106
  123. arize/spans/columns.py +13 -0
  124. arize/spans/conversion.py +54 -38
  125. arize/spans/validation/__init__.py +1 -0
  126. arize/spans/validation/annotations/__init__.py +1 -0
  127. arize/spans/validation/annotations/annotations_validation.py +6 -4
  128. arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
  129. arize/spans/validation/annotations/value_validation.py +35 -11
  130. arize/spans/validation/common/__init__.py +1 -0
  131. arize/spans/validation/common/argument_validation.py +33 -8
  132. arize/spans/validation/common/dataframe_form_validation.py +35 -9
  133. arize/spans/validation/common/errors.py +211 -11
  134. arize/spans/validation/common/value_validation.py +80 -13
  135. arize/spans/validation/evals/__init__.py +1 -0
  136. arize/spans/validation/evals/dataframe_form_validation.py +28 -8
  137. arize/spans/validation/evals/evals_validation.py +34 -4
  138. arize/spans/validation/evals/value_validation.py +26 -3
  139. arize/spans/validation/metadata/__init__.py +1 -1
  140. arize/spans/validation/metadata/argument_validation.py +14 -5
  141. arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
  142. arize/spans/validation/metadata/value_validation.py +24 -10
  143. arize/spans/validation/spans/__init__.py +1 -0
  144. arize/spans/validation/spans/dataframe_form_validation.py +34 -13
  145. arize/spans/validation/spans/spans_validation.py +35 -4
  146. arize/spans/validation/spans/value_validation.py +76 -7
  147. arize/types.py +293 -157
  148. arize/utils/__init__.py +1 -0
  149. arize/utils/arrow.py +31 -15
  150. arize/utils/cache.py +34 -6
  151. arize/utils/dataframe.py +19 -2
  152. arize/utils/online_tasks/__init__.py +2 -0
  153. arize/utils/online_tasks/dataframe_preprocessor.py +53 -41
  154. arize/utils/openinference_conversion.py +44 -5
  155. arize/utils/proto.py +10 -0
  156. arize/utils/size.py +5 -3
  157. arize/version.py +3 -1
  158. {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/METADATA +4 -3
  159. arize-8.0.0a23.dist-info/RECORD +174 -0
  160. {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/WHEEL +1 -1
  161. arize-8.0.0a23.dist-info/licenses/LICENSE +176 -0
  162. arize-8.0.0a23.dist-info/licenses/NOTICE +13 -0
  163. arize/_generated/protocol/flight/export_pb2.py +0 -61
  164. arize/_generated/protocol/flight/ingest_pb2.py +0 -365
  165. arize-8.0.0a21.dist-info/RECORD +0 -146
  166. arize-8.0.0a21.dist-info/licenses/LICENSE.md +0 -12
arize/spans/conversion.py CHANGED
@@ -1,6 +1,8 @@
1
+ """Span data conversion utilities for transforming and normalizing span data."""
2
+
1
3
  import json
2
- from datetime import datetime
3
- from typing import Any, Dict, Iterable, List
4
+ from collections.abc import Iterable
5
+ from datetime import datetime, timezone
4
6
 
5
7
  import numpy as np
6
8
  import pandas as pd
@@ -10,53 +12,62 @@ from arize.spans.columns import SPAN_OPENINFERENCE_COLUMNS, SpanColumnDataType
10
12
 
11
13
 
12
14
  def convert_timestamps(df: pd.DataFrame, fmt: str = "") -> pd.DataFrame:
15
+ """Convert timestamp columns in a DataFrame to nanoseconds.
16
+
17
+ Args:
18
+ df: The pandas DataFrame containing timestamp columns.
19
+ fmt: Optional datetime format string for parsing string timestamps. Defaults to "".
20
+
21
+ Returns:
22
+ The DataFrame with timestamp columns converted to nanoseconds.
23
+
24
+ Raises:
25
+ KeyError: If required timestamp column is not found in DataFrame.
26
+ """
13
27
  for col in SPAN_OPENINFERENCE_COLUMNS:
14
28
  if col.data_type != SpanColumnDataType.TIMESTAMP:
15
29
  continue
30
+ if col.name not in df.columns:
31
+ raise KeyError(f"Column '{col.name}' not found in DataFrame")
16
32
  df[col.name] = df[col.name].apply(lambda dt: _datetime_to_ns(dt, fmt))
17
33
  return df
18
34
 
19
35
 
20
36
  def _datetime_to_ns(dt: object, fmt: str) -> int:
21
37
  if isinstance(dt, str):
22
- try:
23
- ts = int(datetime.timestamp(datetime.strptime(dt, fmt)) * 1e9)
24
- except Exception as e:
25
- # logger.error(
26
- # f"Error parsing string '{dt}' to timestamp in nanoseconds "
27
- # f"using the format '{fmt}': {e}"
28
- # )
29
- raise e
30
- return ts
31
- elif isinstance(dt, datetime):
32
- try:
33
- ts = int(datetime.timestamp(dt) * 1e9)
34
- except Exception as e:
35
- # logger.error(
36
- # f"Error converting datetime object to nanoseconds: {e}"
37
- # )
38
- raise e
39
- return ts
40
- elif isinstance(dt, pd.Timestamp):
38
+ return int(
39
+ datetime.strptime(dt, fmt).replace(tzinfo=timezone.utc).timestamp()
40
+ * 1e9
41
+ )
42
+ if isinstance(dt, datetime):
43
+ return int(datetime.timestamp(dt) * 1e9)
44
+ if isinstance(dt, pd.Timestamp):
41
45
  return int(dt.value)
42
- elif isinstance(dt, pd.DatetimeIndex):
46
+ if isinstance(dt, pd.DatetimeIndex):
43
47
  # Only allow a single element; otherwise ambiguous for a scalar function
44
48
  if len(dt) != 1:
45
49
  raise TypeError(
46
50
  f"Expected a single timestamp in DatetimeIndex, got length={len(dt)}"
47
51
  )
48
52
  return int(dt.to_numpy(dtype="datetime64[ns]").astype("int64")[0])
49
- elif isinstance(dt, (int, float)):
53
+ if isinstance(dt, (int, float)):
50
54
  # Assume value already in nanoseconds,
51
55
  # validate timestamps in validate_values
52
56
  return int(dt)
53
- else:
54
- e = TypeError(f"Cannot convert type {type(dt)} to nanoseconds")
55
- # logger.error(f"Error converting pandas Timestamp to nanoseconds: {e}")
56
- raise e
57
+ e = TypeError(f"Cannot convert type {type(dt)} to nanoseconds")
58
+ # logger.error(f"Error converting pandas Timestamp to nanoseconds: {e}")
59
+ raise e
57
60
 
58
61
 
59
62
  def jsonify_dictionaries(df: pd.DataFrame) -> pd.DataFrame:
63
+ """Convert dictionary and list-of-dictionary columns to JSON strings.
64
+
65
+ Args:
66
+ df: The pandas DataFrame containing dictionary columns.
67
+
68
+ Returns:
69
+ The DataFrame with dictionary columns converted to JSON strings.
70
+ """
60
71
  # NOTE: numpy arrays are not json serializable. Hence, we assume the
61
72
  # embeddings come as lists, not arrays
62
73
  dict_cols = [
@@ -90,7 +101,15 @@ def jsonify_dictionaries(df: pd.DataFrame) -> pd.DataFrame:
90
101
 
91
102
 
92
103
  # Defines what is considered a missing value
93
- def isMissingValue(value: Any) -> bool:
104
+ def is_missing_value(value: object) -> bool:
105
+ """Check if a value should be considered missing or invalid.
106
+
107
+ Args:
108
+ value: The value to check.
109
+
110
+ Returns:
111
+ True if the value is missing (NaN, infinity, or pandas NA), False otherwise.
112
+ """
94
113
  assumed_missing_values = (
95
114
  np.inf,
96
115
  -np.inf,
@@ -99,22 +118,19 @@ def isMissingValue(value: Any) -> bool:
99
118
 
100
119
 
101
120
  def _jsonify_list_of_dicts(
102
- list_of_dicts: Iterable[Dict[str, Any]] | None,
103
- ) -> List[str]:
104
- if not isinstance(list_of_dicts, Iterable) and isMissingValue(
121
+ list_of_dicts: Iterable[dict[str, object]] | None,
122
+ ) -> list[str]:
123
+ if not isinstance(list_of_dicts, Iterable) and is_missing_value(
105
124
  list_of_dicts
106
125
  ):
107
126
  return []
108
- list_of_json = []
109
- for d in list_of_dicts:
110
- list_of_json.append(_jsonify_dict(d))
111
- return list_of_json
127
+ return [_jsonify_dict(d) for d in list_of_dicts]
112
128
 
113
129
 
114
- def _jsonify_dict(d: Dict[str, Any] | None) -> str | None:
130
+ def _jsonify_dict(d: dict[str, object] | None) -> str | None:
115
131
  if d is None:
116
- return
117
- if isMissingValue(d):
132
+ return None
133
+ if is_missing_value(d):
118
134
  return None
119
135
  d = d.copy() # avoid side effects
120
136
  for k, v in d.items():
@@ -0,0 +1 @@
1
+ """Validation utilities for LLM tracing spans data."""
@@ -0,0 +1 @@
1
+ """Annotation validation for LLM tracing spans."""
@@ -1,7 +1,9 @@
1
+ """Annotation validation orchestration for spans."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
5
  from itertools import chain
4
- from typing import TYPE_CHECKING, List
6
+ from typing import TYPE_CHECKING
5
7
 
6
8
  from arize.spans.columns import SPAN_SPAN_ID_COL
7
9
  from arize.spans.validation.annotations import (
@@ -27,7 +29,7 @@ if TYPE_CHECKING:
27
29
  def validate_argument_types(
28
30
  annotations_dataframe: pd.DataFrame,
29
31
  project_name: str,
30
- ) -> List[ValidationError]:
32
+ ) -> list[ValidationError]:
31
33
  """Validates argument types for log_annotations."""
32
34
  checks = chain(
33
35
  common_arg_validation.check_field_convertible_to_str(project_name),
@@ -40,7 +42,7 @@ def validate_argument_types(
40
42
 
41
43
  def validate_dataframe_form(
42
44
  annotations_dataframe: pd.DataFrame,
43
- ) -> List[ValidationError]:
45
+ ) -> list[ValidationError]:
44
46
  """Validates the form/structure of the annotation dataframe."""
45
47
  # Call annotation-specific function (to be created)
46
48
  df_validation.log_info_dataframe_extra_column_names(annotations_dataframe)
@@ -64,7 +66,7 @@ def validate_dataframe_form(
64
66
  def validate_values(
65
67
  annotations_dataframe: pd.DataFrame,
66
68
  project_name: str,
67
- ) -> List[ValidationError]:
69
+ ) -> list[ValidationError]:
68
70
  """Validates the values within the annotation dataframe."""
69
71
  checks = chain(
70
72
  # Common checks remain the same
@@ -1,8 +1,10 @@
1
+ """DataFrame form validation for span annotations."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
5
  import logging
4
6
  import re
5
- from typing import TYPE_CHECKING, List
7
+ from typing import TYPE_CHECKING
6
8
 
7
9
  import pandas as pd
8
10
 
@@ -19,7 +21,7 @@ from arize.spans.columns import (
19
21
  ANNOTATION_UPDATED_BY_SUFFIX,
20
22
  SPAN_SPAN_ID_COL,
21
23
  )
22
- from arize.spans.conversion import isMissingValue
24
+ from arize.spans.conversion import is_missing_value
23
25
  from arize.spans.validation.common.errors import (
24
26
  InvalidAnnotationColumnFormat,
25
27
  InvalidDataFrameColumnContentTypes,
@@ -36,7 +38,7 @@ def log_info_dataframe_extra_column_names(
36
38
  ) -> None:
37
39
  """Logs columns that don't match expected annotation or context patterns."""
38
40
  if df is None:
39
- return None
41
+ return
40
42
  # Check against annotation pattern, span id, and note column
41
43
  irrelevant_columns = [
42
44
  col
@@ -56,12 +58,12 @@ def log_info_dataframe_extra_column_names(
56
58
  "- annotation.<your-annotation-name>.score"
57
59
  f"An optional '{ANNOTATION_NOTES_COLUMN_NAME}' column can also be included."
58
60
  )
59
- return None
61
+ return
60
62
 
61
63
 
62
64
  def check_invalid_annotation_column_names(
63
65
  df: pd.DataFrame,
64
- ) -> List[ValidationError]:
66
+ ) -> list[ValidationError]:
65
67
  """Checks for columns that start with 'annotation.' but don't match the expected pattern."""
66
68
  errors = []
67
69
 
@@ -86,7 +88,7 @@ def check_invalid_annotation_column_names(
86
88
 
87
89
  def check_dataframe_column_content_type(
88
90
  df: pd.DataFrame,
89
- ) -> List[ValidationError]:
91
+ ) -> list[ValidationError]:
90
92
  """Checks that columns matching annotation patterns have the correct data types."""
91
93
  wrong_labels_cols = []
92
94
  wrong_scores_cols = []
@@ -128,14 +130,14 @@ def check_dataframe_column_content_type(
128
130
  # Check annotation label column type (string or missing)
129
131
  elif annotation_label_re.match(column):
130
132
  if not all(
131
- isinstance(value, str) or isMissingValue(value)
133
+ isinstance(value, str) or is_missing_value(value)
132
134
  for value in df[column]
133
135
  ):
134
136
  wrong_labels_cols.append(column)
135
137
  # Check annotation score column type (numeric or missing)
136
138
  elif annotation_score_re.match(column):
137
139
  if not all(
138
- isinstance(value, (int, float)) or isMissingValue(value)
140
+ isinstance(value, (int, float)) or is_missing_value(value)
139
141
  for value in df[column]
140
142
  ):
141
143
  wrong_scores_cols.append(column)
@@ -144,21 +146,21 @@ def check_dataframe_column_content_type(
144
146
  if not all(
145
147
  # Note: After formatting, this column holds list<string> (JSON), not just string.
146
148
  # We rely on later schema inference/validation. Keep basic check for now.
147
- isinstance(value, list) or isMissingValue(value)
149
+ isinstance(value, list) or is_missing_value(value)
148
150
  for value in df[column]
149
151
  ):
150
152
  wrong_notes_cols.append(column)
151
153
  # Check annotation updated_by column type (string or missing)
152
154
  elif annotation_updated_by_re.match(column):
153
155
  if not all(
154
- isinstance(value, str) or isMissingValue(value)
156
+ isinstance(value, str) or is_missing_value(value)
155
157
  for value in df[column]
156
158
  ):
157
159
  wrong_updated_by_cols.append(column)
158
160
  # Check annotation updated_at column type (numeric or missing)
159
161
  elif annotation_updated_at_re.match(column) and not all(
160
162
  # Allow int, float (e.g., Unix timestamp millis)
161
- isinstance(value, (int, float)) or isMissingValue(value)
163
+ isinstance(value, (int, float)) or is_missing_value(value)
162
164
  for value in df[column]
163
165
  ):
164
166
  wrong_updated_at_cols.append(column)
@@ -1,10 +1,12 @@
1
+ """Value validation logic for span annotation data."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
5
  import logging
4
6
  import re
5
- from datetime import datetime
7
+ from datetime import datetime, timezone
6
8
  from itertools import chain
7
- from typing import TYPE_CHECKING, List
9
+ from typing import TYPE_CHECKING
8
10
 
9
11
  from arize.constants.spans import (
10
12
  ANNOTATION_LABEL_MAX_STR_LENGTH,
@@ -41,33 +43,55 @@ logger = logging.getLogger(__name__)
41
43
 
42
44
 
43
45
  class InvalidAnnotationTimestamp(ValidationError):
46
+ """Raised when annotation timestamp is invalid or out of acceptable range."""
47
+
44
48
  def __repr__(self) -> str:
49
+ """Return a string representation for debugging and logging."""
45
50
  return "Invalid_Annotation_Timestamp"
46
51
 
47
52
  def __init__(self, timestamp_col_name: str, error_type: str) -> None:
53
+ """Initialize the exception with timestamp validation context.
54
+
55
+ Args:
56
+ timestamp_col_name: Name of the annotation timestamp column.
57
+ error_type: Type of timestamp error (e.g., 'future').
58
+ """
48
59
  self.timestamp_col_name = timestamp_col_name
49
60
  self.error_type = error_type
50
61
 
51
62
  def error_message(self) -> str:
63
+ """Return the error message for this exception."""
52
64
  if self.error_type == "future":
53
65
  return (
54
66
  f"At least one timestamp in the annotation column '{self.timestamp_col_name}' "
55
67
  f"is in the future. Annotation timestamps cannot be in the future."
56
68
  )
57
- elif self.error_type == "non_positive":
69
+ if self.error_type == "non_positive":
58
70
  return (
59
71
  f"At least one timestamp in the annotation column '{self.timestamp_col_name}' "
60
72
  f"is zero or negative. Annotation timestamps must be positive values."
61
73
  )
62
- else:
63
- return f"Invalid timestamp in annotation column '{self.timestamp_col_name}'."
74
+ return f"Invalid timestamp in annotation column '{self.timestamp_col_name}'."
64
75
 
65
76
 
66
77
  def check_annotation_updated_at_timestamp(
67
78
  df: pd.DataFrame,
68
79
  col_name: str,
69
80
  is_required: bool,
70
- ) -> List[InvalidMissingValueInColumn | InvalidAnnotationTimestamp]:
81
+ ) -> list[InvalidMissingValueInColumn | InvalidAnnotationTimestamp]:
82
+ """Validates annotation timestamp values for validity and acceptable ranges.
83
+
84
+ Checks that timestamp values are positive, not in the future, and satisfy
85
+ required constraints if specified.
86
+
87
+ Args:
88
+ df: DataFrame containing the annotation timestamp column.
89
+ col_name: Name of the timestamp column to validate.
90
+ is_required: Whether the column must have non-null values in all rows.
91
+
92
+ Returns:
93
+ List of validation errors found (empty if valid).
94
+ """
71
95
  # This check expects that timestamps have previously been converted to milliseconds
72
96
  if col_name not in df.columns:
73
97
  return []
@@ -83,7 +107,7 @@ def check_annotation_updated_at_timestamp(
83
107
  if df[col_name].isnull().all():
84
108
  return errors
85
109
 
86
- now_ms = datetime.now().timestamp() * 1000
110
+ now_ms = datetime.now(tz=timezone.utc).timestamp() * 1000
87
111
 
88
112
  if df[col_name].max() > now_ms:
89
113
  logger.warning(f"Detected future timestamp in column '{col_name}'.")
@@ -105,7 +129,7 @@ def check_annotation_updated_at_timestamp(
105
129
 
106
130
  def check_annotation_cols(
107
131
  dataframe: pd.DataFrame,
108
- ) -> List[ValidationError]:
132
+ ) -> list[ValidationError]:
109
133
  """Checks value length and validity for columns matching annotation patterns."""
110
134
  checks = []
111
135
  for col in dataframe.columns:
@@ -150,7 +174,7 @@ def check_annotation_cols(
150
174
 
151
175
  def check_annotation_columns_null_values(
152
176
  dataframe: pd.DataFrame,
153
- ) -> List[ValidationError]:
177
+ ) -> list[ValidationError]:
154
178
  """Checks that for a given annotation name, at least one of label or score is non-null per row."""
155
179
  invalid_annotation_names = []
156
180
  annotation_names = set()
@@ -190,7 +214,7 @@ def check_annotation_columns_null_values(
190
214
  invalid_annotation_names.append(ann_name)
191
215
 
192
216
  # Use set to report each name only once
193
- unique_invalid_names = sorted(list(set(invalid_annotation_names)))
217
+ unique_invalid_names = sorted(set(invalid_annotation_names))
194
218
  if unique_invalid_names:
195
219
  return [
196
220
  InvalidNullAnnotationLabelAndScore(
@@ -202,7 +226,7 @@ def check_annotation_columns_null_values(
202
226
 
203
227
  def check_annotation_notes_column(
204
228
  dataframe: pd.DataFrame,
205
- ) -> List[ValidationError]:
229
+ ) -> list[ValidationError]:
206
230
  """Checks the value length for the optional annotation.notes column (raw string)."""
207
231
  col_name = ANNOTATION_NOTES_COLUMN_NAME
208
232
  if col_name in dataframe.columns:
@@ -0,0 +1 @@
1
+ """Common validation utilities shared across spans validation."""
@@ -1,4 +1,4 @@
1
- from typing import Any, List
1
+ """Common argument validation utilities for spans."""
2
2
 
3
3
  import pandas as pd
4
4
 
@@ -7,9 +7,18 @@ from arize.spans.validation.common.errors import InvalidTypeArgument
7
7
 
8
8
 
9
9
  def check_field_convertible_to_str(
10
- project_name: Any,
11
- model_version: Any = None,
12
- ) -> List[InvalidFieldTypeConversion]:
10
+ project_name: object,
11
+ model_version: object = None,
12
+ ) -> list[InvalidFieldTypeConversion]:
13
+ """Validates that field arguments can be converted to strings.
14
+
15
+ Args:
16
+ project_name: The project name value to validate for string conversion.
17
+ model_version: Optional model version value to validate for string conversion.
18
+
19
+ Returns:
20
+ List of validation errors for fields that cannot be converted to strings.
21
+ """
13
22
  wrong_fields = []
14
23
  if project_name is not None and not isinstance(project_name, str):
15
24
  try:
@@ -28,8 +37,16 @@ def check_field_convertible_to_str(
28
37
 
29
38
 
30
39
  def check_dataframe_type(
31
- dataframe,
32
- ) -> List[InvalidTypeArgument]:
40
+ dataframe: object,
41
+ ) -> list[InvalidTypeArgument]:
42
+ """Validates that the provided argument is a pandas DataFrame.
43
+
44
+ Args:
45
+ dataframe: The object to validate as a pandas DataFrame.
46
+
47
+ Returns:
48
+ List of validation errors if not a DataFrame (empty if valid).
49
+ """
33
50
  if not isinstance(dataframe, pd.DataFrame):
34
51
  return [
35
52
  InvalidTypeArgument(
@@ -42,8 +59,16 @@ def check_dataframe_type(
42
59
 
43
60
 
44
61
  def check_datetime_format_type(
45
- dt_fmt: Any,
46
- ) -> List[InvalidTypeArgument]:
62
+ dt_fmt: object,
63
+ ) -> list[InvalidTypeArgument]:
64
+ """Validates that the datetime format argument is a string.
65
+
66
+ Args:
67
+ dt_fmt: The datetime format value to validate.
68
+
69
+ Returns:
70
+ List of validation errors if not a string (empty if valid).
71
+ """
47
72
  if not isinstance(dt_fmt, str):
48
73
  return [
49
74
  InvalidTypeArgument(
@@ -1,6 +1,8 @@
1
+ """Common DataFrame form validation for spans."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
- from typing import TYPE_CHECKING, List
5
+ from typing import TYPE_CHECKING
4
6
 
5
7
  from arize.exceptions.base import InvalidDataFrameIndex
6
8
  from arize.spans.validation.common.errors import (
@@ -14,7 +16,15 @@ if TYPE_CHECKING:
14
16
 
15
17
  def check_dataframe_index(
16
18
  dataframe: pd.DataFrame,
17
- ) -> List[InvalidDataFrameIndex]:
19
+ ) -> list[InvalidDataFrameIndex]:
20
+ """Validates that the DataFrame has a default integer index.
21
+
22
+ Args:
23
+ dataframe: The DataFrame to validate.
24
+
25
+ Returns:
26
+ List of validation errors if index is not default (empty if valid).
27
+ """
18
28
  if (dataframe.index != dataframe.reset_index(drop=True).index).any():
19
29
  return [InvalidDataFrameIndex()]
20
30
  return []
@@ -22,13 +32,21 @@ def check_dataframe_index(
22
32
 
23
33
  def check_dataframe_required_column_set(
24
34
  df: pd.DataFrame,
25
- required_columns: List[str],
26
- ) -> List[InvalidDataFrameMissingColumns]:
35
+ required_columns: list[str],
36
+ ) -> list[InvalidDataFrameMissingColumns]:
37
+ """Validates that the DataFrame contains all required columns.
38
+
39
+ Args:
40
+ df: The DataFrame to validate.
41
+ required_columns: List of column names that must be present.
42
+
43
+ Returns:
44
+ List of validation errors for missing columns (empty if valid).
45
+ """
27
46
  existing_columns = set(df.columns)
28
- missing_cols = []
29
- for col in required_columns:
30
- if col not in existing_columns:
31
- missing_cols.append(col)
47
+ missing_cols = [
48
+ col for col in required_columns if col not in existing_columns
49
+ ]
32
50
 
33
51
  if missing_cols:
34
52
  return [InvalidDataFrameMissingColumns(missing_cols=missing_cols)]
@@ -37,7 +55,15 @@ def check_dataframe_required_column_set(
37
55
 
38
56
  def check_dataframe_for_duplicate_columns(
39
57
  df: pd.DataFrame,
40
- ) -> List[InvalidDataFrameDuplicateColumns]:
58
+ ) -> list[InvalidDataFrameDuplicateColumns]:
59
+ """Validates that the DataFrame has no duplicate column names.
60
+
61
+ Args:
62
+ df: The DataFrame to validate.
63
+
64
+ Returns:
65
+ List of validation errors if duplicate columns exist (empty if valid).
66
+ """
41
67
  # Get the duplicated column names from the dataframe
42
68
  duplicate_columns = df.columns[df.columns.duplicated()]
43
69
  if not duplicate_columns.empty: