arize 8.0.0a21__py3-none-any.whl → 8.0.0a23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. arize/__init__.py +17 -9
  2. arize/_exporter/client.py +55 -36
  3. arize/_exporter/parsers/tracing_data_parser.py +41 -30
  4. arize/_exporter/validation.py +3 -3
  5. arize/_flight/client.py +208 -77
  6. arize/_generated/api_client/__init__.py +30 -6
  7. arize/_generated/api_client/api/__init__.py +1 -0
  8. arize/_generated/api_client/api/datasets_api.py +864 -190
  9. arize/_generated/api_client/api/experiments_api.py +167 -131
  10. arize/_generated/api_client/api/projects_api.py +1197 -0
  11. arize/_generated/api_client/api_client.py +2 -2
  12. arize/_generated/api_client/configuration.py +42 -34
  13. arize/_generated/api_client/exceptions.py +2 -2
  14. arize/_generated/api_client/models/__init__.py +15 -4
  15. arize/_generated/api_client/models/dataset.py +10 -10
  16. arize/_generated/api_client/models/dataset_example.py +111 -0
  17. arize/_generated/api_client/models/dataset_example_update.py +100 -0
  18. arize/_generated/api_client/models/dataset_version.py +13 -13
  19. arize/_generated/api_client/models/datasets_create_request.py +16 -8
  20. arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
  21. arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
  22. arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
  23. arize/_generated/api_client/models/datasets_list200_response.py +10 -4
  24. arize/_generated/api_client/models/experiment.py +14 -16
  25. arize/_generated/api_client/models/experiment_run.py +108 -0
  26. arize/_generated/api_client/models/experiment_run_create.py +102 -0
  27. arize/_generated/api_client/models/experiments_create_request.py +16 -10
  28. arize/_generated/api_client/models/experiments_list200_response.py +10 -4
  29. arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
  30. arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
  31. arize/_generated/api_client/models/primitive_value.py +172 -0
  32. arize/_generated/api_client/models/problem.py +100 -0
  33. arize/_generated/api_client/models/project.py +99 -0
  34. arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
  35. arize/_generated/api_client/models/projects_list200_response.py +106 -0
  36. arize/_generated/api_client/rest.py +2 -2
  37. arize/_generated/api_client/test/test_dataset.py +4 -2
  38. arize/_generated/api_client/test/test_dataset_example.py +56 -0
  39. arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
  40. arize/_generated/api_client/test/test_dataset_version.py +7 -2
  41. arize/_generated/api_client/test/test_datasets_api.py +27 -13
  42. arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
  43. arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
  44. arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
  45. arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
  46. arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
  47. arize/_generated/api_client/test/test_experiment.py +2 -4
  48. arize/_generated/api_client/test/test_experiment_run.py +56 -0
  49. arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
  50. arize/_generated/api_client/test/test_experiments_api.py +6 -6
  51. arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
  52. arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
  53. arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
  54. arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
  55. arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
  56. arize/_generated/api_client/test/test_problem.py +57 -0
  57. arize/_generated/api_client/test/test_project.py +58 -0
  58. arize/_generated/api_client/test/test_projects_api.py +59 -0
  59. arize/_generated/api_client/test/test_projects_create_request.py +54 -0
  60. arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
  61. arize/_generated/api_client_README.md +43 -29
  62. arize/_generated/protocol/flight/flight_pb2.py +400 -0
  63. arize/_lazy.py +27 -19
  64. arize/client.py +269 -55
  65. arize/config.py +365 -116
  66. arize/constants/__init__.py +1 -0
  67. arize/constants/config.py +11 -4
  68. arize/constants/ml.py +6 -4
  69. arize/constants/openinference.py +2 -0
  70. arize/constants/pyarrow.py +2 -0
  71. arize/constants/spans.py +3 -1
  72. arize/datasets/__init__.py +1 -0
  73. arize/datasets/client.py +299 -84
  74. arize/datasets/errors.py +32 -2
  75. arize/datasets/validation.py +18 -8
  76. arize/embeddings/__init__.py +2 -0
  77. arize/embeddings/auto_generator.py +23 -19
  78. arize/embeddings/base_generators.py +89 -36
  79. arize/embeddings/constants.py +2 -0
  80. arize/embeddings/cv_generators.py +26 -4
  81. arize/embeddings/errors.py +27 -5
  82. arize/embeddings/nlp_generators.py +31 -12
  83. arize/embeddings/tabular_generators.py +32 -20
  84. arize/embeddings/usecases.py +12 -2
  85. arize/exceptions/__init__.py +1 -0
  86. arize/exceptions/auth.py +11 -1
  87. arize/exceptions/base.py +29 -4
  88. arize/exceptions/models.py +21 -2
  89. arize/exceptions/parameters.py +31 -0
  90. arize/exceptions/spaces.py +12 -1
  91. arize/exceptions/types.py +86 -7
  92. arize/exceptions/values.py +220 -20
  93. arize/experiments/__init__.py +1 -0
  94. arize/experiments/client.py +390 -286
  95. arize/experiments/evaluators/__init__.py +1 -0
  96. arize/experiments/evaluators/base.py +74 -41
  97. arize/experiments/evaluators/exceptions.py +6 -3
  98. arize/experiments/evaluators/executors.py +121 -73
  99. arize/experiments/evaluators/rate_limiters.py +106 -57
  100. arize/experiments/evaluators/types.py +34 -7
  101. arize/experiments/evaluators/utils.py +65 -27
  102. arize/experiments/functions.py +103 -101
  103. arize/experiments/tracing.py +52 -44
  104. arize/experiments/types.py +56 -31
  105. arize/logging.py +54 -22
  106. arize/models/__init__.py +1 -0
  107. arize/models/batch_validation/__init__.py +1 -0
  108. arize/models/batch_validation/errors.py +543 -65
  109. arize/models/batch_validation/validator.py +339 -300
  110. arize/models/bounded_executor.py +20 -7
  111. arize/models/casting.py +75 -29
  112. arize/models/client.py +326 -107
  113. arize/models/proto.py +95 -40
  114. arize/models/stream_validation.py +42 -14
  115. arize/models/surrogate_explainer/__init__.py +1 -0
  116. arize/models/surrogate_explainer/mimic.py +24 -13
  117. arize/pre_releases.py +43 -0
  118. arize/projects/__init__.py +1 -0
  119. arize/projects/client.py +129 -0
  120. arize/regions.py +40 -0
  121. arize/spans/__init__.py +1 -0
  122. arize/spans/client.py +130 -106
  123. arize/spans/columns.py +13 -0
  124. arize/spans/conversion.py +54 -38
  125. arize/spans/validation/__init__.py +1 -0
  126. arize/spans/validation/annotations/__init__.py +1 -0
  127. arize/spans/validation/annotations/annotations_validation.py +6 -4
  128. arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
  129. arize/spans/validation/annotations/value_validation.py +35 -11
  130. arize/spans/validation/common/__init__.py +1 -0
  131. arize/spans/validation/common/argument_validation.py +33 -8
  132. arize/spans/validation/common/dataframe_form_validation.py +35 -9
  133. arize/spans/validation/common/errors.py +211 -11
  134. arize/spans/validation/common/value_validation.py +80 -13
  135. arize/spans/validation/evals/__init__.py +1 -0
  136. arize/spans/validation/evals/dataframe_form_validation.py +28 -8
  137. arize/spans/validation/evals/evals_validation.py +34 -4
  138. arize/spans/validation/evals/value_validation.py +26 -3
  139. arize/spans/validation/metadata/__init__.py +1 -1
  140. arize/spans/validation/metadata/argument_validation.py +14 -5
  141. arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
  142. arize/spans/validation/metadata/value_validation.py +24 -10
  143. arize/spans/validation/spans/__init__.py +1 -0
  144. arize/spans/validation/spans/dataframe_form_validation.py +34 -13
  145. arize/spans/validation/spans/spans_validation.py +35 -4
  146. arize/spans/validation/spans/value_validation.py +76 -7
  147. arize/types.py +293 -157
  148. arize/utils/__init__.py +1 -0
  149. arize/utils/arrow.py +31 -15
  150. arize/utils/cache.py +34 -6
  151. arize/utils/dataframe.py +19 -2
  152. arize/utils/online_tasks/__init__.py +2 -0
  153. arize/utils/online_tasks/dataframe_preprocessor.py +53 -41
  154. arize/utils/openinference_conversion.py +44 -5
  155. arize/utils/proto.py +10 -0
  156. arize/utils/size.py +5 -3
  157. arize/version.py +3 -1
  158. {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/METADATA +4 -3
  159. arize-8.0.0a23.dist-info/RECORD +174 -0
  160. {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/WHEEL +1 -1
  161. arize-8.0.0a23.dist-info/licenses/LICENSE +176 -0
  162. arize-8.0.0a23.dist-info/licenses/NOTICE +13 -0
  163. arize/_generated/protocol/flight/export_pb2.py +0 -61
  164. arize/_generated/protocol/flight/ingest_pb2.py +0 -365
  165. arize-8.0.0a21.dist-info/RECORD +0 -146
  166. arize-8.0.0a21.dist-info/licenses/LICENSE.md +0 -12
@@ -1,23 +1,35 @@
1
+ """Bounded thread pool executor with queue size limits."""
2
+
3
+ from collections.abc import Callable
1
4
  from concurrent.futures import ThreadPoolExecutor
2
5
  from threading import BoundedSemaphore
3
6
 
4
7
 
5
8
  class BoundedExecutor:
6
- """
7
- BoundedExecutor behaves as a ThreadPoolExecutor which will block on
8
- calls to submit() once the limit given as "bound" work items are queued for
9
- execution.
9
+ """BoundedExecutor behaves as a ThreadPoolExecutor which will block on calls to submit().
10
+
11
+ Blocks once the limit given as "bound" work items are queued for execution.
12
+
10
13
  :param bound: Integer - the maximum number of items in the work queue
11
14
  :param max_workers: Integer - the size of the thread pool
12
15
  """
13
16
 
14
- def __init__(self, bound, max_workers):
17
+ def __init__(self, bound: int, max_workers: int) -> None:
18
+ """Initialize the bounded executor.
19
+
20
+ Args:
21
+ bound: Maximum number of items in the work queue.
22
+ max_workers: Size of the thread pool.
23
+ """
15
24
  self.executor = ThreadPoolExecutor(max_workers=max_workers)
16
25
  self.semaphore = BoundedSemaphore(bound + max_workers)
17
26
 
18
27
  """See concurrent.futures.Executor#submit"""
19
28
 
20
- def submit(self, fn, *args, **kwargs):
29
+ def submit(
30
+ self, fn: Callable[..., object], *args: object, **kwargs: object
31
+ ) -> object:
32
+ """Submit a callable to be executed with bounded concurrency."""
21
33
  self.semaphore.acquire()
22
34
  try:
23
35
  future = self.executor.submit(fn, *args, **kwargs)
@@ -30,5 +42,6 @@ class BoundedExecutor:
30
42
 
31
43
  """See concurrent.futures.Executor#shutdown"""
32
44
 
33
- def shutdown(self, wait=True):
45
+ def shutdown(self, wait: bool = True) -> None:
46
+ """Shutdown the executor, optionally waiting for pending tasks to complete."""
34
47
  self.executor.shutdown(wait)
arize/models/casting.py CHANGED
@@ -1,8 +1,10 @@
1
+ """Type casting utilities for ML model data conversion."""
2
+
1
3
  # type: ignore[pb2]
2
4
  from __future__ import annotations
3
5
 
4
6
  import math
5
- from typing import TYPE_CHECKING, List, Tuple, Union
7
+ from typing import TYPE_CHECKING
6
8
 
7
9
  import numpy as np
8
10
 
@@ -14,14 +16,24 @@ if TYPE_CHECKING:
14
16
 
15
17
 
16
18
  class CastingError(Exception):
19
+ """Raised when type casting fails for a value."""
20
+
17
21
  def __str__(self) -> str:
22
+ """Return a human-readable error message."""
18
23
  return self.error_message()
19
24
 
20
25
  def __init__(self, error_msg: str, typed_value: TypedValue) -> None:
26
+ """Initialize the exception with type casting context.
27
+
28
+ Args:
29
+ error_msg: Description of the casting failure.
30
+ typed_value: The TypedValue that failed to cast.
31
+ """
21
32
  self.error_msg = error_msg
22
33
  self.typed_value = typed_value
23
34
 
24
35
  def error_message(self) -> str:
36
+ """Return the error message for this exception."""
25
37
  return (
26
38
  f"Failed to cast value {self.typed_value.value} of type {type(self.typed_value.value)} "
27
39
  f"to type {self.typed_value.type}. "
@@ -30,7 +42,10 @@ class CastingError(Exception):
30
42
 
31
43
 
32
44
  class ColumnCastingError(Exception):
45
+ """Raised when type casting fails for a column."""
46
+
33
47
  def __str__(self) -> str:
48
+ """Return a human-readable error message."""
34
49
  return self.error_message()
35
50
 
36
51
  def __init__(
@@ -39,11 +54,19 @@ class ColumnCastingError(Exception):
39
54
  attempted_columns: str,
40
55
  attempted_type: TypedColumns,
41
56
  ) -> None:
57
+ """Initialize the exception with column casting context.
58
+
59
+ Args:
60
+ error_msg: Description of the casting failure.
61
+ attempted_columns: Columns that failed to cast.
62
+ attempted_type: The TypedColumns type that was attempted.
63
+ """
42
64
  self.error_msg = error_msg
43
65
  self.attempted_casting_columns = attempted_columns
44
66
  self.attempted_casting_type = attempted_type
45
67
 
46
68
  def error_message(self) -> str:
69
+ """Return the error message for this exception."""
47
70
  return (
48
71
  f"Failed to cast to type {self.attempted_casting_type} "
49
72
  f"for columns: {log_a_list(self.attempted_casting_columns, 'and')}. "
@@ -52,36 +75,55 @@ class ColumnCastingError(Exception):
52
75
 
53
76
 
54
77
  class InvalidTypedColumnsError(Exception):
78
+ """Raised when typed columns are invalid or incorrectly specified."""
79
+
55
80
  def __str__(self) -> str:
81
+ """Return a human-readable error message."""
56
82
  return self.error_message()
57
83
 
58
84
  def __init__(self, field_name: str, reason: str) -> None:
85
+ """Initialize the exception with typed columns validation context.
86
+
87
+ Args:
88
+ field_name: Name of the schema field with invalid typed columns.
89
+ reason: Description of why the typed columns are invalid.
90
+ """
59
91
  self.field_name = field_name
60
92
  self.reason = reason
61
93
 
62
94
  def error_message(self) -> str:
95
+ """Return the error message for this exception."""
63
96
  return f"The {self.field_name} TypedColumns object {self.reason}."
64
97
 
65
98
 
66
99
  class InvalidSchemaFieldTypeError(Exception):
100
+ """Raised when schema field has invalid or unexpected type."""
101
+
67
102
  def __str__(self) -> str:
103
+ """Return a human-readable error message."""
68
104
  return self.error_message()
69
105
 
70
106
  def __init__(self, msg: str) -> None:
107
+ """Initialize the exception with schema field type error message.
108
+
109
+ Args:
110
+ msg: Error message describing the schema field type issue.
111
+ """
71
112
  self.msg = msg
72
113
 
73
114
  def error_message(self) -> str:
115
+ """Return the error message for this exception."""
74
116
  return self.msg
75
117
 
76
118
 
77
119
  def cast_typed_columns(
78
120
  dataframe: pd.DataFrame,
79
121
  schema: Schema,
80
- ) -> Tuple[pd.DataFrame, Schema]:
81
- """
82
- Cast feature and tag columns in the dataframe to the types specified in each TypedColumns config.
83
- This optional feature provides a simple way for users to prevent
84
- type drift within a column across many SDK uploads.
122
+ ) -> tuple[pd.DataFrame, Schema]:
123
+ """Cast feature and tag columns in the dataframe to the types specified in each TypedColumns config.
124
+
125
+ This optional feature provides a simple way for users to prevent type drift within
126
+ a column across many SDK uploads.
85
127
 
86
128
  Arguments:
87
129
  ---------
@@ -120,7 +162,7 @@ def cast_typed_columns(
120
162
  )
121
163
 
122
164
  # Make sure no other schema fields have this type.
123
- if any({f for f in typed_column_fields if f not in allowed_fields}):
165
+ if any(f for f in typed_column_fields if f not in allowed_fields):
124
166
  raise InvalidSchemaFieldTypeError(
125
167
  "Only the feature_column_names and tag_column_names Schema fields can be of type "
126
168
  "TypedColumns. Fields with type TypedColumns:"
@@ -130,10 +172,7 @@ def cast_typed_columns(
130
172
  for field_name in typed_column_fields:
131
173
  f = getattr(schema, field_name)
132
174
  if f:
133
- try:
134
- _validate_typed_columns(field_name, f)
135
- except InvalidTypedColumnsError:
136
- raise
175
+ _validate_typed_columns(field_name, f)
137
176
  dataframe = _cast_columns(dataframe, f)
138
177
 
139
178
  # Now that the dataframe values have been cast to the specified types:
@@ -144,6 +183,14 @@ def cast_typed_columns(
144
183
 
145
184
 
146
185
  def cast_dictionary(d: dict) -> dict:
186
+ """Cast TypedValue entries in a dictionary to their appropriate Python types.
187
+
188
+ Args:
189
+ d: Dictionary that may contain TypedValue objects as values.
190
+
191
+ Returns:
192
+ Dictionary with TypedValue objects cast to their native Python types.
193
+ """
147
194
  cast_dict = {}
148
195
  for k, v in d.items():
149
196
  if isinstance(v, TypedValue):
@@ -154,9 +201,8 @@ def cast_dictionary(d: dict) -> dict:
154
201
 
155
202
  def _cast_value(
156
203
  typed_value: TypedValue,
157
- ) -> Union[str, int, float, List[str], None]:
158
- """
159
- Casts a TypedValue to its provided type, preserving all null values as None or float('nan').
204
+ ) -> str | int | float | list[str] | None:
205
+ """Casts a TypedValue to its provided type, preserving all null values as None or float('nan').
160
206
 
161
207
  Arguments:
162
208
  ---------
@@ -179,22 +225,21 @@ def _cast_value(
179
225
 
180
226
  if typed_value.type == ArizeTypes.FLOAT:
181
227
  return _cast_to_float(typed_value)
182
- elif typed_value.type == ArizeTypes.INT:
228
+ if typed_value.type == ArizeTypes.INT:
183
229
  return _cast_to_int(typed_value)
184
- elif typed_value.type == ArizeTypes.STR:
230
+ if typed_value.type == ArizeTypes.STR:
185
231
  return _cast_to_str(typed_value)
186
- else:
187
- raise CastingError("Unknown casting type", typed_value)
232
+ raise CastingError("Unknown casting type", typed_value)
188
233
 
189
234
 
190
- def _cast_to_float(typed_value: TypedValue) -> Union[float, None]:
235
+ def _cast_to_float(typed_value: TypedValue) -> float | None:
191
236
  try:
192
237
  return float(typed_value.value)
193
238
  except Exception as e:
194
239
  raise CastingError(str(e), typed_value) from e
195
240
 
196
241
 
197
- def _cast_to_int(typed_value: TypedValue) -> Union[int, None]:
242
+ def _cast_to_int(typed_value: TypedValue) -> int | None:
198
243
  # a NaN float can't be cast to an int. Proactively return None instead.
199
244
  if isinstance(typed_value.value, float) and math.isnan(typed_value.value):
200
245
  return None
@@ -214,7 +259,7 @@ def _cast_to_int(typed_value: TypedValue) -> Union[int, None]:
214
259
  raise CastingError(str(e), typed_value) from e
215
260
 
216
261
 
217
- def _cast_to_str(typed_value: TypedValue) -> Union[str, None]:
262
+ def _cast_to_str(typed_value: TypedValue) -> str | None:
218
263
  # a NaN float can't be cast to a string. Proactively return None instead.
219
264
  if isinstance(typed_value.value, float) and math.isnan(typed_value.value):
220
265
  return None
@@ -227,8 +272,7 @@ def _cast_to_str(typed_value: TypedValue) -> Union[str, None]:
227
272
  def _validate_typed_columns(
228
273
  field_name: str, typed_columns: TypedColumns
229
274
  ) -> None:
230
- """
231
- Validate a TypedColumns object.
275
+ """Validate a TypedColumns object.
232
276
 
233
277
  Arguments:
234
278
  ---------
@@ -256,8 +300,8 @@ def _validate_typed_columns(
256
300
  def _cast_columns(
257
301
  dataframe: pd.DataFrame, columns: TypedColumns
258
302
  ) -> pd.DataFrame:
259
- """
260
- Cast columns corresponding to a single TypedColumns object and a single Arize Schema field.
303
+ """Cast columns corresponding to a single TypedColumns object and a single Arize Schema field.
304
+
261
305
  (feature_column_names or tag_column_names)
262
306
 
263
307
  Arguments:
@@ -324,9 +368,10 @@ def _cast_columns(
324
368
 
325
369
 
326
370
  def _cast_df(
327
- df: pd.DataFrame, cols: List[str], target_type_str: str
371
+ df: pd.DataFrame, cols: list[str], target_type_str: str
328
372
  ) -> pd.DataFrame:
329
- """
373
+ """Cast columns in a dataframe to the specified type.
374
+
330
375
  Arguments:
331
376
  ---------
332
377
  df: pd.DataFrame
@@ -351,13 +396,14 @@ def _cast_df(
351
396
  df = df.replace(nan_mapping)
352
397
 
353
398
  # None or NaN-based values (including np.nan) are automatically converted to pandas pd.NA type
354
- return df.astype({col: target_type_str for col in cols})
399
+ return df.astype(dict.fromkeys(cols, target_type_str))
355
400
 
356
401
 
357
402
  def _convert_schema_field_types(
358
403
  schema: Schema,
359
404
  ) -> Schema:
360
- """
405
+ """Convert schema field types from TypedColumns to List[string] format.
406
+
361
407
  Arguments:
362
408
  ---------
363
409
  schema: Schema