arize 8.0.0a22__py3-none-any.whl → 8.0.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. arize/__init__.py +28 -19
  2. arize/_exporter/client.py +56 -37
  3. arize/_exporter/parsers/tracing_data_parser.py +41 -30
  4. arize/_exporter/validation.py +3 -3
  5. arize/_flight/client.py +207 -76
  6. arize/_generated/api_client/__init__.py +30 -6
  7. arize/_generated/api_client/api/__init__.py +1 -0
  8. arize/_generated/api_client/api/datasets_api.py +864 -190
  9. arize/_generated/api_client/api/experiments_api.py +167 -131
  10. arize/_generated/api_client/api/projects_api.py +1197 -0
  11. arize/_generated/api_client/api_client.py +2 -2
  12. arize/_generated/api_client/configuration.py +42 -34
  13. arize/_generated/api_client/exceptions.py +2 -2
  14. arize/_generated/api_client/models/__init__.py +15 -4
  15. arize/_generated/api_client/models/dataset.py +10 -10
  16. arize/_generated/api_client/models/dataset_example.py +111 -0
  17. arize/_generated/api_client/models/dataset_example_update.py +100 -0
  18. arize/_generated/api_client/models/dataset_version.py +13 -13
  19. arize/_generated/api_client/models/datasets_create_request.py +16 -8
  20. arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
  21. arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
  22. arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
  23. arize/_generated/api_client/models/datasets_list200_response.py +10 -4
  24. arize/_generated/api_client/models/experiment.py +14 -16
  25. arize/_generated/api_client/models/experiment_run.py +108 -0
  26. arize/_generated/api_client/models/experiment_run_create.py +102 -0
  27. arize/_generated/api_client/models/experiments_create_request.py +16 -10
  28. arize/_generated/api_client/models/experiments_list200_response.py +10 -4
  29. arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
  30. arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
  31. arize/_generated/api_client/models/primitive_value.py +172 -0
  32. arize/_generated/api_client/models/problem.py +100 -0
  33. arize/_generated/api_client/models/project.py +99 -0
  34. arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
  35. arize/_generated/api_client/models/projects_list200_response.py +106 -0
  36. arize/_generated/api_client/rest.py +2 -2
  37. arize/_generated/api_client/test/test_dataset.py +4 -2
  38. arize/_generated/api_client/test/test_dataset_example.py +56 -0
  39. arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
  40. arize/_generated/api_client/test/test_dataset_version.py +7 -2
  41. arize/_generated/api_client/test/test_datasets_api.py +27 -13
  42. arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
  43. arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
  44. arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
  45. arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
  46. arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
  47. arize/_generated/api_client/test/test_experiment.py +2 -4
  48. arize/_generated/api_client/test/test_experiment_run.py +56 -0
  49. arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
  50. arize/_generated/api_client/test/test_experiments_api.py +6 -6
  51. arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
  52. arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
  53. arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
  54. arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
  55. arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
  56. arize/_generated/api_client/test/test_problem.py +57 -0
  57. arize/_generated/api_client/test/test_project.py +58 -0
  58. arize/_generated/api_client/test/test_projects_api.py +59 -0
  59. arize/_generated/api_client/test/test_projects_create_request.py +54 -0
  60. arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
  61. arize/_generated/api_client_README.md +43 -29
  62. arize/_generated/protocol/flight/flight_pb2.py +400 -0
  63. arize/_lazy.py +27 -19
  64. arize/client.py +181 -58
  65. arize/config.py +324 -116
  66. arize/constants/__init__.py +1 -0
  67. arize/constants/config.py +11 -4
  68. arize/constants/ml.py +6 -4
  69. arize/constants/openinference.py +2 -0
  70. arize/constants/pyarrow.py +2 -0
  71. arize/constants/spans.py +3 -1
  72. arize/datasets/__init__.py +1 -0
  73. arize/datasets/client.py +304 -84
  74. arize/datasets/errors.py +32 -2
  75. arize/datasets/validation.py +18 -8
  76. arize/embeddings/__init__.py +2 -0
  77. arize/embeddings/auto_generator.py +23 -19
  78. arize/embeddings/base_generators.py +89 -36
  79. arize/embeddings/constants.py +2 -0
  80. arize/embeddings/cv_generators.py +26 -4
  81. arize/embeddings/errors.py +27 -5
  82. arize/embeddings/nlp_generators.py +43 -18
  83. arize/embeddings/tabular_generators.py +46 -31
  84. arize/embeddings/usecases.py +12 -2
  85. arize/exceptions/__init__.py +1 -0
  86. arize/exceptions/auth.py +11 -1
  87. arize/exceptions/base.py +29 -4
  88. arize/exceptions/models.py +21 -2
  89. arize/exceptions/parameters.py +31 -0
  90. arize/exceptions/spaces.py +12 -1
  91. arize/exceptions/types.py +86 -7
  92. arize/exceptions/values.py +220 -20
  93. arize/experiments/__init__.py +13 -0
  94. arize/experiments/client.py +394 -285
  95. arize/experiments/evaluators/__init__.py +1 -0
  96. arize/experiments/evaluators/base.py +74 -41
  97. arize/experiments/evaluators/exceptions.py +6 -3
  98. arize/experiments/evaluators/executors.py +121 -73
  99. arize/experiments/evaluators/rate_limiters.py +106 -57
  100. arize/experiments/evaluators/types.py +34 -7
  101. arize/experiments/evaluators/utils.py +65 -27
  102. arize/experiments/functions.py +103 -101
  103. arize/experiments/tracing.py +52 -44
  104. arize/experiments/types.py +56 -31
  105. arize/logging.py +54 -22
  106. arize/ml/__init__.py +1 -0
  107. arize/ml/batch_validation/__init__.py +1 -0
  108. arize/{models → ml}/batch_validation/errors.py +545 -67
  109. arize/{models → ml}/batch_validation/validator.py +344 -303
  110. arize/ml/bounded_executor.py +47 -0
  111. arize/{models → ml}/casting.py +118 -108
  112. arize/{models → ml}/client.py +339 -118
  113. arize/{models → ml}/proto.py +97 -42
  114. arize/{models → ml}/stream_validation.py +43 -15
  115. arize/ml/surrogate_explainer/__init__.py +1 -0
  116. arize/{models → ml}/surrogate_explainer/mimic.py +25 -10
  117. arize/{types.py → ml/types.py} +355 -354
  118. arize/pre_releases.py +44 -0
  119. arize/projects/__init__.py +1 -0
  120. arize/projects/client.py +134 -0
  121. arize/regions.py +40 -0
  122. arize/spans/__init__.py +1 -0
  123. arize/spans/client.py +204 -175
  124. arize/spans/columns.py +13 -0
  125. arize/spans/conversion.py +60 -37
  126. arize/spans/validation/__init__.py +1 -0
  127. arize/spans/validation/annotations/__init__.py +1 -0
  128. arize/spans/validation/annotations/annotations_validation.py +6 -4
  129. arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
  130. arize/spans/validation/annotations/value_validation.py +35 -11
  131. arize/spans/validation/common/__init__.py +1 -0
  132. arize/spans/validation/common/argument_validation.py +33 -8
  133. arize/spans/validation/common/dataframe_form_validation.py +35 -9
  134. arize/spans/validation/common/errors.py +211 -11
  135. arize/spans/validation/common/value_validation.py +81 -14
  136. arize/spans/validation/evals/__init__.py +1 -0
  137. arize/spans/validation/evals/dataframe_form_validation.py +28 -8
  138. arize/spans/validation/evals/evals_validation.py +34 -4
  139. arize/spans/validation/evals/value_validation.py +26 -3
  140. arize/spans/validation/metadata/__init__.py +1 -1
  141. arize/spans/validation/metadata/argument_validation.py +14 -5
  142. arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
  143. arize/spans/validation/metadata/value_validation.py +24 -10
  144. arize/spans/validation/spans/__init__.py +1 -0
  145. arize/spans/validation/spans/dataframe_form_validation.py +35 -14
  146. arize/spans/validation/spans/spans_validation.py +35 -4
  147. arize/spans/validation/spans/value_validation.py +78 -8
  148. arize/utils/__init__.py +1 -0
  149. arize/utils/arrow.py +31 -15
  150. arize/utils/cache.py +34 -6
  151. arize/utils/dataframe.py +20 -3
  152. arize/utils/online_tasks/__init__.py +2 -0
  153. arize/utils/online_tasks/dataframe_preprocessor.py +58 -47
  154. arize/utils/openinference_conversion.py +44 -5
  155. arize/utils/proto.py +10 -0
  156. arize/utils/size.py +5 -3
  157. arize/utils/types.py +105 -0
  158. arize/version.py +3 -1
  159. {arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/METADATA +13 -6
  160. arize-8.0.0b0.dist-info/RECORD +175 -0
  161. {arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/WHEEL +1 -1
  162. arize-8.0.0b0.dist-info/licenses/LICENSE +176 -0
  163. arize-8.0.0b0.dist-info/licenses/NOTICE +13 -0
  164. arize/_generated/protocol/flight/export_pb2.py +0 -61
  165. arize/_generated/protocol/flight/ingest_pb2.py +0 -365
  166. arize/models/__init__.py +0 -0
  167. arize/models/batch_validation/__init__.py +0 -0
  168. arize/models/bounded_executor.py +0 -34
  169. arize/models/surrogate_explainer/__init__.py +0 -0
  170. arize-8.0.0a22.dist-info/RECORD +0 -146
  171. arize-8.0.0a22.dist-info/licenses/LICENSE.md +0 -12
@@ -0,0 +1 @@
1
+ """Evaluator implementations for experiment evaluation in the Arize SDK."""
@@ -1,10 +1,13 @@
1
+ """Base evaluator classes for experiment evaluation."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
5
  import functools
4
6
  import inspect
5
7
  from abc import ABC
8
+ from collections.abc import Awaitable, Callable, Mapping, Sequence
6
9
  from types import MappingProxyType
7
- from typing import Any, Awaitable, Callable, Mapping, Sequence
10
+ from typing import TYPE_CHECKING
8
11
 
9
12
  from arize.experiments.evaluators.types import (
10
13
  AnnotatorKind,
@@ -14,17 +17,19 @@ from arize.experiments.evaluators.types import (
14
17
  EvaluatorOutput,
15
18
  JSONSerializable,
16
19
  )
17
- from arize.experiments.types import (
18
- ExampleInput,
19
- ExampleMetadata,
20
- ExampleOutput,
21
- TaskOutput,
22
- )
20
+
21
+ if TYPE_CHECKING:
22
+ from arize.experiments.types import (
23
+ ExampleInput,
24
+ ExampleMetadata,
25
+ ExampleOutput,
26
+ TaskOutput,
27
+ )
23
28
 
24
29
 
25
30
  class Evaluator(ABC):
26
- """
27
- A helper super class to guide the implementation of an `Evaluator` object.
31
+ """A helper super class to guide the implementation of an `Evaluator` object.
32
+
28
33
  Subclasses must implement either the `evaluate` or `async_evaluate` method.
29
34
  Implementing both methods is recommended, but not required.
30
35
 
@@ -36,17 +41,20 @@ class Evaluator(ABC):
36
41
 
37
42
  @functools.cached_property
38
43
  def name(self) -> EvaluatorName:
44
+ """Return the name of this evaluator."""
39
45
  if hasattr(self, "_name"):
40
46
  return self._name
41
47
  return self.__class__.__name__
42
48
 
43
49
  @functools.cached_property
44
50
  def kind(self) -> EvaluatorKind:
51
+ """Return the kind of this evaluator (CODE or LLM)."""
45
52
  if hasattr(self, "_kind"):
46
53
  return self._kind
47
54
  return AnnotatorKind.CODE.value
48
55
 
49
- def __new__(cls, *args: Any, **kwargs: Any) -> Evaluator:
56
+ def __new__(cls, *args: object, **kwargs: object) -> Evaluator:
57
+ """Create a new evaluator instance, preventing direct instantiation of abstract class."""
50
58
  if cls is Evaluator:
51
59
  raise TypeError(
52
60
  f"{cls.__name__} is an abstract class and should not be instantiated."
@@ -62,20 +70,23 @@ class Evaluator(ABC):
62
70
  experiment_output: TaskOutput | None = None,
63
71
  dataset_output: ExampleOutput = MappingProxyType({}),
64
72
  metadata: ExampleMetadata = MappingProxyType({}),
65
- **kwargs: Any,
73
+ **kwargs: object,
66
74
  ) -> EvaluationResult:
67
- """
68
- Evaluate the given inputs and produce an evaluation result.
75
+ """Evaluate the given inputs and produce an evaluation result.
76
+
69
77
  This method should be implemented by subclasses to perform the actual
70
78
  evaluation logic. It is recommended to implement both this synchronous
71
79
  method and the asynchronous `async_evaluate` method, but it is not required.
80
+
72
81
  Args:
73
- output (Optional[TaskOutput]): The output produced by the task.
74
- expected (Optional[ExampleOutput]): The expected output for comparison.
75
82
  dataset_row (Optional[Mapping[str, JSONSerializable]]): A row from the dataset.
76
- metadata (ExampleMetadata): Metadata associated with the example.
77
83
  input (ExampleInput): The input provided for evaluation.
84
+ output (Optional[TaskOutput]): The output produced by the task.
85
+ experiment_output (Optional[TaskOutput]): The experiment output for comparison.
86
+ dataset_output (ExampleOutput): The expected output from the dataset.
87
+ metadata (ExampleMetadata): Metadata associated with the example.
78
88
  **kwargs (Any): Additional keyword arguments.
89
+
79
90
  Raises:
80
91
  NotImplementedError: If the method is not implemented by the subclass.
81
92
  """
@@ -92,22 +103,26 @@ class Evaluator(ABC):
92
103
  experiment_output: TaskOutput | None = None,
93
104
  dataset_output: ExampleOutput = MappingProxyType({}),
94
105
  metadata: ExampleMetadata = MappingProxyType({}),
95
- **kwargs: Any,
106
+ **kwargs: object,
96
107
  ) -> EvaluationResult:
97
- """
98
- Asynchronously evaluate the given inputs and produce an evaluation result.
108
+ """Asynchronously evaluate the given inputs and produce an evaluation result.
109
+
99
110
  This method should be implemented by subclasses to perform the actual
100
111
  evaluation logic. It is recommended to implement both this asynchronous
101
112
  method and the synchronous `evaluate` method, but it is not required.
113
+
102
114
  Args:
103
- output (Optional[TaskOutput]): The output produced by the task.
104
- expected (Optional[ExampleOutput]): The expected output for comparison.
105
115
  dataset_row (Optional[Mapping[str, JSONSerializable]]): A row from the dataset.
106
- metadata (ExampleMetadata): Metadata associated with the example.
107
116
  input (ExampleInput): The input provided for evaluation.
117
+ output (Optional[TaskOutput]): The output produced by the task.
118
+ experiment_output (Optional[TaskOutput]): The experiment output for comparison.
119
+ dataset_output (ExampleOutput): The expected output from the dataset.
120
+ metadata (ExampleMetadata): Metadata associated with the example.
108
121
  **kwargs (Any): Additional keyword arguments.
122
+
109
123
  Returns:
110
124
  EvaluationResult: The result of the evaluation.
125
+
111
126
  Raises:
112
127
  NotImplementedError: If the method is not implemented by the subclass.
113
128
  """
@@ -124,8 +139,14 @@ class Evaluator(ABC):
124
139
  )
125
140
 
126
141
  def __init_subclass__(
127
- cls, is_abstract: bool = False, **kwargs: Any
142
+ cls, is_abstract: bool = False, **kwargs: object
128
143
  ) -> None:
144
+ """Validate subclass implementation when inherited.
145
+
146
+ Args:
147
+ is_abstract: Whether the subclass is abstract and should skip validation.
148
+ **kwargs: Additional keyword arguments for parent class.
149
+ """
129
150
  super().__init_subclass__(**kwargs)
130
151
  if is_abstract:
131
152
  return
@@ -136,9 +157,10 @@ class Evaluator(ABC):
136
157
  if evaluate := super_cls.__dict__.get(Evaluator.evaluate.__name__):
137
158
  if isinstance(evaluate, classmethod):
138
159
  evaluate = evaluate.__func__
139
- assert callable(evaluate), (
140
- "`evaluate()` method should be callable"
141
- )
160
+ if not callable(evaluate):
161
+ raise TypeError(
162
+ f"`evaluate()` method should be callable, got {type(evaluate)}"
163
+ )
142
164
  # need to remove the first param, i.e. `self`
143
165
  _validate_sig(functools.partial(evaluate, None), "evaluate")
144
166
  return
@@ -147,9 +169,10 @@ class Evaluator(ABC):
147
169
  ):
148
170
  if isinstance(async_evaluate, classmethod):
149
171
  async_evaluate = async_evaluate.__func__
150
- assert callable(async_evaluate), (
151
- "`async_evaluate()` method should be callable"
152
- )
172
+ if not callable(async_evaluate):
173
+ raise TypeError(
174
+ f"`async_evaluate()` method should be callable, got {type(async_evaluate)}"
175
+ )
153
176
  # need to remove the first param, i.e. `self`
154
177
  _validate_sig(
155
178
  functools.partial(async_evaluate, None), "async_evaluate"
@@ -162,7 +185,7 @@ class Evaluator(ABC):
162
185
  )
163
186
 
164
187
 
165
- def _validate_sig(fn: Callable[..., Any], fn_name: str) -> None:
188
+ def _validate_sig(fn: Callable[..., object], fn_name: str) -> None:
166
189
  sig = inspect.signature(fn)
167
190
  validate_evaluator_signature(sig)
168
191
  for param in sig.parameters.values():
@@ -175,6 +198,14 @@ def _validate_sig(fn: Callable[..., Any], fn_name: str) -> None:
175
198
 
176
199
 
177
200
  def validate_evaluator_signature(sig: inspect.Signature) -> None:
201
+ """Validate that a function signature is compatible for use as an evaluator.
202
+
203
+ Args:
204
+ sig: The function signature to validate.
205
+
206
+ Raises:
207
+ ValueError: If the signature is invalid for use as an evaluator.
208
+ """
178
209
  # Check that the wrapped function has a valid signature for use as an evaluator
179
210
  # If it does not, raise an error to exit early before running evaluations
180
211
  params = sig.parameters
@@ -199,23 +230,24 @@ def validate_evaluator_signature(sig: inspect.Signature) -> None:
199
230
  ):
200
231
  continue
201
232
  raise ValueError(
202
- f"Invalid parameter names in evaluation function: {', '.join(not_found)}. "
233
+ f"Invalid parameter names in evaluation function: {not_found}. "
203
234
  "Parameters names for multi-argument functions must be "
204
235
  f"any of: {', '.join(valid_named_params)}."
205
236
  )
206
237
 
207
238
 
208
239
  class CodeEvaluator(Evaluator, ABC, is_abstract=True):
209
- """
210
- A convenience super class for defining code evaluators. There are functionally
211
- no differences between this class and the `Evaluator` class, except that this
212
- class has a default `_kind` attribute for AnnotatorKind.CODE.
240
+ """A convenience super class for defining code evaluators.
241
+
242
+ There are functionally no differences between this class and the `Evaluator` class,
243
+ except that this class has a default `_kind` attribute for AnnotatorKind.CODE.
213
244
  This class is intended to be subclassed, and should not be instantiated directly.
214
245
  """
215
246
 
216
247
  _kind = str(AnnotatorKind.CODE)
217
248
 
218
- def __new__(cls, *args: Any, **kwargs: Any) -> CodeEvaluator:
249
+ def __new__(cls, *args: object, **kwargs: object) -> CodeEvaluator:
250
+ """Create a new code evaluator instance, preventing direct instantiation of abstract class."""
219
251
  if cls is CodeEvaluator:
220
252
  raise TypeError(
221
253
  f"{cls.__name__} is an abstract class and should not be instantiated."
@@ -224,16 +256,17 @@ class CodeEvaluator(Evaluator, ABC, is_abstract=True):
224
256
 
225
257
 
226
258
  class LLMEvaluator(Evaluator, ABC, is_abstract=True):
227
- """
228
- A convenience super class for defining LLM evaluators. There are functionally
229
- no differences between this class and the `Evaluator` class, except that this
230
- class has a default `_kind` attribute for AnnotatorKind.LLM.
259
+ """A convenience super class for defining LLM evaluators.
260
+
261
+ There are functionally no differences between this class and the `Evaluator` class,
262
+ except that this class has a default `_kind` attribute for AnnotatorKind.LLM.
231
263
  This class is intended to be subclassed, and should not be instantiated directly.
232
264
  """
233
265
 
234
266
  _kind = str(AnnotatorKind.LLM)
235
267
 
236
- def __new__(cls, *args: Any, **kwargs: Any) -> LLMEvaluator:
268
+ def __new__(cls, *args: object, **kwargs: object) -> LLMEvaluator:
269
+ """Create a new LLM evaluator instance, preventing direct instantiation of abstract class."""
237
270
  if cls is LLMEvaluator:
238
271
  raise TypeError(
239
272
  f"{cls.__name__} is an abstract class and should not be instantiated."
@@ -1,10 +1,13 @@
1
+ """Evaluator-specific exception classes."""
2
+
3
+
1
4
  class ArizeException(Exception):
2
- pass
5
+ """Base exception for Arize experiment evaluator errors."""
3
6
 
4
7
 
5
8
  class ArizeContextLimitExceeded(ArizeException):
6
- pass
9
+ """Raised when context limit is exceeded during evaluation."""
7
10
 
8
11
 
9
12
  class ArizeTemplateMappingError(ArizeException):
10
- pass
13
+ """Raised when template mapping fails during evaluation."""