arize 8.0.0a14__py3-none-any.whl → 8.0.0a16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. arize/__init__.py +70 -1
  2. arize/_flight/client.py +163 -43
  3. arize/_flight/types.py +1 -0
  4. arize/_generated/api_client/__init__.py +5 -1
  5. arize/_generated/api_client/api/datasets_api.py +6 -6
  6. arize/_generated/api_client/api/experiments_api.py +924 -61
  7. arize/_generated/api_client/api_client.py +1 -1
  8. arize/_generated/api_client/configuration.py +1 -1
  9. arize/_generated/api_client/exceptions.py +1 -1
  10. arize/_generated/api_client/models/__init__.py +3 -1
  11. arize/_generated/api_client/models/dataset.py +2 -2
  12. arize/_generated/api_client/models/dataset_version.py +1 -1
  13. arize/_generated/api_client/models/datasets_create_request.py +3 -3
  14. arize/_generated/api_client/models/datasets_list200_response.py +1 -1
  15. arize/_generated/api_client/models/datasets_list_examples200_response.py +1 -1
  16. arize/_generated/api_client/models/error.py +1 -1
  17. arize/_generated/api_client/models/experiment.py +6 -6
  18. arize/_generated/api_client/models/experiments_create_request.py +98 -0
  19. arize/_generated/api_client/models/experiments_list200_response.py +1 -1
  20. arize/_generated/api_client/models/experiments_runs_list200_response.py +92 -0
  21. arize/_generated/api_client/rest.py +1 -1
  22. arize/_generated/api_client/test/test_dataset.py +2 -1
  23. arize/_generated/api_client/test/test_dataset_version.py +1 -1
  24. arize/_generated/api_client/test/test_datasets_api.py +1 -1
  25. arize/_generated/api_client/test/test_datasets_create_request.py +2 -1
  26. arize/_generated/api_client/test/test_datasets_list200_response.py +1 -1
  27. arize/_generated/api_client/test/test_datasets_list_examples200_response.py +1 -1
  28. arize/_generated/api_client/test/test_error.py +1 -1
  29. arize/_generated/api_client/test/test_experiment.py +6 -1
  30. arize/_generated/api_client/test/test_experiments_api.py +23 -2
  31. arize/_generated/api_client/test/test_experiments_create_request.py +61 -0
  32. arize/_generated/api_client/test/test_experiments_list200_response.py +1 -1
  33. arize/_generated/api_client/test/test_experiments_runs_list200_response.py +56 -0
  34. arize/_generated/api_client_README.md +13 -8
  35. arize/client.py +19 -2
  36. arize/config.py +50 -3
  37. arize/constants/config.py +8 -2
  38. arize/constants/openinference.py +14 -0
  39. arize/constants/pyarrow.py +1 -0
  40. arize/datasets/__init__.py +0 -70
  41. arize/datasets/client.py +106 -19
  42. arize/datasets/errors.py +61 -0
  43. arize/datasets/validation.py +46 -0
  44. arize/experiments/client.py +455 -0
  45. arize/experiments/evaluators/__init__.py +0 -0
  46. arize/experiments/evaluators/base.py +255 -0
  47. arize/experiments/evaluators/exceptions.py +10 -0
  48. arize/experiments/evaluators/executors.py +502 -0
  49. arize/experiments/evaluators/rate_limiters.py +277 -0
  50. arize/experiments/evaluators/types.py +122 -0
  51. arize/experiments/evaluators/utils.py +198 -0
  52. arize/experiments/functions.py +920 -0
  53. arize/experiments/tracing.py +276 -0
  54. arize/experiments/types.py +394 -0
  55. arize/models/client.py +4 -1
  56. arize/spans/client.py +16 -20
  57. arize/utils/arrow.py +4 -3
  58. arize/utils/openinference_conversion.py +56 -0
  59. arize/utils/proto.py +13 -0
  60. arize/utils/size.py +22 -0
  61. arize/version.py +1 -1
  62. {arize-8.0.0a14.dist-info → arize-8.0.0a16.dist-info}/METADATA +3 -1
  63. {arize-8.0.0a14.dist-info → arize-8.0.0a16.dist-info}/RECORD +65 -44
  64. {arize-8.0.0a14.dist-info → arize-8.0.0a16.dist-info}/WHEEL +0 -0
  65. {arize-8.0.0a14.dist-info → arize-8.0.0a16.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,255 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import inspect
5
+ from abc import ABC
6
+ from types import MappingProxyType
7
+ from typing import Any, Awaitable, Callable, Mapping, Sequence
8
+
9
+ from arize.experiments.evaluators.types import (
10
+ AnnotatorKind,
11
+ EvaluationResult,
12
+ EvaluatorKind,
13
+ EvaluatorName,
14
+ EvaluatorOutput,
15
+ JSONSerializable,
16
+ )
17
+ from arize.experiments.types import (
18
+ ExampleInput,
19
+ ExampleMetadata,
20
+ ExampleOutput,
21
+ TaskOutput,
22
+ )
23
+
24
+
25
+ class Evaluator(ABC):
26
+ """
27
+ A helper super class to guide the implementation of an `Evaluator` object.
28
+ Subclasses must implement either the `evaluate` or `async_evaluate` method.
29
+ Implementing both methods is recommended, but not required.
30
+
31
+ This Class is intended to be subclassed, and should not be instantiated directly.
32
+ """
33
+
34
+ _kind: EvaluatorKind
35
+ _name: EvaluatorName
36
+
37
+ @functools.cached_property
38
+ def name(self) -> EvaluatorName:
39
+ if hasattr(self, "_name"):
40
+ return self._name
41
+ return self.__class__.__name__
42
+
43
+ @functools.cached_property
44
+ def kind(self) -> EvaluatorKind:
45
+ if hasattr(self, "_kind"):
46
+ return self._kind
47
+ return AnnotatorKind.CODE.value
48
+
49
+ def __new__(cls, *args: Any, **kwargs: Any) -> Evaluator:
50
+ if cls is Evaluator:
51
+ raise TypeError(
52
+ f"{cls.__name__} is an abstract class and should not be instantiated."
53
+ )
54
+ return object.__new__(cls)
55
+
56
+ def evaluate(
57
+ self,
58
+ *,
59
+ dataset_row: Mapping[str, JSONSerializable] | None = None,
60
+ input: ExampleInput = MappingProxyType({}),
61
+ output: TaskOutput | None = None,
62
+ experiment_output: TaskOutput | None = None,
63
+ dataset_output: ExampleOutput = MappingProxyType({}),
64
+ metadata: ExampleMetadata = MappingProxyType({}),
65
+ **kwargs: Any,
66
+ ) -> EvaluationResult:
67
+ """
68
+ Evaluate the given inputs and produce an evaluation result.
69
+ This method should be implemented by subclasses to perform the actual
70
+ evaluation logic. It is recommended to implement both this synchronous
71
+ method and the asynchronous `async_evaluate` method, but it is not required.
72
+ Args:
73
+ output (Optional[TaskOutput]): The output produced by the task.
74
+ expected (Optional[ExampleOutput]): The expected output for comparison.
75
+ dataset_row (Optional[Mapping[str, JSONSerializable]]): A row from the dataset.
76
+ metadata (ExampleMetadata): Metadata associated with the example.
77
+ input (ExampleInput): The input provided for evaluation.
78
+ **kwargs (Any): Additional keyword arguments.
79
+ Raises:
80
+ NotImplementedError: If the method is not implemented by the subclass.
81
+ """
82
+ # For subclassing, one should implement either this sync method or the
83
+ # async version. Implementing both is recommended but not required.
84
+ raise NotImplementedError
85
+
86
+ async def async_evaluate(
87
+ self,
88
+ *,
89
+ dataset_row: Mapping[str, JSONSerializable] | None = None,
90
+ input: ExampleInput = MappingProxyType({}),
91
+ output: TaskOutput | None = None,
92
+ experiment_output: TaskOutput | None = None,
93
+ dataset_output: ExampleOutput = MappingProxyType({}),
94
+ metadata: ExampleMetadata = MappingProxyType({}),
95
+ **kwargs: Any,
96
+ ) -> EvaluationResult:
97
+ """
98
+ Asynchronously evaluate the given inputs and produce an evaluation result.
99
+ This method should be implemented by subclasses to perform the actual
100
+ evaluation logic. It is recommended to implement both this asynchronous
101
+ method and the synchronous `evaluate` method, but it is not required.
102
+ Args:
103
+ output (Optional[TaskOutput]): The output produced by the task.
104
+ expected (Optional[ExampleOutput]): The expected output for comparison.
105
+ dataset_row (Optional[Mapping[str, JSONSerializable]]): A row from the dataset.
106
+ metadata (ExampleMetadata): Metadata associated with the example.
107
+ input (ExampleInput): The input provided for evaluation.
108
+ **kwargs (Any): Additional keyword arguments.
109
+ Returns:
110
+ EvaluationResult: The result of the evaluation.
111
+ Raises:
112
+ NotImplementedError: If the method is not implemented by the subclass.
113
+ """
114
+ # For subclassing, one should implement either this async method or the
115
+ # sync version. Implementing both is recommended but not required.
116
+ return self.evaluate(
117
+ dataset_row=dataset_row,
118
+ input=input,
119
+ output=output,
120
+ experiment_output=experiment_output,
121
+ dataset_output=dataset_output,
122
+ metadata=metadata,
123
+ **kwargs,
124
+ )
125
+
126
+ def __init_subclass__(
127
+ cls, is_abstract: bool = False, **kwargs: Any
128
+ ) -> None:
129
+ super().__init_subclass__(**kwargs)
130
+ if is_abstract:
131
+ return
132
+ evaluate_fn_signature = inspect.signature(Evaluator.evaluate)
133
+ for super_cls in inspect.getmro(cls):
134
+ if super_cls in (LLMEvaluator, Evaluator):
135
+ break
136
+ if evaluate := super_cls.__dict__.get(Evaluator.evaluate.__name__):
137
+ if isinstance(evaluate, classmethod):
138
+ evaluate = evaluate.__func__
139
+ assert callable(evaluate), (
140
+ "`evaluate()` method should be callable"
141
+ )
142
+ # need to remove the first param, i.e. `self`
143
+ _validate_sig(functools.partial(evaluate, None), "evaluate")
144
+ return
145
+ if async_evaluate := super_cls.__dict__.get(
146
+ Evaluator.async_evaluate.__name__
147
+ ):
148
+ if isinstance(async_evaluate, classmethod):
149
+ async_evaluate = async_evaluate.__func__
150
+ assert callable(async_evaluate), (
151
+ "`async_evaluate()` method should be callable"
152
+ )
153
+ # need to remove the first param, i.e. `self`
154
+ _validate_sig(
155
+ functools.partial(async_evaluate, None), "async_evaluate"
156
+ )
157
+ return
158
+ raise ValueError(
159
+ f"Evaluator must implement either "
160
+ f"`def evaluate{evaluate_fn_signature}` or "
161
+ f"`async def async_evaluate{evaluate_fn_signature}`"
162
+ )
163
+
164
+
165
+ def _validate_sig(fn: Callable[..., Any], fn_name: str) -> None:
166
+ sig = inspect.signature(fn)
167
+ validate_evaluator_signature(sig)
168
+ for param in sig.parameters.values():
169
+ if param.kind is inspect.Parameter.VAR_KEYWORD:
170
+ return
171
+ else:
172
+ raise ValueError(
173
+ f"`{fn_name}` should allow variadic keyword arguments `**kwargs`"
174
+ )
175
+
176
+
177
+ def validate_evaluator_signature(sig: inspect.Signature) -> None:
178
+ # Check that the wrapped function has a valid signature for use as an evaluator
179
+ # If it does not, raise an error to exit early before running evaluations
180
+ params = sig.parameters
181
+ valid_named_params = {
182
+ "dataset_row",
183
+ "input",
184
+ "output",
185
+ "experiment_output",
186
+ "dataset_output",
187
+ "metadata",
188
+ }
189
+ if len(params) == 0:
190
+ raise ValueError(
191
+ "Evaluation function must have at least one parameter."
192
+ )
193
+ if len(params) > 1:
194
+ for not_found in set(params) - valid_named_params:
195
+ param = params[not_found]
196
+ if (
197
+ param.kind is inspect.Parameter.VAR_KEYWORD
198
+ or param.default is not inspect.Parameter.empty
199
+ ):
200
+ continue
201
+ raise ValueError(
202
+ f"Invalid parameter names in evaluation function: {', '.join(not_found)}. "
203
+ "Parameters names for multi-argument functions must be "
204
+ f"any of: {', '.join(valid_named_params)}."
205
+ )
206
+
207
+
208
+ class CodeEvaluator(Evaluator, ABC, is_abstract=True):
209
+ """
210
+ A convenience super class for defining code evaluators. There are functionally
211
+ no differences between this class and the `Evaluator` class, except that this
212
+ class has a default `_kind` attribute for AnnotatorKind.CODE.
213
+ This class is intended to be subclassed, and should not be instantiated directly.
214
+ """
215
+
216
+ _kind = str(AnnotatorKind.CODE)
217
+
218
+ def __new__(cls, *args: Any, **kwargs: Any) -> CodeEvaluator:
219
+ if cls is CodeEvaluator:
220
+ raise TypeError(
221
+ f"{cls.__name__} is an abstract class and should not be instantiated."
222
+ )
223
+ return object.__new__(cls)
224
+
225
+
226
+ class LLMEvaluator(Evaluator, ABC, is_abstract=True):
227
+ """
228
+ A convenience super class for defining LLM evaluators. There are functionally
229
+ no differences between this class and the `Evaluator` class, except that this
230
+ class has a default `_kind` attribute for AnnotatorKind.LLM.
231
+ This class is intended to be subclassed, and should not be instantiated directly.
232
+ """
233
+
234
+ _kind = str(AnnotatorKind.LLM)
235
+
236
+ def __new__(cls, *args: Any, **kwargs: Any) -> LLMEvaluator:
237
+ if cls is LLMEvaluator:
238
+ raise TypeError(
239
+ f"{cls.__name__} is an abstract class and should not be instantiated."
240
+ )
241
+ return object.__new__(cls)
242
+
243
+
244
+ ExperimentEvaluator = (
245
+ Evaluator
246
+ | Callable[..., EvaluatorOutput]
247
+ | Callable[..., Awaitable[EvaluatorOutput]]
248
+ )
249
+
250
+
251
+ Evaluators = (
252
+ ExperimentEvaluator
253
+ | Sequence[ExperimentEvaluator]
254
+ | Mapping[EvaluatorName, ExperimentEvaluator]
255
+ )
@@ -0,0 +1,10 @@
1
+ class ArizeException(Exception):
2
+ pass
3
+
4
+
5
+ class ArizeContextLimitExceeded(ArizeException):
6
+ pass
7
+
8
+
9
+ class ArizeTemplateMappingError(ArizeException):
10
+ pass