arize-phoenix 4.4.4rc4__py3-none-any.whl → 4.4.4rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (31) hide show
  1. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/METADATA +2 -2
  2. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/RECORD +30 -28
  3. phoenix/datasets/evaluators/code_evaluators.py +25 -53
  4. phoenix/datasets/evaluators/llm_evaluators.py +63 -32
  5. phoenix/datasets/evaluators/utils.py +292 -0
  6. phoenix/datasets/experiments.py +147 -82
  7. phoenix/datasets/tracing.py +19 -0
  8. phoenix/datasets/types.py +18 -52
  9. phoenix/db/insertion/dataset.py +19 -16
  10. phoenix/db/migrations/versions/10460e46d750_datasets.py +2 -2
  11. phoenix/db/models.py +8 -3
  12. phoenix/server/api/context.py +2 -0
  13. phoenix/server/api/dataloaders/__init__.py +2 -0
  14. phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
  15. phoenix/server/api/helpers/dataset_helpers.py +8 -7
  16. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  17. phoenix/server/api/mutations/project_mutations.py +9 -4
  18. phoenix/server/api/routers/v1/datasets.py +146 -42
  19. phoenix/server/api/routers/v1/experiment_evaluations.py +1 -0
  20. phoenix/server/api/routers/v1/experiment_runs.py +2 -2
  21. phoenix/server/api/types/Experiment.py +5 -0
  22. phoenix/server/api/types/ExperimentRun.py +1 -1
  23. phoenix/server/api/types/ExperimentRunAnnotation.py +1 -1
  24. phoenix/server/app.py +2 -0
  25. phoenix/server/static/index.js +610 -564
  26. phoenix/session/client.py +124 -2
  27. phoenix/version.py +1 -1
  28. phoenix/datasets/evaluators/_utils.py +0 -13
  29. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/WHEEL +0 -0
  30. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/licenses/IP_NOTICE +0 -0
  31. {arize_phoenix-4.4.4rc4.dist-info → arize_phoenix-4.4.4rc5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,292 @@
1
+ import functools
2
+ import inspect
3
+ from abc import ABC
4
+ from types import MappingProxyType
5
+ from typing import Any, Awaitable, Callable, Mapping, Optional, Union
6
+
7
+ from typing_extensions import TypeAlias
8
+
9
+ from phoenix.datasets.types import (
10
+ AnnotatorKind,
11
+ EvaluationResult,
12
+ JSONSerializable,
13
+ TaskOutput,
14
+ )
15
+
16
+
17
+ def _unwrap_json(obj: JSONSerializable) -> JSONSerializable:
18
+ if isinstance(obj, dict):
19
+ if len(obj) == 1:
20
+ key = next(iter(obj.keys()))
21
+ output = obj[key]
22
+ assert isinstance(
23
+ output, (dict, list, str, int, float, bool, type(None))
24
+ ), "Output must be JSON serializable"
25
+ return output
26
+ return obj
27
+
28
+
29
+ def validate_signature(sig: inspect.Signature) -> None:
30
+ # Check that the wrapped function has a valid signature for use as an evaluator
31
+ # If it does not, raise an error to exit early before running evaluations
32
+ params = sig.parameters
33
+ valid_named_params = {"input", "output", "expected", "metadata"}
34
+ if len(params) == 0:
35
+ raise ValueError("Evaluation function must have at least one parameter.")
36
+ if len(params) > 1:
37
+ for not_found in set(params) - valid_named_params:
38
+ param = params[not_found]
39
+ if (
40
+ param.kind is inspect.Parameter.VAR_KEYWORD
41
+ or param.default is not inspect.Parameter.empty
42
+ ):
43
+ continue
44
+ raise ValueError(
45
+ (
46
+ f"Invalid parameter names in evaluation function: {', '.join(not_found)}. "
47
+ "Parameters names for multi-argument functions must be "
48
+ f"any of: {', '.join(valid_named_params)}."
49
+ )
50
+ )
51
+
52
+
53
+ def _bind_signature(sig: inspect.Signature, **kwargs: Any) -> inspect.BoundArguments:
54
+ parameter_mapping = {
55
+ "input": kwargs.get("input"),
56
+ "output": kwargs.get("output"),
57
+ "expected": kwargs.get("expected"),
58
+ "metadata": kwargs.get("metadata"),
59
+ }
60
+ params = sig.parameters
61
+ if len(params) == 1:
62
+ parameter_name = next(iter(params))
63
+ if parameter_name in parameter_mapping:
64
+ return sig.bind(parameter_mapping[parameter_name])
65
+ else:
66
+ return sig.bind(parameter_mapping["output"])
67
+ return sig.bind_partial(
68
+ **{name: parameter_mapping[name] for name in set(parameter_mapping).intersection(params)}
69
+ )
70
+
71
+
72
+ def create_evaluator(
73
+ kind: Union[str, AnnotatorKind] = AnnotatorKind.CODE,
74
+ name: Optional[str] = None,
75
+ scorer: Optional[Callable[[Any], EvaluationResult]] = None,
76
+ ) -> Callable[[Callable[..., Any]], "Evaluator"]:
77
+ if scorer is None:
78
+ scorer = _default_eval_scorer
79
+
80
+ if isinstance(kind, str):
81
+ kind = AnnotatorKind(kind.upper())
82
+
83
+ def wrapper(func: Callable[..., Any]) -> Evaluator:
84
+ nonlocal name
85
+ if not name:
86
+ if hasattr(func, "__self__"):
87
+ name = func.__self__.__class__.__name__
88
+ elif hasattr(func, "__name__"):
89
+ name = func.__name__
90
+ else:
91
+ name = str(func)
92
+ assert name is not None
93
+
94
+ wrapped_signature = inspect.signature(func)
95
+ validate_signature(wrapped_signature)
96
+
97
+ if inspect.iscoroutinefunction(func):
98
+ return _wrap_coroutine_evaluation_function(name, kind, wrapped_signature, scorer)(func)
99
+ else:
100
+ return _wrap_sync_evaluation_function(name, kind, wrapped_signature, scorer)(func)
101
+
102
+ return wrapper
103
+
104
+
105
+ def _wrap_coroutine_evaluation_function(
106
+ name: str,
107
+ annotator_kind: AnnotatorKind,
108
+ sig: inspect.Signature,
109
+ convert_to_score: Callable[[Any], EvaluationResult],
110
+ ) -> Callable[[Callable[..., Any]], "Evaluator"]:
111
+ def wrapper(func: Callable[..., Any]) -> "Evaluator":
112
+ class AsyncEvaluator(Evaluator):
113
+ def __init__(self) -> None:
114
+ self._name = name
115
+ self._kind = annotator_kind
116
+
117
+ @functools.wraps(func)
118
+ async def __call__(self, *args: Any, **kwargs: Any) -> Any:
119
+ return await func(*args, **kwargs)
120
+
121
+ async def async_evaluate(self, **kwargs: Any) -> EvaluationResult:
122
+ bound_signature = _bind_signature(sig, **kwargs)
123
+ result = await func(*bound_signature.args, **bound_signature.kwargs)
124
+ return convert_to_score(result)
125
+
126
+ return AsyncEvaluator()
127
+
128
+ return wrapper
129
+
130
+
131
+ def _wrap_sync_evaluation_function(
132
+ name: str,
133
+ annotator_kind: AnnotatorKind,
134
+ sig: inspect.Signature,
135
+ convert_to_score: Callable[[Any], EvaluationResult],
136
+ ) -> Callable[[Callable[..., Any]], "Evaluator"]:
137
+ def wrapper(func: Callable[..., Any]) -> "Evaluator":
138
+ class SyncEvaluator(Evaluator):
139
+ def __init__(self) -> None:
140
+ self._name = name
141
+ self._kind = annotator_kind
142
+
143
+ @functools.wraps(func)
144
+ def __call__(self, *args: Any, **kwargs: Any) -> Any:
145
+ return func(*args, **kwargs)
146
+
147
+ def evaluate(self, **kwargs: Any) -> EvaluationResult:
148
+ bound_signature = _bind_signature(sig, **kwargs)
149
+ result = func(*bound_signature.args, **bound_signature.kwargs)
150
+ return convert_to_score(result)
151
+
152
+ return SyncEvaluator()
153
+
154
+ return wrapper
155
+
156
+
157
+ def _default_eval_scorer(result: Any) -> EvaluationResult:
158
+ if isinstance(result, bool):
159
+ return EvaluationResult(score=float(result), label=str(result))
160
+ elif isinstance(result, (int, float)):
161
+ return EvaluationResult(score=float(result))
162
+ elif isinstance(result, EvaluationResult):
163
+ return result
164
+ else:
165
+ raise ValueError(f"Unsupported evaluation result type: {type(result)}")
166
+
167
+
168
+ ExampleOutput: TypeAlias = Mapping[str, JSONSerializable]
169
+ ExampleMetadata: TypeAlias = Mapping[str, JSONSerializable]
170
+ ExampleInput: TypeAlias = Mapping[str, JSONSerializable]
171
+
172
+ EvaluatorName: TypeAlias = str
173
+ EvaluatorKind: TypeAlias = str
174
+ EvaluatorOutput: TypeAlias = Union[EvaluationResult, bool, int, float, str]
175
+
176
+
177
+ class Evaluator(ABC):
178
+ """
179
+ A helper super class to guide the implementation of an `Evaluator` object.
180
+ Subclasses must implement either the `evaluate` or `async_evaluate` method.
181
+ Implementing both methods is recommended, but not required.
182
+
183
+ This Class is intended to be subclassed, and should not be instantiated directly.
184
+ """
185
+
186
+ _kind: AnnotatorKind
187
+ _name: EvaluatorName
188
+
189
+ @functools.cached_property
190
+ def name(self) -> EvaluatorName:
191
+ if hasattr(self, "_name"):
192
+ return self._name
193
+ return self.__class__.__name__
194
+
195
+ @functools.cached_property
196
+ def kind(self) -> EvaluatorKind:
197
+ if hasattr(self, "_kind"):
198
+ return self._kind.value
199
+ return AnnotatorKind.CODE.value
200
+
201
+ def __new__(cls, *args: Any, **kwargs: Any) -> "Evaluator":
202
+ if cls is Evaluator:
203
+ raise TypeError(f"{cls.__name__} is an abstract class and should not be instantiated.")
204
+ return object.__new__(cls)
205
+
206
+ def evaluate(
207
+ self,
208
+ *,
209
+ output: Optional[TaskOutput] = None,
210
+ expected: Optional[ExampleOutput] = None,
211
+ metadata: ExampleMetadata = MappingProxyType({}),
212
+ input: ExampleInput = MappingProxyType({}),
213
+ **kwargs: Any,
214
+ ) -> EvaluationResult:
215
+ # For subclassing, one should implement either this sync method or the
216
+ # async version. Implementing both is recommended but not required.
217
+ raise NotImplementedError
218
+
219
+ async def async_evaluate(
220
+ self,
221
+ *,
222
+ output: Optional[TaskOutput] = None,
223
+ expected: Optional[ExampleOutput] = None,
224
+ metadata: ExampleMetadata = MappingProxyType({}),
225
+ input: ExampleInput = MappingProxyType({}),
226
+ **kwargs: Any,
227
+ ) -> EvaluationResult:
228
+ # For subclassing, one should implement either this async method or the
229
+ # sync version. Implementing both is recommended but not required.
230
+ return self.evaluate(
231
+ output=output,
232
+ expected=expected,
233
+ metadata=metadata,
234
+ input=input,
235
+ **kwargs,
236
+ )
237
+
238
+ def __init_subclass__(cls, is_abstract: bool = False, **kwargs: Any) -> None:
239
+ super().__init_subclass__(**kwargs)
240
+ if is_abstract:
241
+ return
242
+ evaluate_fn_signature = inspect.signature(Evaluator.evaluate)
243
+ for super_cls in inspect.getmro(cls):
244
+ if super_cls in (LLMEvaluator, Evaluator):
245
+ break
246
+ if evaluate := super_cls.__dict__.get(Evaluator.evaluate.__name__):
247
+ assert callable(evaluate), "`evaluate()` method should be callable"
248
+ # need to remove the first param, i.e. `self`
249
+ _validate_sig(functools.partial(evaluate, None), "evaluate")
250
+ return
251
+ if async_evaluate := super_cls.__dict__.get(Evaluator.async_evaluate.__name__):
252
+ assert callable(async_evaluate), "`async_evaluate()` method should be callable"
253
+ # need to remove the first param, i.e. `self`
254
+ _validate_sig(functools.partial(async_evaluate, None), "async_evaluate")
255
+ return
256
+ raise ValueError(
257
+ f"Evaluator must implement either "
258
+ f"`def evaluate{evaluate_fn_signature}` or "
259
+ f"`async def async_evaluate{evaluate_fn_signature}`"
260
+ )
261
+
262
+
263
+ def _validate_sig(fn: Callable[..., Any], fn_name: str) -> None:
264
+ sig = inspect.signature(fn)
265
+ validate_signature(sig)
266
+ for param in sig.parameters.values():
267
+ if param.kind is inspect.Parameter.VAR_KEYWORD:
268
+ return
269
+ else:
270
+ raise ValueError(f"`{fn_name}` should allow variadic keyword arguments `**kwargs`")
271
+
272
+
273
+ class LLMEvaluator(Evaluator, ABC, is_abstract=True):
274
+ """
275
+ A convenience super class for setting `kind` as LLM.
276
+
277
+ This Class is intended to be subclassed, and should not be instantiated directly.
278
+ """
279
+
280
+ _kind = AnnotatorKind.LLM
281
+
282
+ def __new__(cls, *args: Any, **kwargs: Any) -> "LLMEvaluator":
283
+ if cls is LLMEvaluator:
284
+ raise TypeError(f"{cls.__name__} is an abstract class and should not be instantiated.")
285
+ return object.__new__(cls)
286
+
287
+
288
+ ExperimentEvaluator: TypeAlias = Union[
289
+ Evaluator,
290
+ Callable[..., EvaluatorOutput],
291
+ Callable[..., Awaitable[EvaluatorOutput]],
292
+ ]