arize-phoenix 2.3.0__py3-none-any.whl → 2.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-2.3.0.dist-info → arize_phoenix-2.4.1.dist-info}/METADATA +1 -1
- {arize_phoenix-2.3.0.dist-info → arize_phoenix-2.4.1.dist-info}/RECORD +18 -18
- phoenix/config.py +1 -0
- phoenix/experimental/evals/evaluators.py +3 -0
- phoenix/experimental/evals/functions/classify.py +6 -0
- phoenix/experimental/evals/functions/generate.py +3 -0
- phoenix/experimental/evals/models/base.py +3 -0
- phoenix/experimental/evals/models/openai.py +3 -0
- phoenix/experimental/evals/models/vertex.py +26 -4
- phoenix/server/static/index.js +454 -438
- phoenix/session/evaluation.py +7 -5
- phoenix/trace/__init__.py +8 -2
- phoenix/trace/llama_index/callback.py +17 -8
- phoenix/trace/span_evaluations.py +112 -1
- phoenix/version.py +1 -1
- {arize_phoenix-2.3.0.dist-info → arize_phoenix-2.4.1.dist-info}/WHEEL +0 -0
- {arize_phoenix-2.3.0.dist-info → arize_phoenix-2.4.1.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-2.3.0.dist-info → arize_phoenix-2.4.1.dist-info}/licenses/LICENSE +0 -0
phoenix/session/evaluation.py
CHANGED
|
@@ -103,13 +103,15 @@ def _extract_subject_id_from_index(
|
|
|
103
103
|
|
|
104
104
|
def _extract_result(row: "pd.Series[Any]") -> Optional[pb.Evaluation.Result]:
|
|
105
105
|
score = cast(Optional[float], row.get("score"))
|
|
106
|
+
if isinstance(score, float) and math.isnan(score):
|
|
107
|
+
score = None
|
|
106
108
|
label = cast(Optional[str], row.get("label"))
|
|
109
|
+
if isinstance(label, float) and math.isnan(label):
|
|
110
|
+
label = None
|
|
107
111
|
explanation = cast(Optional[str], row.get("explanation"))
|
|
108
|
-
if (
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
and not explanation
|
|
112
|
-
):
|
|
112
|
+
if isinstance(explanation, float) and math.isnan(explanation):
|
|
113
|
+
explanation = None
|
|
114
|
+
if score is None and not label and not explanation:
|
|
113
115
|
return None
|
|
114
116
|
return pb.Evaluation.Result(
|
|
115
117
|
score=DoubleValue(value=score) if score is not None else None,
|
phoenix/trace/__init__.py
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
from .span_evaluations import DocumentEvaluations, SpanEvaluations, TraceEvaluations
|
|
1
|
+
from .span_evaluations import DocumentEvaluations, Evaluations, SpanEvaluations, TraceEvaluations
|
|
2
2
|
from .trace_dataset import TraceDataset
|
|
3
3
|
|
|
4
|
-
__all__ = [
|
|
4
|
+
__all__ = [
|
|
5
|
+
"Evaluations",
|
|
6
|
+
"TraceDataset",
|
|
7
|
+
"SpanEvaluations",
|
|
8
|
+
"DocumentEvaluations",
|
|
9
|
+
"TraceEvaluations",
|
|
10
|
+
]
|
|
@@ -214,17 +214,26 @@ def payload_to_semantic_attributes(
|
|
|
214
214
|
if event_type is CBEventType.LLM:
|
|
215
215
|
if model_name := serialized.get("model"):
|
|
216
216
|
attributes[LLM_MODEL_NAME] = model_name
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
"temperature": serialized["temperature"],
|
|
221
|
-
"max_tokens": serialized["max_tokens"],
|
|
222
|
-
**serialized["additional_kwargs"],
|
|
223
|
-
}
|
|
224
|
-
)
|
|
217
|
+
invocation_parameters = _extract_invocation_parameters(serialized)
|
|
218
|
+
invocation_parameters["model"] = model_name
|
|
219
|
+
attributes[LLM_INVOCATION_PARAMETERS] = json.dumps(invocation_parameters)
|
|
225
220
|
return attributes
|
|
226
221
|
|
|
227
222
|
|
|
223
|
+
def _extract_invocation_parameters(serialized: Mapping[str, Any]) -> Dict[str, Any]:
|
|
224
|
+
# FIXME: this is only based on openai. Other models have different parameters.
|
|
225
|
+
if not hasattr(serialized, "get"):
|
|
226
|
+
return {}
|
|
227
|
+
invocation_parameters: Dict[str, Any] = {}
|
|
228
|
+
additional_kwargs = serialized.get("additional_kwargs")
|
|
229
|
+
if additional_kwargs and isinstance(additional_kwargs, Mapping):
|
|
230
|
+
invocation_parameters.update(additional_kwargs)
|
|
231
|
+
for key in ("temperature", "max_tokens"):
|
|
232
|
+
if (value := serialized.get(key)) is not None:
|
|
233
|
+
invocation_parameters[key] = value
|
|
234
|
+
return invocation_parameters
|
|
235
|
+
|
|
236
|
+
|
|
228
237
|
class OpenInferenceTraceCallbackHandler(BaseCallbackHandler):
|
|
229
238
|
"""Callback handler for storing LLM application trace data in OpenInference format.
|
|
230
239
|
OpenInference is an open standard for capturing and storing AI model
|
|
@@ -1,15 +1,26 @@
|
|
|
1
|
+
import json
|
|
1
2
|
from abc import ABC
|
|
2
3
|
from dataclasses import dataclass, field
|
|
3
4
|
from itertools import product
|
|
5
|
+
from pathlib import Path
|
|
4
6
|
from types import MappingProxyType
|
|
5
|
-
from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple
|
|
7
|
+
from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Set, Tuple, Type, Union
|
|
8
|
+
from uuid import UUID, uuid4
|
|
6
9
|
|
|
7
10
|
import pandas as pd
|
|
8
11
|
from pandas.api.types import is_integer_dtype, is_numeric_dtype, is_string_dtype
|
|
12
|
+
from pyarrow import Table, parquet
|
|
13
|
+
|
|
14
|
+
from phoenix.config import TRACE_DATASET_DIR
|
|
15
|
+
from phoenix.exceptions import PhoenixException
|
|
9
16
|
|
|
10
17
|
EVAL_NAME_COLUMN_PREFIX = "eval."
|
|
11
18
|
|
|
12
19
|
|
|
20
|
+
class InvalidParquetMetadataError(PhoenixException):
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
13
24
|
class NeedsNamedIndex(ABC):
|
|
14
25
|
index_names: Mapping[Tuple[str, ...], Callable[[Any], bool]]
|
|
15
26
|
all_valid_index_name_sorted_combos: Set[Tuple[str, ...]]
|
|
@@ -72,6 +83,7 @@ class NeedsResultColumns(ABC):
|
|
|
72
83
|
class Evaluations(NeedsNamedIndex, NeedsResultColumns, ABC):
|
|
73
84
|
eval_name: str # The name for the evaluation, e.g. 'toxicity'
|
|
74
85
|
dataframe: pd.DataFrame = field(repr=False)
|
|
86
|
+
id: UUID = field(init=False, default_factory=uuid4)
|
|
75
87
|
|
|
76
88
|
def __len__(self) -> int:
|
|
77
89
|
return len(self.dataframe)
|
|
@@ -152,6 +164,58 @@ class Evaluations(NeedsNamedIndex, NeedsResultColumns, ABC):
|
|
|
152
164
|
tuple(sorted(prod)) for prod in product(*cls.index_names.keys())
|
|
153
165
|
)
|
|
154
166
|
|
|
167
|
+
def to_parquet(self, directory: Optional[Union[str, Path]] = None) -> Path:
|
|
168
|
+
"""Persists the evaluations to a parquet file.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
directory (Optional[Union[str, Path]], optional): An optional path
|
|
172
|
+
to a directory where the parquet file will be saved. If not
|
|
173
|
+
provided, the parquet file will be saved to a default location.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Path: The path to the parquet file, including a randomly generated
|
|
177
|
+
filename.
|
|
178
|
+
"""
|
|
179
|
+
directory = Path(directory) if directory else TRACE_DATASET_DIR
|
|
180
|
+
path = directory / f"evaluations-{self.id}.parquet"
|
|
181
|
+
table = Table.from_pandas(self.dataframe)
|
|
182
|
+
table = table.replace_schema_metadata(
|
|
183
|
+
{
|
|
184
|
+
**(table.schema.metadata or {}),
|
|
185
|
+
# explicitly encode keys and values, which are automatically encoded regardless
|
|
186
|
+
b"arize": json.dumps(
|
|
187
|
+
{
|
|
188
|
+
"eval_id": str(self.id),
|
|
189
|
+
"eval_name": self.eval_name,
|
|
190
|
+
"eval_type": self.__class__.__name__,
|
|
191
|
+
}
|
|
192
|
+
).encode("utf-8"),
|
|
193
|
+
}
|
|
194
|
+
)
|
|
195
|
+
parquet.write_table(table, path)
|
|
196
|
+
return path
|
|
197
|
+
|
|
198
|
+
@classmethod
|
|
199
|
+
def from_parquet(cls, path: Union[str, Path]) -> "Evaluations":
|
|
200
|
+
"""Loads the evaluations from a parquet file.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
path (Union[str, Path]): Path to a persisted evaluations parquet
|
|
204
|
+
file.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
Evaluations: The loaded evaluations. The type of the returned
|
|
208
|
+
evaluations will be the same as the type of the evaluations that
|
|
209
|
+
were originally persisted.
|
|
210
|
+
"""
|
|
211
|
+
schema = parquet.read_schema(path)
|
|
212
|
+
eval_id, eval_name, evaluations_cls = _parse_schema_metadata(schema.metadata)
|
|
213
|
+
table = parquet.read_table(path)
|
|
214
|
+
dataframe = table.to_pandas()
|
|
215
|
+
evaluations = evaluations_cls(eval_name=eval_name, dataframe=dataframe)
|
|
216
|
+
object.__setattr__(evaluations, "id", eval_id)
|
|
217
|
+
return evaluations
|
|
218
|
+
|
|
155
219
|
|
|
156
220
|
@dataclass(frozen=True)
|
|
157
221
|
class SpanEvaluations(
|
|
@@ -235,3 +299,50 @@ class TraceEvaluations(
|
|
|
235
299
|
index_names=MappingProxyType({("context.trace_id", "trace_id"): is_string_dtype}),
|
|
236
300
|
):
|
|
237
301
|
...
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def _parse_schema_metadata(metadata: Dict[bytes, Any]) -> Tuple[UUID, str, Type[Evaluations]]:
|
|
305
|
+
"""Validates and parses the schema metadata. Raises an exception if the
|
|
306
|
+
metadata is invalid.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
metadata (Dict[bytes, Any]): A dictionary of schema metadata from a
|
|
310
|
+
parquet file.
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Tuple[str, ModuleType]: The evaluation name and the evaluations class.
|
|
314
|
+
"""
|
|
315
|
+
if not (arize_metadata_json := metadata.get(b"arize")):
|
|
316
|
+
raise InvalidParquetMetadataError('Schema metadata is missing "arize" key')
|
|
317
|
+
try:
|
|
318
|
+
arize_metadata = json.loads(arize_metadata_json)
|
|
319
|
+
except json.JSONDecodeError as err:
|
|
320
|
+
raise InvalidParquetMetadataError(
|
|
321
|
+
'Encountered invalid JSON string under "arize" key'
|
|
322
|
+
) from err
|
|
323
|
+
evaluations_classes = {subclass.__name__: subclass for subclass in Evaluations.__subclasses__()}
|
|
324
|
+
if not (
|
|
325
|
+
isinstance(arize_metadata, dict)
|
|
326
|
+
and (eval_id := _to_uuid(arize_metadata.get("eval_id")))
|
|
327
|
+
and isinstance(eval_name := arize_metadata.get("eval_name"), str)
|
|
328
|
+
and (eval_type := arize_metadata.get("eval_type"))
|
|
329
|
+
and (evaluations_cls := evaluations_classes.get(eval_type))
|
|
330
|
+
):
|
|
331
|
+
raise InvalidParquetMetadataError(f"Invalid Arize metadata: {arize_metadata}")
|
|
332
|
+
return eval_id, eval_name, evaluations_cls
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _to_uuid(value: Any) -> Optional[UUID]:
|
|
336
|
+
"""
|
|
337
|
+
Converts an input to a UUID if possible, otherwise returns None.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
value (Any): The value to convert to a UUID.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
Optional[UUID]: A UUID if the value could be converted, otherwise None.
|
|
344
|
+
"""
|
|
345
|
+
try:
|
|
346
|
+
return UUID(value)
|
|
347
|
+
except Exception:
|
|
348
|
+
return None
|
phoenix/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "2.
|
|
1
|
+
__version__ = "2.4.1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|