arize-phoenix 4.4.4rc5__py3-none-any.whl → 4.4.4rc6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/METADATA +11 -5
- {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/RECORD +39 -36
- phoenix/config.py +21 -0
- phoenix/datetime_utils.py +4 -0
- phoenix/db/insertion/evaluation.py +4 -4
- phoenix/db/insertion/helpers.py +4 -12
- phoenix/db/insertion/span.py +3 -3
- phoenix/db/models.py +1 -1
- phoenix/experiments/__init__.py +6 -0
- phoenix/experiments/evaluators/__init__.py +29 -0
- phoenix/experiments/evaluators/base.py +153 -0
- phoenix/{datasets → experiments}/evaluators/code_evaluators.py +7 -7
- phoenix/{datasets → experiments}/evaluators/llm_evaluators.py +9 -9
- phoenix/{datasets → experiments}/evaluators/utils.py +38 -141
- phoenix/{datasets/experiments.py → experiments/functions.py} +248 -182
- phoenix/experiments/types.py +722 -0
- phoenix/experiments/utils.py +9 -0
- phoenix/server/api/context.py +2 -0
- phoenix/server/api/dataloaders/__init__.py +2 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
- phoenix/server/api/routers/v1/__init__.py +1 -1
- phoenix/server/api/routers/v1/dataset_examples.py +10 -10
- phoenix/server/api/routers/v1/datasets.py +6 -6
- phoenix/server/api/routers/v1/evaluations.py +4 -11
- phoenix/server/api/routers/v1/experiment_evaluations.py +22 -23
- phoenix/server/api/routers/v1/experiment_runs.py +4 -16
- phoenix/server/api/routers/v1/experiments.py +5 -5
- phoenix/server/api/routers/v1/spans.py +6 -4
- phoenix/server/api/types/Experiment.py +7 -0
- phoenix/server/app.py +2 -0
- phoenix/server/static/index.js +648 -570
- phoenix/session/client.py +256 -85
- phoenix/trace/fixtures.py +6 -6
- phoenix/utilities/json.py +8 -8
- phoenix/version.py +1 -1
- phoenix/datasets/__init__.py +0 -0
- phoenix/datasets/evaluators/__init__.py +0 -18
- phoenix/datasets/types.py +0 -178
- {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/WHEEL +0 -0
- {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → experiments}/tracing.py +0 -0
phoenix/datasets/types.py
DELETED
|
@@ -1,178 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from dataclasses import dataclass, field
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from enum import Enum
|
|
6
|
-
from typing import (
|
|
7
|
-
Any,
|
|
8
|
-
Awaitable,
|
|
9
|
-
Callable,
|
|
10
|
-
Dict,
|
|
11
|
-
List,
|
|
12
|
-
Mapping,
|
|
13
|
-
Optional,
|
|
14
|
-
Sequence,
|
|
15
|
-
Union,
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
from typing_extensions import TypeAlias
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class AnnotatorKind(Enum):
|
|
22
|
-
CODE = "CODE"
|
|
23
|
-
LLM = "LLM"
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
JSONSerializable: TypeAlias = Optional[Union[Dict[str, Any], List[Any], str, int, float, bool]]
|
|
27
|
-
|
|
28
|
-
ExperimentId: TypeAlias = str
|
|
29
|
-
DatasetId: TypeAlias = str
|
|
30
|
-
DatasetVersionId: TypeAlias = str
|
|
31
|
-
ExampleId: TypeAlias = str
|
|
32
|
-
RepetitionNumber: TypeAlias = int
|
|
33
|
-
ExperimentRunId: TypeAlias = str
|
|
34
|
-
TraceId: TypeAlias = str
|
|
35
|
-
|
|
36
|
-
TaskOutput: TypeAlias = JSONSerializable
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
@dataclass(frozen=True)
|
|
40
|
-
class Example:
|
|
41
|
-
id: ExampleId
|
|
42
|
-
updated_at: datetime
|
|
43
|
-
input: Mapping[str, JSONSerializable]
|
|
44
|
-
output: Mapping[str, JSONSerializable]
|
|
45
|
-
metadata: Mapping[str, JSONSerializable] = field(default_factory=dict)
|
|
46
|
-
|
|
47
|
-
@classmethod
|
|
48
|
-
def from_dict(cls, obj: Mapping[str, Any]) -> Example:
|
|
49
|
-
return cls(
|
|
50
|
-
input=obj["input"],
|
|
51
|
-
output=obj["output"],
|
|
52
|
-
metadata=obj.get("metadata") or {},
|
|
53
|
-
id=obj["id"],
|
|
54
|
-
updated_at=obj["updated_at"],
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
@dataclass(frozen=True)
|
|
59
|
-
class Dataset:
|
|
60
|
-
id: DatasetId
|
|
61
|
-
version_id: DatasetVersionId
|
|
62
|
-
examples: Sequence[Example]
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
@dataclass(frozen=True)
|
|
66
|
-
class TestCase:
|
|
67
|
-
example: Example
|
|
68
|
-
repetition_number: RepetitionNumber
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
@dataclass(frozen=True)
|
|
72
|
-
class Experiment:
|
|
73
|
-
id: ExperimentId
|
|
74
|
-
dataset_id: DatasetId
|
|
75
|
-
dataset_version_id: DatasetVersionId
|
|
76
|
-
project_name: Optional[str] = None
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
@dataclass(frozen=True)
|
|
80
|
-
class ExperimentResult:
|
|
81
|
-
result: TaskOutput
|
|
82
|
-
|
|
83
|
-
@classmethod
|
|
84
|
-
def from_dict(cls, obj: Optional[Mapping[str, Any]]) -> Optional[ExperimentResult]:
|
|
85
|
-
if not obj:
|
|
86
|
-
return None
|
|
87
|
-
return cls(result=obj["result"])
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
@dataclass(frozen=True)
|
|
91
|
-
class ExperimentRun:
|
|
92
|
-
start_time: datetime
|
|
93
|
-
end_time: datetime
|
|
94
|
-
experiment_id: ExperimentId
|
|
95
|
-
dataset_example_id: ExampleId
|
|
96
|
-
repetition_number: RepetitionNumber
|
|
97
|
-
output: Optional[ExperimentResult] = None
|
|
98
|
-
error: Optional[str] = None
|
|
99
|
-
id: Optional[ExperimentRunId] = None
|
|
100
|
-
trace_id: Optional[TraceId] = None
|
|
101
|
-
|
|
102
|
-
@classmethod
|
|
103
|
-
def from_dict(cls, obj: Mapping[str, Any]) -> ExperimentRun:
|
|
104
|
-
return cls(
|
|
105
|
-
start_time=obj["start_time"],
|
|
106
|
-
end_time=obj["end_time"],
|
|
107
|
-
experiment_id=obj["experiment_id"],
|
|
108
|
-
dataset_example_id=obj["dataset_example_id"],
|
|
109
|
-
repetition_number=obj.get("repetition_number") or 1,
|
|
110
|
-
output=ExperimentResult.from_dict(obj["output"]),
|
|
111
|
-
error=obj.get("error"),
|
|
112
|
-
id=obj.get("id"),
|
|
113
|
-
trace_id=obj.get("trace_id"),
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
def __post_init__(self) -> None:
|
|
117
|
-
if bool(self.output) == bool(self.error):
|
|
118
|
-
ValueError("Must specify either result or error")
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
@dataclass(frozen=True)
|
|
122
|
-
class EvaluationResult:
|
|
123
|
-
score: Optional[float] = None
|
|
124
|
-
label: Optional[str] = None
|
|
125
|
-
explanation: Optional[str] = None
|
|
126
|
-
metadata: Mapping[str, JSONSerializable] = field(default_factory=dict)
|
|
127
|
-
|
|
128
|
-
@classmethod
|
|
129
|
-
def from_dict(cls, obj: Optional[Mapping[str, Any]]) -> Optional[EvaluationResult]:
|
|
130
|
-
if not obj:
|
|
131
|
-
return None
|
|
132
|
-
return cls(
|
|
133
|
-
score=obj.get("score"),
|
|
134
|
-
label=obj.get("label"),
|
|
135
|
-
explanation=obj.get("explanation"),
|
|
136
|
-
metadata=obj.get("metadata") or {},
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
def __post_init__(self) -> None:
|
|
140
|
-
if self.score is None and not self.label and not self.explanation:
|
|
141
|
-
ValueError("Must specify one of score, label, or explanation")
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
@dataclass(frozen=True)
|
|
145
|
-
class ExperimentEvaluationRun:
|
|
146
|
-
experiment_run_id: ExperimentRunId
|
|
147
|
-
start_time: datetime
|
|
148
|
-
end_time: datetime
|
|
149
|
-
name: str
|
|
150
|
-
annotator_kind: str
|
|
151
|
-
error: Optional[str] = None
|
|
152
|
-
result: Optional[EvaluationResult] = None
|
|
153
|
-
id: Optional[str] = None
|
|
154
|
-
trace_id: Optional[TraceId] = None
|
|
155
|
-
|
|
156
|
-
@classmethod
|
|
157
|
-
def from_dict(cls, obj: Mapping[str, Any]) -> ExperimentEvaluationRun:
|
|
158
|
-
return cls(
|
|
159
|
-
experiment_run_id=obj["experiment_run_id"],
|
|
160
|
-
start_time=obj["start_time"],
|
|
161
|
-
end_time=obj["end_time"],
|
|
162
|
-
name=obj["name"],
|
|
163
|
-
annotator_kind=obj["annotator_kind"],
|
|
164
|
-
error=obj.get("error"),
|
|
165
|
-
result=EvaluationResult.from_dict(obj.get("result")),
|
|
166
|
-
id=obj.get("id"),
|
|
167
|
-
trace_id=obj.get("trace_id"),
|
|
168
|
-
)
|
|
169
|
-
|
|
170
|
-
def __post_init__(self) -> None:
|
|
171
|
-
if bool(self.result) == bool(self.error):
|
|
172
|
-
ValueError("Must specify either result or error")
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
ExperimentTask: TypeAlias = Union[
|
|
176
|
-
Callable[[Example], TaskOutput],
|
|
177
|
-
Callable[[Example], Awaitable[TaskOutput]],
|
|
178
|
-
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|