arize-phoenix 4.4.4rc5__py3-none-any.whl → 4.4.4rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (42) hide show
  1. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/METADATA +11 -5
  2. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/RECORD +39 -36
  3. phoenix/config.py +21 -0
  4. phoenix/datetime_utils.py +4 -0
  5. phoenix/db/insertion/evaluation.py +4 -4
  6. phoenix/db/insertion/helpers.py +4 -12
  7. phoenix/db/insertion/span.py +3 -3
  8. phoenix/db/models.py +1 -1
  9. phoenix/experiments/__init__.py +6 -0
  10. phoenix/experiments/evaluators/__init__.py +29 -0
  11. phoenix/experiments/evaluators/base.py +153 -0
  12. phoenix/{datasets → experiments}/evaluators/code_evaluators.py +7 -7
  13. phoenix/{datasets → experiments}/evaluators/llm_evaluators.py +9 -9
  14. phoenix/{datasets → experiments}/evaluators/utils.py +38 -141
  15. phoenix/{datasets/experiments.py → experiments/functions.py} +248 -182
  16. phoenix/experiments/types.py +722 -0
  17. phoenix/experiments/utils.py +9 -0
  18. phoenix/server/api/context.py +2 -0
  19. phoenix/server/api/dataloaders/__init__.py +2 -0
  20. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  21. phoenix/server/api/routers/v1/__init__.py +1 -1
  22. phoenix/server/api/routers/v1/dataset_examples.py +10 -10
  23. phoenix/server/api/routers/v1/datasets.py +6 -6
  24. phoenix/server/api/routers/v1/evaluations.py +4 -11
  25. phoenix/server/api/routers/v1/experiment_evaluations.py +22 -23
  26. phoenix/server/api/routers/v1/experiment_runs.py +4 -16
  27. phoenix/server/api/routers/v1/experiments.py +5 -5
  28. phoenix/server/api/routers/v1/spans.py +6 -4
  29. phoenix/server/api/types/Experiment.py +7 -0
  30. phoenix/server/app.py +2 -0
  31. phoenix/server/static/index.js +648 -570
  32. phoenix/session/client.py +256 -85
  33. phoenix/trace/fixtures.py +6 -6
  34. phoenix/utilities/json.py +8 -8
  35. phoenix/version.py +1 -1
  36. phoenix/datasets/__init__.py +0 -0
  37. phoenix/datasets/evaluators/__init__.py +0 -18
  38. phoenix/datasets/types.py +0 -178
  39. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/WHEEL +0 -0
  40. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/licenses/IP_NOTICE +0 -0
  41. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.4.4rc6.dist-info}/licenses/LICENSE +0 -0
  42. /phoenix/{datasets → experiments}/tracing.py +0 -0
phoenix/datasets/types.py DELETED
@@ -1,178 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from dataclasses import dataclass, field
4
- from datetime import datetime
5
- from enum import Enum
6
- from typing import (
7
- Any,
8
- Awaitable,
9
- Callable,
10
- Dict,
11
- List,
12
- Mapping,
13
- Optional,
14
- Sequence,
15
- Union,
16
- )
17
-
18
- from typing_extensions import TypeAlias
19
-
20
-
21
- class AnnotatorKind(Enum):
22
- CODE = "CODE"
23
- LLM = "LLM"
24
-
25
-
26
- JSONSerializable: TypeAlias = Optional[Union[Dict[str, Any], List[Any], str, int, float, bool]]
27
-
28
- ExperimentId: TypeAlias = str
29
- DatasetId: TypeAlias = str
30
- DatasetVersionId: TypeAlias = str
31
- ExampleId: TypeAlias = str
32
- RepetitionNumber: TypeAlias = int
33
- ExperimentRunId: TypeAlias = str
34
- TraceId: TypeAlias = str
35
-
36
- TaskOutput: TypeAlias = JSONSerializable
37
-
38
-
39
- @dataclass(frozen=True)
40
- class Example:
41
- id: ExampleId
42
- updated_at: datetime
43
- input: Mapping[str, JSONSerializable]
44
- output: Mapping[str, JSONSerializable]
45
- metadata: Mapping[str, JSONSerializable] = field(default_factory=dict)
46
-
47
- @classmethod
48
- def from_dict(cls, obj: Mapping[str, Any]) -> Example:
49
- return cls(
50
- input=obj["input"],
51
- output=obj["output"],
52
- metadata=obj.get("metadata") or {},
53
- id=obj["id"],
54
- updated_at=obj["updated_at"],
55
- )
56
-
57
-
58
- @dataclass(frozen=True)
59
- class Dataset:
60
- id: DatasetId
61
- version_id: DatasetVersionId
62
- examples: Sequence[Example]
63
-
64
-
65
- @dataclass(frozen=True)
66
- class TestCase:
67
- example: Example
68
- repetition_number: RepetitionNumber
69
-
70
-
71
- @dataclass(frozen=True)
72
- class Experiment:
73
- id: ExperimentId
74
- dataset_id: DatasetId
75
- dataset_version_id: DatasetVersionId
76
- project_name: Optional[str] = None
77
-
78
-
79
- @dataclass(frozen=True)
80
- class ExperimentResult:
81
- result: TaskOutput
82
-
83
- @classmethod
84
- def from_dict(cls, obj: Optional[Mapping[str, Any]]) -> Optional[ExperimentResult]:
85
- if not obj:
86
- return None
87
- return cls(result=obj["result"])
88
-
89
-
90
- @dataclass(frozen=True)
91
- class ExperimentRun:
92
- start_time: datetime
93
- end_time: datetime
94
- experiment_id: ExperimentId
95
- dataset_example_id: ExampleId
96
- repetition_number: RepetitionNumber
97
- output: Optional[ExperimentResult] = None
98
- error: Optional[str] = None
99
- id: Optional[ExperimentRunId] = None
100
- trace_id: Optional[TraceId] = None
101
-
102
- @classmethod
103
- def from_dict(cls, obj: Mapping[str, Any]) -> ExperimentRun:
104
- return cls(
105
- start_time=obj["start_time"],
106
- end_time=obj["end_time"],
107
- experiment_id=obj["experiment_id"],
108
- dataset_example_id=obj["dataset_example_id"],
109
- repetition_number=obj.get("repetition_number") or 1,
110
- output=ExperimentResult.from_dict(obj["output"]),
111
- error=obj.get("error"),
112
- id=obj.get("id"),
113
- trace_id=obj.get("trace_id"),
114
- )
115
-
116
- def __post_init__(self) -> None:
117
- if bool(self.output) == bool(self.error):
118
- ValueError("Must specify either result or error")
119
-
120
-
121
- @dataclass(frozen=True)
122
- class EvaluationResult:
123
- score: Optional[float] = None
124
- label: Optional[str] = None
125
- explanation: Optional[str] = None
126
- metadata: Mapping[str, JSONSerializable] = field(default_factory=dict)
127
-
128
- @classmethod
129
- def from_dict(cls, obj: Optional[Mapping[str, Any]]) -> Optional[EvaluationResult]:
130
- if not obj:
131
- return None
132
- return cls(
133
- score=obj.get("score"),
134
- label=obj.get("label"),
135
- explanation=obj.get("explanation"),
136
- metadata=obj.get("metadata") or {},
137
- )
138
-
139
- def __post_init__(self) -> None:
140
- if self.score is None and not self.label and not self.explanation:
141
- ValueError("Must specify one of score, label, or explanation")
142
-
143
-
144
- @dataclass(frozen=True)
145
- class ExperimentEvaluationRun:
146
- experiment_run_id: ExperimentRunId
147
- start_time: datetime
148
- end_time: datetime
149
- name: str
150
- annotator_kind: str
151
- error: Optional[str] = None
152
- result: Optional[EvaluationResult] = None
153
- id: Optional[str] = None
154
- trace_id: Optional[TraceId] = None
155
-
156
- @classmethod
157
- def from_dict(cls, obj: Mapping[str, Any]) -> ExperimentEvaluationRun:
158
- return cls(
159
- experiment_run_id=obj["experiment_run_id"],
160
- start_time=obj["start_time"],
161
- end_time=obj["end_time"],
162
- name=obj["name"],
163
- annotator_kind=obj["annotator_kind"],
164
- error=obj.get("error"),
165
- result=EvaluationResult.from_dict(obj.get("result")),
166
- id=obj.get("id"),
167
- trace_id=obj.get("trace_id"),
168
- )
169
-
170
- def __post_init__(self) -> None:
171
- if bool(self.result) == bool(self.error):
172
- ValueError("Must specify either result or error")
173
-
174
-
175
- ExperimentTask: TypeAlias = Union[
176
- Callable[[Example], TaskOutput],
177
- Callable[[Example], Awaitable[TaskOutput]],
178
- ]
File without changes