arize-phoenix 4.5.0__py3-none-any.whl → 4.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/METADATA +16 -8
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/RECORD +122 -58
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/WHEEL +1 -1
- phoenix/__init__.py +0 -27
- phoenix/config.py +42 -7
- phoenix/core/model.py +25 -25
- phoenix/core/model_schema.py +64 -62
- phoenix/core/model_schema_adapter.py +27 -25
- phoenix/datetime_utils.py +4 -0
- phoenix/db/bulk_inserter.py +54 -14
- phoenix/db/insertion/dataset.py +237 -0
- phoenix/db/insertion/evaluation.py +10 -10
- phoenix/db/insertion/helpers.py +17 -14
- phoenix/db/insertion/span.py +3 -3
- phoenix/db/migrations/types.py +29 -0
- phoenix/db/migrations/versions/10460e46d750_datasets.py +291 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +2 -28
- phoenix/db/models.py +236 -4
- phoenix/experiments/__init__.py +6 -0
- phoenix/experiments/evaluators/__init__.py +29 -0
- phoenix/experiments/evaluators/base.py +153 -0
- phoenix/experiments/evaluators/code_evaluators.py +99 -0
- phoenix/experiments/evaluators/llm_evaluators.py +244 -0
- phoenix/experiments/evaluators/utils.py +186 -0
- phoenix/experiments/functions.py +757 -0
- phoenix/experiments/tracing.py +85 -0
- phoenix/experiments/types.py +753 -0
- phoenix/experiments/utils.py +24 -0
- phoenix/inferences/fixtures.py +23 -23
- phoenix/inferences/inferences.py +7 -7
- phoenix/inferences/validation.py +1 -1
- phoenix/server/api/context.py +20 -0
- phoenix/server/api/dataloaders/__init__.py +20 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +100 -0
- phoenix/server/api/dataloaders/dataset_example_spans.py +43 -0
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +85 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +43 -0
- phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
- phoenix/server/api/dataloaders/experiment_sequence_number.py +49 -0
- phoenix/server/api/dataloaders/project_by_name.py +31 -0
- phoenix/server/api/dataloaders/span_descendants.py +2 -3
- phoenix/server/api/dataloaders/span_projects.py +33 -0
- phoenix/server/api/dataloaders/trace_row_ids.py +39 -0
- phoenix/server/api/helpers/dataset_helpers.py +179 -0
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
- phoenix/server/api/input_types/ClearProjectInput.py +15 -0
- phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
- phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
- phoenix/server/api/input_types/DatasetSort.py +17 -0
- phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
- phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
- phoenix/server/api/input_types/DeleteExperimentsInput.py +9 -0
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
- phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
- phoenix/server/api/mutations/__init__.py +13 -0
- phoenix/server/api/mutations/auth.py +11 -0
- phoenix/server/api/mutations/dataset_mutations.py +520 -0
- phoenix/server/api/mutations/experiment_mutations.py +65 -0
- phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +17 -14
- phoenix/server/api/mutations/project_mutations.py +47 -0
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +6 -0
- phoenix/server/api/openapi/schema.py +16 -0
- phoenix/server/api/queries.py +503 -0
- phoenix/server/api/routers/v1/__init__.py +77 -2
- phoenix/server/api/routers/v1/dataset_examples.py +178 -0
- phoenix/server/api/routers/v1/datasets.py +965 -0
- phoenix/server/api/routers/v1/evaluations.py +8 -13
- phoenix/server/api/routers/v1/experiment_evaluations.py +143 -0
- phoenix/server/api/routers/v1/experiment_runs.py +220 -0
- phoenix/server/api/routers/v1/experiments.py +302 -0
- phoenix/server/api/routers/v1/spans.py +9 -5
- phoenix/server/api/routers/v1/traces.py +1 -4
- phoenix/server/api/schema.py +2 -303
- phoenix/server/api/types/AnnotatorKind.py +10 -0
- phoenix/server/api/types/Cluster.py +19 -19
- phoenix/server/api/types/CreateDatasetPayload.py +8 -0
- phoenix/server/api/types/Dataset.py +282 -63
- phoenix/server/api/types/DatasetExample.py +85 -0
- phoenix/server/api/types/DatasetExampleRevision.py +34 -0
- phoenix/server/api/types/DatasetVersion.py +14 -0
- phoenix/server/api/types/Dimension.py +30 -29
- phoenix/server/api/types/EmbeddingDimension.py +40 -34
- phoenix/server/api/types/Event.py +16 -16
- phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
- phoenix/server/api/types/Experiment.py +147 -0
- phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
- phoenix/server/api/types/ExperimentComparison.py +19 -0
- phoenix/server/api/types/ExperimentRun.py +91 -0
- phoenix/server/api/types/ExperimentRunAnnotation.py +57 -0
- phoenix/server/api/types/Inferences.py +80 -0
- phoenix/server/api/types/InferencesRole.py +23 -0
- phoenix/server/api/types/Model.py +43 -42
- phoenix/server/api/types/Project.py +26 -12
- phoenix/server/api/types/Span.py +79 -2
- phoenix/server/api/types/TimeSeries.py +6 -6
- phoenix/server/api/types/Trace.py +15 -4
- phoenix/server/api/types/UMAPPoints.py +1 -1
- phoenix/server/api/types/node.py +5 -111
- phoenix/server/api/types/pagination.py +10 -52
- phoenix/server/app.py +103 -49
- phoenix/server/main.py +49 -27
- phoenix/server/openapi/docs.py +3 -0
- phoenix/server/static/index.js +2300 -1294
- phoenix/server/templates/index.html +1 -0
- phoenix/services.py +15 -15
- phoenix/session/client.py +581 -22
- phoenix/session/session.py +47 -37
- phoenix/trace/exporter.py +14 -9
- phoenix/trace/fixtures.py +133 -7
- phoenix/trace/schemas.py +1 -2
- phoenix/trace/span_evaluations.py +3 -3
- phoenix/trace/trace_dataset.py +6 -6
- phoenix/utilities/json.py +61 -0
- phoenix/utilities/re.py +50 -0
- phoenix/version.py +1 -1
- phoenix/server/api/types/DatasetRole.py +0 -23
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/licenses/LICENSE +0 -0
- /phoenix/server/api/{helpers.py → helpers/__init__.py} +0 -0
phoenix/db/bulk_inserter.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import logging
|
|
3
|
+
from asyncio import Queue
|
|
3
4
|
from dataclasses import dataclass, field
|
|
4
5
|
from datetime import datetime, timezone
|
|
5
6
|
from itertools import islice
|
|
@@ -14,6 +15,7 @@ from typing import (
|
|
|
14
15
|
Optional,
|
|
15
16
|
Set,
|
|
16
17
|
Tuple,
|
|
18
|
+
cast,
|
|
17
19
|
)
|
|
18
20
|
|
|
19
21
|
from cachetools import LRUCache
|
|
@@ -22,10 +24,11 @@ from typing_extensions import TypeAlias
|
|
|
22
24
|
|
|
23
25
|
import phoenix.trace.v1 as pb
|
|
24
26
|
from phoenix.db.insertion.evaluation import (
|
|
25
|
-
|
|
27
|
+
EvaluationInsertionEvent,
|
|
26
28
|
InsertEvaluationError,
|
|
27
29
|
insert_evaluation,
|
|
28
30
|
)
|
|
31
|
+
from phoenix.db.insertion.helpers import DataManipulation, DataManipulationEvent
|
|
29
32
|
from phoenix.db.insertion.span import SpanInsertionEvent, insert_span
|
|
30
33
|
from phoenix.server.api.dataloaders import CacheForDataLoaders
|
|
31
34
|
from phoenix.trace.schemas import Span
|
|
@@ -46,23 +49,29 @@ class BulkInserter:
|
|
|
46
49
|
db: Callable[[], AsyncContextManager[AsyncSession]],
|
|
47
50
|
*,
|
|
48
51
|
cache_for_dataloaders: Optional[CacheForDataLoaders] = None,
|
|
52
|
+
initial_batch_of_operations: Iterable[DataManipulation] = (),
|
|
49
53
|
initial_batch_of_spans: Optional[Iterable[Tuple[Span, str]]] = None,
|
|
50
54
|
initial_batch_of_evaluations: Optional[Iterable[pb.Evaluation]] = None,
|
|
51
55
|
sleep: float = 0.1,
|
|
52
|
-
|
|
56
|
+
max_ops_per_transaction: int = 1000,
|
|
57
|
+
max_queue_size: int = 1000,
|
|
53
58
|
enable_prometheus: bool = False,
|
|
54
59
|
) -> None:
|
|
55
60
|
"""
|
|
56
61
|
:param db: A function to initiate a new database session.
|
|
57
62
|
:param initial_batch_of_spans: Initial batch of spans to insert.
|
|
58
63
|
:param sleep: The time to sleep between bulk insertions
|
|
59
|
-
:param
|
|
60
|
-
|
|
64
|
+
:param max_ops_per_transaction: The maximum number of operations to dequeue from
|
|
65
|
+
the operations queue for each transaction.
|
|
66
|
+
:param max_queue_size: The maximum length of the operations queue.
|
|
67
|
+
:param enable_prometheus: Whether Prometheus is enabled.
|
|
61
68
|
"""
|
|
62
69
|
self._db = db
|
|
63
70
|
self._running = False
|
|
64
71
|
self._sleep = sleep
|
|
65
|
-
self.
|
|
72
|
+
self._max_ops_per_transaction = max_ops_per_transaction
|
|
73
|
+
self._operations: Optional[Queue[DataManipulation]] = None
|
|
74
|
+
self._max_queue_size = max_queue_size
|
|
66
75
|
self._spans: List[Tuple[Span, str]] = (
|
|
67
76
|
[] if initial_batch_of_spans is None else list(initial_batch_of_spans)
|
|
68
77
|
)
|
|
@@ -81,27 +90,58 @@ class BulkInserter:
|
|
|
81
90
|
|
|
82
91
|
async def __aenter__(
|
|
83
92
|
self,
|
|
84
|
-
) -> Tuple[
|
|
93
|
+
) -> Tuple[
|
|
94
|
+
Callable[[Span, str], Awaitable[None]],
|
|
95
|
+
Callable[[pb.Evaluation], Awaitable[None]],
|
|
96
|
+
Callable[[DataManipulation], None],
|
|
97
|
+
]:
|
|
85
98
|
self._running = True
|
|
99
|
+
self._operations = Queue(maxsize=self._max_queue_size)
|
|
86
100
|
self._task = asyncio.create_task(self._bulk_insert())
|
|
87
|
-
return
|
|
101
|
+
return (
|
|
102
|
+
self._queue_span,
|
|
103
|
+
self._queue_evaluation,
|
|
104
|
+
self._enqueue_operation,
|
|
105
|
+
)
|
|
88
106
|
|
|
89
107
|
async def __aexit__(self, *args: Any) -> None:
|
|
108
|
+
self._operations = None
|
|
90
109
|
self._running = False
|
|
91
110
|
|
|
111
|
+
def _enqueue_operation(self, operation: DataManipulation) -> None:
|
|
112
|
+
cast("Queue[DataManipulation]", self._operations).put_nowait(operation)
|
|
113
|
+
|
|
92
114
|
async def _queue_span(self, span: Span, project_name: str) -> None:
|
|
93
115
|
self._spans.append((span, project_name))
|
|
94
116
|
|
|
95
117
|
async def _queue_evaluation(self, evaluation: pb.Evaluation) -> None:
|
|
96
118
|
self._evaluations.append(evaluation)
|
|
97
119
|
|
|
120
|
+
async def _process_events(self, events: Iterable[Optional[DataManipulationEvent]]) -> None: ...
|
|
121
|
+
|
|
98
122
|
async def _bulk_insert(self) -> None:
|
|
123
|
+
assert isinstance(self._operations, Queue)
|
|
99
124
|
spans_buffer, evaluations_buffer = None, None
|
|
100
125
|
# start first insert immediately if the inserter has not run recently
|
|
101
|
-
while self.
|
|
102
|
-
if not (self._spans or self._evaluations):
|
|
126
|
+
while self._running or not self._operations.empty() or self._spans or self._evaluations:
|
|
127
|
+
if self._operations.empty() and not (self._spans or self._evaluations):
|
|
103
128
|
await asyncio.sleep(self._sleep)
|
|
104
129
|
continue
|
|
130
|
+
ops_remaining, events = self._max_ops_per_transaction, []
|
|
131
|
+
async with self._db() as session:
|
|
132
|
+
while ops_remaining and not self._operations.empty():
|
|
133
|
+
ops_remaining -= 1
|
|
134
|
+
op = await self._operations.get()
|
|
135
|
+
try:
|
|
136
|
+
async with session.begin_nested():
|
|
137
|
+
events.append(await op(session))
|
|
138
|
+
except Exception as e:
|
|
139
|
+
if self._enable_prometheus:
|
|
140
|
+
from phoenix.server.prometheus import BULK_LOADER_EXCEPTIONS
|
|
141
|
+
|
|
142
|
+
BULK_LOADER_EXCEPTIONS.inc()
|
|
143
|
+
logger.exception(str(e))
|
|
144
|
+
await self._process_events(events)
|
|
105
145
|
# It's important to grab the buffers at the same time so there's
|
|
106
146
|
# no race condition, since an eval insertion will fail if the span
|
|
107
147
|
# it references doesn't exist. Grabbing the eval buffer later may
|
|
@@ -130,11 +170,11 @@ class BulkInserter:
|
|
|
130
170
|
|
|
131
171
|
async def _insert_spans(self, spans: List[Tuple[Span, str]]) -> TransactionResult:
|
|
132
172
|
transaction_result = TransactionResult()
|
|
133
|
-
for i in range(0, len(spans), self.
|
|
173
|
+
for i in range(0, len(spans), self._max_ops_per_transaction):
|
|
134
174
|
try:
|
|
135
175
|
start = perf_counter()
|
|
136
176
|
async with self._db() as session:
|
|
137
|
-
for span, project_name in islice(spans, i, i + self.
|
|
177
|
+
for span, project_name in islice(spans, i, i + self._max_ops_per_transaction):
|
|
138
178
|
if self._enable_prometheus:
|
|
139
179
|
from phoenix.server.prometheus import BULK_LOADER_SPAN_INSERTIONS
|
|
140
180
|
|
|
@@ -169,16 +209,16 @@ class BulkInserter:
|
|
|
169
209
|
|
|
170
210
|
async def _insert_evaluations(self, evaluations: List[pb.Evaluation]) -> TransactionResult:
|
|
171
211
|
transaction_result = TransactionResult()
|
|
172
|
-
for i in range(0, len(evaluations), self.
|
|
212
|
+
for i in range(0, len(evaluations), self._max_ops_per_transaction):
|
|
173
213
|
try:
|
|
174
214
|
start = perf_counter()
|
|
175
215
|
async with self._db() as session:
|
|
176
|
-
for evaluation in islice(evaluations, i, i + self.
|
|
216
|
+
for evaluation in islice(evaluations, i, i + self._max_ops_per_transaction):
|
|
177
217
|
if self._enable_prometheus:
|
|
178
218
|
from phoenix.server.prometheus import BULK_LOADER_EVALUATION_INSERTIONS
|
|
179
219
|
|
|
180
220
|
BULK_LOADER_EVALUATION_INSERTIONS.inc()
|
|
181
|
-
result: Optional[
|
|
221
|
+
result: Optional[EvaluationInsertionEvent] = None
|
|
182
222
|
try:
|
|
183
223
|
async with session.begin_nested():
|
|
184
224
|
result = await insert_evaluation(session, evaluation)
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from itertools import chain
|
|
6
|
+
from typing import (
|
|
7
|
+
Any,
|
|
8
|
+
Awaitable,
|
|
9
|
+
Dict,
|
|
10
|
+
FrozenSet,
|
|
11
|
+
Iterable,
|
|
12
|
+
Iterator,
|
|
13
|
+
Mapping,
|
|
14
|
+
Optional,
|
|
15
|
+
Union,
|
|
16
|
+
cast,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
from sqlalchemy import insert, select
|
|
20
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
21
|
+
from typing_extensions import TypeAlias
|
|
22
|
+
|
|
23
|
+
from phoenix.db import models
|
|
24
|
+
from phoenix.db.insertion.helpers import DataManipulationEvent
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
DatasetId: TypeAlias = int
|
|
29
|
+
DatasetVersionId: TypeAlias = int
|
|
30
|
+
DatasetExampleId: TypeAlias = int
|
|
31
|
+
DatasetExampleRevisionId: TypeAlias = int
|
|
32
|
+
SpanRowId: TypeAlias = int
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class ExampleContent:
|
|
37
|
+
input: Dict[str, Any] = field(default_factory=dict)
|
|
38
|
+
output: Dict[str, Any] = field(default_factory=dict)
|
|
39
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
Examples: TypeAlias = Iterable[ExampleContent]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class DatasetExampleAdditionEvent(DataManipulationEvent):
|
|
47
|
+
dataset_id: DatasetId
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def insert_dataset(
|
|
51
|
+
session: AsyncSession,
|
|
52
|
+
name: str,
|
|
53
|
+
description: Optional[str] = None,
|
|
54
|
+
metadata: Optional[Mapping[str, Any]] = None,
|
|
55
|
+
created_at: Optional[datetime] = None,
|
|
56
|
+
) -> DatasetId:
|
|
57
|
+
id_ = await session.scalar(
|
|
58
|
+
insert(models.Dataset)
|
|
59
|
+
.values(
|
|
60
|
+
name=name,
|
|
61
|
+
description=description,
|
|
62
|
+
metadata_=metadata,
|
|
63
|
+
created_at=created_at,
|
|
64
|
+
)
|
|
65
|
+
.returning(models.Dataset.id)
|
|
66
|
+
)
|
|
67
|
+
return cast(DatasetId, id_)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
async def insert_dataset_version(
|
|
71
|
+
session: AsyncSession,
|
|
72
|
+
dataset_id: DatasetId,
|
|
73
|
+
description: Optional[str] = None,
|
|
74
|
+
metadata: Optional[Mapping[str, Any]] = None,
|
|
75
|
+
created_at: Optional[datetime] = None,
|
|
76
|
+
) -> DatasetVersionId:
|
|
77
|
+
id_ = await session.scalar(
|
|
78
|
+
insert(models.DatasetVersion)
|
|
79
|
+
.values(
|
|
80
|
+
dataset_id=dataset_id,
|
|
81
|
+
description=description,
|
|
82
|
+
metadata_=metadata,
|
|
83
|
+
created_at=created_at,
|
|
84
|
+
)
|
|
85
|
+
.returning(models.DatasetVersion.id)
|
|
86
|
+
)
|
|
87
|
+
return cast(DatasetVersionId, id_)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
async def insert_dataset_example(
|
|
91
|
+
session: AsyncSession,
|
|
92
|
+
dataset_id: DatasetId,
|
|
93
|
+
span_rowid: Optional[SpanRowId] = None,
|
|
94
|
+
created_at: Optional[datetime] = None,
|
|
95
|
+
) -> DatasetExampleId:
|
|
96
|
+
id_ = await session.scalar(
|
|
97
|
+
insert(models.DatasetExample)
|
|
98
|
+
.values(
|
|
99
|
+
dataset_id=dataset_id,
|
|
100
|
+
span_rowid=span_rowid,
|
|
101
|
+
created_at=created_at,
|
|
102
|
+
)
|
|
103
|
+
.returning(models.DatasetExample.id)
|
|
104
|
+
)
|
|
105
|
+
return cast(DatasetExampleId, id_)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class RevisionKind(Enum):
|
|
109
|
+
CREATE = "CREATE"
|
|
110
|
+
PATCH = "PATCH"
|
|
111
|
+
DELETE = "DELETE"
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def _missing_(cls, v: Any) -> "RevisionKind":
|
|
115
|
+
if isinstance(v, str) and v and v.isascii() and not v.isupper():
|
|
116
|
+
return cls(v.upper())
|
|
117
|
+
raise ValueError(f"Invalid revision kind: {v}")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
async def insert_dataset_example_revision(
|
|
121
|
+
session: AsyncSession,
|
|
122
|
+
dataset_version_id: DatasetVersionId,
|
|
123
|
+
dataset_example_id: DatasetExampleId,
|
|
124
|
+
input: Mapping[str, Any],
|
|
125
|
+
output: Mapping[str, Any],
|
|
126
|
+
metadata: Optional[Mapping[str, Any]] = None,
|
|
127
|
+
revision_kind: RevisionKind = RevisionKind.CREATE,
|
|
128
|
+
created_at: Optional[datetime] = None,
|
|
129
|
+
) -> DatasetExampleRevisionId:
|
|
130
|
+
id_ = await session.scalar(
|
|
131
|
+
insert(models.DatasetExampleRevision)
|
|
132
|
+
.values(
|
|
133
|
+
dataset_version_id=dataset_version_id,
|
|
134
|
+
dataset_example_id=dataset_example_id,
|
|
135
|
+
input=input,
|
|
136
|
+
output=output,
|
|
137
|
+
metadata_=metadata,
|
|
138
|
+
revision_kind=revision_kind.value,
|
|
139
|
+
created_at=created_at,
|
|
140
|
+
)
|
|
141
|
+
.returning(models.DatasetExampleRevision.id)
|
|
142
|
+
)
|
|
143
|
+
return cast(DatasetExampleRevisionId, id_)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class DatasetAction(Enum):
|
|
147
|
+
CREATE = "create"
|
|
148
|
+
APPEND = "append"
|
|
149
|
+
|
|
150
|
+
@classmethod
|
|
151
|
+
def _missing_(cls, v: Any) -> "DatasetAction":
|
|
152
|
+
if isinstance(v, str) and v and v.isascii() and not v.islower():
|
|
153
|
+
return cls(v.lower())
|
|
154
|
+
raise ValueError(f"Invalid dateset action: {v}")
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
async def add_dataset_examples(
|
|
158
|
+
session: AsyncSession,
|
|
159
|
+
name: str,
|
|
160
|
+
examples: Union[Examples, Awaitable[Examples]],
|
|
161
|
+
description: Optional[str] = None,
|
|
162
|
+
metadata: Optional[Mapping[str, Any]] = None,
|
|
163
|
+
action: DatasetAction = DatasetAction.CREATE,
|
|
164
|
+
) -> Optional[DatasetExampleAdditionEvent]:
|
|
165
|
+
created_at = datetime.now(timezone.utc)
|
|
166
|
+
dataset_id: Optional[DatasetId] = None
|
|
167
|
+
if action is DatasetAction.APPEND and name:
|
|
168
|
+
dataset_id = await session.scalar(
|
|
169
|
+
select(models.Dataset.id).where(models.Dataset.name == name)
|
|
170
|
+
)
|
|
171
|
+
if action is DatasetAction.CREATE or dataset_id is None:
|
|
172
|
+
try:
|
|
173
|
+
dataset_id = await insert_dataset(
|
|
174
|
+
session=session,
|
|
175
|
+
name=name,
|
|
176
|
+
description=description,
|
|
177
|
+
metadata=metadata,
|
|
178
|
+
created_at=created_at,
|
|
179
|
+
)
|
|
180
|
+
except Exception:
|
|
181
|
+
logger.exception(f"Failed to insert dataset: {name=}")
|
|
182
|
+
raise
|
|
183
|
+
try:
|
|
184
|
+
dataset_version_id = await insert_dataset_version(
|
|
185
|
+
session=session,
|
|
186
|
+
dataset_id=dataset_id,
|
|
187
|
+
created_at=created_at,
|
|
188
|
+
)
|
|
189
|
+
except Exception:
|
|
190
|
+
logger.exception(f"Failed to insert dataset version for {dataset_id=}")
|
|
191
|
+
raise
|
|
192
|
+
for example in (await examples) if isinstance(examples, Awaitable) else examples:
|
|
193
|
+
try:
|
|
194
|
+
dataset_example_id = await insert_dataset_example(
|
|
195
|
+
session=session,
|
|
196
|
+
dataset_id=dataset_id,
|
|
197
|
+
created_at=created_at,
|
|
198
|
+
)
|
|
199
|
+
except Exception:
|
|
200
|
+
logger.exception(f"Failed to insert dataset example for {dataset_id=}")
|
|
201
|
+
raise
|
|
202
|
+
try:
|
|
203
|
+
await insert_dataset_example_revision(
|
|
204
|
+
session=session,
|
|
205
|
+
dataset_version_id=dataset_version_id,
|
|
206
|
+
dataset_example_id=dataset_example_id,
|
|
207
|
+
input=example.input,
|
|
208
|
+
output=example.output,
|
|
209
|
+
metadata=example.metadata,
|
|
210
|
+
created_at=created_at,
|
|
211
|
+
)
|
|
212
|
+
except Exception:
|
|
213
|
+
logger.exception(
|
|
214
|
+
f"Failed to insert dataset example revision for {dataset_version_id=}, "
|
|
215
|
+
f"{dataset_example_id=}"
|
|
216
|
+
)
|
|
217
|
+
raise
|
|
218
|
+
return DatasetExampleAdditionEvent(dataset_id=dataset_id)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
@dataclass(frozen=True)
|
|
222
|
+
class DatasetKeys:
|
|
223
|
+
input: FrozenSet[str]
|
|
224
|
+
output: FrozenSet[str]
|
|
225
|
+
metadata: FrozenSet[str]
|
|
226
|
+
|
|
227
|
+
def __iter__(self) -> Iterator[str]:
|
|
228
|
+
yield from sorted(set(chain(self.input, self.output, self.metadata)))
|
|
229
|
+
|
|
230
|
+
def check_differences(self, column_headers_set: FrozenSet[str]) -> None:
|
|
231
|
+
for category, keys in (
|
|
232
|
+
("input", self.input),
|
|
233
|
+
("output", self.output),
|
|
234
|
+
("metadata", self.metadata),
|
|
235
|
+
):
|
|
236
|
+
if diff := keys.difference(column_headers_set):
|
|
237
|
+
raise ValueError(f"{category} keys not found in table column headers: {diff}")
|
|
@@ -6,7 +6,7 @@ from typing_extensions import assert_never
|
|
|
6
6
|
|
|
7
7
|
from phoenix.db import models
|
|
8
8
|
from phoenix.db.helpers import SupportedSQLDialect, num_docs_col
|
|
9
|
-
from phoenix.db.insertion.helpers import OnConflict,
|
|
9
|
+
from phoenix.db.insertion.helpers import OnConflict, insert_on_conflict
|
|
10
10
|
from phoenix.exceptions import PhoenixException
|
|
11
11
|
from phoenix.trace import v1 as pb
|
|
12
12
|
|
|
@@ -15,24 +15,24 @@ class InsertEvaluationError(PhoenixException):
|
|
|
15
15
|
pass
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
class
|
|
18
|
+
class EvaluationInsertionEvent(NamedTuple):
|
|
19
19
|
project_rowid: int
|
|
20
20
|
evaluation_name: str
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class SpanEvaluationInsertionEvent(
|
|
23
|
+
class SpanEvaluationInsertionEvent(EvaluationInsertionEvent): ...
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
class TraceEvaluationInsertionEvent(
|
|
26
|
+
class TraceEvaluationInsertionEvent(EvaluationInsertionEvent): ...
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
class DocumentEvaluationInsertionEvent(
|
|
29
|
+
class DocumentEvaluationInsertionEvent(EvaluationInsertionEvent): ...
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
async def insert_evaluation(
|
|
33
33
|
session: AsyncSession,
|
|
34
34
|
evaluation: pb.Evaluation,
|
|
35
|
-
) -> Optional[
|
|
35
|
+
) -> Optional[EvaluationInsertionEvent]:
|
|
36
36
|
evaluation_name = evaluation.name
|
|
37
37
|
result = evaluation.result
|
|
38
38
|
label = result.label.value if result.HasField("label") else None
|
|
@@ -91,7 +91,7 @@ async def _insert_trace_evaluation(
|
|
|
91
91
|
set_.pop("metadata_")
|
|
92
92
|
set_["metadata"] = values["metadata_"] # `metadata` must match database
|
|
93
93
|
await session.execute(
|
|
94
|
-
|
|
94
|
+
insert_on_conflict(
|
|
95
95
|
dialect=dialect,
|
|
96
96
|
table=models.TraceAnnotation,
|
|
97
97
|
values=values,
|
|
@@ -139,7 +139,7 @@ async def _insert_span_evaluation(
|
|
|
139
139
|
set_.pop("metadata_")
|
|
140
140
|
set_["metadata"] = values["metadata_"] # `metadata` must match database
|
|
141
141
|
await session.execute(
|
|
142
|
-
|
|
142
|
+
insert_on_conflict(
|
|
143
143
|
dialect=dialect,
|
|
144
144
|
table=models.SpanAnnotation,
|
|
145
145
|
values=values,
|
|
@@ -160,7 +160,7 @@ async def _insert_document_evaluation(
|
|
|
160
160
|
label: Optional[str],
|
|
161
161
|
score: Optional[float],
|
|
162
162
|
explanation: Optional[str],
|
|
163
|
-
) ->
|
|
163
|
+
) -> EvaluationInsertionEvent:
|
|
164
164
|
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
165
165
|
stmt = (
|
|
166
166
|
select(
|
|
@@ -196,7 +196,7 @@ async def _insert_document_evaluation(
|
|
|
196
196
|
set_.pop("metadata_")
|
|
197
197
|
set_["metadata"] = values["metadata_"] # `metadata` must match database
|
|
198
198
|
await session.execute(
|
|
199
|
-
|
|
199
|
+
insert_on_conflict(
|
|
200
200
|
dialect=dialect,
|
|
201
201
|
table=models.DocumentAnnotation,
|
|
202
202
|
values=values,
|
phoenix/db/insertion/helpers.py
CHANGED
|
@@ -1,39 +1,42 @@
|
|
|
1
|
+
from abc import ABC
|
|
1
2
|
from enum import Enum, auto
|
|
2
|
-
from typing import Any, Mapping, Optional, Sequence
|
|
3
|
+
from typing import Any, Awaitable, Callable, Mapping, Optional, Sequence
|
|
3
4
|
|
|
4
|
-
from sqlalchemy import Insert
|
|
5
|
+
from sqlalchemy import Insert
|
|
5
6
|
from sqlalchemy.dialects.postgresql import insert as insert_postgresql
|
|
6
7
|
from sqlalchemy.dialects.sqlite import insert as insert_sqlite
|
|
7
|
-
from
|
|
8
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
9
|
+
from typing_extensions import TypeAlias, assert_never
|
|
8
10
|
|
|
9
11
|
from phoenix.db.helpers import SupportedSQLDialect
|
|
10
12
|
|
|
11
13
|
|
|
14
|
+
class DataManipulationEvent(ABC):
|
|
15
|
+
"""
|
|
16
|
+
Execution of DML (Data Manipulation Language) statements.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
DataManipulation: TypeAlias = Callable[[AsyncSession], Awaitable[Optional[DataManipulationEvent]]]
|
|
21
|
+
|
|
22
|
+
|
|
12
23
|
class OnConflict(Enum):
|
|
13
24
|
DO_NOTHING = auto()
|
|
14
25
|
DO_UPDATE = auto()
|
|
15
26
|
|
|
16
27
|
|
|
17
|
-
def
|
|
28
|
+
def insert_on_conflict(
|
|
18
29
|
dialect: SupportedSQLDialect,
|
|
19
30
|
table: Any,
|
|
20
31
|
values: Mapping[str, Any],
|
|
21
|
-
constraint:
|
|
22
|
-
column_names: Sequence[str]
|
|
32
|
+
constraint: str,
|
|
33
|
+
column_names: Sequence[str],
|
|
23
34
|
on_conflict: OnConflict = OnConflict.DO_NOTHING,
|
|
24
35
|
set_: Optional[Mapping[str, Any]] = None,
|
|
25
36
|
) -> Insert:
|
|
26
37
|
"""
|
|
27
38
|
Dialect specific insertion statement using ON CONFLICT DO syntax.
|
|
28
39
|
"""
|
|
29
|
-
if bool(constraint) != bool(column_names):
|
|
30
|
-
raise ValueError(
|
|
31
|
-
"Both `constraint` and `column_names` must be provided or omitted at the same time."
|
|
32
|
-
)
|
|
33
|
-
if (dialect is SupportedSQLDialect.POSTGRESQL and constraint is None) or (
|
|
34
|
-
dialect is SupportedSQLDialect.SQLITE and not column_names
|
|
35
|
-
):
|
|
36
|
-
return insert(table).values(values)
|
|
37
40
|
if dialect is SupportedSQLDialect.POSTGRESQL:
|
|
38
41
|
stmt_postgresql = insert_postgresql(table).values(values)
|
|
39
42
|
if on_conflict is OnConflict.DO_NOTHING or not set_:
|
phoenix/db/insertion/span.py
CHANGED
|
@@ -7,7 +7,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
7
7
|
|
|
8
8
|
from phoenix.db import models
|
|
9
9
|
from phoenix.db.helpers import SupportedSQLDialect
|
|
10
|
-
from phoenix.db.insertion.helpers import OnConflict,
|
|
10
|
+
from phoenix.db.insertion.helpers import OnConflict, insert_on_conflict
|
|
11
11
|
from phoenix.trace.attributes import get_attribute_value
|
|
12
12
|
from phoenix.trace.schemas import Span, SpanStatusCode
|
|
13
13
|
|
|
@@ -27,7 +27,7 @@ async def insert_span(
|
|
|
27
27
|
) -> Optional[SpanInsertionEvent]:
|
|
28
28
|
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
29
29
|
project_rowid = await session.scalar(
|
|
30
|
-
|
|
30
|
+
insert_on_conflict(
|
|
31
31
|
dialect=dialect,
|
|
32
32
|
table=models.Project,
|
|
33
33
|
constraint="uq_projects_name",
|
|
@@ -87,7 +87,7 @@ async def insert_span(
|
|
|
87
87
|
cumulative_llm_token_count_prompt += cast(int, accumulation[1] or 0)
|
|
88
88
|
cumulative_llm_token_count_completion += cast(int, accumulation[2] or 0)
|
|
89
89
|
span_rowid = await session.scalar(
|
|
90
|
-
|
|
90
|
+
insert_on_conflict(
|
|
91
91
|
dialect=dialect,
|
|
92
92
|
table=models.Span,
|
|
93
93
|
constraint="uq_spans_span_id",
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from sqlalchemy import JSON
|
|
4
|
+
from sqlalchemy.dialects import postgresql
|
|
5
|
+
from sqlalchemy.ext.compiler import compiles
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class JSONB(JSON):
|
|
9
|
+
# See https://docs.sqlalchemy.org/en/20/core/custom_types.html
|
|
10
|
+
__visit_name__ = "JSONB"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@compiles(JSONB, "sqlite") # type: ignore
|
|
14
|
+
def _(*args: Any, **kwargs: Any) -> str:
|
|
15
|
+
# See https://docs.sqlalchemy.org/en/20/core/custom_types.html
|
|
16
|
+
return "JSONB"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
JSON_ = (
|
|
20
|
+
JSON()
|
|
21
|
+
.with_variant(
|
|
22
|
+
postgresql.JSONB(), # type: ignore
|
|
23
|
+
"postgresql",
|
|
24
|
+
)
|
|
25
|
+
.with_variant(
|
|
26
|
+
JSONB(),
|
|
27
|
+
"sqlite",
|
|
28
|
+
)
|
|
29
|
+
)
|