arize-phoenix 11.32.1__py3-none-any.whl → 11.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-11.32.1.dist-info → arize_phoenix-11.34.0.dist-info}/METADATA +1 -1
- {arize_phoenix-11.32.1.dist-info → arize_phoenix-11.34.0.dist-info}/RECORD +57 -50
- phoenix/config.py +44 -0
- phoenix/db/bulk_inserter.py +111 -116
- phoenix/inferences/inferences.py +1 -2
- phoenix/server/api/context.py +20 -0
- phoenix/server/api/dataloaders/__init__.py +20 -0
- phoenix/server/api/dataloaders/average_experiment_repeated_run_group_latency.py +50 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -1
- phoenix/server/api/dataloaders/dataset_examples_and_versions_by_experiment_run.py +47 -0
- phoenix/server/api/dataloaders/experiment_repeated_run_group_annotation_summaries.py +77 -0
- phoenix/server/api/dataloaders/experiment_repeated_run_groups.py +59 -0
- phoenix/server/api/dataloaders/experiment_repetition_counts.py +39 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment_repeated_run_group.py +64 -0
- phoenix/server/api/helpers/playground_clients.py +4 -0
- phoenix/server/api/mutations/prompt_label_mutations.py +67 -58
- phoenix/server/api/queries.py +52 -37
- phoenix/server/api/routers/v1/documents.py +1 -1
- phoenix/server/api/routers/v1/evaluations.py +4 -4
- phoenix/server/api/routers/v1/experiment_runs.py +1 -1
- phoenix/server/api/routers/v1/experiments.py +1 -1
- phoenix/server/api/routers/v1/spans.py +2 -2
- phoenix/server/api/routers/v1/traces.py +18 -3
- phoenix/server/api/types/DatasetExample.py +49 -1
- phoenix/server/api/types/Experiment.py +12 -2
- phoenix/server/api/types/ExperimentComparison.py +3 -9
- phoenix/server/api/types/ExperimentRepeatedRunGroup.py +146 -0
- phoenix/server/api/types/ExperimentRepeatedRunGroupAnnotationSummary.py +9 -0
- phoenix/server/api/types/ExperimentRun.py +12 -19
- phoenix/server/api/types/Prompt.py +11 -0
- phoenix/server/api/types/PromptLabel.py +2 -19
- phoenix/server/api/types/node.py +10 -0
- phoenix/server/app.py +78 -20
- phoenix/server/cost_tracking/model_cost_manifest.json +1 -1
- phoenix/server/daemons/span_cost_calculator.py +10 -8
- phoenix/server/grpc_server.py +9 -9
- phoenix/server/prometheus.py +30 -6
- phoenix/server/static/.vite/manifest.json +43 -43
- phoenix/server/static/assets/components-CdQiQTvs.js +5778 -0
- phoenix/server/static/assets/{index-D1FDMBMV.js → index-B1VuXYRI.js} +12 -21
- phoenix/server/static/assets/pages-CnfZ3RhB.js +9163 -0
- phoenix/server/static/assets/vendor-BGzfc4EU.css +1 -0
- phoenix/server/static/assets/vendor-Cfrr9FCF.js +903 -0
- phoenix/server/static/assets/{vendor-arizeai-DsYDNOqt.js → vendor-arizeai-Dz0kN-lQ.js} +4 -4
- phoenix/server/static/assets/vendor-codemirror-ClqtONZQ.js +25 -0
- phoenix/server/static/assets/{vendor-recharts-BTHn5Y2R.js → vendor-recharts-D6kvOpmb.js} +2 -2
- phoenix/server/static/assets/{vendor-shiki-BAcocHFl.js → vendor-shiki-xSOiKxt0.js} +1 -1
- phoenix/session/client.py +55 -1
- phoenix/session/data_extractor.py +5 -0
- phoenix/session/evaluation.py +8 -4
- phoenix/session/session.py +13 -0
- phoenix/trace/projects.py +1 -2
- phoenix/version.py +1 -1
- phoenix/server/static/assets/components-Cs9c4Nxp.js +0 -5698
- phoenix/server/static/assets/pages-Cbj9SjBx.js +0 -8928
- phoenix/server/static/assets/vendor-CqDb5u4o.css +0 -1
- phoenix/server/static/assets/vendor-RdRDaQiR.js +0 -905
- phoenix/server/static/assets/vendor-codemirror-BzJDUbEx.js +0 -25
- phoenix/utilities/deprecation.py +0 -31
- {arize_phoenix-11.32.1.dist-info → arize_phoenix-11.34.0.dist-info}/WHEEL +0 -0
- {arize_phoenix-11.32.1.dist-info → arize_phoenix-11.34.0.dist-info}/entry_points.txt +0 -0
- {arize_phoenix-11.32.1.dist-info → arize_phoenix-11.34.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-11.32.1.dist-info → arize_phoenix-11.34.0.dist-info}/licenses/LICENSE +0 -0
phoenix/db/bulk_inserter.py
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import logging
|
|
3
3
|
from asyncio import Queue, as_completed
|
|
4
|
-
from collections
|
|
4
|
+
from collections import deque
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
6
|
from functools import singledispatchmethod
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
from typing import Any, Optional, cast
|
|
7
|
+
from time import perf_counter, time
|
|
8
|
+
from typing import Any, AsyncIterator, Awaitable, Callable, Iterable, Optional, cast
|
|
10
9
|
|
|
11
10
|
from openinference.semconv.trace import SpanAttributes
|
|
12
11
|
from typing_extensions import TypeAlias
|
|
13
12
|
|
|
14
13
|
import phoenix.trace.v1 as pb
|
|
14
|
+
from phoenix.db import models
|
|
15
15
|
from phoenix.db.insertion.constants import DEFAULT_RETRY_ALLOWANCE, DEFAULT_RETRY_DELAY_SEC
|
|
16
16
|
from phoenix.db.insertion.document_annotation import DocumentAnnotationQueueInserter
|
|
17
17
|
from phoenix.db.insertion.evaluation import (
|
|
@@ -29,15 +29,23 @@ from phoenix.db.insertion.trace_annotation import TraceAnnotationQueueInserter
|
|
|
29
29
|
from phoenix.db.insertion.types import Insertables, Precursors
|
|
30
30
|
from phoenix.server.daemons.span_cost_calculator import (
|
|
31
31
|
SpanCostCalculator,
|
|
32
|
-
SpanCostCalculatorQueueItem,
|
|
33
32
|
)
|
|
34
33
|
from phoenix.server.dml_event import DmlEvent, SpanInsertEvent
|
|
34
|
+
from phoenix.server.prometheus import (
|
|
35
|
+
BULK_LOADER_EVALUATION_INSERTIONS,
|
|
36
|
+
BULK_LOADER_EXCEPTIONS,
|
|
37
|
+
BULK_LOADER_LAST_ACTIVITY,
|
|
38
|
+
BULK_LOADER_SPAN_EXCEPTIONS,
|
|
39
|
+
BULK_LOADER_SPAN_INSERTION_TIME,
|
|
40
|
+
SPAN_QUEUE_SIZE,
|
|
41
|
+
)
|
|
35
42
|
from phoenix.server.types import CanPutItem, DbSessionFactory
|
|
36
43
|
from phoenix.trace.schemas import Span
|
|
37
44
|
|
|
38
45
|
logger = logging.getLogger(__name__)
|
|
39
46
|
|
|
40
47
|
ProjectRowId: TypeAlias = int
|
|
48
|
+
ProjectName: TypeAlias = str
|
|
41
49
|
|
|
42
50
|
|
|
43
51
|
@dataclass(frozen=True)
|
|
@@ -52,12 +60,12 @@ class BulkInserter:
|
|
|
52
60
|
*,
|
|
53
61
|
event_queue: CanPutItem[DmlEvent],
|
|
54
62
|
span_cost_calculator: SpanCostCalculator,
|
|
55
|
-
initial_batch_of_spans:
|
|
56
|
-
initial_batch_of_evaluations:
|
|
63
|
+
initial_batch_of_spans: Iterable[tuple[Span, ProjectName]] = (),
|
|
64
|
+
initial_batch_of_evaluations: Iterable[pb.Evaluation] = (),
|
|
57
65
|
sleep: float = 0.1,
|
|
58
66
|
max_ops_per_transaction: int = 1000,
|
|
59
67
|
max_queue_size: int = 1000,
|
|
60
|
-
|
|
68
|
+
max_spans_queue_size: Optional[int] = None,
|
|
61
69
|
retry_delay_sec: float = DEFAULT_RETRY_DELAY_SEC,
|
|
62
70
|
retry_allowance: int = DEFAULT_RETRY_ALLOWANCE,
|
|
63
71
|
) -> None:
|
|
@@ -68,7 +76,6 @@ class BulkInserter:
|
|
|
68
76
|
:param max_ops_per_transaction: The maximum number of operations to dequeue from
|
|
69
77
|
the operations queue for each transaction.
|
|
70
78
|
:param max_queue_size: The maximum length of the operations queue.
|
|
71
|
-
:param enable_prometheus: Whether Prometheus is enabled.
|
|
72
79
|
"""
|
|
73
80
|
self._db = db
|
|
74
81
|
self._running = False
|
|
@@ -76,20 +83,20 @@ class BulkInserter:
|
|
|
76
83
|
self._max_ops_per_transaction = max_ops_per_transaction
|
|
77
84
|
self._operations: Optional[Queue[DataManipulation]] = None
|
|
78
85
|
self._max_queue_size = max_queue_size
|
|
79
|
-
self.
|
|
80
|
-
|
|
81
|
-
)
|
|
82
|
-
self._evaluations: list[pb.Evaluation] = (
|
|
83
|
-
[] if initial_batch_of_evaluations is None else list(initial_batch_of_evaluations)
|
|
84
|
-
)
|
|
86
|
+
self._max_spans_queue_size = max_spans_queue_size
|
|
87
|
+
self._spans: deque[tuple[Span, ProjectName]] = deque(initial_batch_of_spans)
|
|
88
|
+
self._evaluations: deque[pb.Evaluation] = deque(initial_batch_of_evaluations)
|
|
85
89
|
self._task: Optional[asyncio.Task[None]] = None
|
|
86
90
|
self._event_queue = event_queue
|
|
87
|
-
self._enable_prometheus = enable_prometheus
|
|
88
91
|
self._retry_delay_sec = retry_delay_sec
|
|
89
92
|
self._retry_allowance = retry_allowance
|
|
90
93
|
self._queue_inserters = _QueueInserters(db, self._retry_delay_sec, self._retry_allowance)
|
|
91
94
|
self._span_cost_calculator = span_cost_calculator
|
|
92
95
|
|
|
96
|
+
@property
|
|
97
|
+
def is_full(self) -> bool:
|
|
98
|
+
return bool(self._max_spans_queue_size and self._max_spans_queue_size <= len(self._spans))
|
|
99
|
+
|
|
93
100
|
async def __aenter__(
|
|
94
101
|
self,
|
|
95
102
|
) -> tuple[
|
|
@@ -102,9 +109,9 @@ class BulkInserter:
|
|
|
102
109
|
self._operations = Queue(maxsize=self._max_queue_size)
|
|
103
110
|
self._task = asyncio.create_task(self._bulk_insert())
|
|
104
111
|
return (
|
|
105
|
-
self.
|
|
106
|
-
self.
|
|
107
|
-
self.
|
|
112
|
+
self._enqueue_annotations,
|
|
113
|
+
self._enqueue_span,
|
|
114
|
+
self._enqueue_evaluation,
|
|
108
115
|
self._enqueue_operation,
|
|
109
116
|
)
|
|
110
117
|
|
|
@@ -114,23 +121,22 @@ class BulkInserter:
|
|
|
114
121
|
self._task.cancel()
|
|
115
122
|
self._task = None
|
|
116
123
|
|
|
117
|
-
async def
|
|
124
|
+
async def _enqueue_annotations(self, *items: Any) -> None:
|
|
118
125
|
await self._queue_inserters.enqueue(*items)
|
|
119
126
|
|
|
120
127
|
def _enqueue_operation(self, operation: DataManipulation) -> None:
|
|
121
128
|
cast("Queue[DataManipulation]", self._operations).put_nowait(operation)
|
|
122
129
|
|
|
123
|
-
async def
|
|
130
|
+
async def _enqueue_span(self, span: Span, project_name: str) -> None:
|
|
124
131
|
self._spans.append((span, project_name))
|
|
125
132
|
|
|
126
|
-
async def
|
|
133
|
+
async def _enqueue_evaluation(self, evaluation: pb.Evaluation) -> None:
|
|
127
134
|
self._evaluations.append(evaluation)
|
|
128
135
|
|
|
129
136
|
async def _process_events(self, events: Iterable[Optional[DataManipulationEvent]]) -> None: ...
|
|
130
137
|
|
|
131
138
|
async def _bulk_insert(self) -> None:
|
|
132
139
|
assert isinstance(self._operations, Queue)
|
|
133
|
-
spans_buffer, evaluations_buffer = None, None
|
|
134
140
|
# start first insert immediately if the inserter has not run recently
|
|
135
141
|
while (
|
|
136
142
|
self._running
|
|
@@ -139,6 +145,8 @@ class BulkInserter:
|
|
|
139
145
|
or self._spans
|
|
140
146
|
or self._evaluations
|
|
141
147
|
):
|
|
148
|
+
BULK_LOADER_LAST_ACTIVITY.set(time())
|
|
149
|
+
SPAN_QUEUE_SIZE.set(len(self._spans))
|
|
142
150
|
if (
|
|
143
151
|
self._queue_inserters.empty
|
|
144
152
|
and self._operations.empty()
|
|
@@ -156,113 +164,100 @@ class BulkInserter:
|
|
|
156
164
|
async with session.begin_nested():
|
|
157
165
|
await op(session)
|
|
158
166
|
except Exception as e:
|
|
159
|
-
|
|
160
|
-
from phoenix.server.prometheus import BULK_LOADER_EXCEPTIONS
|
|
161
|
-
|
|
162
|
-
BULK_LOADER_EXCEPTIONS.inc()
|
|
167
|
+
BULK_LOADER_EXCEPTIONS.inc()
|
|
163
168
|
logger.exception(str(e))
|
|
164
169
|
# It's important to grab the buffers at the same time so there's
|
|
165
170
|
# no race condition, since an eval insertion will fail if the span
|
|
166
171
|
# it references doesn't exist. Grabbing the eval buffer later may
|
|
167
172
|
# include an eval whose span is in the queue but missed being
|
|
168
173
|
# included in the span buffer that was grabbed previously.
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
self._spans = []
|
|
172
|
-
if self._evaluations:
|
|
173
|
-
evaluations_buffer = self._evaluations
|
|
174
|
-
self._evaluations = []
|
|
174
|
+
num_spans_to_insert = min(self._max_ops_per_transaction, len(self._spans))
|
|
175
|
+
num_evals_to_insert = min(self._max_ops_per_transaction, len(self._evaluations))
|
|
175
176
|
# Spans should be inserted before the evaluations, since an evaluation
|
|
176
177
|
# insertion will fail if the span it references doesn't exist.
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
spans_buffer = None
|
|
180
|
-
if evaluations_buffer:
|
|
181
|
-
await self._insert_evaluations(evaluations_buffer)
|
|
182
|
-
evaluations_buffer = None
|
|
178
|
+
await self._insert_spans(num_spans_to_insert)
|
|
179
|
+
await self._insert_evaluations(num_evals_to_insert)
|
|
183
180
|
async for event in self._queue_inserters.insert():
|
|
184
181
|
self._event_queue.put(event)
|
|
185
182
|
await asyncio.sleep(self._sleep)
|
|
186
183
|
|
|
187
|
-
async def _insert_spans(self,
|
|
184
|
+
async def _insert_spans(self, num_spans_to_insert: int) -> None:
|
|
185
|
+
if not num_spans_to_insert or not self._spans:
|
|
186
|
+
return
|
|
188
187
|
project_ids = set()
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
if
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
async with self._db() as session:
|
|
242
|
-
for evaluation in islice(evaluations, i, i + self._max_ops_per_transaction):
|
|
243
|
-
if self._enable_prometheus:
|
|
244
|
-
from phoenix.server.prometheus import BULK_LOADER_EVALUATION_INSERTIONS
|
|
245
|
-
|
|
246
|
-
BULK_LOADER_EVALUATION_INSERTIONS.inc()
|
|
247
|
-
try:
|
|
248
|
-
async with session.begin_nested():
|
|
249
|
-
await insert_evaluation(session, evaluation)
|
|
250
|
-
except InsertEvaluationError as error:
|
|
251
|
-
if self._enable_prometheus:
|
|
252
|
-
from phoenix.server.prometheus import BULK_LOADER_EXCEPTIONS
|
|
253
|
-
|
|
254
|
-
BULK_LOADER_EXCEPTIONS.inc()
|
|
255
|
-
logger.exception(f"Failed to insert evaluation: {str(error)}")
|
|
256
|
-
if self._enable_prometheus:
|
|
257
|
-
from phoenix.server.prometheus import BULK_LOADER_INSERTION_TIME
|
|
258
|
-
|
|
259
|
-
BULK_LOADER_INSERTION_TIME.observe(perf_counter() - start)
|
|
260
|
-
except Exception:
|
|
261
|
-
if self._enable_prometheus:
|
|
262
|
-
from phoenix.server.prometheus import BULK_LOADER_EXCEPTIONS
|
|
188
|
+
span_costs: list[models.SpanCost] = []
|
|
189
|
+
try:
|
|
190
|
+
start = perf_counter()
|
|
191
|
+
async with self._db() as session:
|
|
192
|
+
while num_spans_to_insert > 0:
|
|
193
|
+
num_spans_to_insert -= 1
|
|
194
|
+
if not self._spans:
|
|
195
|
+
break
|
|
196
|
+
span, project_name = self._spans.popleft()
|
|
197
|
+
result: Optional[SpanInsertionEvent] = None
|
|
198
|
+
try:
|
|
199
|
+
async with session.begin_nested():
|
|
200
|
+
result = await insert_span(session, span, project_name)
|
|
201
|
+
except Exception:
|
|
202
|
+
BULK_LOADER_SPAN_EXCEPTIONS.inc()
|
|
203
|
+
logger.exception(
|
|
204
|
+
f"Failed to insert span with span_id={span.context.span_id}"
|
|
205
|
+
)
|
|
206
|
+
if result is None:
|
|
207
|
+
continue
|
|
208
|
+
project_ids.add(result.project_rowid)
|
|
209
|
+
try:
|
|
210
|
+
if not should_calculate_span_cost(span.attributes):
|
|
211
|
+
continue
|
|
212
|
+
span_cost = self._span_cost_calculator.calculate_cost(
|
|
213
|
+
span.start_time,
|
|
214
|
+
span.attributes,
|
|
215
|
+
)
|
|
216
|
+
except Exception:
|
|
217
|
+
logger.exception(
|
|
218
|
+
f"Failed to calculate span cost for span with "
|
|
219
|
+
f"span_id={span.context.span_id}"
|
|
220
|
+
)
|
|
221
|
+
else:
|
|
222
|
+
if span_cost is None:
|
|
223
|
+
continue
|
|
224
|
+
span_cost.span_rowid = result.span_rowid
|
|
225
|
+
span_cost.trace_rowid = result.trace_rowid
|
|
226
|
+
span_costs.append(span_cost)
|
|
227
|
+
BULK_LOADER_SPAN_INSERTION_TIME.observe(perf_counter() - start)
|
|
228
|
+
except Exception:
|
|
229
|
+
BULK_LOADER_SPAN_EXCEPTIONS.inc()
|
|
230
|
+
logger.exception("Failed to insert spans")
|
|
231
|
+
if project_ids:
|
|
232
|
+
self._event_queue.put(SpanInsertEvent(tuple(project_ids)))
|
|
233
|
+
if not span_costs:
|
|
234
|
+
return
|
|
235
|
+
try:
|
|
236
|
+
async with self._db() as session:
|
|
237
|
+
session.add_all(span_costs)
|
|
238
|
+
except Exception:
|
|
239
|
+
logger.exception("Failed to insert span costs")
|
|
263
240
|
|
|
264
|
-
|
|
265
|
-
|
|
241
|
+
async def _insert_evaluations(self, num_evals_to_insert: int) -> None:
|
|
242
|
+
if not num_evals_to_insert or not self._evaluations:
|
|
243
|
+
return
|
|
244
|
+
try:
|
|
245
|
+
async with self._db() as session:
|
|
246
|
+
while num_evals_to_insert > 0:
|
|
247
|
+
num_evals_to_insert -= 1
|
|
248
|
+
if not self._evaluations:
|
|
249
|
+
break
|
|
250
|
+
evaluation = self._evaluations.popleft()
|
|
251
|
+
BULK_LOADER_EVALUATION_INSERTIONS.inc()
|
|
252
|
+
try:
|
|
253
|
+
async with session.begin_nested():
|
|
254
|
+
await insert_evaluation(session, evaluation)
|
|
255
|
+
except InsertEvaluationError as error:
|
|
256
|
+
BULK_LOADER_EXCEPTIONS.inc()
|
|
257
|
+
logger.exception(f"Failed to insert evaluation: {str(error)}")
|
|
258
|
+
except Exception:
|
|
259
|
+
BULK_LOADER_EXCEPTIONS.inc()
|
|
260
|
+
logger.exception("Failed to insert evaluations")
|
|
266
261
|
|
|
267
262
|
|
|
268
263
|
class _QueueInserters:
|
phoenix/inferences/inferences.py
CHANGED
|
@@ -13,11 +13,10 @@ from pandas import DataFrame, Series, Timestamp, read_parquet
|
|
|
13
13
|
from pandas.api.types import (
|
|
14
14
|
is_numeric_dtype,
|
|
15
15
|
)
|
|
16
|
-
from typing_extensions import TypeAlias
|
|
16
|
+
from typing_extensions import TypeAlias, deprecated
|
|
17
17
|
|
|
18
18
|
from phoenix.config import GENERATED_INFERENCES_NAME_PREFIX, INFERENCES_DIR
|
|
19
19
|
from phoenix.datetime_utils import normalize_timestamps
|
|
20
|
-
from phoenix.utilities.deprecation import deprecated
|
|
21
20
|
|
|
22
21
|
from . import errors as err
|
|
23
22
|
from .schema import (
|
phoenix/server/api/context.py
CHANGED
|
@@ -17,15 +17,20 @@ from phoenix.db import models
|
|
|
17
17
|
from phoenix.server.api.dataloaders import (
|
|
18
18
|
AnnotationConfigsByProjectDataLoader,
|
|
19
19
|
AnnotationSummaryDataLoader,
|
|
20
|
+
AverageExperimentRepeatedRunGroupLatencyDataLoader,
|
|
20
21
|
AverageExperimentRunLatencyDataLoader,
|
|
21
22
|
CacheForDataLoaders,
|
|
22
23
|
DatasetExampleRevisionsDataLoader,
|
|
24
|
+
DatasetExamplesAndVersionsByExperimentRunDataLoader,
|
|
23
25
|
DatasetExampleSpansDataLoader,
|
|
24
26
|
DocumentEvaluationsDataLoader,
|
|
25
27
|
DocumentEvaluationSummaryDataLoader,
|
|
26
28
|
DocumentRetrievalMetricsDataLoader,
|
|
27
29
|
ExperimentAnnotationSummaryDataLoader,
|
|
28
30
|
ExperimentErrorRatesDataLoader,
|
|
31
|
+
ExperimentRepeatedRunGroupAnnotationSummariesDataLoader,
|
|
32
|
+
ExperimentRepeatedRunGroupsDataLoader,
|
|
33
|
+
ExperimentRepetitionCountsDataLoader,
|
|
29
34
|
ExperimentRunAnnotations,
|
|
30
35
|
ExperimentRunCountsDataLoader,
|
|
31
36
|
ExperimentSequenceNumberDataLoader,
|
|
@@ -52,6 +57,7 @@ from phoenix.server.api.dataloaders import (
|
|
|
52
57
|
SpanCostDetailSummaryEntriesBySpanDataLoader,
|
|
53
58
|
SpanCostDetailSummaryEntriesByTraceDataLoader,
|
|
54
59
|
SpanCostSummaryByExperimentDataLoader,
|
|
60
|
+
SpanCostSummaryByExperimentRepeatedRunGroupDataLoader,
|
|
55
61
|
SpanCostSummaryByExperimentRunDataLoader,
|
|
56
62
|
SpanCostSummaryByGenerativeModelDataLoader,
|
|
57
63
|
SpanCostSummaryByProjectDataLoader,
|
|
@@ -85,14 +91,25 @@ from phoenix.server.types import (
|
|
|
85
91
|
class DataLoaders:
|
|
86
92
|
annotation_configs_by_project: AnnotationConfigsByProjectDataLoader
|
|
87
93
|
annotation_summaries: AnnotationSummaryDataLoader
|
|
94
|
+
average_experiment_repeated_run_group_latency: (
|
|
95
|
+
AverageExperimentRepeatedRunGroupLatencyDataLoader
|
|
96
|
+
)
|
|
88
97
|
average_experiment_run_latency: AverageExperimentRunLatencyDataLoader
|
|
89
98
|
dataset_example_revisions: DatasetExampleRevisionsDataLoader
|
|
90
99
|
dataset_example_spans: DatasetExampleSpansDataLoader
|
|
100
|
+
dataset_examples_and_versions_by_experiment_run: (
|
|
101
|
+
DatasetExamplesAndVersionsByExperimentRunDataLoader
|
|
102
|
+
)
|
|
91
103
|
document_evaluation_summaries: DocumentEvaluationSummaryDataLoader
|
|
92
104
|
document_evaluations: DocumentEvaluationsDataLoader
|
|
93
105
|
document_retrieval_metrics: DocumentRetrievalMetricsDataLoader
|
|
94
106
|
experiment_annotation_summaries: ExperimentAnnotationSummaryDataLoader
|
|
95
107
|
experiment_error_rates: ExperimentErrorRatesDataLoader
|
|
108
|
+
experiment_repeated_run_group_annotation_summaries: (
|
|
109
|
+
ExperimentRepeatedRunGroupAnnotationSummariesDataLoader
|
|
110
|
+
)
|
|
111
|
+
experiment_repeated_run_groups: ExperimentRepeatedRunGroupsDataLoader
|
|
112
|
+
experiment_repetition_counts: ExperimentRepetitionCountsDataLoader
|
|
96
113
|
experiment_run_annotations: ExperimentRunAnnotations
|
|
97
114
|
experiment_run_counts: ExperimentRunCountsDataLoader
|
|
98
115
|
experiment_sequence_number: ExperimentSequenceNumberDataLoader
|
|
@@ -128,6 +145,9 @@ class DataLoaders:
|
|
|
128
145
|
span_cost_details_by_span_cost: SpanCostDetailsBySpanCostDataLoader
|
|
129
146
|
span_cost_fields: TableFieldsDataLoader
|
|
130
147
|
span_cost_summary_by_experiment: SpanCostSummaryByExperimentDataLoader
|
|
148
|
+
span_cost_summary_by_experiment_repeated_run_group: (
|
|
149
|
+
SpanCostSummaryByExperimentRepeatedRunGroupDataLoader
|
|
150
|
+
)
|
|
131
151
|
span_cost_summary_by_experiment_run: SpanCostSummaryByExperimentRunDataLoader
|
|
132
152
|
span_cost_summary_by_generative_model: SpanCostSummaryByGenerativeModelDataLoader
|
|
133
153
|
span_cost_summary_by_project: SpanCostSummaryByProjectDataLoader
|
|
@@ -6,9 +6,15 @@ from phoenix.server.api.dataloaders.span_cost_detail_summary_entries_by_project_
|
|
|
6
6
|
|
|
7
7
|
from .annotation_configs_by_project import AnnotationConfigsByProjectDataLoader
|
|
8
8
|
from .annotation_summaries import AnnotationSummaryCache, AnnotationSummaryDataLoader
|
|
9
|
+
from .average_experiment_repeated_run_group_latency import (
|
|
10
|
+
AverageExperimentRepeatedRunGroupLatencyDataLoader,
|
|
11
|
+
)
|
|
9
12
|
from .average_experiment_run_latency import AverageExperimentRunLatencyDataLoader
|
|
10
13
|
from .dataset_example_revisions import DatasetExampleRevisionsDataLoader
|
|
11
14
|
from .dataset_example_spans import DatasetExampleSpansDataLoader
|
|
15
|
+
from .dataset_examples_and_versions_by_experiment_run import (
|
|
16
|
+
DatasetExamplesAndVersionsByExperimentRunDataLoader,
|
|
17
|
+
)
|
|
12
18
|
from .document_evaluation_summaries import (
|
|
13
19
|
DocumentEvaluationSummaryCache,
|
|
14
20
|
DocumentEvaluationSummaryDataLoader,
|
|
@@ -17,6 +23,11 @@ from .document_evaluations import DocumentEvaluationsDataLoader
|
|
|
17
23
|
from .document_retrieval_metrics import DocumentRetrievalMetricsDataLoader
|
|
18
24
|
from .experiment_annotation_summaries import ExperimentAnnotationSummaryDataLoader
|
|
19
25
|
from .experiment_error_rates import ExperimentErrorRatesDataLoader
|
|
26
|
+
from .experiment_repeated_run_group_annotation_summaries import (
|
|
27
|
+
ExperimentRepeatedRunGroupAnnotationSummariesDataLoader,
|
|
28
|
+
)
|
|
29
|
+
from .experiment_repeated_run_groups import ExperimentRepeatedRunGroupsDataLoader
|
|
30
|
+
from .experiment_repetition_counts import ExperimentRepetitionCountsDataLoader
|
|
20
31
|
from .experiment_run_annotations import ExperimentRunAnnotations
|
|
21
32
|
from .experiment_run_counts import ExperimentRunCountsDataLoader
|
|
22
33
|
from .experiment_sequence_number import ExperimentSequenceNumberDataLoader
|
|
@@ -44,6 +55,9 @@ from .span_cost_detail_summary_entries_by_span import SpanCostDetailSummaryEntri
|
|
|
44
55
|
from .span_cost_detail_summary_entries_by_trace import SpanCostDetailSummaryEntriesByTraceDataLoader
|
|
45
56
|
from .span_cost_details_by_span_cost import SpanCostDetailsBySpanCostDataLoader
|
|
46
57
|
from .span_cost_summary_by_experiment import SpanCostSummaryByExperimentDataLoader
|
|
58
|
+
from .span_cost_summary_by_experiment_repeated_run_group import (
|
|
59
|
+
SpanCostSummaryByExperimentRepeatedRunGroupDataLoader,
|
|
60
|
+
)
|
|
47
61
|
from .span_cost_summary_by_experiment_run import SpanCostSummaryByExperimentRunDataLoader
|
|
48
62
|
from .span_cost_summary_by_generative_model import SpanCostSummaryByGenerativeModelDataLoader
|
|
49
63
|
from .span_cost_summary_by_project import SpanCostSummaryByProjectDataLoader, SpanCostSummaryCache
|
|
@@ -64,15 +78,20 @@ from .users import UsersDataLoader
|
|
|
64
78
|
__all__ = [
|
|
65
79
|
"AnnotationConfigsByProjectDataLoader",
|
|
66
80
|
"AnnotationSummaryDataLoader",
|
|
81
|
+
"AverageExperimentRepeatedRunGroupLatencyDataLoader",
|
|
67
82
|
"AverageExperimentRunLatencyDataLoader",
|
|
68
83
|
"CacheForDataLoaders",
|
|
69
84
|
"DatasetExampleRevisionsDataLoader",
|
|
70
85
|
"DatasetExampleSpansDataLoader",
|
|
86
|
+
"DatasetExamplesAndVersionsByExperimentRunDataLoader",
|
|
71
87
|
"DocumentEvaluationSummaryDataLoader",
|
|
72
88
|
"DocumentEvaluationsDataLoader",
|
|
73
89
|
"DocumentRetrievalMetricsDataLoader",
|
|
74
90
|
"ExperimentAnnotationSummaryDataLoader",
|
|
75
91
|
"ExperimentErrorRatesDataLoader",
|
|
92
|
+
"ExperimentRepeatedRunGroupsDataLoader",
|
|
93
|
+
"ExperimentRepeatedRunGroupAnnotationSummariesDataLoader",
|
|
94
|
+
"ExperimentRepetitionCountsDataLoader",
|
|
76
95
|
"ExperimentRunAnnotations",
|
|
77
96
|
"ExperimentRunCountsDataLoader",
|
|
78
97
|
"ExperimentSequenceNumberDataLoader",
|
|
@@ -99,6 +118,7 @@ __all__ = [
|
|
|
99
118
|
"SpanCostDetailSummaryEntriesByTraceDataLoader",
|
|
100
119
|
"SpanCostDetailsBySpanCostDataLoader",
|
|
101
120
|
"SpanCostSummaryByExperimentDataLoader",
|
|
121
|
+
"SpanCostSummaryByExperimentRepeatedRunGroupDataLoader",
|
|
102
122
|
"SpanCostSummaryByExperimentRunDataLoader",
|
|
103
123
|
"SpanCostSummaryByGenerativeModelDataLoader",
|
|
104
124
|
"SpanCostSummaryByProjectDataLoader",
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from sqlalchemy import func, select, tuple_
|
|
4
|
+
from strawberry.dataloader import DataLoader
|
|
5
|
+
from typing_extensions import TypeAlias
|
|
6
|
+
|
|
7
|
+
from phoenix.db import models
|
|
8
|
+
from phoenix.server.types import DbSessionFactory
|
|
9
|
+
|
|
10
|
+
ExperimentID: TypeAlias = int
|
|
11
|
+
DatasetExampleID: TypeAlias = int
|
|
12
|
+
RunLatency: TypeAlias = float
|
|
13
|
+
Key: TypeAlias = tuple[ExperimentID, DatasetExampleID]
|
|
14
|
+
Result: TypeAlias = Optional[RunLatency]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AverageExperimentRepeatedRunGroupLatencyDataLoader(DataLoader[Key, Result]):
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
db: DbSessionFactory,
|
|
21
|
+
) -> None:
|
|
22
|
+
super().__init__(load_fn=self._load_fn)
|
|
23
|
+
self._db = db
|
|
24
|
+
|
|
25
|
+
async def _load_fn(self, keys: list[Key]) -> list[Result]:
|
|
26
|
+
average_latency_query = (
|
|
27
|
+
select(
|
|
28
|
+
models.ExperimentRun.experiment_id.label("experiment_id"),
|
|
29
|
+
models.ExperimentRun.dataset_example_id.label("example_id"),
|
|
30
|
+
func.avg(models.ExperimentRun.latency_ms).label("average_repetition_latency_ms"),
|
|
31
|
+
)
|
|
32
|
+
.select_from(models.ExperimentRun)
|
|
33
|
+
.where(
|
|
34
|
+
tuple_(
|
|
35
|
+
models.ExperimentRun.experiment_id, models.ExperimentRun.dataset_example_id
|
|
36
|
+
).in_(set(keys))
|
|
37
|
+
)
|
|
38
|
+
.group_by(models.ExperimentRun.experiment_id, models.ExperimentRun.dataset_example_id)
|
|
39
|
+
)
|
|
40
|
+
async with self._db() as session:
|
|
41
|
+
average_run_latencies_ms = {
|
|
42
|
+
(experiment_id, example_id): average_run_latency_ms
|
|
43
|
+
async for experiment_id, example_id, average_run_latency_ms in await session.stream(
|
|
44
|
+
average_latency_query
|
|
45
|
+
)
|
|
46
|
+
}
|
|
47
|
+
return [
|
|
48
|
+
average_run_latencies_ms.get((experiment_id, example_id))
|
|
49
|
+
for experiment_id, example_id in keys
|
|
50
|
+
]
|
|
@@ -91,7 +91,6 @@ class DatasetExampleRevisionsDataLoader(DataLoader[Key, Result]):
|
|
|
91
91
|
onclause=revision_ids.c.version_id == models.DatasetVersion.id,
|
|
92
92
|
isouter=True, # keep rows where the version id is null
|
|
93
93
|
)
|
|
94
|
-
.where(models.DatasetExampleRevision.revision_kind != "DELETE")
|
|
95
94
|
)
|
|
96
95
|
async with self._db() as session:
|
|
97
96
|
results = {
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from sqlalchemy import select
|
|
2
|
+
from strawberry.dataloader import DataLoader
|
|
3
|
+
from typing_extensions import TypeAlias
|
|
4
|
+
|
|
5
|
+
from phoenix.db import models
|
|
6
|
+
from phoenix.server.types import DbSessionFactory
|
|
7
|
+
|
|
8
|
+
ExperimentRunID: TypeAlias = int
|
|
9
|
+
DatasetExampleID: TypeAlias = int
|
|
10
|
+
DatasetVersionID: TypeAlias = int
|
|
11
|
+
Key: TypeAlias = ExperimentRunID
|
|
12
|
+
Result: TypeAlias = tuple[models.DatasetExample, DatasetVersionID]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DatasetExamplesAndVersionsByExperimentRunDataLoader(DataLoader[Key, Result]):
|
|
16
|
+
def __init__(self, db: DbSessionFactory) -> None:
|
|
17
|
+
super().__init__(load_fn=self._load_fn)
|
|
18
|
+
self._db = db
|
|
19
|
+
|
|
20
|
+
async def _load_fn(self, keys: list[Key]) -> list[Result]:
|
|
21
|
+
experiment_run_ids = set(keys)
|
|
22
|
+
examples_and_versions_query = (
|
|
23
|
+
select(
|
|
24
|
+
models.ExperimentRun.id.label("experiment_run_id"),
|
|
25
|
+
models.DatasetExample,
|
|
26
|
+
models.Experiment.dataset_version_id.label("dataset_version_id"),
|
|
27
|
+
)
|
|
28
|
+
.select_from(models.ExperimentRun)
|
|
29
|
+
.join(
|
|
30
|
+
models.DatasetExample,
|
|
31
|
+
models.DatasetExample.id == models.ExperimentRun.dataset_example_id,
|
|
32
|
+
)
|
|
33
|
+
.join(
|
|
34
|
+
models.Experiment,
|
|
35
|
+
models.Experiment.id == models.ExperimentRun.experiment_id,
|
|
36
|
+
)
|
|
37
|
+
.where(models.ExperimentRun.id.in_(experiment_run_ids))
|
|
38
|
+
)
|
|
39
|
+
async with self._db() as session:
|
|
40
|
+
examples_and_versions = {
|
|
41
|
+
experiment_run_id: (example, version_id)
|
|
42
|
+
for experiment_run_id, example, version_id in (
|
|
43
|
+
await session.execute(examples_and_versions_query)
|
|
44
|
+
).all()
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return [examples_and_versions[key] for key in keys]
|