arize-phoenix 3.25.0__py3-none-any.whl → 4.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (113) hide show
  1. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/METADATA +26 -4
  2. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/RECORD +80 -75
  3. phoenix/__init__.py +9 -5
  4. phoenix/config.py +109 -53
  5. phoenix/datetime_utils.py +18 -1
  6. phoenix/db/README.md +25 -0
  7. phoenix/db/__init__.py +4 -0
  8. phoenix/db/alembic.ini +119 -0
  9. phoenix/db/bulk_inserter.py +206 -0
  10. phoenix/db/engines.py +152 -0
  11. phoenix/db/helpers.py +47 -0
  12. phoenix/db/insertion/evaluation.py +209 -0
  13. phoenix/db/insertion/helpers.py +51 -0
  14. phoenix/db/insertion/span.py +142 -0
  15. phoenix/db/migrate.py +71 -0
  16. phoenix/db/migrations/env.py +121 -0
  17. phoenix/db/migrations/script.py.mako +26 -0
  18. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
  19. phoenix/db/models.py +371 -0
  20. phoenix/exceptions.py +5 -1
  21. phoenix/server/api/context.py +40 -3
  22. phoenix/server/api/dataloaders/__init__.py +97 -0
  23. phoenix/server/api/dataloaders/cache/__init__.py +3 -0
  24. phoenix/server/api/dataloaders/cache/two_tier_cache.py +67 -0
  25. phoenix/server/api/dataloaders/document_evaluation_summaries.py +152 -0
  26. phoenix/server/api/dataloaders/document_evaluations.py +37 -0
  27. phoenix/server/api/dataloaders/document_retrieval_metrics.py +98 -0
  28. phoenix/server/api/dataloaders/evaluation_summaries.py +151 -0
  29. phoenix/server/api/dataloaders/latency_ms_quantile.py +198 -0
  30. phoenix/server/api/dataloaders/min_start_or_max_end_times.py +93 -0
  31. phoenix/server/api/dataloaders/record_counts.py +125 -0
  32. phoenix/server/api/dataloaders/span_descendants.py +64 -0
  33. phoenix/server/api/dataloaders/span_evaluations.py +37 -0
  34. phoenix/server/api/dataloaders/token_counts.py +138 -0
  35. phoenix/server/api/dataloaders/trace_evaluations.py +37 -0
  36. phoenix/server/api/input_types/SpanSort.py +138 -68
  37. phoenix/server/api/routers/v1/__init__.py +11 -0
  38. phoenix/server/api/routers/v1/evaluations.py +275 -0
  39. phoenix/server/api/routers/v1/spans.py +126 -0
  40. phoenix/server/api/routers/v1/traces.py +82 -0
  41. phoenix/server/api/schema.py +112 -48
  42. phoenix/server/api/types/DocumentEvaluationSummary.py +1 -1
  43. phoenix/server/api/types/Evaluation.py +29 -12
  44. phoenix/server/api/types/EvaluationSummary.py +29 -44
  45. phoenix/server/api/types/MimeType.py +2 -2
  46. phoenix/server/api/types/Model.py +9 -9
  47. phoenix/server/api/types/Project.py +240 -171
  48. phoenix/server/api/types/Span.py +87 -131
  49. phoenix/server/api/types/Trace.py +29 -20
  50. phoenix/server/api/types/pagination.py +151 -10
  51. phoenix/server/app.py +263 -35
  52. phoenix/server/grpc_server.py +93 -0
  53. phoenix/server/main.py +75 -60
  54. phoenix/server/openapi/docs.py +218 -0
  55. phoenix/server/prometheus.py +23 -7
  56. phoenix/server/static/index.js +662 -643
  57. phoenix/server/telemetry.py +68 -0
  58. phoenix/services.py +4 -0
  59. phoenix/session/client.py +34 -30
  60. phoenix/session/data_extractor.py +8 -3
  61. phoenix/session/session.py +176 -155
  62. phoenix/settings.py +13 -0
  63. phoenix/trace/attributes.py +349 -0
  64. phoenix/trace/dsl/README.md +116 -0
  65. phoenix/trace/dsl/filter.py +660 -192
  66. phoenix/trace/dsl/helpers.py +24 -5
  67. phoenix/trace/dsl/query.py +562 -185
  68. phoenix/trace/fixtures.py +69 -7
  69. phoenix/trace/otel.py +44 -200
  70. phoenix/trace/schemas.py +14 -8
  71. phoenix/trace/span_evaluations.py +5 -2
  72. phoenix/utilities/__init__.py +0 -26
  73. phoenix/utilities/span_store.py +0 -23
  74. phoenix/version.py +1 -1
  75. phoenix/core/project.py +0 -773
  76. phoenix/core/traces.py +0 -96
  77. phoenix/datasets/dataset.py +0 -214
  78. phoenix/datasets/fixtures.py +0 -24
  79. phoenix/datasets/schema.py +0 -31
  80. phoenix/experimental/evals/__init__.py +0 -73
  81. phoenix/experimental/evals/evaluators.py +0 -413
  82. phoenix/experimental/evals/functions/__init__.py +0 -4
  83. phoenix/experimental/evals/functions/classify.py +0 -453
  84. phoenix/experimental/evals/functions/executor.py +0 -353
  85. phoenix/experimental/evals/functions/generate.py +0 -138
  86. phoenix/experimental/evals/functions/processing.py +0 -76
  87. phoenix/experimental/evals/models/__init__.py +0 -14
  88. phoenix/experimental/evals/models/anthropic.py +0 -175
  89. phoenix/experimental/evals/models/base.py +0 -170
  90. phoenix/experimental/evals/models/bedrock.py +0 -221
  91. phoenix/experimental/evals/models/litellm.py +0 -134
  92. phoenix/experimental/evals/models/openai.py +0 -453
  93. phoenix/experimental/evals/models/rate_limiters.py +0 -246
  94. phoenix/experimental/evals/models/vertex.py +0 -173
  95. phoenix/experimental/evals/models/vertexai.py +0 -186
  96. phoenix/experimental/evals/retrievals.py +0 -96
  97. phoenix/experimental/evals/templates/__init__.py +0 -50
  98. phoenix/experimental/evals/templates/default_templates.py +0 -472
  99. phoenix/experimental/evals/templates/template.py +0 -195
  100. phoenix/experimental/evals/utils/__init__.py +0 -172
  101. phoenix/experimental/evals/utils/threads.py +0 -27
  102. phoenix/server/api/routers/evaluation_handler.py +0 -110
  103. phoenix/server/api/routers/span_handler.py +0 -70
  104. phoenix/server/api/routers/trace_handler.py +0 -60
  105. phoenix/storage/span_store/__init__.py +0 -23
  106. phoenix/storage/span_store/text_file.py +0 -85
  107. phoenix/trace/dsl/missing.py +0 -60
  108. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/WHEEL +0 -0
  109. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/licenses/IP_NOTICE +0 -0
  110. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/licenses/LICENSE +0 -0
  111. /phoenix/{datasets → db/insertion}/__init__.py +0 -0
  112. /phoenix/{experimental → db/migrations}/__init__.py +0 -0
  113. /phoenix/{storage → server/openapi}/__init__.py +0 -0
phoenix/core/project.py DELETED
@@ -1,773 +0,0 @@
1
- import logging
2
- from collections import defaultdict
3
- from datetime import datetime, timezone
4
- from threading import RLock
5
- from types import MappingProxyType
6
- from typing import (
7
- Any,
8
- DefaultDict,
9
- Dict,
10
- Iterable,
11
- Iterator,
12
- List,
13
- Mapping,
14
- Optional,
15
- Set,
16
- Sized,
17
- Tuple,
18
- Union,
19
- cast,
20
- )
21
-
22
- import numpy as np
23
- from google.protobuf.json_format import MessageToDict
24
- from openinference.semconv.trace import SpanAttributes
25
- from pandas import DataFrame, Index, MultiIndex
26
- from sortedcontainers import SortedKeyList
27
- from typing_extensions import TypeAlias, assert_never
28
- from wrapt import ObjectProxy
29
-
30
- import phoenix.trace.v1 as pb
31
- from phoenix.datetime_utils import right_open_time_range
32
- from phoenix.trace import DocumentEvaluations, Evaluations, SpanEvaluations
33
- from phoenix.trace.schemas import (
34
- ComputedAttributes,
35
- Span,
36
- SpanID,
37
- SpanStatusCode,
38
- TraceID,
39
- )
40
-
41
- logger = logging.getLogger(__name__)
42
- logger.addHandler(logging.NullHandler())
43
-
44
- END_OF_QUEUE = None # sentinel value for queue termination
45
-
46
-
47
- class WrappedSpan(ObjectProxy): # type: ignore
48
- """
49
- A wrapped Span object with __getitem__ and __setitem__ methods for accessing
50
- computed attributes.
51
- """
52
-
53
- def __init__(self, span: Span) -> None:
54
- super().__init__(span)
55
- self._self_computed_values: Dict[ComputedAttributes, Union[float, int]] = {}
56
-
57
- def get_computed_value(self, key: str) -> Optional[Union[float, int]]:
58
- try:
59
- attr = ComputedAttributes(key)
60
- except Exception:
61
- return None
62
- return self._self_computed_values.get(attr)
63
-
64
- def __getitem__(self, key: Union[str, ComputedAttributes]) -> Any:
65
- if isinstance(key, ComputedAttributes):
66
- return self._self_computed_values.get(key)
67
- return self.__wrapped__.attributes.get(key)
68
-
69
- def __setitem__(self, key: ComputedAttributes, value: Any) -> None:
70
- if not isinstance(key, ComputedAttributes):
71
- raise KeyError(f"{key} is not a computed value")
72
- self._self_computed_values[key] = value
73
-
74
- def __eq__(self, other: Any) -> bool:
75
- return self is other
76
-
77
- def __hash__(self) -> int:
78
- return id(self)
79
-
80
-
81
- _ParentSpanID: TypeAlias = SpanID
82
- _ChildSpanID: TypeAlias = SpanID
83
- _ProjectName: TypeAlias = str
84
-
85
-
86
- EvaluationName: TypeAlias = str
87
- DocumentPosition: TypeAlias = int
88
-
89
-
90
- class Project:
91
- def __init__(self) -> None:
92
- self._spans = _Spans()
93
- self._evals = _Evals()
94
- self._is_archived = False
95
-
96
- @property
97
- def last_updated_at(self) -> Optional[datetime]:
98
- spans_last_updated_at = self._spans.last_updated_at
99
- evals_last_updated_at = self._evals.last_updated_at
100
- if (
101
- not spans_last_updated_at
102
- or evals_last_updated_at
103
- and evals_last_updated_at > spans_last_updated_at
104
- ):
105
- return evals_last_updated_at
106
- return spans_last_updated_at
107
-
108
- def add_span(self, span: Span) -> None:
109
- self._spans.add(WrappedSpan(span))
110
-
111
- def add_eval(self, pb_eval: pb.Evaluation) -> None:
112
- self._evals.add(pb_eval)
113
-
114
- def has_trace(self, trace_id: TraceID) -> bool:
115
- return self._spans.has_trace(trace_id)
116
-
117
- def get_trace(self, trace_id: TraceID) -> Iterator[WrappedSpan]:
118
- yield from self._spans.get_trace(trace_id)
119
-
120
- def get_trace_ids(
121
- self,
122
- start_time: Optional[datetime] = None,
123
- stop_time: Optional[datetime] = None,
124
- trace_ids: Optional[Iterable[TraceID]] = None,
125
- ) -> Iterator[TraceID]:
126
- yield from self._spans.get_trace_ids(start_time, stop_time, trace_ids)
127
-
128
- def get_spans(
129
- self,
130
- start_time: Optional[datetime] = None,
131
- stop_time: Optional[datetime] = None,
132
- root_spans_only: Optional[bool] = False,
133
- span_ids: Optional[Iterable[SpanID]] = None,
134
- ) -> Iterator[WrappedSpan]:
135
- yield from self._spans.get_spans(start_time, stop_time, root_spans_only, span_ids)
136
-
137
- def get_num_documents(self, span_id: SpanID) -> int:
138
- return self._spans.get_num_documents(span_id)
139
-
140
- def root_span_latency_ms_quantiles(self, probability: float) -> Optional[float]:
141
- """Root span latency quantiles in milliseconds"""
142
- return self._spans.root_span_latency_ms_quantiles(probability)
143
-
144
- def get_descendant_spans(self, span_id: SpanID) -> Iterator[WrappedSpan]:
145
- yield from self._spans.get_descendant_spans(span_id)
146
-
147
- def span_count(
148
- self,
149
- start_time: Optional[datetime] = None,
150
- stop_time: Optional[datetime] = None,
151
- ) -> int:
152
- return self._spans.span_count(start_time, stop_time)
153
-
154
- def trace_count(
155
- self,
156
- start_time: Optional[datetime] = None,
157
- stop_time: Optional[datetime] = None,
158
- ) -> int:
159
- return self._spans.trace_count(start_time, stop_time)
160
-
161
- @property
162
- def token_count_total(self) -> int:
163
- return self._spans.token_count_total
164
-
165
- @property
166
- def right_open_time_range(self) -> Tuple[Optional[datetime], Optional[datetime]]:
167
- return self._spans.right_open_time_range
168
-
169
- def get_trace_evaluation(self, trace_id: TraceID, name: str) -> Optional[pb.Evaluation]:
170
- return self._evals.get_trace_evaluation(trace_id, name)
171
-
172
- def get_trace_evaluation_names(self) -> List[EvaluationName]:
173
- return self._evals.get_trace_evaluation_names()
174
-
175
- def get_trace_evaluation_labels(self, name: EvaluationName) -> Tuple[str, ...]:
176
- return self._evals.get_trace_evaluation_labels(name)
177
-
178
- def get_trace_evaluation_trace_ids(self, name: EvaluationName) -> Tuple[TraceID, ...]:
179
- return self._evals.get_trace_evaluation_trace_ids(name)
180
-
181
- def get_evaluations_by_trace_id(self, trace_id: TraceID) -> List[pb.Evaluation]:
182
- return self._evals.get_evaluations_by_trace_id(trace_id)
183
-
184
- def get_span_evaluation(self, span_id: SpanID, name: str) -> Optional[pb.Evaluation]:
185
- return self._evals.get_span_evaluation(span_id, name)
186
-
187
- def get_span_evaluation_names(self) -> List[EvaluationName]:
188
- return self._evals.get_span_evaluation_names()
189
-
190
- def get_document_evaluation_names(
191
- self,
192
- span_id: Optional[SpanID] = None,
193
- ) -> List[EvaluationName]:
194
- return self._evals.get_document_evaluation_names(span_id)
195
-
196
- def get_span_evaluation_labels(self, name: EvaluationName) -> Tuple[str, ...]:
197
- return self._evals.get_span_evaluation_labels(name)
198
-
199
- def get_span_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
200
- return self._evals.get_span_evaluation_span_ids(name)
201
-
202
- def get_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
203
- return self._evals.get_evaluations_by_span_id(span_id)
204
-
205
- def get_document_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
206
- return self._evals.get_document_evaluation_span_ids(name)
207
-
208
- def get_document_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
209
- return self._evals.get_document_evaluations_by_span_id(span_id)
210
-
211
- def get_document_evaluation_scores(
212
- self,
213
- span_id: SpanID,
214
- evaluation_name: str,
215
- num_documents: int,
216
- ) -> List[float]:
217
- return self._evals.get_document_evaluation_scores(span_id, evaluation_name, num_documents)
218
-
219
- def export_evaluations(self) -> List[Evaluations]:
220
- return self._evals.export_evaluations()
221
-
222
- def archive(self) -> None:
223
- self._is_archived = True
224
-
225
- @property
226
- def is_archived(self) -> bool:
227
- return self._is_archived
228
-
229
-
230
- class _Trace:
231
- def __init__(self, span: WrappedSpan) -> None:
232
- self._trace_id: TraceID = span.context.trace_id
233
- self._min_start_time: datetime = span.start_time
234
- self._max_end_time: datetime = span.end_time
235
- self._spans: List[WrappedSpan] = [span]
236
-
237
- @property
238
- def trace_id(self) -> TraceID:
239
- return self._trace_id
240
-
241
- @property
242
- def start_time(self) -> datetime:
243
- return self._min_start_time
244
-
245
- @property
246
- def latency_ms(self) -> float:
247
- return (self._max_end_time - self._min_start_time).total_seconds() * 1000
248
-
249
- def add(self, span: WrappedSpan) -> None:
250
- self._min_start_time = min(self._min_start_time, span.start_time)
251
- self._max_end_time = max(self._max_end_time, span.end_time)
252
- self._spans.append(span)
253
-
254
- def __eq__(self, other: Any) -> bool:
255
- return self is other
256
-
257
- def __iter__(self) -> Iterator[WrappedSpan]:
258
- yield from self._spans
259
-
260
-
261
- class _Spans:
262
- def __init__(self) -> None:
263
- self._lock = RLock()
264
- self._spans: Dict[SpanID, WrappedSpan] = {}
265
- self._parent_span_ids: Dict[SpanID, _ParentSpanID] = {}
266
- self._traces: Dict[TraceID, _Trace] = {}
267
- self._child_spans: DefaultDict[SpanID, Set[WrappedSpan]] = defaultdict(set)
268
- self._num_documents: DefaultDict[SpanID, int] = defaultdict(int)
269
- self._start_time_sorted_spans: SortedKeyList[WrappedSpan] = SortedKeyList(
270
- key=lambda span: span.start_time,
271
- )
272
- self._start_time_sorted_root_spans: SortedKeyList[WrappedSpan] = SortedKeyList(
273
- key=lambda span: span.start_time,
274
- )
275
- """
276
- A root span is defined to be a span whose parent span is not in our collection.
277
- This includes spans whose parent is None and spans whose parent has not arrived
278
- (or will not arrive). For spans whose parent is not None, the root span status
279
- is temporary and will be revoked when its parent span arrives.
280
- """
281
- self._latency_sorted_traces: SortedKeyList[_Trace] = SortedKeyList(
282
- key=lambda trace: trace.latency_ms,
283
- )
284
- self._start_time_sorted_traces: SortedKeyList[_Trace] = SortedKeyList(
285
- key=lambda trace: trace.start_time,
286
- )
287
- self._token_count_total: int = 0
288
- self._last_updated_at: Optional[datetime] = None
289
-
290
- def has_trace(self, trace_id: TraceID) -> bool:
291
- return trace_id in self._traces
292
-
293
- def get_trace(self, trace_id: TraceID) -> Iterator[WrappedSpan]:
294
- with self._lock:
295
- # make a copy because source data can mutate during iteration
296
- if not (trace := self._traces.get(trace_id)):
297
- return
298
- spans = tuple(trace)
299
- for span in spans:
300
- yield span
301
-
302
- def get_trace_ids(
303
- self,
304
- start_time: Optional[datetime] = None,
305
- stop_time: Optional[datetime] = None,
306
- trace_ids: Optional[Iterable[TraceID]] = None,
307
- ) -> Iterator[TraceID]:
308
- if not self._spans:
309
- return
310
- if start_time is None or stop_time is None:
311
- min_start_time, max_stop_time = cast(
312
- Tuple[datetime, datetime],
313
- self.right_open_time_range,
314
- )
315
- start_time = start_time or min_start_time
316
- stop_time = stop_time or max_stop_time
317
- if trace_ids is not None:
318
- with self._lock:
319
- traces = tuple(
320
- trace
321
- for trace_id in trace_ids
322
- if (
323
- (trace := self._traces.get(trace_id))
324
- and start_time <= trace.start_time < stop_time
325
- )
326
- )
327
- else:
328
- sorted_traces = self._start_time_sorted_traces
329
- # make a copy because source data can mutate during iteration
330
- with self._lock:
331
- traces = tuple(
332
- sorted_traces.irange_key(
333
- start_time.astimezone(timezone.utc),
334
- stop_time.astimezone(timezone.utc),
335
- inclusive=(True, False),
336
- reverse=True, # most recent traces first
337
- )
338
- )
339
- for trace in traces:
340
- yield trace.trace_id
341
-
342
- def get_spans(
343
- self,
344
- start_time: Optional[datetime] = None,
345
- stop_time: Optional[datetime] = None,
346
- root_spans_only: Optional[bool] = False,
347
- span_ids: Optional[Iterable[SpanID]] = None,
348
- ) -> Iterator[WrappedSpan]:
349
- if not self._spans:
350
- return
351
- if start_time is None or stop_time is None:
352
- min_start_time, max_stop_time = cast(
353
- Tuple[datetime, datetime],
354
- self.right_open_time_range,
355
- )
356
- start_time = start_time or min_start_time
357
- stop_time = stop_time or max_stop_time
358
- if span_ids is not None:
359
- with self._lock:
360
- spans = tuple(
361
- span
362
- for span_id in span_ids
363
- if (
364
- (span := self._spans.get(span_id))
365
- and start_time <= span.start_time < stop_time
366
- and (not root_spans_only or span.parent_id is None)
367
- )
368
- )
369
- else:
370
- sorted_spans = (
371
- self._start_time_sorted_root_spans
372
- if root_spans_only
373
- else self._start_time_sorted_spans
374
- )
375
- # make a copy because source data can mutate during iteration
376
- with self._lock:
377
- spans = tuple(
378
- sorted_spans.irange_key(
379
- start_time.astimezone(timezone.utc),
380
- stop_time.astimezone(timezone.utc),
381
- inclusive=(True, False),
382
- reverse=True, # most recent spans first
383
- )
384
- )
385
- for span in spans:
386
- yield span
387
-
388
- def get_num_documents(self, span_id: SpanID) -> int:
389
- with self._lock:
390
- return self._num_documents.get(span_id) or 0
391
-
392
- def root_span_latency_ms_quantiles(self, probability: float) -> Optional[float]:
393
- """Root span latency quantiles in milliseconds"""
394
- with self._lock:
395
- traces = self._latency_sorted_traces
396
- if not (n := len(traces)):
397
- return None
398
- if probability >= 1:
399
- return cast(float, traces[-1].latency_ms)
400
- if probability <= 0:
401
- return cast(float, traces[0].latency_ms)
402
- k = max(0, round(n * probability) - 1)
403
- return cast(float, traces[k].latency_ms)
404
-
405
- def get_descendant_spans(self, span_id: SpanID) -> Iterator[WrappedSpan]:
406
- for span in self._get_descendant_spans(span_id):
407
- yield span
408
-
409
- def _get_descendant_spans(self, span_id: SpanID) -> Iterator[WrappedSpan]:
410
- with self._lock:
411
- # make a copy because source data can mutate during iteration
412
- if not (child_spans := self._child_spans.get(span_id)):
413
- return
414
- spans = tuple(child_spans)
415
- for child_span in spans:
416
- yield child_span
417
- yield from self._get_descendant_spans(child_span.context.span_id)
418
-
419
- @property
420
- def last_updated_at(self) -> Optional[datetime]:
421
- return self._last_updated_at
422
-
423
- def span_count(
424
- self,
425
- start_time: Optional[datetime] = None,
426
- stop_time: Optional[datetime] = None,
427
- ) -> int:
428
- _index = self._start_time_sorted_spans.bisect_key_left
429
- with self._lock:
430
- start: int = _index(start_time) if start_time else 0
431
- stop: int = _index(stop_time) if stop_time else len(self._spans)
432
- return stop - start
433
-
434
- def trace_count(
435
- self,
436
- start_time: Optional[datetime] = None,
437
- stop_time: Optional[datetime] = None,
438
- ) -> int:
439
- _index = self._start_time_sorted_root_spans.bisect_key_left
440
- with self._lock:
441
- start: int = _index(start_time) if start_time else 0
442
- stop: int = _index(stop_time) if stop_time else len(self._traces)
443
- return stop - start
444
-
445
- @property
446
- def token_count_total(self) -> int:
447
- return self._token_count_total
448
-
449
- @property
450
- def right_open_time_range(self) -> Tuple[Optional[datetime], Optional[datetime]]:
451
- with self._lock:
452
- if not self._start_time_sorted_spans:
453
- return None, None
454
- first_span = self._start_time_sorted_spans[0]
455
- last_span = self._start_time_sorted_spans[-1]
456
- min_start_time = first_span.start_time
457
- max_start_time = last_span.start_time
458
- return right_open_time_range(min_start_time, max_start_time)
459
-
460
- def add(self, span: WrappedSpan) -> None:
461
- with self._lock:
462
- self._add_span(span)
463
-
464
- def _add_span(self, span: WrappedSpan) -> None:
465
- span_id = span.context.span_id
466
- if span_id in self._spans:
467
- # Update is not allowed.
468
- return
469
-
470
- parent_span_id = span.parent_id
471
- if parent_span_id is not None:
472
- self._child_spans[parent_span_id].add(span)
473
- self._parent_span_ids[span_id] = parent_span_id
474
-
475
- for child_span in self._child_spans.get(span_id, ()):
476
- # A root span is a span whose parent span is not in our collection.
477
- # Now that their parent span has arrived, they are no longer root spans.
478
- self._start_time_sorted_root_spans.remove(child_span)
479
-
480
- # Add computed attributes to span
481
- start_time = span.start_time
482
- end_time = span.end_time
483
- span[ComputedAttributes.LATENCY_MS] = (end_time - start_time).total_seconds() * 1000
484
- span[ComputedAttributes.ERROR_COUNT] = int(span.status_code is SpanStatusCode.ERROR)
485
-
486
- # Store the new span (after adding computed attributes)
487
- self._spans[span_id] = span
488
- self._add_span_to_trace(span)
489
- self._start_time_sorted_spans.add(span)
490
- if parent_span_id is None or parent_span_id not in self._spans:
491
- self._start_time_sorted_root_spans.add(span)
492
- self._propagate_cumulative_values(span)
493
- self._update_cached_statistics(span)
494
-
495
- # Update last updated timestamp, letting users know
496
- # when they should refresh the page.
497
- self._last_updated_at = datetime.now(timezone.utc)
498
-
499
- def _add_span_to_trace(self, span: WrappedSpan) -> None:
500
- trace_id = span.context.trace_id
501
- if (trace := self._traces.get(trace_id)) is None:
502
- self._traces[trace_id] = trace = _Trace(span)
503
- else:
504
- # Must remove trace before mutating it.
505
- self._latency_sorted_traces.remove(trace)
506
- self._start_time_sorted_traces.remove(trace)
507
- trace.add(span)
508
- self._latency_sorted_traces.add(trace)
509
- self._start_time_sorted_traces.add(trace)
510
-
511
- def _update_cached_statistics(self, span: WrappedSpan) -> None:
512
- # Update statistics for quick access later
513
- span_id = span.context.span_id
514
- if token_count_update := span.attributes.get(SpanAttributes.LLM_TOKEN_COUNT_TOTAL):
515
- self._token_count_total += token_count_update
516
- if isinstance(
517
- (retrieval_documents := span.attributes.get(SpanAttributes.RETRIEVAL_DOCUMENTS)),
518
- Sized,
519
- ) and (num_documents_update := len(retrieval_documents)):
520
- self._num_documents[span_id] += num_documents_update
521
-
522
- def _propagate_cumulative_values(self, span: WrappedSpan) -> None:
523
- child_spans: Iterable[WrappedSpan] = self._child_spans.get(span.context.span_id) or ()
524
- for cumulative_attribute, attribute in _CUMULATIVE_ATTRIBUTES.items():
525
- span[cumulative_attribute] = span[attribute] or 0
526
- for child_span in child_spans:
527
- span[cumulative_attribute] += child_span[cumulative_attribute] or 0
528
- self._update_ancestors(span)
529
-
530
- def _update_ancestors(self, span: WrappedSpan) -> None:
531
- # Add cumulative values to each of the span's ancestors.
532
- span_id = span.context.span_id
533
- for attribute in _CUMULATIVE_ATTRIBUTES.keys():
534
- value = span[attribute] or 0
535
- self._add_value_to_span_ancestors(span_id, attribute, value)
536
-
537
- def _add_value_to_span_ancestors(
538
- self,
539
- span_id: SpanID,
540
- attribute: ComputedAttributes,
541
- value: float,
542
- ) -> None:
543
- while parent_span_id := self._parent_span_ids.get(span_id):
544
- if not (parent_span := self._spans.get(parent_span_id)):
545
- return
546
- cumulative_value = parent_span[attribute] or 0
547
- parent_span[attribute] = cumulative_value + value
548
- span_id = parent_span_id
549
-
550
-
551
- class _Evals:
552
- def __init__(self) -> None:
553
- self._lock = RLock()
554
- self._trace_evaluations_by_name: DefaultDict[
555
- EvaluationName, Dict[TraceID, pb.Evaluation]
556
- ] = defaultdict(dict)
557
- self._evaluations_by_trace_id: DefaultDict[TraceID, Dict[EvaluationName, pb.Evaluation]] = (
558
- defaultdict(dict)
559
- )
560
- self._trace_evaluation_labels: DefaultDict[EvaluationName, Set[str]] = defaultdict(set)
561
- self._span_evaluations_by_name: DefaultDict[EvaluationName, Dict[SpanID, pb.Evaluation]] = (
562
- defaultdict(dict)
563
- )
564
- self._evaluations_by_span_id: DefaultDict[SpanID, Dict[EvaluationName, pb.Evaluation]] = (
565
- defaultdict(dict)
566
- )
567
- self._span_evaluation_labels: DefaultDict[EvaluationName, Set[str]] = defaultdict(set)
568
- self._document_evaluations_by_span_id: DefaultDict[
569
- SpanID, DefaultDict[EvaluationName, Dict[DocumentPosition, pb.Evaluation]]
570
- ] = defaultdict(lambda: defaultdict(dict))
571
- self._document_evaluations_by_name: DefaultDict[
572
- EvaluationName, DefaultDict[SpanID, Dict[DocumentPosition, pb.Evaluation]]
573
- ] = defaultdict(lambda: defaultdict(dict))
574
- self._last_updated_at: Optional[datetime] = None
575
-
576
- def add(self, evaluation: pb.Evaluation) -> None:
577
- with self._lock:
578
- self._add(evaluation)
579
-
580
- def _add(self, evaluation: pb.Evaluation) -> None:
581
- subject_id = evaluation.subject_id
582
- name = evaluation.name
583
- subject_id_kind = subject_id.WhichOneof("kind")
584
- if subject_id_kind == "document_retrieval_id":
585
- document_retrieval_id = subject_id.document_retrieval_id
586
- span_id = SpanID(document_retrieval_id.span_id)
587
- document_position = document_retrieval_id.document_position
588
- self._document_evaluations_by_span_id[span_id][name][document_position] = evaluation
589
- self._document_evaluations_by_name[name][span_id][document_position] = evaluation
590
- elif subject_id_kind == "span_id":
591
- span_id = SpanID(subject_id.span_id)
592
- self._evaluations_by_span_id[span_id][name] = evaluation
593
- self._span_evaluations_by_name[name][span_id] = evaluation
594
- if evaluation.result.HasField("label"):
595
- label = evaluation.result.label.value
596
- self._span_evaluation_labels[name].add(label)
597
- elif subject_id_kind == "trace_id":
598
- trace_id = TraceID(subject_id.trace_id)
599
- self._evaluations_by_trace_id[trace_id][name] = evaluation
600
- self._trace_evaluations_by_name[name][trace_id] = evaluation
601
- if evaluation.result.HasField("label"):
602
- label = evaluation.result.label.value
603
- self._trace_evaluation_labels[name].add(label)
604
- elif subject_id_kind is None:
605
- logger.warning(
606
- f"discarding evaluation with missing subject_id: {MessageToDict(evaluation)}"
607
- )
608
- else:
609
- assert_never(subject_id_kind)
610
- self._last_updated_at = datetime.now(timezone.utc)
611
-
612
- @property
613
- def last_updated_at(self) -> Optional[datetime]:
614
- return self._last_updated_at
615
-
616
- def get_trace_evaluation(self, trace_id: TraceID, name: str) -> Optional[pb.Evaluation]:
617
- with self._lock:
618
- trace_evaluations = self._evaluations_by_trace_id.get(trace_id)
619
- return trace_evaluations.get(name) if trace_evaluations else None
620
-
621
- def get_trace_evaluation_names(self) -> List[EvaluationName]:
622
- with self._lock:
623
- return list(self._trace_evaluations_by_name)
624
-
625
- def get_trace_evaluation_labels(self, name: EvaluationName) -> Tuple[str, ...]:
626
- with self._lock:
627
- labels = self._trace_evaluation_labels.get(name)
628
- return tuple(labels) if labels else ()
629
-
630
- def get_trace_evaluation_trace_ids(self, name: EvaluationName) -> Tuple[TraceID, ...]:
631
- with self._lock:
632
- trace_evaluations = self._trace_evaluations_by_name.get(name)
633
- return tuple(trace_evaluations.keys()) if trace_evaluations else ()
634
-
635
- def get_evaluations_by_trace_id(self, trace_id: TraceID) -> List[pb.Evaluation]:
636
- with self._lock:
637
- evaluations = self._evaluations_by_trace_id.get(trace_id)
638
- return list(evaluations.values()) if evaluations else []
639
-
640
- def get_span_evaluation(self, span_id: SpanID, name: str) -> Optional[pb.Evaluation]:
641
- with self._lock:
642
- span_evaluations = self._evaluations_by_span_id.get(span_id)
643
- return span_evaluations.get(name) if span_evaluations else None
644
-
645
- def get_span_evaluation_names(self) -> List[EvaluationName]:
646
- with self._lock:
647
- return list(self._span_evaluations_by_name)
648
-
649
- def get_document_evaluation_names(
650
- self,
651
- span_id: Optional[SpanID] = None,
652
- ) -> List[EvaluationName]:
653
- with self._lock:
654
- if span_id is None:
655
- return list(self._document_evaluations_by_name)
656
- document_evaluations = self._document_evaluations_by_span_id.get(span_id)
657
- return list(document_evaluations) if document_evaluations else []
658
-
659
- def get_span_evaluation_labels(self, name: EvaluationName) -> Tuple[str, ...]:
660
- with self._lock:
661
- labels = self._span_evaluation_labels.get(name)
662
- return tuple(labels) if labels else ()
663
-
664
- def get_span_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
665
- with self._lock:
666
- span_evaluations = self._span_evaluations_by_name.get(name)
667
- return tuple(span_evaluations.keys()) if span_evaluations else ()
668
-
669
- def get_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
670
- with self._lock:
671
- evaluations = self._evaluations_by_span_id.get(span_id)
672
- return list(evaluations.values()) if evaluations else []
673
-
674
- def get_document_evaluation_span_ids(self, name: EvaluationName) -> Tuple[SpanID, ...]:
675
- with self._lock:
676
- document_evaluations = self._document_evaluations_by_name.get(name)
677
- return tuple(document_evaluations.keys()) if document_evaluations else ()
678
-
679
- def get_document_evaluations_by_span_id(self, span_id: SpanID) -> List[pb.Evaluation]:
680
- all_evaluations: List[pb.Evaluation] = []
681
- with self._lock:
682
- document_evaluations = self._document_evaluations_by_span_id.get(span_id)
683
- if not document_evaluations:
684
- return all_evaluations
685
- for evaluations in document_evaluations.values():
686
- all_evaluations.extend(evaluations.values())
687
- return all_evaluations
688
-
689
- def get_document_evaluation_scores(
690
- self,
691
- span_id: SpanID,
692
- evaluation_name: str,
693
- num_documents: int,
694
- ) -> List[float]:
695
- # num_documents is needed as argument because the document position values
696
- # are not checked during ingestion: e.g. if there exists a position value
697
- # of one trillion, we would not want to create a result that large.
698
- scores: List[float] = [np.nan] * num_documents
699
- with self._lock:
700
- document_evaluations = self._document_evaluations_by_span_id.get(span_id)
701
- if not document_evaluations:
702
- return scores
703
- evaluations = document_evaluations.get(evaluation_name)
704
- if not evaluations:
705
- return scores
706
- for document_position, evaluation in evaluations.items():
707
- result = evaluation.result
708
- if result.HasField("score") and document_position < num_documents:
709
- scores[document_position] = result.score.value
710
- return scores
711
-
712
- def export_evaluations(self) -> List[Evaluations]:
713
- evaluations: List[Evaluations] = []
714
- evaluations.extend(self._export_span_evaluations())
715
- evaluations.extend(self._export_document_evaluations())
716
- return evaluations
717
-
718
- def _export_span_evaluations(self) -> List[SpanEvaluations]:
719
- span_evaluations = []
720
- with self._lock:
721
- span_evaluations_by_name = tuple(self._span_evaluations_by_name.items())
722
- for eval_name, _span_evaluations_by_id in span_evaluations_by_name:
723
- span_ids = []
724
- rows = []
725
- with self._lock:
726
- span_evaluations_by_id = tuple(_span_evaluations_by_id.items())
727
- for span_id, pb_eval in span_evaluations_by_id:
728
- span_ids.append(span_id)
729
- rows.append(MessageToDict(pb_eval.result))
730
- dataframe = DataFrame(rows, index=Index(span_ids, name="context.span_id"))
731
- span_evaluations.append(SpanEvaluations(eval_name, dataframe))
732
- return span_evaluations
733
-
734
- def _export_document_evaluations(self) -> List[DocumentEvaluations]:
735
- evaluations = []
736
- with self._lock:
737
- document_evaluations_by_name = tuple(self._document_evaluations_by_name.items())
738
- for eval_name, _document_evaluations_by_id in document_evaluations_by_name:
739
- span_ids = []
740
- document_positions = []
741
- rows = []
742
- with self._lock:
743
- document_evaluations_by_id = tuple(_document_evaluations_by_id.items())
744
- for span_id, _document_evaluations_by_position in document_evaluations_by_id:
745
- with self._lock:
746
- document_evaluations_by_position = sorted(
747
- _document_evaluations_by_position.items()
748
- ) # ensure the evals are sorted by document position
749
- for document_position, pb_eval in document_evaluations_by_position:
750
- span_ids.append(span_id)
751
- document_positions.append(document_position)
752
- rows.append(MessageToDict(pb_eval.result))
753
- dataframe = DataFrame(
754
- rows,
755
- index=MultiIndex.from_arrays(
756
- (span_ids, document_positions),
757
- names=("context.span_id", "document_position"),
758
- ),
759
- )
760
- evaluations.append(DocumentEvaluations(eval_name, dataframe))
761
- return evaluations
762
-
763
-
764
- _CUMULATIVE_ATTRIBUTES: Mapping[ComputedAttributes, Union[str, ComputedAttributes]] = (
765
- MappingProxyType(
766
- {
767
- ComputedAttributes.CUMULATIVE_LLM_TOKEN_COUNT_TOTAL: SpanAttributes.LLM_TOKEN_COUNT_TOTAL, # noqa: E501
768
- ComputedAttributes.CUMULATIVE_LLM_TOKEN_COUNT_PROMPT: SpanAttributes.LLM_TOKEN_COUNT_PROMPT, # noqa: E501
769
- ComputedAttributes.CUMULATIVE_LLM_TOKEN_COUNT_COMPLETION: SpanAttributes.LLM_TOKEN_COUNT_COMPLETION, # noqa: E501
770
- ComputedAttributes.CUMULATIVE_ERROR_COUNT: ComputedAttributes.ERROR_COUNT,
771
- }
772
- )
773
- )