arize-phoenix 3.25.0__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.0.dist-info}/METADATA +26 -4
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.0.dist-info}/RECORD +80 -75
- phoenix/__init__.py +9 -5
- phoenix/config.py +109 -53
- phoenix/datetime_utils.py +18 -1
- phoenix/db/README.md +25 -0
- phoenix/db/__init__.py +4 -0
- phoenix/db/alembic.ini +119 -0
- phoenix/db/bulk_inserter.py +206 -0
- phoenix/db/engines.py +152 -0
- phoenix/db/helpers.py +47 -0
- phoenix/db/insertion/evaluation.py +209 -0
- phoenix/db/insertion/helpers.py +54 -0
- phoenix/db/insertion/span.py +142 -0
- phoenix/db/migrate.py +71 -0
- phoenix/db/migrations/env.py +121 -0
- phoenix/db/migrations/script.py.mako +26 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
- phoenix/db/models.py +371 -0
- phoenix/exceptions.py +5 -1
- phoenix/server/api/context.py +40 -3
- phoenix/server/api/dataloaders/__init__.py +97 -0
- phoenix/server/api/dataloaders/cache/__init__.py +3 -0
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +67 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +152 -0
- phoenix/server/api/dataloaders/document_evaluations.py +37 -0
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +98 -0
- phoenix/server/api/dataloaders/evaluation_summaries.py +151 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +198 -0
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +93 -0
- phoenix/server/api/dataloaders/record_counts.py +125 -0
- phoenix/server/api/dataloaders/span_descendants.py +64 -0
- phoenix/server/api/dataloaders/span_evaluations.py +37 -0
- phoenix/server/api/dataloaders/token_counts.py +138 -0
- phoenix/server/api/dataloaders/trace_evaluations.py +37 -0
- phoenix/server/api/input_types/SpanSort.py +138 -68
- phoenix/server/api/routers/v1/__init__.py +11 -0
- phoenix/server/api/routers/v1/evaluations.py +275 -0
- phoenix/server/api/routers/v1/spans.py +126 -0
- phoenix/server/api/routers/v1/traces.py +82 -0
- phoenix/server/api/schema.py +112 -48
- phoenix/server/api/types/DocumentEvaluationSummary.py +1 -1
- phoenix/server/api/types/Evaluation.py +29 -12
- phoenix/server/api/types/EvaluationSummary.py +29 -44
- phoenix/server/api/types/MimeType.py +2 -2
- phoenix/server/api/types/Model.py +9 -9
- phoenix/server/api/types/Project.py +240 -171
- phoenix/server/api/types/Span.py +87 -131
- phoenix/server/api/types/Trace.py +29 -20
- phoenix/server/api/types/pagination.py +151 -10
- phoenix/server/app.py +263 -35
- phoenix/server/grpc_server.py +93 -0
- phoenix/server/main.py +75 -60
- phoenix/server/openapi/docs.py +218 -0
- phoenix/server/prometheus.py +23 -7
- phoenix/server/static/index.js +662 -643
- phoenix/server/telemetry.py +68 -0
- phoenix/services.py +4 -0
- phoenix/session/client.py +34 -30
- phoenix/session/data_extractor.py +8 -3
- phoenix/session/session.py +176 -155
- phoenix/settings.py +13 -0
- phoenix/trace/attributes.py +349 -0
- phoenix/trace/dsl/README.md +116 -0
- phoenix/trace/dsl/filter.py +660 -192
- phoenix/trace/dsl/helpers.py +24 -5
- phoenix/trace/dsl/query.py +562 -185
- phoenix/trace/fixtures.py +69 -7
- phoenix/trace/otel.py +33 -199
- phoenix/trace/schemas.py +14 -8
- phoenix/trace/span_evaluations.py +5 -2
- phoenix/utilities/__init__.py +0 -26
- phoenix/utilities/span_store.py +0 -23
- phoenix/version.py +1 -1
- phoenix/core/project.py +0 -773
- phoenix/core/traces.py +0 -96
- phoenix/datasets/dataset.py +0 -214
- phoenix/datasets/fixtures.py +0 -24
- phoenix/datasets/schema.py +0 -31
- phoenix/experimental/evals/__init__.py +0 -73
- phoenix/experimental/evals/evaluators.py +0 -413
- phoenix/experimental/evals/functions/__init__.py +0 -4
- phoenix/experimental/evals/functions/classify.py +0 -453
- phoenix/experimental/evals/functions/executor.py +0 -353
- phoenix/experimental/evals/functions/generate.py +0 -138
- phoenix/experimental/evals/functions/processing.py +0 -76
- phoenix/experimental/evals/models/__init__.py +0 -14
- phoenix/experimental/evals/models/anthropic.py +0 -175
- phoenix/experimental/evals/models/base.py +0 -170
- phoenix/experimental/evals/models/bedrock.py +0 -221
- phoenix/experimental/evals/models/litellm.py +0 -134
- phoenix/experimental/evals/models/openai.py +0 -453
- phoenix/experimental/evals/models/rate_limiters.py +0 -246
- phoenix/experimental/evals/models/vertex.py +0 -173
- phoenix/experimental/evals/models/vertexai.py +0 -186
- phoenix/experimental/evals/retrievals.py +0 -96
- phoenix/experimental/evals/templates/__init__.py +0 -50
- phoenix/experimental/evals/templates/default_templates.py +0 -472
- phoenix/experimental/evals/templates/template.py +0 -195
- phoenix/experimental/evals/utils/__init__.py +0 -172
- phoenix/experimental/evals/utils/threads.py +0 -27
- phoenix/server/api/routers/evaluation_handler.py +0 -110
- phoenix/server/api/routers/span_handler.py +0 -70
- phoenix/server/api/routers/trace_handler.py +0 -60
- phoenix/storage/span_store/__init__.py +0 -23
- phoenix/storage/span_store/text_file.py +0 -85
- phoenix/trace/dsl/missing.py +0 -60
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.0.dist-info}/WHEEL +0 -0
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.0.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → db/insertion}/__init__.py +0 -0
- /phoenix/{experimental → db/migrations}/__init__.py +0 -0
- /phoenix/{storage → server/openapi}/__init__.py +0 -0
phoenix/trace/fixtures.py
CHANGED
|
@@ -1,11 +1,15 @@
|
|
|
1
|
-
from
|
|
2
|
-
from
|
|
1
|
+
from binascii import hexlify
|
|
2
|
+
from dataclasses import dataclass, field, replace
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from random import getrandbits
|
|
5
|
+
from typing import Dict, Iterable, Iterator, List, NamedTuple, Optional, Tuple, cast
|
|
3
6
|
from urllib import request
|
|
4
7
|
|
|
5
8
|
import pandas as pd
|
|
6
9
|
from google.protobuf.wrappers_pb2 import DoubleValue, StringValue
|
|
7
10
|
|
|
8
11
|
import phoenix.trace.v1 as pb
|
|
12
|
+
from phoenix.trace.schemas import Span
|
|
9
13
|
from phoenix.trace.trace_dataset import TraceDataset
|
|
10
14
|
from phoenix.trace.utils import json_lines_to_df
|
|
11
15
|
|
|
@@ -105,7 +109,7 @@ TRACES_FIXTURES: List[TracesFixture] = [
|
|
|
105
109
|
NAME_TO_TRACES_FIXTURE = {fixture.name: fixture for fixture in TRACES_FIXTURES}
|
|
106
110
|
|
|
107
111
|
|
|
108
|
-
def
|
|
112
|
+
def get_trace_fixture_by_name(fixture_name: str) -> TracesFixture:
|
|
109
113
|
"""
|
|
110
114
|
Returns the fixture whose name matches the input name.
|
|
111
115
|
|
|
@@ -120,7 +124,7 @@ def _get_trace_fixture_by_name(fixture_name: str) -> TracesFixture:
|
|
|
120
124
|
return NAME_TO_TRACES_FIXTURE[fixture_name]
|
|
121
125
|
|
|
122
126
|
|
|
123
|
-
def
|
|
127
|
+
def download_traces_fixture(
|
|
124
128
|
fixture: TracesFixture,
|
|
125
129
|
host: Optional[str] = "https://storage.googleapis.com/",
|
|
126
130
|
bucket: Optional[str] = "arize-assets",
|
|
@@ -138,12 +142,12 @@ def load_example_traces(use_case: str) -> TraceDataset:
|
|
|
138
142
|
"""
|
|
139
143
|
Loads a trace dataframe by name.
|
|
140
144
|
"""
|
|
141
|
-
fixture =
|
|
142
|
-
return TraceDataset(json_lines_to_df(
|
|
145
|
+
fixture = get_trace_fixture_by_name(use_case)
|
|
146
|
+
return TraceDataset(json_lines_to_df(download_traces_fixture(fixture)))
|
|
143
147
|
|
|
144
148
|
|
|
145
149
|
def get_evals_from_fixture(use_case: str) -> Iterator[pb.Evaluation]:
|
|
146
|
-
fixture =
|
|
150
|
+
fixture = get_trace_fixture_by_name(use_case)
|
|
147
151
|
for eval_fixture in fixture.evaluation_fixtures:
|
|
148
152
|
yield from _read_eval_fixture(eval_fixture)
|
|
149
153
|
|
|
@@ -195,3 +199,61 @@ def _url(
|
|
|
195
199
|
prefix: Optional[str] = "phoenix/traces/",
|
|
196
200
|
) -> str:
|
|
197
201
|
return f"{host}{bucket}/{prefix}{file_name}"
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def reset_fixture_span_ids_and_timestamps(
|
|
205
|
+
spans: Iterable[Span],
|
|
206
|
+
evals: Iterable[pb.Evaluation] = (),
|
|
207
|
+
) -> Tuple[List[Span], List[pb.Evaluation]]:
|
|
208
|
+
old_spans, old_evals = list(spans), list(evals)
|
|
209
|
+
new_trace_ids: Dict[str, str] = {}
|
|
210
|
+
new_span_ids: Dict[str, str] = {}
|
|
211
|
+
for old_span in old_spans:
|
|
212
|
+
new_trace_ids[old_span.context.trace_id] = _new_trace_id()
|
|
213
|
+
new_span_ids[old_span.context.span_id] = _new_span_id()
|
|
214
|
+
if old_span.parent_id:
|
|
215
|
+
new_span_ids[old_span.parent_id] = _new_span_id()
|
|
216
|
+
for old_eval in old_evals:
|
|
217
|
+
subject_id = old_eval.subject_id
|
|
218
|
+
if trace_id := subject_id.trace_id:
|
|
219
|
+
new_trace_ids[trace_id] = _new_trace_id()
|
|
220
|
+
elif span_id := subject_id.span_id:
|
|
221
|
+
new_span_ids[span_id] = _new_span_id()
|
|
222
|
+
elif span_id := subject_id.document_retrieval_id.span_id:
|
|
223
|
+
new_span_ids[span_id] = _new_span_id()
|
|
224
|
+
max_end_time = max(old_span.end_time for old_span in old_spans)
|
|
225
|
+
time_diff = datetime.now(timezone.utc) - max_end_time
|
|
226
|
+
new_spans: List[Span] = []
|
|
227
|
+
new_evals: List[pb.Evaluation] = []
|
|
228
|
+
for old_span in old_spans:
|
|
229
|
+
new_trace_id = new_trace_ids[old_span.context.trace_id]
|
|
230
|
+
new_span_id = new_span_ids[old_span.context.span_id]
|
|
231
|
+
new_parent_id = new_span_ids[old_span.parent_id] if old_span.parent_id else None
|
|
232
|
+
new_span = replace(
|
|
233
|
+
old_span,
|
|
234
|
+
context=replace(old_span.context, trace_id=new_trace_id, span_id=new_span_id),
|
|
235
|
+
parent_id=new_parent_id,
|
|
236
|
+
start_time=old_span.start_time + time_diff,
|
|
237
|
+
end_time=old_span.end_time + time_diff,
|
|
238
|
+
)
|
|
239
|
+
new_spans.append(new_span)
|
|
240
|
+
for old_eval in old_evals:
|
|
241
|
+
new_eval = pb.Evaluation()
|
|
242
|
+
new_eval.CopyFrom(old_eval)
|
|
243
|
+
subject_id = new_eval.subject_id
|
|
244
|
+
if trace_id := subject_id.trace_id:
|
|
245
|
+
subject_id.trace_id = new_trace_ids[trace_id]
|
|
246
|
+
elif span_id := subject_id.span_id:
|
|
247
|
+
subject_id.span_id = new_span_ids[span_id]
|
|
248
|
+
elif span_id := subject_id.document_retrieval_id.span_id:
|
|
249
|
+
subject_id.document_retrieval_id.span_id = new_span_ids[span_id]
|
|
250
|
+
new_evals.append(new_eval)
|
|
251
|
+
return new_spans, new_evals
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _new_trace_id() -> str:
|
|
255
|
+
return hexlify(getrandbits(128).to_bytes(16, "big")).decode()
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _new_span_id() -> str:
|
|
259
|
+
return hexlify(getrandbits(64).to_bytes(8, "big")).decode()
|
phoenix/trace/otel.py
CHANGED
|
@@ -1,39 +1,40 @@
|
|
|
1
|
-
import inspect
|
|
2
1
|
import json
|
|
3
2
|
from binascii import hexlify, unhexlify
|
|
4
3
|
from datetime import datetime, timezone
|
|
5
4
|
from types import MappingProxyType
|
|
6
5
|
from typing import (
|
|
7
6
|
Any,
|
|
8
|
-
DefaultDict,
|
|
9
7
|
Dict,
|
|
10
8
|
Iterable,
|
|
11
9
|
Iterator,
|
|
12
|
-
List,
|
|
13
10
|
Mapping,
|
|
14
11
|
Optional,
|
|
15
12
|
Sequence,
|
|
16
|
-
Set,
|
|
17
13
|
SupportsFloat,
|
|
18
14
|
Tuple,
|
|
19
|
-
Union,
|
|
20
15
|
cast,
|
|
21
16
|
)
|
|
22
17
|
|
|
23
18
|
import numpy as np
|
|
24
19
|
import opentelemetry.proto.trace.v1.trace_pb2 as otlp
|
|
25
|
-
from openinference.semconv import trace
|
|
26
20
|
from openinference.semconv.trace import DocumentAttributes, SpanAttributes
|
|
27
21
|
from opentelemetry.proto.common.v1.common_pb2 import AnyValue, ArrayValue, KeyValue
|
|
28
22
|
from opentelemetry.util.types import Attributes, AttributeValue
|
|
29
23
|
from typing_extensions import TypeAlias, assert_never
|
|
30
24
|
|
|
25
|
+
from phoenix.trace.attributes import (
|
|
26
|
+
JSON_STRING_ATTRIBUTES,
|
|
27
|
+
flatten,
|
|
28
|
+
get_attribute_value,
|
|
29
|
+
has_mapping,
|
|
30
|
+
load_json_strings,
|
|
31
|
+
unflatten,
|
|
32
|
+
)
|
|
31
33
|
from phoenix.trace.schemas import (
|
|
32
34
|
EXCEPTION_ESCAPED,
|
|
33
35
|
EXCEPTION_MESSAGE,
|
|
34
36
|
EXCEPTION_STACKTRACE,
|
|
35
37
|
EXCEPTION_TYPE,
|
|
36
|
-
MimeType,
|
|
37
38
|
Span,
|
|
38
39
|
SpanContext,
|
|
39
40
|
SpanEvent,
|
|
@@ -61,16 +62,10 @@ def decode_otlp_span(otlp_span: otlp.Span) -> Span:
|
|
|
61
62
|
parent_id = _decode_identifier(otlp_span.parent_span_id)
|
|
62
63
|
|
|
63
64
|
start_time = _decode_unix_nano(otlp_span.start_time_unix_nano)
|
|
64
|
-
end_time = (
|
|
65
|
-
_decode_unix_nano(otlp_span.end_time_unix_nano) if otlp_span.end_time_unix_nano else None
|
|
66
|
-
)
|
|
65
|
+
end_time = _decode_unix_nano(otlp_span.end_time_unix_nano)
|
|
67
66
|
|
|
68
|
-
attributes =
|
|
69
|
-
span_kind = SpanKind(attributes
|
|
70
|
-
|
|
71
|
-
for mime_type in (INPUT_MIME_TYPE, OUTPUT_MIME_TYPE):
|
|
72
|
-
if mime_type in attributes:
|
|
73
|
-
attributes[mime_type] = MimeType(attributes[mime_type])
|
|
67
|
+
attributes = unflatten(load_json_strings(_decode_key_values(otlp_span.attributes)))
|
|
68
|
+
span_kind = SpanKind(get_attribute_value(attributes, OPENINFERENCE_SPAN_KIND))
|
|
74
69
|
|
|
75
70
|
status_code, status_message = _decode_status(otlp_span.status)
|
|
76
71
|
events = [_decode_event(event) for event in otlp_span.events]
|
|
@@ -152,28 +147,6 @@ def _decode_value(any_value: AnyValue) -> Any:
|
|
|
152
147
|
assert_never(which)
|
|
153
148
|
|
|
154
149
|
|
|
155
|
-
_JSON_STRING_ATTRIBUTES = (
|
|
156
|
-
DOCUMENT_METADATA,
|
|
157
|
-
LLM_PROMPT_TEMPLATE_VARIABLES,
|
|
158
|
-
METADATA,
|
|
159
|
-
TOOL_PARAMETERS,
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
def _load_json_strings(key_values: Iterable[Tuple[str, Any]]) -> Iterator[Tuple[str, Any]]:
|
|
164
|
-
for key, value in key_values:
|
|
165
|
-
if key.endswith(_JSON_STRING_ATTRIBUTES):
|
|
166
|
-
try:
|
|
167
|
-
dict_value = json.loads(value)
|
|
168
|
-
except Exception:
|
|
169
|
-
yield key, value
|
|
170
|
-
else:
|
|
171
|
-
if dict_value:
|
|
172
|
-
yield key, dict_value
|
|
173
|
-
else:
|
|
174
|
-
yield key, value
|
|
175
|
-
|
|
176
|
-
|
|
177
150
|
StatusMessage: TypeAlias = str
|
|
178
151
|
|
|
179
152
|
_STATUS_DECODING = MappingProxyType(
|
|
@@ -190,120 +163,6 @@ def _decode_status(otlp_status: otlp.Status) -> Tuple[SpanStatusCode, StatusMess
|
|
|
190
163
|
return status_code, otlp_status.message
|
|
191
164
|
|
|
192
165
|
|
|
193
|
-
_SEMANTIC_CONVENTIONS: List[str] = sorted(
|
|
194
|
-
(
|
|
195
|
-
getattr(klass, attr)
|
|
196
|
-
for name in dir(trace)
|
|
197
|
-
if name.endswith("Attributes") and inspect.isclass(klass := getattr(trace, name))
|
|
198
|
-
for attr in dir(klass)
|
|
199
|
-
if attr.isupper()
|
|
200
|
-
),
|
|
201
|
-
reverse=True,
|
|
202
|
-
) # sorted so the longer strings go first
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
def _semantic_convention_prefix_partition(key: str, separator: str = ".") -> Tuple[str, str, str]:
|
|
206
|
-
"""Return the longest prefix of `key` that is a semantic convention, and the remaining suffix
|
|
207
|
-
separated by `.`. For example, if `key` is "retrieval.documents.2.document.score", return
|
|
208
|
-
("retrieval.documents", ".", "2.document.score"). The return signature is based on Python's
|
|
209
|
-
`.partition` method for strings.
|
|
210
|
-
"""
|
|
211
|
-
for prefix in _SEMANTIC_CONVENTIONS:
|
|
212
|
-
if key == prefix:
|
|
213
|
-
return key, "", ""
|
|
214
|
-
if key.startswith(prefix) and key[len(prefix) :].startswith(separator):
|
|
215
|
-
return prefix, separator, key[len(prefix) + len(separator) :]
|
|
216
|
-
return "", "", ""
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
class _Trie(DefaultDict[Union[str, int], "_Trie"]):
|
|
220
|
-
"""Prefix Tree with special handling for indices (i.e. all-digit keys)."""
|
|
221
|
-
|
|
222
|
-
def __init__(self) -> None:
|
|
223
|
-
super().__init__(_Trie)
|
|
224
|
-
self.value: Any = None
|
|
225
|
-
self.indices: Set[int] = set()
|
|
226
|
-
self.branches: Set[Union[str, int]] = set()
|
|
227
|
-
|
|
228
|
-
def set_value(self, value: Any) -> None:
|
|
229
|
-
self.value = value
|
|
230
|
-
# value and indices must not coexist
|
|
231
|
-
self.branches.update(self.indices)
|
|
232
|
-
self.indices.clear()
|
|
233
|
-
|
|
234
|
-
def add_index(self, index: int) -> "_Trie":
|
|
235
|
-
if self.value is not None:
|
|
236
|
-
self.branches.add(index)
|
|
237
|
-
elif index not in self.branches:
|
|
238
|
-
self.indices.add(index)
|
|
239
|
-
return self[index]
|
|
240
|
-
|
|
241
|
-
def add_branch(self, branch: Union[str, int]) -> "_Trie":
|
|
242
|
-
if branch in self.indices:
|
|
243
|
-
self.indices.discard(cast(int, branch))
|
|
244
|
-
self.branches.add(branch)
|
|
245
|
-
return self[branch]
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
# FIXME: Ideally we should not need something so complicated as a Trie, but it's useful here
|
|
249
|
-
# for backward compatibility reasons regarding some deeply nested objects such as TOOL_PARAMETERS.
|
|
250
|
-
# In the future, we should `json_dumps` them and not let things get too deeply nested.
|
|
251
|
-
def _build_trie(
|
|
252
|
-
key_value_pairs: Iterable[Tuple[str, Any]],
|
|
253
|
-
separator: str = ".",
|
|
254
|
-
) -> _Trie:
|
|
255
|
-
"""Build a Trie (a.k.a. prefix tree) from `key_value_pairs`, by partitioning the keys by
|
|
256
|
-
separator. Each partition is a branch in the Trie. Special handling is done for partitions
|
|
257
|
-
that are all digits, e.g. "0", "12", etc., which are converted to integers and collected
|
|
258
|
-
as indices.
|
|
259
|
-
"""
|
|
260
|
-
trie = _Trie()
|
|
261
|
-
for key, value in key_value_pairs:
|
|
262
|
-
if value is None:
|
|
263
|
-
continue
|
|
264
|
-
t = trie
|
|
265
|
-
while True:
|
|
266
|
-
prefix, _, suffix = _semantic_convention_prefix_partition(key, separator)
|
|
267
|
-
if prefix:
|
|
268
|
-
t = t.add_branch(prefix)
|
|
269
|
-
else:
|
|
270
|
-
prefix, _, suffix = key.partition(separator)
|
|
271
|
-
if prefix.isdigit():
|
|
272
|
-
index = int(prefix)
|
|
273
|
-
t = t.add_index(index) if suffix else t.add_branch(index)
|
|
274
|
-
else:
|
|
275
|
-
t = t.add_branch(prefix)
|
|
276
|
-
if not suffix:
|
|
277
|
-
break
|
|
278
|
-
key = suffix
|
|
279
|
-
t.set_value(value)
|
|
280
|
-
return trie
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
def _walk(trie: _Trie, prefix: str = "") -> Iterator[Tuple[str, Any]]:
|
|
284
|
-
if trie.value is not None:
|
|
285
|
-
yield prefix, trie.value
|
|
286
|
-
elif prefix and trie.indices:
|
|
287
|
-
yield prefix, [dict(_walk(trie[index])) for index in sorted(trie.indices)]
|
|
288
|
-
elif trie.indices:
|
|
289
|
-
for index in trie.indices:
|
|
290
|
-
yield from _walk(trie[index], prefix=f"{index}")
|
|
291
|
-
elif prefix:
|
|
292
|
-
yield prefix, dict(_walk(trie))
|
|
293
|
-
return
|
|
294
|
-
for branch in trie.branches:
|
|
295
|
-
new_prefix = f"{prefix}.{branch}" if prefix else f"{branch}"
|
|
296
|
-
yield from _walk(trie[branch], new_prefix)
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
def _unflatten(
|
|
300
|
-
key_value_pairs: Iterable[Tuple[str, Any]],
|
|
301
|
-
separator: str = ".",
|
|
302
|
-
) -> Iterator[Tuple[str, Any]]:
|
|
303
|
-
trie = _build_trie(key_value_pairs, separator)
|
|
304
|
-
yield from _walk(trie)
|
|
305
|
-
|
|
306
|
-
|
|
307
166
|
_BILLION = 1_000_000_000 # for converting seconds to nanoseconds
|
|
308
167
|
|
|
309
168
|
|
|
@@ -316,11 +175,7 @@ def encode_span_to_otlp(span: Span) -> otlp.Span:
|
|
|
316
175
|
start_time_unix_nano: int = int(span.start_time.timestamp() * _BILLION)
|
|
317
176
|
end_time_unix_nano: int = int(span.end_time.timestamp() * _BILLION) if span.end_time else 0
|
|
318
177
|
|
|
319
|
-
attributes: Dict[str, Any] = span.attributes
|
|
320
|
-
|
|
321
|
-
for mime_type in (INPUT_MIME_TYPE, OUTPUT_MIME_TYPE):
|
|
322
|
-
if mime_type in attributes:
|
|
323
|
-
attributes[mime_type] = attributes[mime_type].value
|
|
178
|
+
attributes: Dict[str, Any] = dict(span.attributes)
|
|
324
179
|
|
|
325
180
|
for key, value in span.attributes.items():
|
|
326
181
|
if value is None:
|
|
@@ -328,19 +183,34 @@ def encode_span_to_otlp(span: Span) -> otlp.Span:
|
|
|
328
183
|
attributes.pop(key, None)
|
|
329
184
|
elif isinstance(value, Mapping):
|
|
330
185
|
attributes.pop(key, None)
|
|
331
|
-
if key.endswith(
|
|
186
|
+
if key.endswith(JSON_STRING_ATTRIBUTES):
|
|
332
187
|
attributes[key] = json.dumps(value)
|
|
333
188
|
else:
|
|
334
|
-
attributes.update(
|
|
189
|
+
attributes.update(
|
|
190
|
+
flatten(
|
|
191
|
+
value,
|
|
192
|
+
prefix=key,
|
|
193
|
+
recurse_on_sequence=True,
|
|
194
|
+
json_string_attributes=JSON_STRING_ATTRIBUTES,
|
|
195
|
+
)
|
|
196
|
+
)
|
|
335
197
|
elif (
|
|
336
198
|
not isinstance(value, str)
|
|
337
199
|
and (isinstance(value, Sequence) or isinstance(value, np.ndarray))
|
|
338
|
-
and
|
|
200
|
+
and has_mapping(value)
|
|
339
201
|
):
|
|
340
202
|
attributes.pop(key, None)
|
|
341
|
-
attributes.update(
|
|
342
|
-
|
|
343
|
-
|
|
203
|
+
attributes.update(
|
|
204
|
+
flatten(
|
|
205
|
+
value,
|
|
206
|
+
prefix=key,
|
|
207
|
+
recurse_on_sequence=True,
|
|
208
|
+
json_string_attributes=JSON_STRING_ATTRIBUTES,
|
|
209
|
+
)
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
if OPENINFERENCE_SPAN_KIND not in attributes:
|
|
213
|
+
attributes[OPENINFERENCE_SPAN_KIND] = span.span_kind.value
|
|
344
214
|
|
|
345
215
|
status = _encode_status(span.status_code, span.status_message)
|
|
346
216
|
events = map(_encode_event, span.events)
|
|
@@ -381,42 +251,6 @@ def _encode_identifier(identifier: Optional[str]) -> bytes:
|
|
|
381
251
|
return unhexlify(identifier)
|
|
382
252
|
|
|
383
253
|
|
|
384
|
-
def _has_mapping(sequence: Sequence[Any]) -> bool:
|
|
385
|
-
for item in sequence:
|
|
386
|
-
if isinstance(item, Mapping):
|
|
387
|
-
return True
|
|
388
|
-
return False
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
def _flatten_mapping(
|
|
392
|
-
mapping: Mapping[str, Any],
|
|
393
|
-
prefix: str,
|
|
394
|
-
) -> Iterator[Tuple[str, Any]]:
|
|
395
|
-
for key, value in mapping.items():
|
|
396
|
-
prefixed_key = f"{prefix}.{key}"
|
|
397
|
-
if isinstance(value, Mapping):
|
|
398
|
-
if key.endswith(_JSON_STRING_ATTRIBUTES):
|
|
399
|
-
yield prefixed_key, json.dumps(value)
|
|
400
|
-
else:
|
|
401
|
-
yield from _flatten_mapping(value, prefixed_key)
|
|
402
|
-
elif isinstance(value, Sequence):
|
|
403
|
-
yield from _flatten_sequence(value, prefixed_key)
|
|
404
|
-
elif value is not None:
|
|
405
|
-
yield prefixed_key, value
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
def _flatten_sequence(
|
|
409
|
-
sequence: Sequence[Any],
|
|
410
|
-
prefix: str,
|
|
411
|
-
) -> Iterator[Tuple[str, Any]]:
|
|
412
|
-
if isinstance(sequence, str) or not _has_mapping(sequence):
|
|
413
|
-
yield prefix, sequence
|
|
414
|
-
for idx, obj in enumerate(sequence):
|
|
415
|
-
if not isinstance(obj, Mapping):
|
|
416
|
-
continue
|
|
417
|
-
yield from _flatten_mapping(obj, f"{prefix}.{idx}")
|
|
418
|
-
|
|
419
|
-
|
|
420
254
|
def _encode_event(event: SpanEvent) -> otlp.Span.Event:
|
|
421
255
|
return otlp.Span.Event(
|
|
422
256
|
name=event.name,
|
phoenix/trace/schemas.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, List, Mapping, NamedTuple, Optional
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
7
|
EXCEPTION_TYPE = "exception.type"
|
|
@@ -47,16 +47,14 @@ class SpanKind(Enum):
|
|
|
47
47
|
|
|
48
48
|
@classmethod
|
|
49
49
|
def _missing_(cls, v: Any) -> Optional["SpanKind"]:
|
|
50
|
-
if v and isinstance(v, str) and not v.isupper():
|
|
50
|
+
if v and isinstance(v, str) and v.isascii() and not v.isupper():
|
|
51
51
|
return cls(v.upper())
|
|
52
|
-
return
|
|
52
|
+
return cls.UNKNOWN
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
TraceID = str
|
|
56
56
|
SpanID = str
|
|
57
|
-
|
|
58
|
-
AttributeValue = Union[AttributePrimitiveValue, List[AttributePrimitiveValue]]
|
|
59
|
-
SpanAttributes = Dict[str, AttributeValue]
|
|
57
|
+
SpanAttributes = Mapping[str, Any]
|
|
60
58
|
|
|
61
59
|
|
|
62
60
|
@dataclass(frozen=True)
|
|
@@ -73,7 +71,7 @@ class SpanConversationAttributes:
|
|
|
73
71
|
|
|
74
72
|
|
|
75
73
|
@dataclass(frozen=True)
|
|
76
|
-
class SpanEvent
|
|
74
|
+
class SpanEvent:
|
|
77
75
|
"""
|
|
78
76
|
A Span Event can be thought of as a structured log message (or annotation)
|
|
79
77
|
on a Span, typically used to denote a meaningful, singular point in time
|
|
@@ -142,7 +140,7 @@ class Span:
|
|
|
142
140
|
"If the parent_id is None, this is the root span"
|
|
143
141
|
parent_id: Optional[SpanID]
|
|
144
142
|
start_time: datetime
|
|
145
|
-
end_time:
|
|
143
|
+
end_time: datetime
|
|
146
144
|
status_code: SpanStatusCode
|
|
147
145
|
status_message: str
|
|
148
146
|
"""
|
|
@@ -202,3 +200,11 @@ class ComputedAttributes(Enum):
|
|
|
202
200
|
CUMULATIVE_LLM_TOKEN_COUNT_COMPLETION = "cumulative_token_count.completion"
|
|
203
201
|
ERROR_COUNT = "error_count"
|
|
204
202
|
CUMULATIVE_ERROR_COUNT = "cumulative_error_count"
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class ComputedValues(NamedTuple):
|
|
206
|
+
latency_ms: float
|
|
207
|
+
cumulative_error_count: int
|
|
208
|
+
cumulative_llm_token_count_prompt: int
|
|
209
|
+
cumulative_llm_token_count_completion: int
|
|
210
|
+
cumulative_llm_token_count_total: int
|
|
@@ -12,6 +12,7 @@ from pandas.api.types import is_integer_dtype, is_numeric_dtype, is_string_dtype
|
|
|
12
12
|
from pyarrow import RecordBatchStreamReader, Schema, Table, parquet
|
|
13
13
|
|
|
14
14
|
from phoenix.config import TRACE_DATASET_DIR
|
|
15
|
+
from phoenix.exceptions import PhoenixEvaluationNameIsMissing
|
|
15
16
|
from phoenix.trace.errors import InvalidParquetMetadataError
|
|
16
17
|
|
|
17
18
|
EVAL_NAME_COLUMN_PREFIX = "eval."
|
|
@@ -335,8 +336,10 @@ def _parse_schema_metadata(schema: Schema) -> Tuple[UUID, str, Type[Evaluations]
|
|
|
335
336
|
arize_metadata = json.loads(metadata[b"arize"])
|
|
336
337
|
eval_classes = {subclass.__name__: subclass for subclass in Evaluations.__subclasses__()}
|
|
337
338
|
eval_id = UUID(arize_metadata["eval_id"])
|
|
338
|
-
if not isinstance((eval_name := arize_metadata["eval_name"]), str):
|
|
339
|
-
raise
|
|
339
|
+
if not isinstance((eval_name := arize_metadata["eval_name"]), str) or not eval_name.strip():
|
|
340
|
+
raise PhoenixEvaluationNameIsMissing(
|
|
341
|
+
'Arize metadata must contain a non-empty string value for key "eval_name"'
|
|
342
|
+
)
|
|
340
343
|
evaluations_cls = eval_classes[arize_metadata["eval_type"]]
|
|
341
344
|
return eval_id, eval_name, evaluations_cls
|
|
342
345
|
except Exception as err:
|
phoenix/utilities/__init__.py
CHANGED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
|
-
from typing import List, Optional
|
|
3
|
-
|
|
4
|
-
import pandas as pd
|
|
5
|
-
|
|
6
|
-
from phoenix.core.project import Project
|
|
7
|
-
from phoenix.trace.dsl import SpanQuery
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def query_spans(
|
|
11
|
-
project: Optional[Project],
|
|
12
|
-
*queries: SpanQuery,
|
|
13
|
-
start_time: Optional[datetime] = None,
|
|
14
|
-
stop_time: Optional[datetime] = None,
|
|
15
|
-
root_spans_only: Optional[bool] = None,
|
|
16
|
-
) -> List[pd.DataFrame]:
|
|
17
|
-
if not queries or not project:
|
|
18
|
-
return []
|
|
19
|
-
spans = tuple(
|
|
20
|
-
project.get_spans(
|
|
21
|
-
start_time=start_time,
|
|
22
|
-
stop_time=stop_time,
|
|
23
|
-
root_spans_only=root_spans_only,
|
|
24
|
-
)
|
|
25
|
-
)
|
|
26
|
-
return [query(spans) for query in queries]
|
phoenix/utilities/span_store.py
CHANGED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
|
-
from phoenix.config import get_env_span_storage_type, get_storage_dir
|
|
4
|
-
from phoenix.core.traces import Traces
|
|
5
|
-
from phoenix.storage.span_store import SPAN_STORE_FACTORIES, SpanStore
|
|
6
|
-
from phoenix.trace.otel import decode_otlp_span
|
|
7
|
-
from phoenix.utilities.project import get_project_name
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def get_span_store() -> Optional[SpanStore]:
|
|
11
|
-
if span_store_type := get_env_span_storage_type():
|
|
12
|
-
span_store_factory = SPAN_STORE_FACTORIES[span_store_type]
|
|
13
|
-
return span_store_factory(get_storage_dir())
|
|
14
|
-
return None
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def load_traces_data_from_store(traces: Traces, span_store: SpanStore) -> None:
|
|
18
|
-
for traces_data in span_store.load():
|
|
19
|
-
for resource_spans in traces_data.resource_spans:
|
|
20
|
-
project_name = get_project_name(resource_spans.resource.attributes)
|
|
21
|
-
for scope_span in resource_spans.scope_spans:
|
|
22
|
-
for span in scope_span.spans:
|
|
23
|
-
traces.put(decode_otlp_span(span), project_name=project_name)
|
phoenix/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "
|
|
1
|
+
__version__ = "4.0.0"
|