arize-phoenix 3.25.0__py3-none-any.whl → 4.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/METADATA +26 -4
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/RECORD +80 -75
- phoenix/__init__.py +9 -5
- phoenix/config.py +109 -53
- phoenix/datetime_utils.py +18 -1
- phoenix/db/README.md +25 -0
- phoenix/db/__init__.py +4 -0
- phoenix/db/alembic.ini +119 -0
- phoenix/db/bulk_inserter.py +206 -0
- phoenix/db/engines.py +152 -0
- phoenix/db/helpers.py +47 -0
- phoenix/db/insertion/evaluation.py +209 -0
- phoenix/db/insertion/helpers.py +51 -0
- phoenix/db/insertion/span.py +142 -0
- phoenix/db/migrate.py +71 -0
- phoenix/db/migrations/env.py +121 -0
- phoenix/db/migrations/script.py.mako +26 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
- phoenix/db/models.py +371 -0
- phoenix/exceptions.py +5 -1
- phoenix/server/api/context.py +40 -3
- phoenix/server/api/dataloaders/__init__.py +97 -0
- phoenix/server/api/dataloaders/cache/__init__.py +3 -0
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +67 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +152 -0
- phoenix/server/api/dataloaders/document_evaluations.py +37 -0
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +98 -0
- phoenix/server/api/dataloaders/evaluation_summaries.py +151 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +198 -0
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +93 -0
- phoenix/server/api/dataloaders/record_counts.py +125 -0
- phoenix/server/api/dataloaders/span_descendants.py +64 -0
- phoenix/server/api/dataloaders/span_evaluations.py +37 -0
- phoenix/server/api/dataloaders/token_counts.py +138 -0
- phoenix/server/api/dataloaders/trace_evaluations.py +37 -0
- phoenix/server/api/input_types/SpanSort.py +138 -68
- phoenix/server/api/routers/v1/__init__.py +11 -0
- phoenix/server/api/routers/v1/evaluations.py +275 -0
- phoenix/server/api/routers/v1/spans.py +126 -0
- phoenix/server/api/routers/v1/traces.py +82 -0
- phoenix/server/api/schema.py +112 -48
- phoenix/server/api/types/DocumentEvaluationSummary.py +1 -1
- phoenix/server/api/types/Evaluation.py +29 -12
- phoenix/server/api/types/EvaluationSummary.py +29 -44
- phoenix/server/api/types/MimeType.py +2 -2
- phoenix/server/api/types/Model.py +9 -9
- phoenix/server/api/types/Project.py +240 -171
- phoenix/server/api/types/Span.py +87 -131
- phoenix/server/api/types/Trace.py +29 -20
- phoenix/server/api/types/pagination.py +151 -10
- phoenix/server/app.py +263 -35
- phoenix/server/grpc_server.py +93 -0
- phoenix/server/main.py +75 -60
- phoenix/server/openapi/docs.py +218 -0
- phoenix/server/prometheus.py +23 -7
- phoenix/server/static/index.js +662 -643
- phoenix/server/telemetry.py +68 -0
- phoenix/services.py +4 -0
- phoenix/session/client.py +34 -30
- phoenix/session/data_extractor.py +8 -3
- phoenix/session/session.py +176 -155
- phoenix/settings.py +13 -0
- phoenix/trace/attributes.py +349 -0
- phoenix/trace/dsl/README.md +116 -0
- phoenix/trace/dsl/filter.py +660 -192
- phoenix/trace/dsl/helpers.py +24 -5
- phoenix/trace/dsl/query.py +562 -185
- phoenix/trace/fixtures.py +69 -7
- phoenix/trace/otel.py +44 -200
- phoenix/trace/schemas.py +14 -8
- phoenix/trace/span_evaluations.py +5 -2
- phoenix/utilities/__init__.py +0 -26
- phoenix/utilities/span_store.py +0 -23
- phoenix/version.py +1 -1
- phoenix/core/project.py +0 -773
- phoenix/core/traces.py +0 -96
- phoenix/datasets/dataset.py +0 -214
- phoenix/datasets/fixtures.py +0 -24
- phoenix/datasets/schema.py +0 -31
- phoenix/experimental/evals/__init__.py +0 -73
- phoenix/experimental/evals/evaluators.py +0 -413
- phoenix/experimental/evals/functions/__init__.py +0 -4
- phoenix/experimental/evals/functions/classify.py +0 -453
- phoenix/experimental/evals/functions/executor.py +0 -353
- phoenix/experimental/evals/functions/generate.py +0 -138
- phoenix/experimental/evals/functions/processing.py +0 -76
- phoenix/experimental/evals/models/__init__.py +0 -14
- phoenix/experimental/evals/models/anthropic.py +0 -175
- phoenix/experimental/evals/models/base.py +0 -170
- phoenix/experimental/evals/models/bedrock.py +0 -221
- phoenix/experimental/evals/models/litellm.py +0 -134
- phoenix/experimental/evals/models/openai.py +0 -453
- phoenix/experimental/evals/models/rate_limiters.py +0 -246
- phoenix/experimental/evals/models/vertex.py +0 -173
- phoenix/experimental/evals/models/vertexai.py +0 -186
- phoenix/experimental/evals/retrievals.py +0 -96
- phoenix/experimental/evals/templates/__init__.py +0 -50
- phoenix/experimental/evals/templates/default_templates.py +0 -472
- phoenix/experimental/evals/templates/template.py +0 -195
- phoenix/experimental/evals/utils/__init__.py +0 -172
- phoenix/experimental/evals/utils/threads.py +0 -27
- phoenix/server/api/routers/evaluation_handler.py +0 -110
- phoenix/server/api/routers/span_handler.py +0 -70
- phoenix/server/api/routers/trace_handler.py +0 -60
- phoenix/storage/span_store/__init__.py +0 -23
- phoenix/storage/span_store/text_file.py +0 -85
- phoenix/trace/dsl/missing.py +0 -60
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/WHEEL +0 -0
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → db/insertion}/__init__.py +0 -0
- /phoenix/{experimental → db/migrations}/__init__.py +0 -0
- /phoenix/{storage → server/openapi}/__init__.py +0 -0
phoenix/trace/fixtures.py
CHANGED
|
@@ -1,11 +1,15 @@
|
|
|
1
|
-
from
|
|
2
|
-
from
|
|
1
|
+
from binascii import hexlify
|
|
2
|
+
from dataclasses import dataclass, field, replace
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from random import getrandbits
|
|
5
|
+
from typing import Dict, Iterable, Iterator, List, NamedTuple, Optional, Tuple, cast
|
|
3
6
|
from urllib import request
|
|
4
7
|
|
|
5
8
|
import pandas as pd
|
|
6
9
|
from google.protobuf.wrappers_pb2 import DoubleValue, StringValue
|
|
7
10
|
|
|
8
11
|
import phoenix.trace.v1 as pb
|
|
12
|
+
from phoenix.trace.schemas import Span
|
|
9
13
|
from phoenix.trace.trace_dataset import TraceDataset
|
|
10
14
|
from phoenix.trace.utils import json_lines_to_df
|
|
11
15
|
|
|
@@ -105,7 +109,7 @@ TRACES_FIXTURES: List[TracesFixture] = [
|
|
|
105
109
|
NAME_TO_TRACES_FIXTURE = {fixture.name: fixture for fixture in TRACES_FIXTURES}
|
|
106
110
|
|
|
107
111
|
|
|
108
|
-
def
|
|
112
|
+
def get_trace_fixture_by_name(fixture_name: str) -> TracesFixture:
|
|
109
113
|
"""
|
|
110
114
|
Returns the fixture whose name matches the input name.
|
|
111
115
|
|
|
@@ -120,7 +124,7 @@ def _get_trace_fixture_by_name(fixture_name: str) -> TracesFixture:
|
|
|
120
124
|
return NAME_TO_TRACES_FIXTURE[fixture_name]
|
|
121
125
|
|
|
122
126
|
|
|
123
|
-
def
|
|
127
|
+
def download_traces_fixture(
|
|
124
128
|
fixture: TracesFixture,
|
|
125
129
|
host: Optional[str] = "https://storage.googleapis.com/",
|
|
126
130
|
bucket: Optional[str] = "arize-assets",
|
|
@@ -138,12 +142,12 @@ def load_example_traces(use_case: str) -> TraceDataset:
|
|
|
138
142
|
"""
|
|
139
143
|
Loads a trace dataframe by name.
|
|
140
144
|
"""
|
|
141
|
-
fixture =
|
|
142
|
-
return TraceDataset(json_lines_to_df(
|
|
145
|
+
fixture = get_trace_fixture_by_name(use_case)
|
|
146
|
+
return TraceDataset(json_lines_to_df(download_traces_fixture(fixture)))
|
|
143
147
|
|
|
144
148
|
|
|
145
149
|
def get_evals_from_fixture(use_case: str) -> Iterator[pb.Evaluation]:
|
|
146
|
-
fixture =
|
|
150
|
+
fixture = get_trace_fixture_by_name(use_case)
|
|
147
151
|
for eval_fixture in fixture.evaluation_fixtures:
|
|
148
152
|
yield from _read_eval_fixture(eval_fixture)
|
|
149
153
|
|
|
@@ -195,3 +199,61 @@ def _url(
|
|
|
195
199
|
prefix: Optional[str] = "phoenix/traces/",
|
|
196
200
|
) -> str:
|
|
197
201
|
return f"{host}{bucket}/{prefix}{file_name}"
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def reset_fixture_span_ids_and_timestamps(
|
|
205
|
+
spans: Iterable[Span],
|
|
206
|
+
evals: Iterable[pb.Evaluation] = (),
|
|
207
|
+
) -> Tuple[List[Span], List[pb.Evaluation]]:
|
|
208
|
+
old_spans, old_evals = list(spans), list(evals)
|
|
209
|
+
new_trace_ids: Dict[str, str] = {}
|
|
210
|
+
new_span_ids: Dict[str, str] = {}
|
|
211
|
+
for old_span in old_spans:
|
|
212
|
+
new_trace_ids[old_span.context.trace_id] = _new_trace_id()
|
|
213
|
+
new_span_ids[old_span.context.span_id] = _new_span_id()
|
|
214
|
+
if old_span.parent_id:
|
|
215
|
+
new_span_ids[old_span.parent_id] = _new_span_id()
|
|
216
|
+
for old_eval in old_evals:
|
|
217
|
+
subject_id = old_eval.subject_id
|
|
218
|
+
if trace_id := subject_id.trace_id:
|
|
219
|
+
new_trace_ids[trace_id] = _new_trace_id()
|
|
220
|
+
elif span_id := subject_id.span_id:
|
|
221
|
+
new_span_ids[span_id] = _new_span_id()
|
|
222
|
+
elif span_id := subject_id.document_retrieval_id.span_id:
|
|
223
|
+
new_span_ids[span_id] = _new_span_id()
|
|
224
|
+
max_end_time = max(old_span.end_time for old_span in old_spans)
|
|
225
|
+
time_diff = datetime.now(timezone.utc) - max_end_time
|
|
226
|
+
new_spans: List[Span] = []
|
|
227
|
+
new_evals: List[pb.Evaluation] = []
|
|
228
|
+
for old_span in old_spans:
|
|
229
|
+
new_trace_id = new_trace_ids[old_span.context.trace_id]
|
|
230
|
+
new_span_id = new_span_ids[old_span.context.span_id]
|
|
231
|
+
new_parent_id = new_span_ids[old_span.parent_id] if old_span.parent_id else None
|
|
232
|
+
new_span = replace(
|
|
233
|
+
old_span,
|
|
234
|
+
context=replace(old_span.context, trace_id=new_trace_id, span_id=new_span_id),
|
|
235
|
+
parent_id=new_parent_id,
|
|
236
|
+
start_time=old_span.start_time + time_diff,
|
|
237
|
+
end_time=old_span.end_time + time_diff,
|
|
238
|
+
)
|
|
239
|
+
new_spans.append(new_span)
|
|
240
|
+
for old_eval in old_evals:
|
|
241
|
+
new_eval = pb.Evaluation()
|
|
242
|
+
new_eval.CopyFrom(old_eval)
|
|
243
|
+
subject_id = new_eval.subject_id
|
|
244
|
+
if trace_id := subject_id.trace_id:
|
|
245
|
+
subject_id.trace_id = new_trace_ids[trace_id]
|
|
246
|
+
elif span_id := subject_id.span_id:
|
|
247
|
+
subject_id.span_id = new_span_ids[span_id]
|
|
248
|
+
elif span_id := subject_id.document_retrieval_id.span_id:
|
|
249
|
+
subject_id.document_retrieval_id.span_id = new_span_ids[span_id]
|
|
250
|
+
new_evals.append(new_eval)
|
|
251
|
+
return new_spans, new_evals
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _new_trace_id() -> str:
|
|
255
|
+
return hexlify(getrandbits(128).to_bytes(16, "big")).decode()
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _new_span_id() -> str:
|
|
259
|
+
return hexlify(getrandbits(64).to_bytes(8, "big")).decode()
|
phoenix/trace/otel.py
CHANGED
|
@@ -1,39 +1,44 @@
|
|
|
1
|
-
import inspect
|
|
2
1
|
import json
|
|
3
2
|
from binascii import hexlify, unhexlify
|
|
4
3
|
from datetime import datetime, timezone
|
|
5
4
|
from types import MappingProxyType
|
|
6
5
|
from typing import (
|
|
7
6
|
Any,
|
|
8
|
-
DefaultDict,
|
|
9
7
|
Dict,
|
|
10
8
|
Iterable,
|
|
11
9
|
Iterator,
|
|
12
|
-
List,
|
|
13
10
|
Mapping,
|
|
14
11
|
Optional,
|
|
15
12
|
Sequence,
|
|
16
|
-
Set,
|
|
17
13
|
SupportsFloat,
|
|
18
14
|
Tuple,
|
|
19
|
-
Union,
|
|
20
15
|
cast,
|
|
21
16
|
)
|
|
22
17
|
|
|
23
18
|
import numpy as np
|
|
24
19
|
import opentelemetry.proto.trace.v1.trace_pb2 as otlp
|
|
25
|
-
from openinference.semconv import
|
|
26
|
-
|
|
20
|
+
from openinference.semconv.trace import (
|
|
21
|
+
DocumentAttributes,
|
|
22
|
+
OpenInferenceMimeTypeValues,
|
|
23
|
+
SpanAttributes,
|
|
24
|
+
)
|
|
27
25
|
from opentelemetry.proto.common.v1.common_pb2 import AnyValue, ArrayValue, KeyValue
|
|
28
26
|
from opentelemetry.util.types import Attributes, AttributeValue
|
|
29
27
|
from typing_extensions import TypeAlias, assert_never
|
|
30
28
|
|
|
29
|
+
from phoenix.trace.attributes import (
|
|
30
|
+
JSON_STRING_ATTRIBUTES,
|
|
31
|
+
flatten,
|
|
32
|
+
get_attribute_value,
|
|
33
|
+
has_mapping,
|
|
34
|
+
load_json_strings,
|
|
35
|
+
unflatten,
|
|
36
|
+
)
|
|
31
37
|
from phoenix.trace.schemas import (
|
|
32
38
|
EXCEPTION_ESCAPED,
|
|
33
39
|
EXCEPTION_MESSAGE,
|
|
34
40
|
EXCEPTION_STACKTRACE,
|
|
35
41
|
EXCEPTION_TYPE,
|
|
36
|
-
MimeType,
|
|
37
42
|
Span,
|
|
38
43
|
SpanContext,
|
|
39
44
|
SpanEvent,
|
|
@@ -61,20 +66,20 @@ def decode_otlp_span(otlp_span: otlp.Span) -> Span:
|
|
|
61
66
|
parent_id = _decode_identifier(otlp_span.parent_span_id)
|
|
62
67
|
|
|
63
68
|
start_time = _decode_unix_nano(otlp_span.start_time_unix_nano)
|
|
64
|
-
end_time = (
|
|
65
|
-
_decode_unix_nano(otlp_span.end_time_unix_nano) if otlp_span.end_time_unix_nano else None
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
attributes = dict(_unflatten(_load_json_strings(_decode_key_values(otlp_span.attributes))))
|
|
69
|
-
span_kind = SpanKind(attributes.pop(OPENINFERENCE_SPAN_KIND, None))
|
|
69
|
+
end_time = _decode_unix_nano(otlp_span.end_time_unix_nano)
|
|
70
70
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
attributes[mime_type] = MimeType(attributes[mime_type])
|
|
71
|
+
attributes = unflatten(load_json_strings(_decode_key_values(otlp_span.attributes)))
|
|
72
|
+
span_kind = SpanKind(get_attribute_value(attributes, OPENINFERENCE_SPAN_KIND))
|
|
74
73
|
|
|
75
74
|
status_code, status_message = _decode_status(otlp_span.status)
|
|
76
75
|
events = [_decode_event(event) for event in otlp_span.events]
|
|
77
76
|
|
|
77
|
+
if (input_value := get_attribute_value(attributes, INPUT_VALUE)) and not isinstance(
|
|
78
|
+
input_value, str
|
|
79
|
+
):
|
|
80
|
+
attributes["input"]["value"] = json.dumps(input_value)
|
|
81
|
+
attributes["input"]["mime_type"] = OpenInferenceMimeTypeValues.JSON.value
|
|
82
|
+
|
|
78
83
|
return Span(
|
|
79
84
|
name=otlp_span.name,
|
|
80
85
|
context=SpanContext(
|
|
@@ -152,28 +157,6 @@ def _decode_value(any_value: AnyValue) -> Any:
|
|
|
152
157
|
assert_never(which)
|
|
153
158
|
|
|
154
159
|
|
|
155
|
-
_JSON_STRING_ATTRIBUTES = (
|
|
156
|
-
DOCUMENT_METADATA,
|
|
157
|
-
LLM_PROMPT_TEMPLATE_VARIABLES,
|
|
158
|
-
METADATA,
|
|
159
|
-
TOOL_PARAMETERS,
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
def _load_json_strings(key_values: Iterable[Tuple[str, Any]]) -> Iterator[Tuple[str, Any]]:
|
|
164
|
-
for key, value in key_values:
|
|
165
|
-
if key.endswith(_JSON_STRING_ATTRIBUTES):
|
|
166
|
-
try:
|
|
167
|
-
dict_value = json.loads(value)
|
|
168
|
-
except Exception:
|
|
169
|
-
yield key, value
|
|
170
|
-
else:
|
|
171
|
-
if dict_value:
|
|
172
|
-
yield key, dict_value
|
|
173
|
-
else:
|
|
174
|
-
yield key, value
|
|
175
|
-
|
|
176
|
-
|
|
177
160
|
StatusMessage: TypeAlias = str
|
|
178
161
|
|
|
179
162
|
_STATUS_DECODING = MappingProxyType(
|
|
@@ -190,120 +173,6 @@ def _decode_status(otlp_status: otlp.Status) -> Tuple[SpanStatusCode, StatusMess
|
|
|
190
173
|
return status_code, otlp_status.message
|
|
191
174
|
|
|
192
175
|
|
|
193
|
-
_SEMANTIC_CONVENTIONS: List[str] = sorted(
|
|
194
|
-
(
|
|
195
|
-
getattr(klass, attr)
|
|
196
|
-
for name in dir(trace)
|
|
197
|
-
if name.endswith("Attributes") and inspect.isclass(klass := getattr(trace, name))
|
|
198
|
-
for attr in dir(klass)
|
|
199
|
-
if attr.isupper()
|
|
200
|
-
),
|
|
201
|
-
reverse=True,
|
|
202
|
-
) # sorted so the longer strings go first
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
def _semantic_convention_prefix_partition(key: str, separator: str = ".") -> Tuple[str, str, str]:
|
|
206
|
-
"""Return the longest prefix of `key` that is a semantic convention, and the remaining suffix
|
|
207
|
-
separated by `.`. For example, if `key` is "retrieval.documents.2.document.score", return
|
|
208
|
-
("retrieval.documents", ".", "2.document.score"). The return signature is based on Python's
|
|
209
|
-
`.partition` method for strings.
|
|
210
|
-
"""
|
|
211
|
-
for prefix in _SEMANTIC_CONVENTIONS:
|
|
212
|
-
if key == prefix:
|
|
213
|
-
return key, "", ""
|
|
214
|
-
if key.startswith(prefix) and key[len(prefix) :].startswith(separator):
|
|
215
|
-
return prefix, separator, key[len(prefix) + len(separator) :]
|
|
216
|
-
return "", "", ""
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
class _Trie(DefaultDict[Union[str, int], "_Trie"]):
|
|
220
|
-
"""Prefix Tree with special handling for indices (i.e. all-digit keys)."""
|
|
221
|
-
|
|
222
|
-
def __init__(self) -> None:
|
|
223
|
-
super().__init__(_Trie)
|
|
224
|
-
self.value: Any = None
|
|
225
|
-
self.indices: Set[int] = set()
|
|
226
|
-
self.branches: Set[Union[str, int]] = set()
|
|
227
|
-
|
|
228
|
-
def set_value(self, value: Any) -> None:
|
|
229
|
-
self.value = value
|
|
230
|
-
# value and indices must not coexist
|
|
231
|
-
self.branches.update(self.indices)
|
|
232
|
-
self.indices.clear()
|
|
233
|
-
|
|
234
|
-
def add_index(self, index: int) -> "_Trie":
|
|
235
|
-
if self.value is not None:
|
|
236
|
-
self.branches.add(index)
|
|
237
|
-
elif index not in self.branches:
|
|
238
|
-
self.indices.add(index)
|
|
239
|
-
return self[index]
|
|
240
|
-
|
|
241
|
-
def add_branch(self, branch: Union[str, int]) -> "_Trie":
|
|
242
|
-
if branch in self.indices:
|
|
243
|
-
self.indices.discard(cast(int, branch))
|
|
244
|
-
self.branches.add(branch)
|
|
245
|
-
return self[branch]
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
# FIXME: Ideally we should not need something so complicated as a Trie, but it's useful here
|
|
249
|
-
# for backward compatibility reasons regarding some deeply nested objects such as TOOL_PARAMETERS.
|
|
250
|
-
# In the future, we should `json_dumps` them and not let things get too deeply nested.
|
|
251
|
-
def _build_trie(
|
|
252
|
-
key_value_pairs: Iterable[Tuple[str, Any]],
|
|
253
|
-
separator: str = ".",
|
|
254
|
-
) -> _Trie:
|
|
255
|
-
"""Build a Trie (a.k.a. prefix tree) from `key_value_pairs`, by partitioning the keys by
|
|
256
|
-
separator. Each partition is a branch in the Trie. Special handling is done for partitions
|
|
257
|
-
that are all digits, e.g. "0", "12", etc., which are converted to integers and collected
|
|
258
|
-
as indices.
|
|
259
|
-
"""
|
|
260
|
-
trie = _Trie()
|
|
261
|
-
for key, value in key_value_pairs:
|
|
262
|
-
if value is None:
|
|
263
|
-
continue
|
|
264
|
-
t = trie
|
|
265
|
-
while True:
|
|
266
|
-
prefix, _, suffix = _semantic_convention_prefix_partition(key, separator)
|
|
267
|
-
if prefix:
|
|
268
|
-
t = t.add_branch(prefix)
|
|
269
|
-
else:
|
|
270
|
-
prefix, _, suffix = key.partition(separator)
|
|
271
|
-
if prefix.isdigit():
|
|
272
|
-
index = int(prefix)
|
|
273
|
-
t = t.add_index(index) if suffix else t.add_branch(index)
|
|
274
|
-
else:
|
|
275
|
-
t = t.add_branch(prefix)
|
|
276
|
-
if not suffix:
|
|
277
|
-
break
|
|
278
|
-
key = suffix
|
|
279
|
-
t.set_value(value)
|
|
280
|
-
return trie
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
def _walk(trie: _Trie, prefix: str = "") -> Iterator[Tuple[str, Any]]:
|
|
284
|
-
if trie.value is not None:
|
|
285
|
-
yield prefix, trie.value
|
|
286
|
-
elif prefix and trie.indices:
|
|
287
|
-
yield prefix, [dict(_walk(trie[index])) for index in sorted(trie.indices)]
|
|
288
|
-
elif trie.indices:
|
|
289
|
-
for index in trie.indices:
|
|
290
|
-
yield from _walk(trie[index], prefix=f"{index}")
|
|
291
|
-
elif prefix:
|
|
292
|
-
yield prefix, dict(_walk(trie))
|
|
293
|
-
return
|
|
294
|
-
for branch in trie.branches:
|
|
295
|
-
new_prefix = f"{prefix}.{branch}" if prefix else f"{branch}"
|
|
296
|
-
yield from _walk(trie[branch], new_prefix)
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
def _unflatten(
|
|
300
|
-
key_value_pairs: Iterable[Tuple[str, Any]],
|
|
301
|
-
separator: str = ".",
|
|
302
|
-
) -> Iterator[Tuple[str, Any]]:
|
|
303
|
-
trie = _build_trie(key_value_pairs, separator)
|
|
304
|
-
yield from _walk(trie)
|
|
305
|
-
|
|
306
|
-
|
|
307
176
|
_BILLION = 1_000_000_000 # for converting seconds to nanoseconds
|
|
308
177
|
|
|
309
178
|
|
|
@@ -316,11 +185,7 @@ def encode_span_to_otlp(span: Span) -> otlp.Span:
|
|
|
316
185
|
start_time_unix_nano: int = int(span.start_time.timestamp() * _BILLION)
|
|
317
186
|
end_time_unix_nano: int = int(span.end_time.timestamp() * _BILLION) if span.end_time else 0
|
|
318
187
|
|
|
319
|
-
attributes: Dict[str, Any] = span.attributes
|
|
320
|
-
|
|
321
|
-
for mime_type in (INPUT_MIME_TYPE, OUTPUT_MIME_TYPE):
|
|
322
|
-
if mime_type in attributes:
|
|
323
|
-
attributes[mime_type] = attributes[mime_type].value
|
|
188
|
+
attributes: Dict[str, Any] = dict(span.attributes)
|
|
324
189
|
|
|
325
190
|
for key, value in span.attributes.items():
|
|
326
191
|
if value is None:
|
|
@@ -328,19 +193,34 @@ def encode_span_to_otlp(span: Span) -> otlp.Span:
|
|
|
328
193
|
attributes.pop(key, None)
|
|
329
194
|
elif isinstance(value, Mapping):
|
|
330
195
|
attributes.pop(key, None)
|
|
331
|
-
if key.endswith(
|
|
196
|
+
if key.endswith(JSON_STRING_ATTRIBUTES):
|
|
332
197
|
attributes[key] = json.dumps(value)
|
|
333
198
|
else:
|
|
334
|
-
attributes.update(
|
|
199
|
+
attributes.update(
|
|
200
|
+
flatten(
|
|
201
|
+
value,
|
|
202
|
+
prefix=key,
|
|
203
|
+
recurse_on_sequence=True,
|
|
204
|
+
json_string_attributes=JSON_STRING_ATTRIBUTES,
|
|
205
|
+
)
|
|
206
|
+
)
|
|
335
207
|
elif (
|
|
336
208
|
not isinstance(value, str)
|
|
337
209
|
and (isinstance(value, Sequence) or isinstance(value, np.ndarray))
|
|
338
|
-
and
|
|
210
|
+
and has_mapping(value)
|
|
339
211
|
):
|
|
340
212
|
attributes.pop(key, None)
|
|
341
|
-
attributes.update(
|
|
342
|
-
|
|
343
|
-
|
|
213
|
+
attributes.update(
|
|
214
|
+
flatten(
|
|
215
|
+
value,
|
|
216
|
+
prefix=key,
|
|
217
|
+
recurse_on_sequence=True,
|
|
218
|
+
json_string_attributes=JSON_STRING_ATTRIBUTES,
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
if OPENINFERENCE_SPAN_KIND not in attributes:
|
|
223
|
+
attributes[OPENINFERENCE_SPAN_KIND] = span.span_kind.value
|
|
344
224
|
|
|
345
225
|
status = _encode_status(span.status_code, span.status_message)
|
|
346
226
|
events = map(_encode_event, span.events)
|
|
@@ -381,42 +261,6 @@ def _encode_identifier(identifier: Optional[str]) -> bytes:
|
|
|
381
261
|
return unhexlify(identifier)
|
|
382
262
|
|
|
383
263
|
|
|
384
|
-
def _has_mapping(sequence: Sequence[Any]) -> bool:
|
|
385
|
-
for item in sequence:
|
|
386
|
-
if isinstance(item, Mapping):
|
|
387
|
-
return True
|
|
388
|
-
return False
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
def _flatten_mapping(
|
|
392
|
-
mapping: Mapping[str, Any],
|
|
393
|
-
prefix: str,
|
|
394
|
-
) -> Iterator[Tuple[str, Any]]:
|
|
395
|
-
for key, value in mapping.items():
|
|
396
|
-
prefixed_key = f"{prefix}.{key}"
|
|
397
|
-
if isinstance(value, Mapping):
|
|
398
|
-
if key.endswith(_JSON_STRING_ATTRIBUTES):
|
|
399
|
-
yield prefixed_key, json.dumps(value)
|
|
400
|
-
else:
|
|
401
|
-
yield from _flatten_mapping(value, prefixed_key)
|
|
402
|
-
elif isinstance(value, Sequence):
|
|
403
|
-
yield from _flatten_sequence(value, prefixed_key)
|
|
404
|
-
elif value is not None:
|
|
405
|
-
yield prefixed_key, value
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
def _flatten_sequence(
|
|
409
|
-
sequence: Sequence[Any],
|
|
410
|
-
prefix: str,
|
|
411
|
-
) -> Iterator[Tuple[str, Any]]:
|
|
412
|
-
if isinstance(sequence, str) or not _has_mapping(sequence):
|
|
413
|
-
yield prefix, sequence
|
|
414
|
-
for idx, obj in enumerate(sequence):
|
|
415
|
-
if not isinstance(obj, Mapping):
|
|
416
|
-
continue
|
|
417
|
-
yield from _flatten_mapping(obj, f"{prefix}.{idx}")
|
|
418
|
-
|
|
419
|
-
|
|
420
264
|
def _encode_event(event: SpanEvent) -> otlp.Span.Event:
|
|
421
265
|
return otlp.Span.Event(
|
|
422
266
|
name=event.name,
|
phoenix/trace/schemas.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, List, Mapping, NamedTuple, Optional
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
7
|
EXCEPTION_TYPE = "exception.type"
|
|
@@ -47,16 +47,14 @@ class SpanKind(Enum):
|
|
|
47
47
|
|
|
48
48
|
@classmethod
|
|
49
49
|
def _missing_(cls, v: Any) -> Optional["SpanKind"]:
|
|
50
|
-
if v and isinstance(v, str) and not v.isupper():
|
|
50
|
+
if v and isinstance(v, str) and v.isascii() and not v.isupper():
|
|
51
51
|
return cls(v.upper())
|
|
52
|
-
return
|
|
52
|
+
return cls.UNKNOWN
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
TraceID = str
|
|
56
56
|
SpanID = str
|
|
57
|
-
|
|
58
|
-
AttributeValue = Union[AttributePrimitiveValue, List[AttributePrimitiveValue]]
|
|
59
|
-
SpanAttributes = Dict[str, AttributeValue]
|
|
57
|
+
SpanAttributes = Mapping[str, Any]
|
|
60
58
|
|
|
61
59
|
|
|
62
60
|
@dataclass(frozen=True)
|
|
@@ -73,7 +71,7 @@ class SpanConversationAttributes:
|
|
|
73
71
|
|
|
74
72
|
|
|
75
73
|
@dataclass(frozen=True)
|
|
76
|
-
class SpanEvent
|
|
74
|
+
class SpanEvent:
|
|
77
75
|
"""
|
|
78
76
|
A Span Event can be thought of as a structured log message (or annotation)
|
|
79
77
|
on a Span, typically used to denote a meaningful, singular point in time
|
|
@@ -142,7 +140,7 @@ class Span:
|
|
|
142
140
|
"If the parent_id is None, this is the root span"
|
|
143
141
|
parent_id: Optional[SpanID]
|
|
144
142
|
start_time: datetime
|
|
145
|
-
end_time:
|
|
143
|
+
end_time: datetime
|
|
146
144
|
status_code: SpanStatusCode
|
|
147
145
|
status_message: str
|
|
148
146
|
"""
|
|
@@ -202,3 +200,11 @@ class ComputedAttributes(Enum):
|
|
|
202
200
|
CUMULATIVE_LLM_TOKEN_COUNT_COMPLETION = "cumulative_token_count.completion"
|
|
203
201
|
ERROR_COUNT = "error_count"
|
|
204
202
|
CUMULATIVE_ERROR_COUNT = "cumulative_error_count"
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class ComputedValues(NamedTuple):
|
|
206
|
+
latency_ms: float
|
|
207
|
+
cumulative_error_count: int
|
|
208
|
+
cumulative_llm_token_count_prompt: int
|
|
209
|
+
cumulative_llm_token_count_completion: int
|
|
210
|
+
cumulative_llm_token_count_total: int
|
|
@@ -12,6 +12,7 @@ from pandas.api.types import is_integer_dtype, is_numeric_dtype, is_string_dtype
|
|
|
12
12
|
from pyarrow import RecordBatchStreamReader, Schema, Table, parquet
|
|
13
13
|
|
|
14
14
|
from phoenix.config import TRACE_DATASET_DIR
|
|
15
|
+
from phoenix.exceptions import PhoenixEvaluationNameIsMissing
|
|
15
16
|
from phoenix.trace.errors import InvalidParquetMetadataError
|
|
16
17
|
|
|
17
18
|
EVAL_NAME_COLUMN_PREFIX = "eval."
|
|
@@ -335,8 +336,10 @@ def _parse_schema_metadata(schema: Schema) -> Tuple[UUID, str, Type[Evaluations]
|
|
|
335
336
|
arize_metadata = json.loads(metadata[b"arize"])
|
|
336
337
|
eval_classes = {subclass.__name__: subclass for subclass in Evaluations.__subclasses__()}
|
|
337
338
|
eval_id = UUID(arize_metadata["eval_id"])
|
|
338
|
-
if not isinstance((eval_name := arize_metadata["eval_name"]), str):
|
|
339
|
-
raise
|
|
339
|
+
if not isinstance((eval_name := arize_metadata["eval_name"]), str) or not eval_name.strip():
|
|
340
|
+
raise PhoenixEvaluationNameIsMissing(
|
|
341
|
+
'Arize metadata must contain a non-empty string value for key "eval_name"'
|
|
342
|
+
)
|
|
340
343
|
evaluations_cls = eval_classes[arize_metadata["eval_type"]]
|
|
341
344
|
return eval_id, eval_name, evaluations_cls
|
|
342
345
|
except Exception as err:
|
phoenix/utilities/__init__.py
CHANGED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
|
-
from typing import List, Optional
|
|
3
|
-
|
|
4
|
-
import pandas as pd
|
|
5
|
-
|
|
6
|
-
from phoenix.core.project import Project
|
|
7
|
-
from phoenix.trace.dsl import SpanQuery
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def query_spans(
|
|
11
|
-
project: Optional[Project],
|
|
12
|
-
*queries: SpanQuery,
|
|
13
|
-
start_time: Optional[datetime] = None,
|
|
14
|
-
stop_time: Optional[datetime] = None,
|
|
15
|
-
root_spans_only: Optional[bool] = None,
|
|
16
|
-
) -> List[pd.DataFrame]:
|
|
17
|
-
if not queries or not project:
|
|
18
|
-
return []
|
|
19
|
-
spans = tuple(
|
|
20
|
-
project.get_spans(
|
|
21
|
-
start_time=start_time,
|
|
22
|
-
stop_time=stop_time,
|
|
23
|
-
root_spans_only=root_spans_only,
|
|
24
|
-
)
|
|
25
|
-
)
|
|
26
|
-
return [query(spans) for query in queries]
|
phoenix/utilities/span_store.py
CHANGED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
|
-
from phoenix.config import get_env_span_storage_type, get_storage_dir
|
|
4
|
-
from phoenix.core.traces import Traces
|
|
5
|
-
from phoenix.storage.span_store import SPAN_STORE_FACTORIES, SpanStore
|
|
6
|
-
from phoenix.trace.otel import decode_otlp_span
|
|
7
|
-
from phoenix.utilities.project import get_project_name
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def get_span_store() -> Optional[SpanStore]:
|
|
11
|
-
if span_store_type := get_env_span_storage_type():
|
|
12
|
-
span_store_factory = SPAN_STORE_FACTORIES[span_store_type]
|
|
13
|
-
return span_store_factory(get_storage_dir())
|
|
14
|
-
return None
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def load_traces_data_from_store(traces: Traces, span_store: SpanStore) -> None:
|
|
18
|
-
for traces_data in span_store.load():
|
|
19
|
-
for resource_spans in traces_data.resource_spans:
|
|
20
|
-
project_name = get_project_name(resource_spans.resource.attributes)
|
|
21
|
-
for scope_span in resource_spans.scope_spans:
|
|
22
|
-
for span in scope_span.spans:
|
|
23
|
-
traces.put(decode_otlp_span(span), project_name=project_name)
|
phoenix/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "
|
|
1
|
+
__version__ = "4.0.1"
|