arize-phoenix 2.0.0__py3-none-any.whl → 2.2.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-2.0.0.dist-info → arize_phoenix-2.2.0rc0.dist-info}/METADATA +5 -1
- {arize_phoenix-2.0.0.dist-info → arize_phoenix-2.2.0rc0.dist-info}/RECORD +31 -29
- phoenix/__init__.py +2 -2
- phoenix/core/evals.py +29 -8
- phoenix/core/traces.py +45 -34
- phoenix/experimental/evals/__init__.py +4 -1
- phoenix/experimental/evals/evaluators.py +85 -8
- phoenix/experimental/evals/functions/classify.py +16 -41
- phoenix/experimental/evals/functions/executor.py +1 -0
- phoenix/experimental/evals/models/anthropic.py +171 -0
- phoenix/experimental/evals/models/vertex.py +155 -0
- phoenix/experimental/evals/templates/__init__.py +2 -0
- phoenix/experimental/evals/templates/default_templates.py +12 -0
- phoenix/experimental/evals/utils/__init__.py +64 -2
- phoenix/server/api/schema.py +24 -0
- phoenix/server/app.py +6 -5
- phoenix/server/main.py +6 -7
- phoenix/server/span_handler.py +7 -7
- phoenix/server/static/index.js +586 -499
- phoenix/server/templates/index.html +5 -1
- phoenix/server/trace_handler.py +56 -0
- phoenix/session/session.py +2 -1
- phoenix/trace/exporter.py +4 -3
- phoenix/trace/langchain/tracer.py +14 -4
- phoenix/trace/otel.py +409 -0
- phoenix/trace/semantic_conventions.py +2 -0
- phoenix/trace/v1/__init__.py +0 -4
- phoenix/version.py +1 -0
- phoenix/trace/v1/trace_pb2.py +0 -54
- phoenix/trace/v1/trace_pb2.pyi +0 -361
- phoenix/trace/v1/utils.py +0 -538
- {arize_phoenix-2.0.0.dist-info → arize_phoenix-2.2.0rc0.dist-info}/WHEEL +0 -0
- {arize_phoenix-2.0.0.dist-info → arize_phoenix-2.2.0rc0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-2.0.0.dist-info → arize_phoenix-2.2.0rc0.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,8 +4,12 @@
|
|
|
4
4
|
<title>Phoenix</title>
|
|
5
5
|
<link rel="icon" href="{{basename}}/favicon.ico" type="image/x-icon"></link>
|
|
6
6
|
<meta charset="UTF-8" />
|
|
7
|
-
<meta name="
|
|
7
|
+
<meta name="title" content="Arize Phoenix" />
|
|
8
|
+
<meta name="description" content="AI Observability & Evaluation" />
|
|
8
9
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
10
|
+
<meta property="og:title" content="Arize Phoenix" />
|
|
11
|
+
<meta property="og:description" content="AI Observability & Evaluation" />
|
|
12
|
+
<meta property="og:image" content="https://raw.githubusercontent.com/Arize-ai/phoenix-assets/main/images/socal/social-preview-horizontal.jpg" />
|
|
9
13
|
<meta name="theme-color" content="#ffffff" />
|
|
10
14
|
<link rel="stylesheet" src="{{basename}}/index.css"></link>
|
|
11
15
|
<link
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import gzip
|
|
3
|
+
import zlib
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
|
|
6
|
+
from google.protobuf.message import DecodeError
|
|
7
|
+
from opentelemetry.proto.collector.trace.v1.trace_service_pb2 import (
|
|
8
|
+
ExportTraceServiceRequest,
|
|
9
|
+
)
|
|
10
|
+
from opentelemetry.proto.trace.v1.trace_pb2 import Span
|
|
11
|
+
from starlette.endpoints import HTTPEndpoint
|
|
12
|
+
from starlette.requests import Request
|
|
13
|
+
from starlette.responses import Response
|
|
14
|
+
from starlette.status import HTTP_415_UNSUPPORTED_MEDIA_TYPE, HTTP_422_UNPROCESSABLE_ENTITY
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SupportsPutSpan(Protocol):
|
|
18
|
+
def put(self, span: Span) -> None:
|
|
19
|
+
...
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TraceHandler(HTTPEndpoint):
|
|
23
|
+
queue: SupportsPutSpan
|
|
24
|
+
|
|
25
|
+
async def post(self, request: Request) -> Response:
|
|
26
|
+
content_type = request.headers.get("content-type")
|
|
27
|
+
if content_type != "application/x-protobuf":
|
|
28
|
+
return Response(
|
|
29
|
+
content=f"Unsupported content type: {content_type}",
|
|
30
|
+
status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE,
|
|
31
|
+
)
|
|
32
|
+
content_encoding = request.headers.get("content-encoding")
|
|
33
|
+
if content_encoding and content_encoding not in ("gzip", "deflate"):
|
|
34
|
+
return Response(
|
|
35
|
+
content=f"Unsupported content encoding: {content_encoding}",
|
|
36
|
+
status_code=HTTP_415_UNSUPPORTED_MEDIA_TYPE,
|
|
37
|
+
)
|
|
38
|
+
body = await request.body()
|
|
39
|
+
if content_encoding == "gzip":
|
|
40
|
+
body = gzip.decompress(body)
|
|
41
|
+
elif content_encoding == "deflate":
|
|
42
|
+
body = zlib.decompress(body)
|
|
43
|
+
req = ExportTraceServiceRequest()
|
|
44
|
+
try:
|
|
45
|
+
req.ParseFromString(body)
|
|
46
|
+
except DecodeError:
|
|
47
|
+
return Response(
|
|
48
|
+
content="Request body is invalid ExportTraceServiceRequest",
|
|
49
|
+
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
|
|
50
|
+
)
|
|
51
|
+
for resource_spans in req.resource_spans:
|
|
52
|
+
for scope_span in resource_spans.scope_spans:
|
|
53
|
+
for span in scope_span.spans:
|
|
54
|
+
self.queue.put(span)
|
|
55
|
+
await asyncio.sleep(0)
|
|
56
|
+
return Response()
|
phoenix/session/session.py
CHANGED
|
@@ -31,6 +31,7 @@ from phoenix.server.thread_server import ThreadServer
|
|
|
31
31
|
from phoenix.services import AppService
|
|
32
32
|
from phoenix.trace.dsl import SpanFilter
|
|
33
33
|
from phoenix.trace.dsl.query import SpanQuery
|
|
34
|
+
from phoenix.trace.otel import encode
|
|
34
35
|
from phoenix.trace.span_json_encoder import span_to_json
|
|
35
36
|
from phoenix.trace.trace_dataset import TraceDataset
|
|
36
37
|
|
|
@@ -117,7 +118,7 @@ class Session(ABC):
|
|
|
117
118
|
self.traces = Traces()
|
|
118
119
|
if trace_dataset:
|
|
119
120
|
for span in trace_dataset.to_spans():
|
|
120
|
-
self.traces.put(span)
|
|
121
|
+
self.traces.put(encode(span))
|
|
121
122
|
|
|
122
123
|
self.evals: Evals = Evals()
|
|
123
124
|
|
phoenix/trace/exporter.py
CHANGED
|
@@ -6,21 +6,22 @@ from threading import Thread
|
|
|
6
6
|
from types import MethodType
|
|
7
7
|
from typing import Any, Optional, Union
|
|
8
8
|
|
|
9
|
+
import opentelemetry.proto.trace.v1.trace_pb2 as otlp
|
|
9
10
|
import requests
|
|
10
11
|
from requests import Session
|
|
11
12
|
from typing_extensions import TypeAlias, assert_never
|
|
12
13
|
|
|
13
14
|
import phoenix.trace.v1 as pb
|
|
14
15
|
from phoenix.config import get_env_collector_endpoint, get_env_host, get_env_port
|
|
16
|
+
from phoenix.trace.otel import encode
|
|
15
17
|
from phoenix.trace.schemas import Span
|
|
16
|
-
from phoenix.trace.v1.utils import encode
|
|
17
18
|
|
|
18
19
|
logger = logging.getLogger(__name__)
|
|
19
20
|
logger.addHandler(logging.NullHandler())
|
|
20
21
|
|
|
21
22
|
END_OF_QUEUE = None # sentinel value for queue termination
|
|
22
23
|
|
|
23
|
-
Message: TypeAlias = Union[
|
|
24
|
+
Message: TypeAlias = Union[otlp.Span, pb.Evaluation]
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
class NoOpExporter:
|
|
@@ -102,7 +103,7 @@ class HttpExporter:
|
|
|
102
103
|
logger.exception(e)
|
|
103
104
|
|
|
104
105
|
def _url(self, message: Message) -> str:
|
|
105
|
-
if isinstance(message,
|
|
106
|
+
if isinstance(message, otlp.Span):
|
|
106
107
|
return f"{self._base_url}/v1/spans"
|
|
107
108
|
if isinstance(message, pb.Evaluation):
|
|
108
109
|
return f"{self._base_url}/v1/evaluations"
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
from copy import deepcopy
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from typing import Any, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Any, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple, cast
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
from langchain.callbacks.tracers.base import BaseTracer
|
|
@@ -356,13 +356,19 @@ class OpenInferenceTracer(Tracer, BaseTracer): # type: ignore
|
|
|
356
356
|
if "agent" in run["name"].lower()
|
|
357
357
|
else _langchain_run_type_to_span_kind(run["run_type"])
|
|
358
358
|
)
|
|
359
|
+
start_time = cast(datetime, run["start_time"])
|
|
360
|
+
end_time = cast(Optional[datetime], run.get("end_time"))
|
|
361
|
+
if _is_tz_naive(start_time):
|
|
362
|
+
start_time = start_time.replace(tzinfo=timezone.utc)
|
|
363
|
+
if end_time and _is_tz_naive(end_time):
|
|
364
|
+
end_time = end_time.replace(tzinfo=timezone.utc)
|
|
359
365
|
span = self.create_span(
|
|
360
366
|
name=run["name"],
|
|
361
367
|
span_kind=span_kind,
|
|
362
368
|
parent_id=None if parent is None else parent.context.span_id,
|
|
363
369
|
trace_id=None if parent is None else parent.context.trace_id,
|
|
364
|
-
start_time=
|
|
365
|
-
end_time=
|
|
370
|
+
start_time=start_time,
|
|
371
|
+
end_time=end_time,
|
|
366
372
|
status_code=status_code,
|
|
367
373
|
attributes=attributes,
|
|
368
374
|
events=events,
|
|
@@ -420,3 +426,7 @@ class OpenInferenceTracer(Tracer, BaseTracer): # type: ignore
|
|
|
420
426
|
name=name or "",
|
|
421
427
|
)
|
|
422
428
|
self._start_trace(run)
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def _is_tz_naive(dt: datetime) -> bool:
|
|
432
|
+
return dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None
|
phoenix/trace/otel.py
ADDED
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
from types import MappingProxyType
|
|
3
|
+
from typing import (
|
|
4
|
+
Any,
|
|
5
|
+
DefaultDict,
|
|
6
|
+
Dict,
|
|
7
|
+
Iterable,
|
|
8
|
+
Iterator,
|
|
9
|
+
List,
|
|
10
|
+
Mapping,
|
|
11
|
+
Optional,
|
|
12
|
+
Sequence,
|
|
13
|
+
Set,
|
|
14
|
+
Tuple,
|
|
15
|
+
Union,
|
|
16
|
+
cast,
|
|
17
|
+
)
|
|
18
|
+
from uuid import UUID
|
|
19
|
+
|
|
20
|
+
import opentelemetry.proto.trace.v1.trace_pb2 as otlp
|
|
21
|
+
from opentelemetry.proto.common.v1.common_pb2 import AnyValue, ArrayValue, KeyValue
|
|
22
|
+
from opentelemetry.util.types import Attributes, AttributeValue
|
|
23
|
+
from typing_extensions import TypeAlias, assert_never
|
|
24
|
+
|
|
25
|
+
import phoenix.trace.semantic_conventions as sem_conv
|
|
26
|
+
from phoenix.trace.schemas import (
|
|
27
|
+
MimeType,
|
|
28
|
+
Span,
|
|
29
|
+
SpanContext,
|
|
30
|
+
SpanEvent,
|
|
31
|
+
SpanException,
|
|
32
|
+
SpanID,
|
|
33
|
+
SpanKind,
|
|
34
|
+
SpanStatusCode,
|
|
35
|
+
TraceID,
|
|
36
|
+
)
|
|
37
|
+
from phoenix.trace.semantic_conventions import (
|
|
38
|
+
EXCEPTION_ESCAPED,
|
|
39
|
+
EXCEPTION_MESSAGE,
|
|
40
|
+
EXCEPTION_STACKTRACE,
|
|
41
|
+
EXCEPTION_TYPE,
|
|
42
|
+
INPUT_MIME_TYPE,
|
|
43
|
+
OPENINFERENCE_SPAN_KIND,
|
|
44
|
+
OUTPUT_MIME_TYPE,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def decode(otlp_span: otlp.Span) -> Span:
|
|
49
|
+
trace_id = cast(TraceID, _decode_identifier(otlp_span.trace_id))
|
|
50
|
+
span_id = cast(SpanID, _decode_identifier(otlp_span.span_id))
|
|
51
|
+
parent_id = _decode_identifier(otlp_span.parent_span_id)
|
|
52
|
+
|
|
53
|
+
start_time = _decode_unix_nano(otlp_span.start_time_unix_nano)
|
|
54
|
+
end_time = (
|
|
55
|
+
_decode_unix_nano(otlp_span.end_time_unix_nano) if otlp_span.end_time_unix_nano else None
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
attributes = dict(_unflatten(_decode_key_values(otlp_span.attributes)))
|
|
59
|
+
span_kind = SpanKind(attributes.pop(OPENINFERENCE_SPAN_KIND, None))
|
|
60
|
+
|
|
61
|
+
for mime_type in (INPUT_MIME_TYPE, OUTPUT_MIME_TYPE):
|
|
62
|
+
if mime_type in attributes:
|
|
63
|
+
attributes[mime_type] = MimeType(attributes[mime_type])
|
|
64
|
+
|
|
65
|
+
status_code, status_message = _decode_status(otlp_span.status)
|
|
66
|
+
events = [_decode_event(event) for event in otlp_span.events]
|
|
67
|
+
|
|
68
|
+
return Span(
|
|
69
|
+
name=otlp_span.name,
|
|
70
|
+
context=SpanContext(
|
|
71
|
+
trace_id=trace_id,
|
|
72
|
+
span_id=span_id,
|
|
73
|
+
),
|
|
74
|
+
parent_id=parent_id,
|
|
75
|
+
start_time=start_time,
|
|
76
|
+
end_time=end_time,
|
|
77
|
+
attributes=attributes,
|
|
78
|
+
span_kind=span_kind,
|
|
79
|
+
status_code=status_code,
|
|
80
|
+
status_message=status_message,
|
|
81
|
+
events=events,
|
|
82
|
+
conversation=None,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _decode_identifier(identifier: bytes) -> Optional[UUID]:
|
|
87
|
+
# This is a stopgap solution until we move away from UUIDs.
|
|
88
|
+
# The goal is to convert bytes to UUID in a deterministic way.
|
|
89
|
+
if not identifier:
|
|
90
|
+
return None
|
|
91
|
+
try:
|
|
92
|
+
# OTEL trace_id is 16 bytes, so it matches UUID's length, but
|
|
93
|
+
# OTEL span_id is 8 bytes, so we double up by concatenating.
|
|
94
|
+
return UUID(bytes=identifier[:8] + identifier[-8:])
|
|
95
|
+
except ValueError:
|
|
96
|
+
# Fallback to a seeding a UUID from the bytes.
|
|
97
|
+
return UUID(int=int.from_bytes(identifier, byteorder="big"))
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _decode_event(otlp_event: otlp.Span.Event) -> SpanEvent:
|
|
101
|
+
name = otlp_event.name
|
|
102
|
+
timestamp = _decode_unix_nano(otlp_event.time_unix_nano)
|
|
103
|
+
attributes = dict(_decode_key_values(otlp_event.attributes))
|
|
104
|
+
if name == "exception":
|
|
105
|
+
return SpanException(
|
|
106
|
+
timestamp=timestamp,
|
|
107
|
+
message=attributes.get(EXCEPTION_MESSAGE) or "",
|
|
108
|
+
exception_type=attributes.get(EXCEPTION_TYPE),
|
|
109
|
+
exception_escaped=attributes.get(EXCEPTION_ESCAPED),
|
|
110
|
+
exception_stacktrace=attributes.get(EXCEPTION_STACKTRACE),
|
|
111
|
+
)
|
|
112
|
+
return SpanEvent(
|
|
113
|
+
name=name,
|
|
114
|
+
timestamp=timestamp,
|
|
115
|
+
attributes=attributes,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _decode_unix_nano(time_unix_nano: int) -> datetime:
|
|
120
|
+
# floating point rounding error can cause the timestamp to be slightly different from expected
|
|
121
|
+
return datetime.fromtimestamp(time_unix_nano / 1e9, tz=timezone.utc)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _decode_key_values(
|
|
125
|
+
key_values: Iterable[KeyValue],
|
|
126
|
+
) -> Iterator[Tuple[str, Any]]:
|
|
127
|
+
return ((kv.key, _decode_value(kv.value)) for kv in key_values)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _decode_value(any_value: AnyValue) -> Any:
|
|
131
|
+
which = any_value.WhichOneof("value")
|
|
132
|
+
if which == "string_value":
|
|
133
|
+
return any_value.string_value
|
|
134
|
+
if which == "bool_value":
|
|
135
|
+
return any_value.bool_value
|
|
136
|
+
if which == "int_value":
|
|
137
|
+
return any_value.int_value
|
|
138
|
+
if which == "double_value":
|
|
139
|
+
return any_value.double_value
|
|
140
|
+
if which == "array_value":
|
|
141
|
+
return [_decode_value(value) for value in any_value.array_value.values]
|
|
142
|
+
if which == "kvlist_value":
|
|
143
|
+
return dict(_decode_key_values(any_value.kvlist_value.values))
|
|
144
|
+
if which == "bytes_value":
|
|
145
|
+
return any_value.bytes_value
|
|
146
|
+
if which is None:
|
|
147
|
+
return None
|
|
148
|
+
assert_never(which)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
StatusMessage: TypeAlias = str
|
|
152
|
+
|
|
153
|
+
_STATUS_DECODING = MappingProxyType(
|
|
154
|
+
{
|
|
155
|
+
otlp.Status.StatusCode.STATUS_CODE_UNSET: SpanStatusCode.UNSET,
|
|
156
|
+
otlp.Status.StatusCode.STATUS_CODE_OK: SpanStatusCode.OK,
|
|
157
|
+
otlp.Status.StatusCode.STATUS_CODE_ERROR: SpanStatusCode.ERROR,
|
|
158
|
+
}
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _decode_status(otlp_status: otlp.Status) -> Tuple[SpanStatusCode, StatusMessage]:
|
|
163
|
+
status_code = _STATUS_DECODING.get(otlp_status.code, SpanStatusCode.UNSET)
|
|
164
|
+
return status_code, otlp_status.message
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
_SEMANTIC_CONVENTIONS: List[str] = sorted(
|
|
168
|
+
(getattr(sem_conv, name) for name in dir(sem_conv) if name.isupper()),
|
|
169
|
+
reverse=True,
|
|
170
|
+
) # sorted so the longer strings go first
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _semantic_convention_prefix_partition(key: str, separator: str = ".") -> Tuple[str, str, str]:
|
|
174
|
+
"""Return the longest prefix of `key` that is a semantic convention, and the remaining suffix
|
|
175
|
+
separated by `.`. For example, if `key` is "retrieval.documents.2.document.score", return
|
|
176
|
+
("retrieval.documents", ".", "2.document.score"). The return signature is based on Python's
|
|
177
|
+
`.partition` method for strings.
|
|
178
|
+
"""
|
|
179
|
+
for prefix in _SEMANTIC_CONVENTIONS:
|
|
180
|
+
if key == prefix:
|
|
181
|
+
return key, "", ""
|
|
182
|
+
if key.startswith(prefix) and key[len(prefix) :].startswith(separator):
|
|
183
|
+
return prefix, separator, key[len(prefix) + len(separator) :]
|
|
184
|
+
return "", "", ""
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class _Trie(DefaultDict[Union[str, int], "_Trie"]):
|
|
188
|
+
"""Prefix Tree with special handling for indices (i.e. all-digit keys)."""
|
|
189
|
+
|
|
190
|
+
def __init__(self) -> None:
|
|
191
|
+
super().__init__(_Trie)
|
|
192
|
+
self.value: Any = None
|
|
193
|
+
self.indices: Set[int] = set()
|
|
194
|
+
self.branches: Set[Union[str, int]] = set()
|
|
195
|
+
|
|
196
|
+
def set_value(self, value: Any) -> None:
|
|
197
|
+
self.value = value
|
|
198
|
+
# value and indices must not coexist
|
|
199
|
+
self.branches.update(self.indices)
|
|
200
|
+
self.indices.clear()
|
|
201
|
+
|
|
202
|
+
def add_index(self, index: int) -> "_Trie":
|
|
203
|
+
if self.value is not None:
|
|
204
|
+
self.branches.add(index)
|
|
205
|
+
elif index not in self.branches:
|
|
206
|
+
self.indices.add(index)
|
|
207
|
+
return self[index]
|
|
208
|
+
|
|
209
|
+
def add_branch(self, branch: Union[str, int]) -> "_Trie":
|
|
210
|
+
if branch in self.indices:
|
|
211
|
+
self.indices.discard(cast(int, branch))
|
|
212
|
+
self.branches.add(branch)
|
|
213
|
+
return self[branch]
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
# FIXME: Ideally we should not need something so complicated as a Trie, but it's useful here
|
|
217
|
+
# for backward compatibility reasons regarding some deeply nested objects such as TOOL_PARAMETERS.
|
|
218
|
+
# In the future, we should `json_dumps` them and not let things get too deeply nested.
|
|
219
|
+
def _build_trie(
|
|
220
|
+
key_value_pairs: Iterable[Tuple[str, Any]],
|
|
221
|
+
separator: str = ".",
|
|
222
|
+
) -> _Trie:
|
|
223
|
+
"""Build a Trie (a.k.a. prefix tree) from `key_value_pairs`, by partitioning the keys by
|
|
224
|
+
separator. Each partition is a branch in the Trie. Special handling is done for partitions
|
|
225
|
+
that are all digits, e.g. "0", "12", etc., which are converted to integers and collected
|
|
226
|
+
as indices.
|
|
227
|
+
"""
|
|
228
|
+
trie = _Trie()
|
|
229
|
+
for key, value in key_value_pairs:
|
|
230
|
+
if value is None:
|
|
231
|
+
continue
|
|
232
|
+
t = trie
|
|
233
|
+
while True:
|
|
234
|
+
prefix, _, suffix = _semantic_convention_prefix_partition(key, separator)
|
|
235
|
+
if prefix:
|
|
236
|
+
t = t.add_branch(prefix)
|
|
237
|
+
else:
|
|
238
|
+
prefix, _, suffix = key.partition(separator)
|
|
239
|
+
if prefix.isdigit():
|
|
240
|
+
index = int(prefix)
|
|
241
|
+
t = t.add_index(index) if suffix else t.add_branch(index)
|
|
242
|
+
else:
|
|
243
|
+
t = t.add_branch(prefix)
|
|
244
|
+
if not suffix:
|
|
245
|
+
break
|
|
246
|
+
key = suffix
|
|
247
|
+
t.set_value(value)
|
|
248
|
+
return trie
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _walk(trie: _Trie, prefix: str = "") -> Iterator[Tuple[str, Any]]:
|
|
252
|
+
if trie.value is not None:
|
|
253
|
+
yield prefix, trie.value
|
|
254
|
+
elif prefix and trie.indices:
|
|
255
|
+
yield prefix, [dict(_walk(trie[index])) for index in sorted(trie.indices)]
|
|
256
|
+
elif trie.indices:
|
|
257
|
+
for index in trie.indices:
|
|
258
|
+
yield from _walk(trie[index], prefix=f"{index}")
|
|
259
|
+
elif prefix:
|
|
260
|
+
yield prefix, dict(_walk(trie))
|
|
261
|
+
return
|
|
262
|
+
for branch in trie.branches:
|
|
263
|
+
new_prefix = f"{prefix}.{branch}" if prefix else f"{branch}"
|
|
264
|
+
yield from _walk(trie[branch], new_prefix)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _unflatten(
|
|
268
|
+
key_value_pairs: Iterable[Tuple[str, Any]],
|
|
269
|
+
separator: str = ".",
|
|
270
|
+
) -> Iterator[Tuple[str, Any]]:
|
|
271
|
+
trie = _build_trie(key_value_pairs, separator)
|
|
272
|
+
yield from _walk(trie)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
_BILLION = 1_000_000_000 # for converting seconds to nanoseconds
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def encode(span: Span) -> otlp.Span:
|
|
279
|
+
trace_id: bytes = span.context.trace_id.bytes
|
|
280
|
+
span_id: bytes = _span_id_to_bytes(span.context.span_id)
|
|
281
|
+
parent_span_id: bytes = _span_id_to_bytes(span.parent_id) if span.parent_id else bytes()
|
|
282
|
+
|
|
283
|
+
# floating point rounding error can cause the timestamp to be slightly different from expected
|
|
284
|
+
start_time_unix_nano: int = int(span.start_time.timestamp() * _BILLION)
|
|
285
|
+
end_time_unix_nano: int = int(span.end_time.timestamp() * _BILLION) if span.end_time else 0
|
|
286
|
+
|
|
287
|
+
attributes: Dict[str, Any] = span.attributes.copy()
|
|
288
|
+
|
|
289
|
+
for mime_type in (INPUT_MIME_TYPE, OUTPUT_MIME_TYPE):
|
|
290
|
+
if mime_type in attributes:
|
|
291
|
+
attributes[mime_type] = attributes[mime_type].value
|
|
292
|
+
|
|
293
|
+
for key, value in span.attributes.items():
|
|
294
|
+
if value is None:
|
|
295
|
+
# None can't be transmitted by OTLP
|
|
296
|
+
attributes.pop(key, None)
|
|
297
|
+
elif isinstance(value, Mapping):
|
|
298
|
+
attributes.pop(key, None)
|
|
299
|
+
attributes.update(_flatten_mapping(value, key))
|
|
300
|
+
elif not isinstance(value, str) and isinstance(value, Sequence) and _has_mapping(value):
|
|
301
|
+
attributes.pop(key, None)
|
|
302
|
+
attributes.update(_flatten_sequence(value, key))
|
|
303
|
+
|
|
304
|
+
attributes[OPENINFERENCE_SPAN_KIND] = span.span_kind.value
|
|
305
|
+
|
|
306
|
+
status = _encode_status(span.status_code, span.status_message)
|
|
307
|
+
events = map(_encode_event, span.events)
|
|
308
|
+
|
|
309
|
+
return otlp.Span(
|
|
310
|
+
name=span.name,
|
|
311
|
+
trace_id=trace_id,
|
|
312
|
+
span_id=span_id,
|
|
313
|
+
parent_span_id=parent_span_id,
|
|
314
|
+
start_time_unix_nano=start_time_unix_nano,
|
|
315
|
+
end_time_unix_nano=end_time_unix_nano,
|
|
316
|
+
attributes=_encode_attributes(cast(Attributes, attributes)),
|
|
317
|
+
events=events,
|
|
318
|
+
status=status,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
_STATUS_ENCODING = MappingProxyType(
|
|
323
|
+
{
|
|
324
|
+
SpanStatusCode.UNSET: otlp.Status.StatusCode.STATUS_CODE_UNSET,
|
|
325
|
+
SpanStatusCode.OK: otlp.Status.StatusCode.STATUS_CODE_OK,
|
|
326
|
+
SpanStatusCode.ERROR: otlp.Status.StatusCode.STATUS_CODE_ERROR,
|
|
327
|
+
}
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _encode_status(span_status_code: SpanStatusCode, status_message: str) -> otlp.Status:
|
|
332
|
+
code = _STATUS_ENCODING.get(span_status_code, otlp.Status.StatusCode.STATUS_CODE_UNSET)
|
|
333
|
+
return otlp.Status(code=code, message=status_message)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def _span_id_to_bytes(span_id: SpanID) -> bytes:
|
|
337
|
+
# Note that this is not compliant with the OTEL spec, which uses 8-byte span IDs.
|
|
338
|
+
# This is a stopgap solution for backward compatibility until we move away from UUIDs.
|
|
339
|
+
return span_id.bytes
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _has_mapping(sequence: Sequence[Any]) -> bool:
|
|
343
|
+
for item in sequence:
|
|
344
|
+
if isinstance(item, Mapping):
|
|
345
|
+
return True
|
|
346
|
+
return False
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _flatten_mapping(
|
|
350
|
+
mapping: Mapping[str, Any],
|
|
351
|
+
prefix: str,
|
|
352
|
+
) -> Iterator[Tuple[str, Any]]:
|
|
353
|
+
for key, value in mapping.items():
|
|
354
|
+
prefixed_key = f"{prefix}.{key}"
|
|
355
|
+
if isinstance(value, Mapping):
|
|
356
|
+
yield from _flatten_mapping(value, prefixed_key)
|
|
357
|
+
elif isinstance(value, Sequence):
|
|
358
|
+
yield from _flatten_sequence(value, prefixed_key)
|
|
359
|
+
elif value is not None:
|
|
360
|
+
yield prefixed_key, value
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def _flatten_sequence(
|
|
364
|
+
sequence: Sequence[Any],
|
|
365
|
+
prefix: str,
|
|
366
|
+
) -> Iterator[Tuple[str, Any]]:
|
|
367
|
+
if isinstance(sequence, str) or not _has_mapping(sequence):
|
|
368
|
+
yield prefix, sequence
|
|
369
|
+
for idx, obj in enumerate(sequence):
|
|
370
|
+
if not isinstance(obj, Mapping):
|
|
371
|
+
continue
|
|
372
|
+
yield from _flatten_mapping(obj, f"{prefix}.{idx}")
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _encode_event(event: SpanEvent) -> otlp.Span.Event:
|
|
376
|
+
return otlp.Span.Event(
|
|
377
|
+
name=event.name,
|
|
378
|
+
time_unix_nano=int(event.timestamp.timestamp() * _BILLION),
|
|
379
|
+
attributes=_encode_attributes(cast(Attributes, event.attributes)),
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def _encode_attributes(attributes: Attributes) -> Iterator[KeyValue]:
|
|
384
|
+
if not attributes:
|
|
385
|
+
return
|
|
386
|
+
for key, value in attributes.items():
|
|
387
|
+
yield KeyValue(key=key, value=_encode_value(value))
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def _encode_value(value: AttributeValue) -> AnyValue:
|
|
391
|
+
if isinstance(value, str):
|
|
392
|
+
return AnyValue(string_value=value)
|
|
393
|
+
if isinstance(value, bool):
|
|
394
|
+
return AnyValue(bool_value=value)
|
|
395
|
+
if isinstance(value, int):
|
|
396
|
+
return AnyValue(int_value=value)
|
|
397
|
+
if isinstance(value, float):
|
|
398
|
+
return AnyValue(double_value=value)
|
|
399
|
+
if isinstance(value, bytes):
|
|
400
|
+
return AnyValue(bytes_value=value)
|
|
401
|
+
if isinstance(value, Sequence):
|
|
402
|
+
return AnyValue(array_value=ArrayValue(values=map(_encode_value, value)))
|
|
403
|
+
raise ValueError(f"Unexpected attribute value {value} with type {type(value)}.")
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
__all__ = [
|
|
407
|
+
"encode",
|
|
408
|
+
"decode",
|
|
409
|
+
]
|
phoenix/trace/v1/__init__.py
CHANGED
phoenix/version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "2.2.0rc0"
|
phoenix/trace/v1/trace_pb2.py
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
-
# source: trace/v1/trace.proto
|
|
4
|
-
"""Generated protocol buffer code."""
|
|
5
|
-
from google.protobuf.internal import builder as _builder
|
|
6
|
-
from google.protobuf import descriptor as _descriptor
|
|
7
|
-
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
8
|
-
from google.protobuf import symbol_database as _symbol_database
|
|
9
|
-
# @@protoc_insertion_point(imports)
|
|
10
|
-
|
|
11
|
-
_sym_db = _symbol_database.Default()
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
|
|
15
|
-
from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2
|
|
16
|
-
from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14trace/v1/trace.proto\x12\x16phoenix.proto.trace.v1\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1cgoogle/protobuf/struct.proto\x1a\x1egoogle/protobuf/wrappers.proto\"\xc5\x0b\n\x04Span\x12+\n\nattributes\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x35\n\x07\x63ontext\x18\x02 \x01(\x0b\x32$.phoenix.proto.trace.v1.Span.Context\x12\x34\n\x0eparent_span_id\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.StringValue\x12\x0c\n\x04name\x18\x04 \x01(\t\x12.\n\nstart_time\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12,\n\x08\x65nd_time\x18\x06 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x33\n\x06status\x18\x07 \x01(\x0b\x32#.phoenix.proto.trace.v1.Span.Status\x12\x32\n\x06\x65vents\x18\x08 \x03(\x0b\x32\".phoenix.proto.trace.v1.Span.Event\x12:\n\nexceptions\x18\t \x03(\x0b\x32&.phoenix.proto.trace.v1.Span.Exception\x12\x33\n\x05input\x18\n \x01(\x0b\x32$.phoenix.proto.trace.v1.Span.IOValue\x12\x34\n\x06output\x18\x0b \x01(\x0b\x32$.phoenix.proto.trace.v1.Span.IOValue\x12\x0c\n\x04kind\x18\x0c \x01(\t\x12\x34\n\tretrieval\x18\r \x01(\x0b\x32!.phoenix.proto.trace.v1.Retrieval\x12\x34\n\tembedding\x18\x0e \x01(\x0b\x32!.phoenix.proto.trace.v1.Embedding\x12(\n\x03llm\x18\x0f \x01(\x0b\x32\x1b.phoenix.proto.trace.v1.LLM\x12*\n\x04tool\x18\x10 \x01(\x0b\x32\x1c.phoenix.proto.trace.v1.Tool\x1a,\n\x07\x43ontext\x12\x10\n\x08trace_id\x18\x01 \x01(\t\x12\x0f\n\x07span_id\x18\x02 \x01(\t\x1a\x95\x01\n\x06Status\x12\x36\n\x04\x63ode\x18\x01 \x01(\x0e\x32(.phoenix.proto.trace.v1.Span.Status.Code\x12-\n\x07message\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.StringValue\"$\n\x04\x43ode\x12\t\n\x05UNSET\x10\x00\x12\x06\n\x02OK\x10\x01\x12\t\n\x05\x45RROR\x10\x02\x1aq\n\x05\x45vent\x12+\n\nattributes\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\ttimestamp\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x1a\xa1\x02\n\tException\x12+\n\nattributes\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12-\n\x07message\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.StringValue\x12*\n\x04type\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.StringValue\x12+\n\x07\x65scaped\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x30\n\nstacktrace\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.StringValue\x12-\n\ttimestamp\x18\x06 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x1az\n\x07IOValue\x12\r\n\x05value\x18\x01 \x01(\t\x12@\n\tmime_type\x18\x02 \x01(\x0e\x32-.phoenix.proto.trace.v1.Span.IOValue.MimeType\"\x1e\n\x08MimeType\x12\x08\n\x04TEXT\x10\x00\x12\x08\n\x04JSON\x10\x01\"\xdf\x02\n\tRetrieval\x12=\n\tdocuments\x18\x01 \x03(\x0b\x32*.phoenix.proto.trace.v1.Retrieval.Document\x1a\x92\x02\n\x08\x44ocument\x12+\n\nattributes\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12(\n\x02id\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.StringValue\x12*\n\x05score\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.FloatValue\x12-\n\x07\x63ontent\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.StringValue\x12)\n\x08metadata\x18\x05 \x01(\x0b\x32\x17.google.protobuf.Struct\x12)\n\x04hash\x18\x06 \x01(\x0b\x32\x1b.google.protobuf.BytesValue\"\xf4\x01\n\tEmbedding\x12?\n\nembeddings\x18\x01 \x03(\x0b\x32+.phoenix.proto.trace.v1.Embedding.Embedding\x12\x30\n\nmodel_name\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.StringValue\x1at\n\tEmbedding\x12+\n\nattributes\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x0e\n\x06vector\x18\x02 \x03(\x02\x12*\n\x04text\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.StringValue\"2\n\x03LLM\x12+\n\nattributes\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\"3\n\x04Tool\x12+\n\nattributes\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Structb\x06proto3')
|
|
20
|
-
|
|
21
|
-
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
|
|
22
|
-
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'trace.v1.trace_pb2', globals())
|
|
23
|
-
if _descriptor._USE_C_DESCRIPTORS == False:
|
|
24
|
-
|
|
25
|
-
DESCRIPTOR._options = None
|
|
26
|
-
_SPAN._serialized_start=144
|
|
27
|
-
_SPAN._serialized_end=1621
|
|
28
|
-
_SPAN_CONTEXT._serialized_start=894
|
|
29
|
-
_SPAN_CONTEXT._serialized_end=938
|
|
30
|
-
_SPAN_STATUS._serialized_start=941
|
|
31
|
-
_SPAN_STATUS._serialized_end=1090
|
|
32
|
-
_SPAN_STATUS_CODE._serialized_start=1054
|
|
33
|
-
_SPAN_STATUS_CODE._serialized_end=1090
|
|
34
|
-
_SPAN_EVENT._serialized_start=1092
|
|
35
|
-
_SPAN_EVENT._serialized_end=1205
|
|
36
|
-
_SPAN_EXCEPTION._serialized_start=1208
|
|
37
|
-
_SPAN_EXCEPTION._serialized_end=1497
|
|
38
|
-
_SPAN_IOVALUE._serialized_start=1499
|
|
39
|
-
_SPAN_IOVALUE._serialized_end=1621
|
|
40
|
-
_SPAN_IOVALUE_MIMETYPE._serialized_start=1591
|
|
41
|
-
_SPAN_IOVALUE_MIMETYPE._serialized_end=1621
|
|
42
|
-
_RETRIEVAL._serialized_start=1624
|
|
43
|
-
_RETRIEVAL._serialized_end=1975
|
|
44
|
-
_RETRIEVAL_DOCUMENT._serialized_start=1701
|
|
45
|
-
_RETRIEVAL_DOCUMENT._serialized_end=1975
|
|
46
|
-
_EMBEDDING._serialized_start=1978
|
|
47
|
-
_EMBEDDING._serialized_end=2222
|
|
48
|
-
_EMBEDDING_EMBEDDING._serialized_start=2106
|
|
49
|
-
_EMBEDDING_EMBEDDING._serialized_end=2222
|
|
50
|
-
_LLM._serialized_start=2224
|
|
51
|
-
_LLM._serialized_end=2274
|
|
52
|
-
_TOOL._serialized_start=2276
|
|
53
|
-
_TOOL._serialized_end=2327
|
|
54
|
-
# @@protoc_insertion_point(module_scope)
|