arize-phoenix 0.0.32__py3-none-any.whl → 0.0.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/METADATA +11 -5
- {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/RECORD +69 -40
- phoenix/__init__.py +3 -1
- phoenix/config.py +23 -1
- phoenix/core/model_schema.py +14 -37
- phoenix/core/model_schema_adapter.py +0 -1
- phoenix/core/traces.py +285 -0
- phoenix/datasets/dataset.py +14 -21
- phoenix/datasets/errors.py +4 -1
- phoenix/datasets/schema.py +1 -1
- phoenix/datetime_utils.py +87 -0
- phoenix/experimental/callbacks/__init__.py +0 -0
- phoenix/experimental/callbacks/langchain_tracer.py +228 -0
- phoenix/experimental/callbacks/llama_index_trace_callback_handler.py +364 -0
- phoenix/experimental/evals/__init__.py +33 -0
- phoenix/experimental/evals/functions/__init__.py +4 -0
- phoenix/experimental/evals/functions/binary.py +156 -0
- phoenix/experimental/evals/functions/common.py +31 -0
- phoenix/experimental/evals/functions/generate.py +50 -0
- phoenix/experimental/evals/models/__init__.py +4 -0
- phoenix/experimental/evals/models/base.py +130 -0
- phoenix/experimental/evals/models/openai.py +128 -0
- phoenix/experimental/evals/retrievals.py +2 -2
- phoenix/experimental/evals/templates/__init__.py +24 -0
- phoenix/experimental/evals/templates/default_templates.py +126 -0
- phoenix/experimental/evals/templates/template.py +107 -0
- phoenix/experimental/evals/utils/__init__.py +0 -0
- phoenix/experimental/evals/utils/downloads.py +33 -0
- phoenix/experimental/evals/utils/threads.py +27 -0
- phoenix/experimental/evals/utils/types.py +9 -0
- phoenix/experimental/evals/utils.py +33 -0
- phoenix/metrics/binning.py +0 -1
- phoenix/metrics/timeseries.py +2 -3
- phoenix/server/api/context.py +2 -0
- phoenix/server/api/input_types/SpanSort.py +60 -0
- phoenix/server/api/schema.py +85 -4
- phoenix/server/api/types/DataQualityMetric.py +10 -1
- phoenix/server/api/types/Dataset.py +2 -4
- phoenix/server/api/types/DatasetInfo.py +10 -0
- phoenix/server/api/types/ExportEventsMutation.py +4 -1
- phoenix/server/api/types/Functionality.py +15 -0
- phoenix/server/api/types/MimeType.py +16 -0
- phoenix/server/api/types/Model.py +3 -5
- phoenix/server/api/types/SortDir.py +13 -0
- phoenix/server/api/types/Span.py +229 -0
- phoenix/server/api/types/TimeSeries.py +9 -2
- phoenix/server/api/types/pagination.py +2 -0
- phoenix/server/app.py +24 -4
- phoenix/server/main.py +60 -24
- phoenix/server/span_handler.py +39 -0
- phoenix/server/static/index.js +956 -479
- phoenix/server/thread_server.py +10 -2
- phoenix/services.py +39 -16
- phoenix/session/session.py +99 -27
- phoenix/trace/exporter.py +71 -0
- phoenix/trace/filter.py +181 -0
- phoenix/trace/fixtures.py +23 -8
- phoenix/trace/schemas.py +59 -6
- phoenix/trace/semantic_conventions.py +141 -1
- phoenix/trace/span_json_decoder.py +60 -6
- phoenix/trace/span_json_encoder.py +1 -9
- phoenix/trace/trace_dataset.py +100 -8
- phoenix/trace/tracer.py +26 -3
- phoenix/trace/v1/__init__.py +522 -0
- phoenix/trace/v1/trace_pb2.py +52 -0
- phoenix/trace/v1/trace_pb2.pyi +351 -0
- phoenix/core/dimension_data_type.py +0 -6
- phoenix/core/dimension_type.py +0 -9
- {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/WHEEL +0 -0
- {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/licenses/LICENSE +0 -0
phoenix/trace/fixtures.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
-
from typing import List, Optional
|
|
2
|
+
from typing import List, Optional, cast
|
|
3
3
|
from urllib import request
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
from phoenix.trace.trace_dataset import TraceDataset
|
|
7
6
|
from phoenix.trace.utils import json_lines_to_df
|
|
8
7
|
|
|
9
8
|
|
|
@@ -14,13 +13,29 @@ class TracesFixture:
|
|
|
14
13
|
file_name: str
|
|
15
14
|
|
|
16
15
|
|
|
16
|
+
llama_index_rag_fixture = TracesFixture(
|
|
17
|
+
name="llama_index_rag",
|
|
18
|
+
description="Traces from running the llama_index on a RAG use case.",
|
|
19
|
+
file_name="llama_index_rag_v5.jsonl",
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
langchain_rag_stuff_document_chain_fixture = TracesFixture(
|
|
23
|
+
name="langchain_rag_stuff_document_chain",
|
|
24
|
+
description="LangChain RAG data",
|
|
25
|
+
file_name="langchain_rag.jsonl",
|
|
26
|
+
)
|
|
27
|
+
|
|
17
28
|
random_fixture = TracesFixture(
|
|
18
29
|
name="random",
|
|
19
30
|
description="Randomly generated traces",
|
|
20
31
|
file_name="random.jsonl",
|
|
21
32
|
)
|
|
22
33
|
|
|
23
|
-
TRACES_FIXTURES: List[TracesFixture] = [
|
|
34
|
+
TRACES_FIXTURES: List[TracesFixture] = [
|
|
35
|
+
llama_index_rag_fixture,
|
|
36
|
+
langchain_rag_stuff_document_chain_fixture,
|
|
37
|
+
random_fixture,
|
|
38
|
+
]
|
|
24
39
|
|
|
25
40
|
NAME_TO_TRACES_FIXTURE = {fixture.name: fixture for fixture in TRACES_FIXTURES}
|
|
26
41
|
|
|
@@ -45,20 +60,20 @@ def _download_traces_fixture(
|
|
|
45
60
|
host: Optional[str] = "https://storage.googleapis.com/",
|
|
46
61
|
bucket: Optional[str] = "arize-assets",
|
|
47
62
|
prefix: Optional[str] = "phoenix/traces/",
|
|
48
|
-
) ->
|
|
63
|
+
) -> List[str]:
|
|
49
64
|
"""
|
|
50
65
|
Downloads the traces fixture from the phoenix bucket.
|
|
51
66
|
"""
|
|
52
67
|
url = f"{host}{bucket}/{prefix}{fixture.file_name}"
|
|
53
68
|
with request.urlopen(url) as f:
|
|
54
|
-
return
|
|
69
|
+
return cast(List[str], f.readlines())
|
|
55
70
|
|
|
56
71
|
|
|
57
|
-
def load_example_traces(use_case: str) ->
|
|
72
|
+
def load_example_traces(use_case: str) -> TraceDataset:
|
|
58
73
|
"""
|
|
59
74
|
Loads a trace dataframe by name.
|
|
60
75
|
|
|
61
76
|
NB: this functionality is under active construction.
|
|
62
77
|
"""
|
|
63
78
|
fixture = _get_trace_fixture_by_name(use_case)
|
|
64
|
-
return _download_traces_fixture(fixture)
|
|
79
|
+
return TraceDataset(json_lines_to_df(_download_traces_fixture(fixture)))
|
phoenix/trace/schemas.py
CHANGED
|
@@ -4,14 +4,29 @@ from enum import Enum
|
|
|
4
4
|
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
|
+
from phoenix.trace.semantic_conventions import (
|
|
8
|
+
EXCEPTION_ESCAPED,
|
|
9
|
+
EXCEPTION_MESSAGE,
|
|
10
|
+
EXCEPTION_STACKTRACE,
|
|
11
|
+
EXCEPTION_TYPE,
|
|
12
|
+
)
|
|
13
|
+
|
|
7
14
|
|
|
8
15
|
class SpanStatusCode(Enum):
|
|
9
16
|
UNSET = "UNSET"
|
|
10
17
|
OK = "OK"
|
|
11
18
|
ERROR = "ERROR"
|
|
12
19
|
|
|
20
|
+
def __str__(self) -> str:
|
|
21
|
+
return self.value
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def _missing_(cls, v: Any) -> Optional["SpanStatusCode"]:
|
|
25
|
+
if v and isinstance(v, str) and not v.isupper():
|
|
26
|
+
return cls(v.upper())
|
|
27
|
+
return None if v else cls.UNSET
|
|
28
|
+
|
|
13
29
|
|
|
14
|
-
@dataclass(frozen=True)
|
|
15
30
|
class SpanKind(Enum):
|
|
16
31
|
"""
|
|
17
32
|
SpanKind is loosely inspired by OpenTelemetry's SpanKind
|
|
@@ -25,8 +40,20 @@ class SpanKind(Enum):
|
|
|
25
40
|
LLM = "LLM"
|
|
26
41
|
RETRIEVER = "RETRIEVER"
|
|
27
42
|
EMBEDDING = "EMBEDDING"
|
|
43
|
+
AGENT = "AGENT"
|
|
44
|
+
UNKNOWN = "UNKNOWN"
|
|
45
|
+
|
|
46
|
+
def __str__(self) -> str:
|
|
47
|
+
return self.value
|
|
28
48
|
|
|
49
|
+
@classmethod
|
|
50
|
+
def _missing_(cls, v: Any) -> Optional["SpanKind"]:
|
|
51
|
+
if v and isinstance(v, str) and not v.isupper():
|
|
52
|
+
return cls(v.upper())
|
|
53
|
+
return None if v else cls.UNKNOWN
|
|
29
54
|
|
|
55
|
+
|
|
56
|
+
TraceID = UUID
|
|
30
57
|
SpanID = UUID
|
|
31
58
|
AttributePrimitiveValue = Union[str, bool, float, int]
|
|
32
59
|
AttributeValue = Union[AttributePrimitiveValue, List[AttributePrimitiveValue]]
|
|
@@ -37,7 +64,7 @@ SpanAttributes = Dict[str, AttributeValue]
|
|
|
37
64
|
class SpanContext:
|
|
38
65
|
"""Context propagation for a span"""
|
|
39
66
|
|
|
40
|
-
trace_id:
|
|
67
|
+
trace_id: TraceID
|
|
41
68
|
span_id: SpanID
|
|
42
69
|
|
|
43
70
|
|
|
@@ -58,10 +85,11 @@ class SpanEvent(Dict[str, Any]):
|
|
|
58
85
|
"""
|
|
59
86
|
|
|
60
87
|
name: str
|
|
61
|
-
message: str
|
|
62
88
|
timestamp: datetime
|
|
89
|
+
attributes: SpanAttributes
|
|
63
90
|
|
|
64
91
|
|
|
92
|
+
@dataclass(frozen=True)
|
|
65
93
|
class SpanException(SpanEvent):
|
|
66
94
|
"""
|
|
67
95
|
A Span Exception is a special type of Span Event that denotes an error
|
|
@@ -73,8 +101,28 @@ class SpanException(SpanEvent):
|
|
|
73
101
|
https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/exceptions.md
|
|
74
102
|
"""
|
|
75
103
|
|
|
76
|
-
def __init__(
|
|
77
|
-
|
|
104
|
+
def __init__(
|
|
105
|
+
self,
|
|
106
|
+
timestamp: datetime,
|
|
107
|
+
message: str,
|
|
108
|
+
exception_type: Optional[str] = None,
|
|
109
|
+
exception_escaped: Optional[bool] = None,
|
|
110
|
+
exception_stacktrace: Optional[str] = None,
|
|
111
|
+
):
|
|
112
|
+
super().__init__(
|
|
113
|
+
name="exception",
|
|
114
|
+
timestamp=timestamp,
|
|
115
|
+
attributes={
|
|
116
|
+
k: v
|
|
117
|
+
for k, v in {
|
|
118
|
+
EXCEPTION_TYPE: exception_type,
|
|
119
|
+
EXCEPTION_MESSAGE: message,
|
|
120
|
+
EXCEPTION_ESCAPED: exception_escaped,
|
|
121
|
+
EXCEPTION_STACKTRACE: exception_stacktrace,
|
|
122
|
+
}.items()
|
|
123
|
+
if v is not None
|
|
124
|
+
},
|
|
125
|
+
)
|
|
78
126
|
|
|
79
127
|
|
|
80
128
|
@dataclass(frozen=True)
|
|
@@ -95,7 +143,7 @@ class Span:
|
|
|
95
143
|
"If the parent_id is None, this is the root span"
|
|
96
144
|
parent_id: Optional[SpanID]
|
|
97
145
|
start_time: datetime
|
|
98
|
-
end_time: datetime
|
|
146
|
+
end_time: Optional[datetime]
|
|
99
147
|
status_code: SpanStatusCode
|
|
100
148
|
status_message: str
|
|
101
149
|
"""
|
|
@@ -131,3 +179,8 @@ class Span:
|
|
|
131
179
|
conversation_id
|
|
132
180
|
"""
|
|
133
181
|
conversation: Optional[SpanConversationAttributes]
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
ATTRIBUTE_PREFIX = "attributes."
|
|
185
|
+
CONTEXT_PREFIX = "context."
|
|
186
|
+
COMPUTED_PREFIX = "__computed__."
|
|
@@ -5,7 +5,8 @@ Inspiration from OpenTelemetry:
|
|
|
5
5
|
https://opentelemetry.io/docs/specs/otel/trace/semantic_conventions/span-general/
|
|
6
6
|
"""
|
|
7
7
|
from dataclasses import dataclass
|
|
8
|
-
from
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import Any, Dict, Optional
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
@dataclass(frozen=True)
|
|
@@ -35,3 +36,142 @@ class DeploymentAttributes(AttributeGroup):
|
|
|
35
36
|
type="string",
|
|
36
37
|
),
|
|
37
38
|
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
EXCEPTION_TYPE = "exception.type"
|
|
42
|
+
EXCEPTION_MESSAGE = "exception.message"
|
|
43
|
+
EXCEPTION_ESCAPED = "exception.escaped"
|
|
44
|
+
EXCEPTION_STACKTRACE = "exception.stacktrace"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
OUTPUT_VALUE = "output.value"
|
|
48
|
+
OUTPUT_MIME_TYPE = "output.mime_type"
|
|
49
|
+
"""
|
|
50
|
+
The type of output.value. If unspecified, the type is plain text by default.
|
|
51
|
+
If type is JSON, the value is a string representing a JSON object.
|
|
52
|
+
"""
|
|
53
|
+
INPUT_VALUE = "input.value"
|
|
54
|
+
INPUT_MIME_TYPE = "input.mime_type"
|
|
55
|
+
"""
|
|
56
|
+
The type of input.value. If unspecified, the type is plain text by default.
|
|
57
|
+
If type is JSON, the value is a string representing a JSON object.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class MimeType(Enum):
|
|
62
|
+
TEXT = "text/plain"
|
|
63
|
+
JSON = "application/json"
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def _missing_(cls, v: Any) -> Optional["MimeType"]:
|
|
67
|
+
return None if v else cls.TEXT
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
EMBEDDING_EMBEDDINGS = "embedding.embeddings"
|
|
71
|
+
"""
|
|
72
|
+
A list of objects containing embedding data, including the vector and represented piece of text.
|
|
73
|
+
"""
|
|
74
|
+
EMBEDDING_MODEL_NAME = "embedding.model_name"
|
|
75
|
+
"""
|
|
76
|
+
The name of the embedding model.
|
|
77
|
+
"""
|
|
78
|
+
EMBEDDING_TEXT = "embedding.text"
|
|
79
|
+
"""
|
|
80
|
+
The text represented by the embedding.
|
|
81
|
+
"""
|
|
82
|
+
EMBEDDING_VECTOR = "embedding.vector"
|
|
83
|
+
"""
|
|
84
|
+
The embedding vector.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
MESSAGE_ROLE = "message.role"
|
|
88
|
+
"""
|
|
89
|
+
The role of the message, such as "user", "agent", "function".
|
|
90
|
+
"""
|
|
91
|
+
MESSAGE_NAME = "message.name"
|
|
92
|
+
"""
|
|
93
|
+
The name of the message, often used to identify the function
|
|
94
|
+
that was used to generate the message.
|
|
95
|
+
"""
|
|
96
|
+
MESSAGE_FUNCTION_CALL_NAME = "message.function_call_name"
|
|
97
|
+
"""
|
|
98
|
+
The function name that is a part of the message list.
|
|
99
|
+
This is populated for role 'function' or 'agent' as a mechanism to identify
|
|
100
|
+
the function that was called during the execution of a tool
|
|
101
|
+
"""
|
|
102
|
+
MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = "message.function_call_arguments_json"
|
|
103
|
+
"""
|
|
104
|
+
The JSON string representing the arguments passed to the function
|
|
105
|
+
during a function call
|
|
106
|
+
"""
|
|
107
|
+
MESSAGE_CONTENT = "message.content"
|
|
108
|
+
"""
|
|
109
|
+
The content of the message to the llm
|
|
110
|
+
"""
|
|
111
|
+
LLM_FUNCTION_CALL = "llm.function_call"
|
|
112
|
+
"""
|
|
113
|
+
For models and APIs that support function calling. Records attributes such as the function name and
|
|
114
|
+
arguments to the called function.
|
|
115
|
+
"""
|
|
116
|
+
LLM_INVOCATION_PARAMETERS = "llm.invocation_parameters"
|
|
117
|
+
"""
|
|
118
|
+
Invocation parameters passed to the LLM or API, such as the model name, temperature, etc.
|
|
119
|
+
"""
|
|
120
|
+
LLM_MESSAGES = "llm.messages"
|
|
121
|
+
"""
|
|
122
|
+
Messages provided to a chat API.
|
|
123
|
+
"""
|
|
124
|
+
LLM_MODEL_NAME = "llm.model_name"
|
|
125
|
+
"""
|
|
126
|
+
The name of the model being used.
|
|
127
|
+
"""
|
|
128
|
+
LLM_PROMPT = "llm.prompt"
|
|
129
|
+
"""
|
|
130
|
+
Messages provided to a completions API.
|
|
131
|
+
"""
|
|
132
|
+
LLM_PROMPT_TEMPLATE = "llm.prompt_template.template"
|
|
133
|
+
"""
|
|
134
|
+
The prompt template as a Python f-string.
|
|
135
|
+
"""
|
|
136
|
+
LLM_PROMPT_TEMPLATE_VARIABLES = "llm.prompt_template.variables"
|
|
137
|
+
"""
|
|
138
|
+
A list of input variables to the prompt template.
|
|
139
|
+
"""
|
|
140
|
+
LLM_PROMPT_TEMPLATE_VERSION = "llm.prompt_template.version"
|
|
141
|
+
"""
|
|
142
|
+
The version of the prompt template being used.
|
|
143
|
+
"""
|
|
144
|
+
LLM_TOKEN_COUNT_PROMPT = "llm.token_count.prompt"
|
|
145
|
+
"""
|
|
146
|
+
Number of tokens in the prompt.
|
|
147
|
+
"""
|
|
148
|
+
LLM_TOKEN_COUNT_COMPLETION = "llm.token_count.completion"
|
|
149
|
+
"""
|
|
150
|
+
Number of tokens in the completion.
|
|
151
|
+
"""
|
|
152
|
+
LLM_TOKEN_COUNT_TOTAL = "llm.token_count.total"
|
|
153
|
+
"""
|
|
154
|
+
Total number of tokens, including both prompt and completion.
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
TOOL_NAME = "tool.name"
|
|
158
|
+
"""
|
|
159
|
+
Name of the tool being used.
|
|
160
|
+
"""
|
|
161
|
+
TOOL_DESCRIPTION = "tool.description"
|
|
162
|
+
"""
|
|
163
|
+
Description of the tool's purpose, typically used to select the tool.
|
|
164
|
+
"""
|
|
165
|
+
TOOL_PARAMETERS = "tool.parameters"
|
|
166
|
+
"""
|
|
167
|
+
Parameters of the tool, e.g. see https://platform.openai.com/docs/guides/gpt/function-calling
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
RETRIEVAL_DOCUMENTS = "retrieval.documents"
|
|
171
|
+
DOCUMENT_ID = "document.id"
|
|
172
|
+
DOCUMENT_SCORE = "document.score"
|
|
173
|
+
DOCUMENT_CONTENT = "document.content"
|
|
174
|
+
DOCUMENT_METADATA = "document.metadata"
|
|
175
|
+
"""
|
|
176
|
+
Document metadata as a string representing a JSON object
|
|
177
|
+
"""
|
|
@@ -1,22 +1,52 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from datetime import datetime
|
|
3
|
-
from typing import Any, Dict
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
from uuid import UUID
|
|
4
5
|
|
|
5
6
|
from phoenix.trace.schemas import (
|
|
6
7
|
Span,
|
|
7
8
|
SpanContext,
|
|
8
9
|
SpanConversationAttributes,
|
|
9
10
|
SpanEvent,
|
|
11
|
+
SpanException,
|
|
10
12
|
SpanKind,
|
|
11
13
|
SpanStatusCode,
|
|
12
14
|
)
|
|
15
|
+
from phoenix.trace.semantic_conventions import (
|
|
16
|
+
DOCUMENT_METADATA,
|
|
17
|
+
EXCEPTION_MESSAGE,
|
|
18
|
+
INPUT_MIME_TYPE,
|
|
19
|
+
OUTPUT_MIME_TYPE,
|
|
20
|
+
RETRIEVAL_DOCUMENTS,
|
|
21
|
+
MimeType,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def json_to_document(obj: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
26
|
+
if obj is None:
|
|
27
|
+
return {}
|
|
28
|
+
if document_metadata := obj.get(DOCUMENT_METADATA):
|
|
29
|
+
obj[DOCUMENT_METADATA] = json.loads(document_metadata)
|
|
30
|
+
return obj
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def json_to_attributes(obj: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
34
|
+
if obj is None:
|
|
35
|
+
return {}
|
|
36
|
+
if not isinstance(obj, dict):
|
|
37
|
+
raise ValueError(f"attributes should be dict, but attributes={obj}")
|
|
38
|
+
if mime_type := obj.get(INPUT_MIME_TYPE):
|
|
39
|
+
obj[INPUT_MIME_TYPE] = MimeType(mime_type)
|
|
40
|
+
if mime_type := obj.get(OUTPUT_MIME_TYPE):
|
|
41
|
+
obj[OUTPUT_MIME_TYPE] = MimeType(mime_type)
|
|
42
|
+
if documents := obj.get(RETRIEVAL_DOCUMENTS):
|
|
43
|
+
obj[RETRIEVAL_DOCUMENTS] = [json_to_document(document) for document in documents]
|
|
44
|
+
return obj
|
|
13
45
|
|
|
14
46
|
|
|
15
47
|
def json_to_span(data: Dict[str, Any]) -> Any:
|
|
16
48
|
"""
|
|
17
49
|
A hook for json.loads to convert a dict to a Span object.
|
|
18
|
-
|
|
19
|
-
NB: this function is mainly used for testing purposes. Consider swapping this out for pydantic.
|
|
20
50
|
"""
|
|
21
51
|
# Check if the dict can be interpreted as a Span
|
|
22
52
|
if set(data.keys()) == {
|
|
@@ -32,12 +62,36 @@ def json_to_span(data: Dict[str, Any]) -> Any:
|
|
|
32
62
|
"events",
|
|
33
63
|
"conversation",
|
|
34
64
|
}:
|
|
35
|
-
|
|
65
|
+
context = data["context"]
|
|
66
|
+
if not isinstance(context, dict):
|
|
67
|
+
raise ValueError(f"context should be dict, but context={context}")
|
|
68
|
+
data["context"] = SpanContext(
|
|
69
|
+
trace_id=UUID(context["trace_id"]),
|
|
70
|
+
span_id=UUID(context["span_id"]),
|
|
71
|
+
)
|
|
72
|
+
parent_id = data.get("parent_id")
|
|
73
|
+
data["parent_id"] = UUID(parent_id) if parent_id else None
|
|
74
|
+
attributes = data.get("attributes")
|
|
75
|
+
data["attributes"] = json_to_attributes(attributes)
|
|
36
76
|
data["start_time"] = datetime.fromisoformat(data["start_time"])
|
|
37
|
-
data["end_time"] =
|
|
77
|
+
data["end_time"] = (
|
|
78
|
+
datetime.fromisoformat(end_time) if (end_time := data.get("end_time")) else None
|
|
79
|
+
)
|
|
38
80
|
data["span_kind"] = SpanKind(data["span_kind"])
|
|
39
81
|
data["status_code"] = SpanStatusCode(data["status_code"])
|
|
40
|
-
data["events"] = [
|
|
82
|
+
data["events"] = [
|
|
83
|
+
SpanException(
|
|
84
|
+
message=(event.get("attributes") or {}).get(EXCEPTION_MESSAGE) or "",
|
|
85
|
+
timestamp=datetime.fromisoformat(event["timestamp"]),
|
|
86
|
+
)
|
|
87
|
+
if event["name"] == "exception"
|
|
88
|
+
else SpanEvent(
|
|
89
|
+
name=event["name"],
|
|
90
|
+
attributes=event.get("attributes") or {},
|
|
91
|
+
timestamp=datetime.fromisoformat(event["timestamp"]),
|
|
92
|
+
)
|
|
93
|
+
for event in data["events"]
|
|
94
|
+
]
|
|
41
95
|
data["conversation"] = (
|
|
42
96
|
SpanConversationAttributes(**data["conversation"])
|
|
43
97
|
if data["conversation"] is not None
|
|
@@ -10,7 +10,6 @@ from .schemas import (
|
|
|
10
10
|
SpanContext,
|
|
11
11
|
SpanConversationAttributes,
|
|
12
12
|
SpanEvent,
|
|
13
|
-
SpanException,
|
|
14
13
|
)
|
|
15
14
|
|
|
16
15
|
|
|
@@ -27,14 +26,7 @@ class SpanJSONEncoder(json.JSONEncoder):
|
|
|
27
26
|
elif isinstance(obj, SpanEvent):
|
|
28
27
|
return {
|
|
29
28
|
"name": obj.name,
|
|
30
|
-
"
|
|
31
|
-
"timestamp": obj.timestamp.isoformat(),
|
|
32
|
-
}
|
|
33
|
-
elif isinstance(obj, SpanException):
|
|
34
|
-
# TODO: add stacktrace etc.
|
|
35
|
-
return {
|
|
36
|
-
"name": obj.name,
|
|
37
|
-
"message": obj.message,
|
|
29
|
+
"attributes": obj.attributes,
|
|
38
30
|
"timestamp": obj.timestamp.isoformat(),
|
|
39
31
|
}
|
|
40
32
|
elif isinstance(obj, Span):
|
phoenix/trace/trace_dataset.py
CHANGED
|
@@ -1,10 +1,21 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import uuid
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Iterator, List, Optional, cast
|
|
5
|
+
|
|
1
6
|
import pandas as pd
|
|
2
|
-
from pandas import DataFrame
|
|
7
|
+
from pandas import DataFrame, read_parquet
|
|
8
|
+
|
|
9
|
+
from phoenix.datetime_utils import normalize_timestamps
|
|
10
|
+
|
|
11
|
+
from ..config import DATASET_DIR, GENERATED_DATASET_NAME_PREFIX
|
|
12
|
+
from .schemas import ATTRIBUTE_PREFIX, CONTEXT_PREFIX, Span
|
|
13
|
+
from .span_json_decoder import json_to_span
|
|
14
|
+
from .span_json_encoder import span_to_json
|
|
3
15
|
|
|
4
16
|
# A set of columns that is required
|
|
5
17
|
REQUIRED_COLUMNS = [
|
|
6
18
|
"name",
|
|
7
|
-
"message",
|
|
8
19
|
"span_kind",
|
|
9
20
|
"parent_id",
|
|
10
21
|
"start_time",
|
|
@@ -16,6 +27,15 @@ REQUIRED_COLUMNS = [
|
|
|
16
27
|
]
|
|
17
28
|
|
|
18
29
|
|
|
30
|
+
def normalize_dataframe(dataframe: DataFrame) -> "DataFrame":
|
|
31
|
+
"""Makes the dataframe have appropriate data types"""
|
|
32
|
+
|
|
33
|
+
# Convert the start and end times to datetime
|
|
34
|
+
dataframe["start_time"] = normalize_timestamps(dataframe["start_time"])
|
|
35
|
+
dataframe["end_time"] = normalize_timestamps(dataframe["end_time"])
|
|
36
|
+
return dataframe
|
|
37
|
+
|
|
38
|
+
|
|
19
39
|
class TraceDataset:
|
|
20
40
|
"""
|
|
21
41
|
A TraceDataset is a wrapper around a dataframe which is a flattened representation
|
|
@@ -27,12 +47,84 @@ class TraceDataset:
|
|
|
27
47
|
the pandas dataframe containing the tracing data. Each row represents a span.
|
|
28
48
|
"""
|
|
29
49
|
|
|
50
|
+
name: str
|
|
30
51
|
dataframe: pd.DataFrame
|
|
52
|
+
_data_file_name: str = "data.parquet"
|
|
31
53
|
|
|
32
|
-
def __init__(self, dataframe: DataFrame):
|
|
54
|
+
def __init__(self, dataframe: DataFrame, name: Optional[str] = None):
|
|
33
55
|
# Validate the the dataframe has required fields
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
self.dataframe = dataframe
|
|
56
|
+
if missing_columns := set(REQUIRED_COLUMNS) - set(dataframe.columns):
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"The dataframe is missing some required columns: {', '.join(missing_columns)}"
|
|
59
|
+
)
|
|
60
|
+
self.dataframe = normalize_dataframe(dataframe)
|
|
61
|
+
self.name = name or f"{GENERATED_DATASET_NAME_PREFIX}{str(uuid.uuid4())}"
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def from_spans(cls, spans: List[Span]) -> "TraceDataset":
|
|
65
|
+
"""Creates a TraceDataset from a list of spans.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
spans (List[Span]): A list of spans.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
TraceDataset: A TraceDataset containing the spans.
|
|
72
|
+
"""
|
|
73
|
+
return cls(pd.json_normalize(map(json.loads, map(span_to_json, spans)))) # type: ignore
|
|
74
|
+
|
|
75
|
+
def to_spans(self) -> Iterator[Span]:
|
|
76
|
+
for _, row in self.dataframe.iterrows():
|
|
77
|
+
is_attribute = row.index.str.startswith(ATTRIBUTE_PREFIX)
|
|
78
|
+
attribute_keys = row.index[is_attribute]
|
|
79
|
+
attributes = (
|
|
80
|
+
row.loc[is_attribute]
|
|
81
|
+
.rename(
|
|
82
|
+
{key: key[len(ATTRIBUTE_PREFIX) :] for key in attribute_keys},
|
|
83
|
+
)
|
|
84
|
+
.dropna()
|
|
85
|
+
.to_dict()
|
|
86
|
+
)
|
|
87
|
+
is_context = row.index.str.startswith(CONTEXT_PREFIX)
|
|
88
|
+
context_keys = row.index[is_context]
|
|
89
|
+
context = (
|
|
90
|
+
row.loc[is_context]
|
|
91
|
+
.rename(
|
|
92
|
+
{key: key[len(CONTEXT_PREFIX) :] for key in context_keys},
|
|
93
|
+
)
|
|
94
|
+
.to_dict()
|
|
95
|
+
)
|
|
96
|
+
end_time: Optional[datetime] = cast(datetime, row.get("end_time"))
|
|
97
|
+
if end_time is pd.NaT:
|
|
98
|
+
end_time = None
|
|
99
|
+
yield json_to_span(
|
|
100
|
+
{
|
|
101
|
+
"name": row["name"],
|
|
102
|
+
"context": context,
|
|
103
|
+
"span_kind": row["span_kind"],
|
|
104
|
+
"parent_id": row.get("parent_id"),
|
|
105
|
+
"start_time": cast(datetime, row["start_time"]).isoformat(),
|
|
106
|
+
"end_time": end_time.isoformat() if end_time else None,
|
|
107
|
+
"status_code": row["status_code"],
|
|
108
|
+
"status_message": row.get("status_message") or "",
|
|
109
|
+
"attributes": attributes,
|
|
110
|
+
"events": row.get("events") or [],
|
|
111
|
+
"conversation": row.get("conversation"),
|
|
112
|
+
}
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def from_name(cls, name: str) -> "TraceDataset":
|
|
117
|
+
"""Retrieves a dataset by name from the file system"""
|
|
118
|
+
directory = DATASET_DIR / name
|
|
119
|
+
df = read_parquet(directory / cls._data_file_name)
|
|
120
|
+
return cls(df, name)
|
|
121
|
+
|
|
122
|
+
def to_disc(self) -> None:
|
|
123
|
+
"""writes the data to disc"""
|
|
124
|
+
directory = DATASET_DIR / self.name
|
|
125
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
126
|
+
self.dataframe.to_parquet(
|
|
127
|
+
directory / self._data_file_name,
|
|
128
|
+
allow_truncated_timestamps=True,
|
|
129
|
+
coerce_timestamps="ms",
|
|
130
|
+
)
|
phoenix/trace/tracer.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from datetime import datetime
|
|
2
|
-
from typing import Callable, List, Optional
|
|
3
|
+
from typing import Any, Callable, Iterator, List, Optional, Protocol
|
|
3
4
|
from uuid import UUID, uuid4
|
|
4
5
|
|
|
5
6
|
from .schemas import (
|
|
@@ -13,6 +14,14 @@ from .schemas import (
|
|
|
13
14
|
SpanStatusCode,
|
|
14
15
|
)
|
|
15
16
|
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
logger.addHandler(logging.NullHandler())
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SpanExporter(Protocol):
|
|
22
|
+
def export(self, span: Span) -> None:
|
|
23
|
+
...
|
|
24
|
+
|
|
16
25
|
|
|
17
26
|
class Tracer:
|
|
18
27
|
"""
|
|
@@ -28,7 +37,10 @@ class Tracer:
|
|
|
28
37
|
|
|
29
38
|
def __init__(
|
|
30
39
|
self,
|
|
40
|
+
exporter: Optional[SpanExporter] = None,
|
|
31
41
|
on_append: Optional[Callable[[List[Span]], None]] = None,
|
|
42
|
+
*args: Any,
|
|
43
|
+
**kwargs: Any,
|
|
32
44
|
):
|
|
33
45
|
"""
|
|
34
46
|
Create a new Tracer. A Tracer's main purpose is to create spans.
|
|
@@ -42,14 +54,16 @@ class Tracer:
|
|
|
42
54
|
"""
|
|
43
55
|
self.span_buffer = []
|
|
44
56
|
self.on_append = on_append
|
|
57
|
+
self._exporter: Optional[SpanExporter] = exporter
|
|
58
|
+
super().__init__(*args, **kwargs)
|
|
45
59
|
|
|
46
60
|
def create_span(
|
|
47
61
|
self,
|
|
48
62
|
name: str,
|
|
49
63
|
span_kind: SpanKind,
|
|
50
64
|
start_time: datetime,
|
|
51
|
-
end_time: datetime,
|
|
52
|
-
status_code: SpanStatusCode,
|
|
65
|
+
end_time: Optional[datetime] = None,
|
|
66
|
+
status_code: SpanStatusCode = SpanStatusCode.UNSET,
|
|
53
67
|
status_message: Optional[str] = "",
|
|
54
68
|
parent_id: Optional[SpanID] = None,
|
|
55
69
|
trace_id: Optional[UUID] = None,
|
|
@@ -86,8 +100,17 @@ class Tracer:
|
|
|
86
100
|
conversation=conversation,
|
|
87
101
|
)
|
|
88
102
|
|
|
103
|
+
if self._exporter:
|
|
104
|
+
self._exporter.export(span)
|
|
89
105
|
self.span_buffer.append(span)
|
|
90
106
|
|
|
91
107
|
if self.on_append is not None:
|
|
92
108
|
self.on_append(self.span_buffer)
|
|
93
109
|
return span
|
|
110
|
+
|
|
111
|
+
def get_spans(self) -> Iterator[Span]:
|
|
112
|
+
"""
|
|
113
|
+
Returns the spans stored in the tracer. This is useful if you are running
|
|
114
|
+
in a notebook environment and you want to inspect the spans.
|
|
115
|
+
"""
|
|
116
|
+
yield from self.span_buffer
|