arize-phoenix 0.0.32__py3-none-any.whl → 0.0.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (71) hide show
  1. {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/METADATA +11 -5
  2. {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/RECORD +69 -40
  3. phoenix/__init__.py +3 -1
  4. phoenix/config.py +23 -1
  5. phoenix/core/model_schema.py +14 -37
  6. phoenix/core/model_schema_adapter.py +0 -1
  7. phoenix/core/traces.py +285 -0
  8. phoenix/datasets/dataset.py +14 -21
  9. phoenix/datasets/errors.py +4 -1
  10. phoenix/datasets/schema.py +1 -1
  11. phoenix/datetime_utils.py +87 -0
  12. phoenix/experimental/callbacks/__init__.py +0 -0
  13. phoenix/experimental/callbacks/langchain_tracer.py +228 -0
  14. phoenix/experimental/callbacks/llama_index_trace_callback_handler.py +364 -0
  15. phoenix/experimental/evals/__init__.py +33 -0
  16. phoenix/experimental/evals/functions/__init__.py +4 -0
  17. phoenix/experimental/evals/functions/binary.py +156 -0
  18. phoenix/experimental/evals/functions/common.py +31 -0
  19. phoenix/experimental/evals/functions/generate.py +50 -0
  20. phoenix/experimental/evals/models/__init__.py +4 -0
  21. phoenix/experimental/evals/models/base.py +130 -0
  22. phoenix/experimental/evals/models/openai.py +128 -0
  23. phoenix/experimental/evals/retrievals.py +2 -2
  24. phoenix/experimental/evals/templates/__init__.py +24 -0
  25. phoenix/experimental/evals/templates/default_templates.py +126 -0
  26. phoenix/experimental/evals/templates/template.py +107 -0
  27. phoenix/experimental/evals/utils/__init__.py +0 -0
  28. phoenix/experimental/evals/utils/downloads.py +33 -0
  29. phoenix/experimental/evals/utils/threads.py +27 -0
  30. phoenix/experimental/evals/utils/types.py +9 -0
  31. phoenix/experimental/evals/utils.py +33 -0
  32. phoenix/metrics/binning.py +0 -1
  33. phoenix/metrics/timeseries.py +2 -3
  34. phoenix/server/api/context.py +2 -0
  35. phoenix/server/api/input_types/SpanSort.py +60 -0
  36. phoenix/server/api/schema.py +85 -4
  37. phoenix/server/api/types/DataQualityMetric.py +10 -1
  38. phoenix/server/api/types/Dataset.py +2 -4
  39. phoenix/server/api/types/DatasetInfo.py +10 -0
  40. phoenix/server/api/types/ExportEventsMutation.py +4 -1
  41. phoenix/server/api/types/Functionality.py +15 -0
  42. phoenix/server/api/types/MimeType.py +16 -0
  43. phoenix/server/api/types/Model.py +3 -5
  44. phoenix/server/api/types/SortDir.py +13 -0
  45. phoenix/server/api/types/Span.py +229 -0
  46. phoenix/server/api/types/TimeSeries.py +9 -2
  47. phoenix/server/api/types/pagination.py +2 -0
  48. phoenix/server/app.py +24 -4
  49. phoenix/server/main.py +60 -24
  50. phoenix/server/span_handler.py +39 -0
  51. phoenix/server/static/index.js +956 -479
  52. phoenix/server/thread_server.py +10 -2
  53. phoenix/services.py +39 -16
  54. phoenix/session/session.py +99 -27
  55. phoenix/trace/exporter.py +71 -0
  56. phoenix/trace/filter.py +181 -0
  57. phoenix/trace/fixtures.py +23 -8
  58. phoenix/trace/schemas.py +59 -6
  59. phoenix/trace/semantic_conventions.py +141 -1
  60. phoenix/trace/span_json_decoder.py +60 -6
  61. phoenix/trace/span_json_encoder.py +1 -9
  62. phoenix/trace/trace_dataset.py +100 -8
  63. phoenix/trace/tracer.py +26 -3
  64. phoenix/trace/v1/__init__.py +522 -0
  65. phoenix/trace/v1/trace_pb2.py +52 -0
  66. phoenix/trace/v1/trace_pb2.pyi +351 -0
  67. phoenix/core/dimension_data_type.py +0 -6
  68. phoenix/core/dimension_type.py +0 -9
  69. {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/WHEEL +0 -0
  70. {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/licenses/IP_NOTICE +0 -0
  71. {arize_phoenix-0.0.32.dist-info → arize_phoenix-0.0.33.dist-info}/licenses/LICENSE +0 -0
phoenix/trace/fixtures.py CHANGED
@@ -1,9 +1,8 @@
1
1
  from dataclasses import dataclass
2
- from typing import List, Optional
2
+ from typing import List, Optional, cast
3
3
  from urllib import request
4
4
 
5
- import pandas as pd
6
-
5
+ from phoenix.trace.trace_dataset import TraceDataset
7
6
  from phoenix.trace.utils import json_lines_to_df
8
7
 
9
8
 
@@ -14,13 +13,29 @@ class TracesFixture:
14
13
  file_name: str
15
14
 
16
15
 
16
+ llama_index_rag_fixture = TracesFixture(
17
+ name="llama_index_rag",
18
+ description="Traces from running the llama_index on a RAG use case.",
19
+ file_name="llama_index_rag_v5.jsonl",
20
+ )
21
+
22
+ langchain_rag_stuff_document_chain_fixture = TracesFixture(
23
+ name="langchain_rag_stuff_document_chain",
24
+ description="LangChain RAG data",
25
+ file_name="langchain_rag.jsonl",
26
+ )
27
+
17
28
  random_fixture = TracesFixture(
18
29
  name="random",
19
30
  description="Randomly generated traces",
20
31
  file_name="random.jsonl",
21
32
  )
22
33
 
23
- TRACES_FIXTURES: List[TracesFixture] = [random_fixture]
34
+ TRACES_FIXTURES: List[TracesFixture] = [
35
+ llama_index_rag_fixture,
36
+ langchain_rag_stuff_document_chain_fixture,
37
+ random_fixture,
38
+ ]
24
39
 
25
40
  NAME_TO_TRACES_FIXTURE = {fixture.name: fixture for fixture in TRACES_FIXTURES}
26
41
 
@@ -45,20 +60,20 @@ def _download_traces_fixture(
45
60
  host: Optional[str] = "https://storage.googleapis.com/",
46
61
  bucket: Optional[str] = "arize-assets",
47
62
  prefix: Optional[str] = "phoenix/traces/",
48
- ) -> pd.DataFrame:
63
+ ) -> List[str]:
49
64
  """
50
65
  Downloads the traces fixture from the phoenix bucket.
51
66
  """
52
67
  url = f"{host}{bucket}/{prefix}{fixture.file_name}"
53
68
  with request.urlopen(url) as f:
54
- return json_lines_to_df(f.readlines())
69
+ return cast(List[str], f.readlines())
55
70
 
56
71
 
57
- def load_example_traces(use_case: str) -> pd.DataFrame:
72
+ def load_example_traces(use_case: str) -> TraceDataset:
58
73
  """
59
74
  Loads a trace dataframe by name.
60
75
 
61
76
  NB: this functionality is under active construction.
62
77
  """
63
78
  fixture = _get_trace_fixture_by_name(use_case)
64
- return _download_traces_fixture(fixture)
79
+ return TraceDataset(json_lines_to_df(_download_traces_fixture(fixture)))
phoenix/trace/schemas.py CHANGED
@@ -4,14 +4,29 @@ from enum import Enum
4
4
  from typing import Any, Dict, List, Optional, Union
5
5
  from uuid import UUID
6
6
 
7
+ from phoenix.trace.semantic_conventions import (
8
+ EXCEPTION_ESCAPED,
9
+ EXCEPTION_MESSAGE,
10
+ EXCEPTION_STACKTRACE,
11
+ EXCEPTION_TYPE,
12
+ )
13
+
7
14
 
8
15
  class SpanStatusCode(Enum):
9
16
  UNSET = "UNSET"
10
17
  OK = "OK"
11
18
  ERROR = "ERROR"
12
19
 
20
+ def __str__(self) -> str:
21
+ return self.value
22
+
23
+ @classmethod
24
+ def _missing_(cls, v: Any) -> Optional["SpanStatusCode"]:
25
+ if v and isinstance(v, str) and not v.isupper():
26
+ return cls(v.upper())
27
+ return None if v else cls.UNSET
28
+
13
29
 
14
- @dataclass(frozen=True)
15
30
  class SpanKind(Enum):
16
31
  """
17
32
  SpanKind is loosely inspired by OpenTelemetry's SpanKind
@@ -25,8 +40,20 @@ class SpanKind(Enum):
25
40
  LLM = "LLM"
26
41
  RETRIEVER = "RETRIEVER"
27
42
  EMBEDDING = "EMBEDDING"
43
+ AGENT = "AGENT"
44
+ UNKNOWN = "UNKNOWN"
45
+
46
+ def __str__(self) -> str:
47
+ return self.value
28
48
 
49
+ @classmethod
50
+ def _missing_(cls, v: Any) -> Optional["SpanKind"]:
51
+ if v and isinstance(v, str) and not v.isupper():
52
+ return cls(v.upper())
53
+ return None if v else cls.UNKNOWN
29
54
 
55
+
56
+ TraceID = UUID
30
57
  SpanID = UUID
31
58
  AttributePrimitiveValue = Union[str, bool, float, int]
32
59
  AttributeValue = Union[AttributePrimitiveValue, List[AttributePrimitiveValue]]
@@ -37,7 +64,7 @@ SpanAttributes = Dict[str, AttributeValue]
37
64
  class SpanContext:
38
65
  """Context propagation for a span"""
39
66
 
40
- trace_id: UUID
67
+ trace_id: TraceID
41
68
  span_id: SpanID
42
69
 
43
70
 
@@ -58,10 +85,11 @@ class SpanEvent(Dict[str, Any]):
58
85
  """
59
86
 
60
87
  name: str
61
- message: str
62
88
  timestamp: datetime
89
+ attributes: SpanAttributes
63
90
 
64
91
 
92
+ @dataclass(frozen=True)
65
93
  class SpanException(SpanEvent):
66
94
  """
67
95
  A Span Exception is a special type of Span Event that denotes an error
@@ -73,8 +101,28 @@ class SpanException(SpanEvent):
73
101
  https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/exceptions.md
74
102
  """
75
103
 
76
- def __init__(self, timestamp: datetime, message: str):
77
- super().__init__(name="exception", message=message, timestamp=timestamp)
104
+ def __init__(
105
+ self,
106
+ timestamp: datetime,
107
+ message: str,
108
+ exception_type: Optional[str] = None,
109
+ exception_escaped: Optional[bool] = None,
110
+ exception_stacktrace: Optional[str] = None,
111
+ ):
112
+ super().__init__(
113
+ name="exception",
114
+ timestamp=timestamp,
115
+ attributes={
116
+ k: v
117
+ for k, v in {
118
+ EXCEPTION_TYPE: exception_type,
119
+ EXCEPTION_MESSAGE: message,
120
+ EXCEPTION_ESCAPED: exception_escaped,
121
+ EXCEPTION_STACKTRACE: exception_stacktrace,
122
+ }.items()
123
+ if v is not None
124
+ },
125
+ )
78
126
 
79
127
 
80
128
  @dataclass(frozen=True)
@@ -95,7 +143,7 @@ class Span:
95
143
  "If the parent_id is None, this is the root span"
96
144
  parent_id: Optional[SpanID]
97
145
  start_time: datetime
98
- end_time: datetime
146
+ end_time: Optional[datetime]
99
147
  status_code: SpanStatusCode
100
148
  status_message: str
101
149
  """
@@ -131,3 +179,8 @@ class Span:
131
179
  conversation_id
132
180
  """
133
181
  conversation: Optional[SpanConversationAttributes]
182
+
183
+
184
+ ATTRIBUTE_PREFIX = "attributes."
185
+ CONTEXT_PREFIX = "context."
186
+ COMPUTED_PREFIX = "__computed__."
@@ -5,7 +5,8 @@ Inspiration from OpenTelemetry:
5
5
  https://opentelemetry.io/docs/specs/otel/trace/semantic_conventions/span-general/
6
6
  """
7
7
  from dataclasses import dataclass
8
- from typing import Any, Dict
8
+ from enum import Enum
9
+ from typing import Any, Dict, Optional
9
10
 
10
11
 
11
12
  @dataclass(frozen=True)
@@ -35,3 +36,142 @@ class DeploymentAttributes(AttributeGroup):
35
36
  type="string",
36
37
  ),
37
38
  }
39
+
40
+
41
+ EXCEPTION_TYPE = "exception.type"
42
+ EXCEPTION_MESSAGE = "exception.message"
43
+ EXCEPTION_ESCAPED = "exception.escaped"
44
+ EXCEPTION_STACKTRACE = "exception.stacktrace"
45
+
46
+
47
+ OUTPUT_VALUE = "output.value"
48
+ OUTPUT_MIME_TYPE = "output.mime_type"
49
+ """
50
+ The type of output.value. If unspecified, the type is plain text by default.
51
+ If type is JSON, the value is a string representing a JSON object.
52
+ """
53
+ INPUT_VALUE = "input.value"
54
+ INPUT_MIME_TYPE = "input.mime_type"
55
+ """
56
+ The type of input.value. If unspecified, the type is plain text by default.
57
+ If type is JSON, the value is a string representing a JSON object.
58
+ """
59
+
60
+
61
+ class MimeType(Enum):
62
+ TEXT = "text/plain"
63
+ JSON = "application/json"
64
+
65
+ @classmethod
66
+ def _missing_(cls, v: Any) -> Optional["MimeType"]:
67
+ return None if v else cls.TEXT
68
+
69
+
70
+ EMBEDDING_EMBEDDINGS = "embedding.embeddings"
71
+ """
72
+ A list of objects containing embedding data, including the vector and represented piece of text.
73
+ """
74
+ EMBEDDING_MODEL_NAME = "embedding.model_name"
75
+ """
76
+ The name of the embedding model.
77
+ """
78
+ EMBEDDING_TEXT = "embedding.text"
79
+ """
80
+ The text represented by the embedding.
81
+ """
82
+ EMBEDDING_VECTOR = "embedding.vector"
83
+ """
84
+ The embedding vector.
85
+ """
86
+
87
+ MESSAGE_ROLE = "message.role"
88
+ """
89
+ The role of the message, such as "user", "agent", "function".
90
+ """
91
+ MESSAGE_NAME = "message.name"
92
+ """
93
+ The name of the message, often used to identify the function
94
+ that was used to generate the message.
95
+ """
96
+ MESSAGE_FUNCTION_CALL_NAME = "message.function_call_name"
97
+ """
98
+ The function name that is a part of the message list.
99
+ This is populated for role 'function' or 'agent' as a mechanism to identify
100
+ the function that was called during the execution of a tool
101
+ """
102
+ MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = "message.function_call_arguments_json"
103
+ """
104
+ The JSON string representing the arguments passed to the function
105
+ during a function call
106
+ """
107
+ MESSAGE_CONTENT = "message.content"
108
+ """
109
+ The content of the message to the llm
110
+ """
111
+ LLM_FUNCTION_CALL = "llm.function_call"
112
+ """
113
+ For models and APIs that support function calling. Records attributes such as the function name and
114
+ arguments to the called function.
115
+ """
116
+ LLM_INVOCATION_PARAMETERS = "llm.invocation_parameters"
117
+ """
118
+ Invocation parameters passed to the LLM or API, such as the model name, temperature, etc.
119
+ """
120
+ LLM_MESSAGES = "llm.messages"
121
+ """
122
+ Messages provided to a chat API.
123
+ """
124
+ LLM_MODEL_NAME = "llm.model_name"
125
+ """
126
+ The name of the model being used.
127
+ """
128
+ LLM_PROMPT = "llm.prompt"
129
+ """
130
+ Messages provided to a completions API.
131
+ """
132
+ LLM_PROMPT_TEMPLATE = "llm.prompt_template.template"
133
+ """
134
+ The prompt template as a Python f-string.
135
+ """
136
+ LLM_PROMPT_TEMPLATE_VARIABLES = "llm.prompt_template.variables"
137
+ """
138
+ A list of input variables to the prompt template.
139
+ """
140
+ LLM_PROMPT_TEMPLATE_VERSION = "llm.prompt_template.version"
141
+ """
142
+ The version of the prompt template being used.
143
+ """
144
+ LLM_TOKEN_COUNT_PROMPT = "llm.token_count.prompt"
145
+ """
146
+ Number of tokens in the prompt.
147
+ """
148
+ LLM_TOKEN_COUNT_COMPLETION = "llm.token_count.completion"
149
+ """
150
+ Number of tokens in the completion.
151
+ """
152
+ LLM_TOKEN_COUNT_TOTAL = "llm.token_count.total"
153
+ """
154
+ Total number of tokens, including both prompt and completion.
155
+ """
156
+
157
+ TOOL_NAME = "tool.name"
158
+ """
159
+ Name of the tool being used.
160
+ """
161
+ TOOL_DESCRIPTION = "tool.description"
162
+ """
163
+ Description of the tool's purpose, typically used to select the tool.
164
+ """
165
+ TOOL_PARAMETERS = "tool.parameters"
166
+ """
167
+ Parameters of the tool, e.g. see https://platform.openai.com/docs/guides/gpt/function-calling
168
+ """
169
+
170
+ RETRIEVAL_DOCUMENTS = "retrieval.documents"
171
+ DOCUMENT_ID = "document.id"
172
+ DOCUMENT_SCORE = "document.score"
173
+ DOCUMENT_CONTENT = "document.content"
174
+ DOCUMENT_METADATA = "document.metadata"
175
+ """
176
+ Document metadata as a string representing a JSON object
177
+ """
@@ -1,22 +1,52 @@
1
1
  import json
2
2
  from datetime import datetime
3
- from typing import Any, Dict
3
+ from typing import Any, Dict, Optional
4
+ from uuid import UUID
4
5
 
5
6
  from phoenix.trace.schemas import (
6
7
  Span,
7
8
  SpanContext,
8
9
  SpanConversationAttributes,
9
10
  SpanEvent,
11
+ SpanException,
10
12
  SpanKind,
11
13
  SpanStatusCode,
12
14
  )
15
+ from phoenix.trace.semantic_conventions import (
16
+ DOCUMENT_METADATA,
17
+ EXCEPTION_MESSAGE,
18
+ INPUT_MIME_TYPE,
19
+ OUTPUT_MIME_TYPE,
20
+ RETRIEVAL_DOCUMENTS,
21
+ MimeType,
22
+ )
23
+
24
+
25
+ def json_to_document(obj: Optional[Dict[str, Any]]) -> Dict[str, Any]:
26
+ if obj is None:
27
+ return {}
28
+ if document_metadata := obj.get(DOCUMENT_METADATA):
29
+ obj[DOCUMENT_METADATA] = json.loads(document_metadata)
30
+ return obj
31
+
32
+
33
+ def json_to_attributes(obj: Optional[Dict[str, Any]]) -> Dict[str, Any]:
34
+ if obj is None:
35
+ return {}
36
+ if not isinstance(obj, dict):
37
+ raise ValueError(f"attributes should be dict, but attributes={obj}")
38
+ if mime_type := obj.get(INPUT_MIME_TYPE):
39
+ obj[INPUT_MIME_TYPE] = MimeType(mime_type)
40
+ if mime_type := obj.get(OUTPUT_MIME_TYPE):
41
+ obj[OUTPUT_MIME_TYPE] = MimeType(mime_type)
42
+ if documents := obj.get(RETRIEVAL_DOCUMENTS):
43
+ obj[RETRIEVAL_DOCUMENTS] = [json_to_document(document) for document in documents]
44
+ return obj
13
45
 
14
46
 
15
47
  def json_to_span(data: Dict[str, Any]) -> Any:
16
48
  """
17
49
  A hook for json.loads to convert a dict to a Span object.
18
-
19
- NB: this function is mainly used for testing purposes. Consider swapping this out for pydantic.
20
50
  """
21
51
  # Check if the dict can be interpreted as a Span
22
52
  if set(data.keys()) == {
@@ -32,12 +62,36 @@ def json_to_span(data: Dict[str, Any]) -> Any:
32
62
  "events",
33
63
  "conversation",
34
64
  }:
35
- data["context"] = SpanContext(**data["context"])
65
+ context = data["context"]
66
+ if not isinstance(context, dict):
67
+ raise ValueError(f"context should be dict, but context={context}")
68
+ data["context"] = SpanContext(
69
+ trace_id=UUID(context["trace_id"]),
70
+ span_id=UUID(context["span_id"]),
71
+ )
72
+ parent_id = data.get("parent_id")
73
+ data["parent_id"] = UUID(parent_id) if parent_id else None
74
+ attributes = data.get("attributes")
75
+ data["attributes"] = json_to_attributes(attributes)
36
76
  data["start_time"] = datetime.fromisoformat(data["start_time"])
37
- data["end_time"] = datetime.fromisoformat(data["end_time"])
77
+ data["end_time"] = (
78
+ datetime.fromisoformat(end_time) if (end_time := data.get("end_time")) else None
79
+ )
38
80
  data["span_kind"] = SpanKind(data["span_kind"])
39
81
  data["status_code"] = SpanStatusCode(data["status_code"])
40
- data["events"] = [SpanEvent(**event) for event in data["events"]] # Build SpanEvent objects
82
+ data["events"] = [
83
+ SpanException(
84
+ message=(event.get("attributes") or {}).get(EXCEPTION_MESSAGE) or "",
85
+ timestamp=datetime.fromisoformat(event["timestamp"]),
86
+ )
87
+ if event["name"] == "exception"
88
+ else SpanEvent(
89
+ name=event["name"],
90
+ attributes=event.get("attributes") or {},
91
+ timestamp=datetime.fromisoformat(event["timestamp"]),
92
+ )
93
+ for event in data["events"]
94
+ ]
41
95
  data["conversation"] = (
42
96
  SpanConversationAttributes(**data["conversation"])
43
97
  if data["conversation"] is not None
@@ -10,7 +10,6 @@ from .schemas import (
10
10
  SpanContext,
11
11
  SpanConversationAttributes,
12
12
  SpanEvent,
13
- SpanException,
14
13
  )
15
14
 
16
15
 
@@ -27,14 +26,7 @@ class SpanJSONEncoder(json.JSONEncoder):
27
26
  elif isinstance(obj, SpanEvent):
28
27
  return {
29
28
  "name": obj.name,
30
- "message": obj.message,
31
- "timestamp": obj.timestamp.isoformat(),
32
- }
33
- elif isinstance(obj, SpanException):
34
- # TODO: add stacktrace etc.
35
- return {
36
- "name": obj.name,
37
- "message": obj.message,
29
+ "attributes": obj.attributes,
38
30
  "timestamp": obj.timestamp.isoformat(),
39
31
  }
40
32
  elif isinstance(obj, Span):
@@ -1,10 +1,21 @@
1
+ import json
2
+ import uuid
3
+ from datetime import datetime
4
+ from typing import Iterator, List, Optional, cast
5
+
1
6
  import pandas as pd
2
- from pandas import DataFrame
7
+ from pandas import DataFrame, read_parquet
8
+
9
+ from phoenix.datetime_utils import normalize_timestamps
10
+
11
+ from ..config import DATASET_DIR, GENERATED_DATASET_NAME_PREFIX
12
+ from .schemas import ATTRIBUTE_PREFIX, CONTEXT_PREFIX, Span
13
+ from .span_json_decoder import json_to_span
14
+ from .span_json_encoder import span_to_json
3
15
 
4
16
  # A set of columns that is required
5
17
  REQUIRED_COLUMNS = [
6
18
  "name",
7
- "message",
8
19
  "span_kind",
9
20
  "parent_id",
10
21
  "start_time",
@@ -16,6 +27,15 @@ REQUIRED_COLUMNS = [
16
27
  ]
17
28
 
18
29
 
30
+ def normalize_dataframe(dataframe: DataFrame) -> "DataFrame":
31
+ """Makes the dataframe have appropriate data types"""
32
+
33
+ # Convert the start and end times to datetime
34
+ dataframe["start_time"] = normalize_timestamps(dataframe["start_time"])
35
+ dataframe["end_time"] = normalize_timestamps(dataframe["end_time"])
36
+ return dataframe
37
+
38
+
19
39
  class TraceDataset:
20
40
  """
21
41
  A TraceDataset is a wrapper around a dataframe which is a flattened representation
@@ -27,12 +47,84 @@ class TraceDataset:
27
47
  the pandas dataframe containing the tracing data. Each row represents a span.
28
48
  """
29
49
 
50
+ name: str
30
51
  dataframe: pd.DataFrame
52
+ _data_file_name: str = "data.parquet"
31
53
 
32
- def __init__(self, dataframe: DataFrame):
54
+ def __init__(self, dataframe: DataFrame, name: Optional[str] = None):
33
55
  # Validate the the dataframe has required fields
34
- columns = dataframe.columns.values
35
- missing_columns = [column for column in REQUIRED_COLUMNS if column not in columns]
36
- if missing_columns:
37
- raise ValueError(f"The dataframe is missing some required columns: {missing_columns}")
38
- self.dataframe = dataframe
56
+ if missing_columns := set(REQUIRED_COLUMNS) - set(dataframe.columns):
57
+ raise ValueError(
58
+ f"The dataframe is missing some required columns: {', '.join(missing_columns)}"
59
+ )
60
+ self.dataframe = normalize_dataframe(dataframe)
61
+ self.name = name or f"{GENERATED_DATASET_NAME_PREFIX}{str(uuid.uuid4())}"
62
+
63
+ @classmethod
64
+ def from_spans(cls, spans: List[Span]) -> "TraceDataset":
65
+ """Creates a TraceDataset from a list of spans.
66
+
67
+ Args:
68
+ spans (List[Span]): A list of spans.
69
+
70
+ Returns:
71
+ TraceDataset: A TraceDataset containing the spans.
72
+ """
73
+ return cls(pd.json_normalize(map(json.loads, map(span_to_json, spans)))) # type: ignore
74
+
75
+ def to_spans(self) -> Iterator[Span]:
76
+ for _, row in self.dataframe.iterrows():
77
+ is_attribute = row.index.str.startswith(ATTRIBUTE_PREFIX)
78
+ attribute_keys = row.index[is_attribute]
79
+ attributes = (
80
+ row.loc[is_attribute]
81
+ .rename(
82
+ {key: key[len(ATTRIBUTE_PREFIX) :] for key in attribute_keys},
83
+ )
84
+ .dropna()
85
+ .to_dict()
86
+ )
87
+ is_context = row.index.str.startswith(CONTEXT_PREFIX)
88
+ context_keys = row.index[is_context]
89
+ context = (
90
+ row.loc[is_context]
91
+ .rename(
92
+ {key: key[len(CONTEXT_PREFIX) :] for key in context_keys},
93
+ )
94
+ .to_dict()
95
+ )
96
+ end_time: Optional[datetime] = cast(datetime, row.get("end_time"))
97
+ if end_time is pd.NaT:
98
+ end_time = None
99
+ yield json_to_span(
100
+ {
101
+ "name": row["name"],
102
+ "context": context,
103
+ "span_kind": row["span_kind"],
104
+ "parent_id": row.get("parent_id"),
105
+ "start_time": cast(datetime, row["start_time"]).isoformat(),
106
+ "end_time": end_time.isoformat() if end_time else None,
107
+ "status_code": row["status_code"],
108
+ "status_message": row.get("status_message") or "",
109
+ "attributes": attributes,
110
+ "events": row.get("events") or [],
111
+ "conversation": row.get("conversation"),
112
+ }
113
+ )
114
+
115
+ @classmethod
116
+ def from_name(cls, name: str) -> "TraceDataset":
117
+ """Retrieves a dataset by name from the file system"""
118
+ directory = DATASET_DIR / name
119
+ df = read_parquet(directory / cls._data_file_name)
120
+ return cls(df, name)
121
+
122
+ def to_disc(self) -> None:
123
+ """writes the data to disc"""
124
+ directory = DATASET_DIR / self.name
125
+ directory.mkdir(parents=True, exist_ok=True)
126
+ self.dataframe.to_parquet(
127
+ directory / self._data_file_name,
128
+ allow_truncated_timestamps=True,
129
+ coerce_timestamps="ms",
130
+ )
phoenix/trace/tracer.py CHANGED
@@ -1,5 +1,6 @@
1
+ import logging
1
2
  from datetime import datetime
2
- from typing import Callable, List, Optional
3
+ from typing import Any, Callable, Iterator, List, Optional, Protocol
3
4
  from uuid import UUID, uuid4
4
5
 
5
6
  from .schemas import (
@@ -13,6 +14,14 @@ from .schemas import (
13
14
  SpanStatusCode,
14
15
  )
15
16
 
17
+ logger = logging.getLogger(__name__)
18
+ logger.addHandler(logging.NullHandler())
19
+
20
+
21
+ class SpanExporter(Protocol):
22
+ def export(self, span: Span) -> None:
23
+ ...
24
+
16
25
 
17
26
  class Tracer:
18
27
  """
@@ -28,7 +37,10 @@ class Tracer:
28
37
 
29
38
  def __init__(
30
39
  self,
40
+ exporter: Optional[SpanExporter] = None,
31
41
  on_append: Optional[Callable[[List[Span]], None]] = None,
42
+ *args: Any,
43
+ **kwargs: Any,
32
44
  ):
33
45
  """
34
46
  Create a new Tracer. A Tracer's main purpose is to create spans.
@@ -42,14 +54,16 @@ class Tracer:
42
54
  """
43
55
  self.span_buffer = []
44
56
  self.on_append = on_append
57
+ self._exporter: Optional[SpanExporter] = exporter
58
+ super().__init__(*args, **kwargs)
45
59
 
46
60
  def create_span(
47
61
  self,
48
62
  name: str,
49
63
  span_kind: SpanKind,
50
64
  start_time: datetime,
51
- end_time: datetime,
52
- status_code: SpanStatusCode,
65
+ end_time: Optional[datetime] = None,
66
+ status_code: SpanStatusCode = SpanStatusCode.UNSET,
53
67
  status_message: Optional[str] = "",
54
68
  parent_id: Optional[SpanID] = None,
55
69
  trace_id: Optional[UUID] = None,
@@ -86,8 +100,17 @@ class Tracer:
86
100
  conversation=conversation,
87
101
  )
88
102
 
103
+ if self._exporter:
104
+ self._exporter.export(span)
89
105
  self.span_buffer.append(span)
90
106
 
91
107
  if self.on_append is not None:
92
108
  self.on_append(self.span_buffer)
93
109
  return span
110
+
111
+ def get_spans(self) -> Iterator[Span]:
112
+ """
113
+ Returns the spans stored in the tracer. This is useful if you are running
114
+ in a notebook environment and you want to inspect the spans.
115
+ """
116
+ yield from self.span_buffer