arize-phoenix 2.7.0__py3-none-any.whl → 2.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

@@ -533,7 +533,8 @@ def _is_databricks() -> bool:
533
533
  import IPython # type: ignore
534
534
  except ImportError:
535
535
  return False
536
- shell = IPython.get_ipython()
536
+ if (shell := IPython.get_ipython()) is None:
537
+ return False
537
538
  try:
538
539
  dbutils = shell.user_ns["dbutils"]
539
540
  except KeyError:
phoenix/trace/exporter.py CHANGED
@@ -5,6 +5,7 @@ from queue import SimpleQueue
5
5
  from threading import Thread
6
6
  from types import MethodType
7
7
  from typing import Any, Optional, Union
8
+ from urllib.parse import urljoin
8
9
 
9
10
  import opentelemetry.proto.trace.v1.trace_pb2 as otlp
10
11
  import requests
@@ -42,22 +43,25 @@ class HttpExporter:
42
43
  Parameters
43
44
  ----------
44
45
  endpoint: Optional[str]
45
- The endpoint of the Phoenix server (collector). This should be set if the Phoenix
46
- server is running on a remote instance. It can also be set using environment
47
- variable `PHOENIX_COLLECTOR_ENDPOINT`, otherwise it defaults to `http://127.0.0.1:6006`
48
- Note, this parameter supersedes `host` and `port`.
46
+ The endpoint of the Phoenix server (collector). This should be set
47
+ if the Phoenix server is running on a remote instance. It can also
48
+ be set using environment variable `PHOENIX_COLLECTOR_ENDPOINT`,
49
+ otherwise it defaults to `http://<host>:<port>`. Note, this
50
+ parameter supersedes `host` and `port`.
49
51
  host: Optional[str]
50
52
  The host of the Phoenix server. It can also be set using environment
51
- variable `PHOENIX_HOST`, otherwise it defaults to `127.0.0.1`.
53
+ variable `PHOENIX_HOST`, otherwise it defaults to `0.0.0.0`.
52
54
  port: Optional[int]
53
55
  The port of the Phoenix server. It can also be set using environment
54
56
  variable `PHOENIX_PORT`, otherwise it defaults to `6006`.
55
57
  """
56
58
  self._host = host or get_env_host()
57
59
  self._port = port or get_env_port()
58
- endpoint = endpoint or get_env_collector_endpoint() or f"http://{self._host}:{self._port}"
59
- # Make sure the url does not end with a slash
60
- self._base_url = endpoint.rstrip("/")
60
+ self._base_url = (
61
+ endpoint
62
+ or get_env_collector_endpoint()
63
+ or f"http://{'127.0.0.1' if self._host == '0.0.0.0' else self._host}:{self._port}"
64
+ )
61
65
  self._warn_if_phoenix_is_not_running()
62
66
  self._session = Session()
63
67
  weakref.finalize(self, self._session.close)
@@ -104,15 +108,15 @@ class HttpExporter:
104
108
 
105
109
  def _url(self, message: Message) -> str:
106
110
  if isinstance(message, otlp.Span):
107
- return f"{self._base_url}/v1/spans"
111
+ return urljoin(self._base_url, "v1/spans")
108
112
  if isinstance(message, pb.Evaluation):
109
- return f"{self._base_url}/v1/evaluations"
113
+ return urljoin(self._base_url, "v1/evaluations")
110
114
  logger.exception(f"unrecognized message type: {type(message)}")
111
115
  assert_never(message)
112
116
 
113
117
  def _warn_if_phoenix_is_not_running(self) -> None:
114
118
  try:
115
- requests.get(f"{self._base_url}/arize_phoenix_version").raise_for_status()
119
+ requests.get(urljoin(self._base_url, "arize_phoenix_version")).raise_for_status()
116
120
  except Exception:
117
121
  logger.warning(
118
122
  f"Arize Phoenix is not running on {self._base_url}. Launch Phoenix "
phoenix/trace/fixtures.py CHANGED
@@ -164,6 +164,11 @@ def _read_eval_fixture(eval_fixture: EvaluationFixture) -> Iterator[pb.Evaluatio
164
164
  )
165
165
  if isinstance(eval_fixture, DocumentEvaluationFixture):
166
166
  span_id, document_position = cast(Tuple[str, int], index)
167
+ # Legacy fixture files contain UUID strings for span_ids. The hyphens in these
168
+ # strings need to be removed because we are also removing the hyphens from the
169
+ # span_ids of their corresponding traces. In general, hyphen is not an allowed
170
+ # character in the string representation of span_ids.
171
+ span_id = span_id.replace("-", "")
167
172
  subject_id = pb.Evaluation.SubjectId(
168
173
  document_retrieval_id=pb.Evaluation.SubjectId.DocumentRetrievalId(
169
174
  document_position=document_position,
@@ -172,6 +177,11 @@ def _read_eval_fixture(eval_fixture: EvaluationFixture) -> Iterator[pb.Evaluatio
172
177
  )
173
178
  else:
174
179
  span_id = cast(str, index)
180
+ # Legacy fixture files contain UUID strings for span_ids. The hyphens in these
181
+ # strings need to be removed because we are also removing the hyphens from the
182
+ # span_ids of their corresponding traces. In general, hyphen is not an allowed
183
+ # character in the string representation of span_ids.
184
+ span_id = span_id.replace("-", "")
175
185
  subject_id = pb.Evaluation.SubjectId(span_id=span_id)
176
186
  yield pb.Evaluation(
177
187
  name=eval_fixture.evaluation_name,
@@ -7,7 +7,7 @@ It enables production LLMapp servers to seamlessly integrate with LLM
7
7
  observability solutions such as Arize and Phoenix.
8
8
 
9
9
  For more information on the specification, see
10
- https://github.com/Arize-ai/open-inference-spec
10
+ https://github.com/Arize-ai/openinference
11
11
  """
12
12
  import json
13
13
  import logging
@@ -27,7 +27,7 @@ from typing import (
27
27
  Union,
28
28
  cast,
29
29
  )
30
- from uuid import UUID, uuid4
30
+ from uuid import uuid4
31
31
 
32
32
  import llama_index
33
33
  from llama_index.callbacks.base_handler import BaseCallbackHandler
@@ -241,7 +241,7 @@ class OpenInferenceTraceCallbackHandler(BaseCallbackHandler):
241
241
  with LLM observability solutions such as Arize and Phoenix.
242
242
 
243
243
  For more information on the specification, see
244
- https://github.com/Arize-ai/open-inference-spec
244
+ https://github.com/Arize-ai/openinference
245
245
  """
246
246
 
247
247
  def __init__(
@@ -291,7 +291,7 @@ class OpenInferenceTraceCallbackHandler(BaseCallbackHandler):
291
291
  if parent_data := self._event_id_to_event_data.get(parent_id):
292
292
  trace_id = parent_data.trace_id
293
293
  else:
294
- trace_id = uuid4()
294
+ trace_id = TraceID(uuid4())
295
295
  event_data = self._event_id_to_event_data[event_id]
296
296
  event_data.name = event_type.value
297
297
  event_data.event_type = event_type
@@ -432,7 +432,7 @@ def _add_spans_to_tracer(
432
432
  attributes=attributes,
433
433
  events=sorted(span_exceptions, key=lambda event: event.timestamp) or None,
434
434
  conversation=None,
435
- span_id=UUID(event_data.span_id),
435
+ span_id=SpanID(event_data.span_id),
436
436
  )
437
437
  new_parent_span_id = span.context.span_id
438
438
  for new_child_event_id in trace_map.get(event_id, []):
@@ -1,11 +1,10 @@
1
1
  from datetime import datetime, timezone
2
2
  from typing import TYPE_CHECKING, Generator, List
3
- from uuid import UUID
4
3
 
5
4
  from llama_index.callbacks.schema import TIMESTAMP_FORMAT
6
5
  from llama_index.response.schema import StreamingResponse
7
6
 
8
- from phoenix.trace.schemas import SpanKind, SpanStatusCode
7
+ from phoenix.trace.schemas import SpanID, SpanKind, SpanStatusCode
9
8
  from phoenix.trace.semantic_conventions import OUTPUT_VALUE
10
9
  from phoenix.trace.tracer import Tracer
11
10
 
@@ -60,11 +59,11 @@ class TokenGenInstrumentor:
60
59
  end_time=datetime.now(timezone.utc),
61
60
  status_code=SpanStatusCode.OK,
62
61
  status_message="",
63
- parent_id=UUID(parent_id) if parent_id else None,
62
+ parent_id=SpanID(parent_id) if parent_id else None,
64
63
  attributes=self._event_data.attributes,
65
64
  events=[],
66
65
  conversation=None,
67
- span_id=UUID(self._event_data.span_id),
66
+ span_id=SpanID(self._event_data.span_id),
68
67
  )
69
68
 
70
69
 
phoenix/trace/otel.py CHANGED
@@ -1,3 +1,5 @@
1
+ import json
2
+ from binascii import hexlify, unhexlify
1
3
  from datetime import datetime, timezone
2
4
  from types import MappingProxyType
3
5
  from typing import (
@@ -16,7 +18,6 @@ from typing import (
16
18
  Union,
17
19
  cast,
18
20
  )
19
- from uuid import UUID
20
21
 
21
22
  import opentelemetry.proto.trace.v1.trace_pb2 as otlp
22
23
  from opentelemetry.proto.common.v1.common_pb2 import AnyValue, ArrayValue, KeyValue
@@ -36,13 +37,16 @@ from phoenix.trace.schemas import (
36
37
  TraceID,
37
38
  )
38
39
  from phoenix.trace.semantic_conventions import (
40
+ DOCUMENT_METADATA,
39
41
  EXCEPTION_ESCAPED,
40
42
  EXCEPTION_MESSAGE,
41
43
  EXCEPTION_STACKTRACE,
42
44
  EXCEPTION_TYPE,
43
45
  INPUT_MIME_TYPE,
46
+ LLM_PROMPT_TEMPLATE_VARIABLES,
44
47
  OPENINFERENCE_SPAN_KIND,
45
48
  OUTPUT_MIME_TYPE,
49
+ TOOL_PARAMETERS,
46
50
  )
47
51
 
48
52
 
@@ -56,7 +60,7 @@ def decode(otlp_span: otlp.Span) -> Span:
56
60
  _decode_unix_nano(otlp_span.end_time_unix_nano) if otlp_span.end_time_unix_nano else None
57
61
  )
58
62
 
59
- attributes = dict(_unflatten(_decode_key_values(otlp_span.attributes)))
63
+ attributes = dict(_unflatten(_load_json_strings(_decode_key_values(otlp_span.attributes))))
60
64
  span_kind = SpanKind(attributes.pop(OPENINFERENCE_SPAN_KIND, None))
61
65
 
62
66
  for mime_type in (INPUT_MIME_TYPE, OUTPUT_MIME_TYPE):
@@ -84,18 +88,12 @@ def decode(otlp_span: otlp.Span) -> Span:
84
88
  )
85
89
 
86
90
 
87
- def _decode_identifier(identifier: bytes) -> Optional[UUID]:
88
- # This is a stopgap solution until we move away from UUIDs.
89
- # The goal is to convert bytes to UUID in a deterministic way.
91
+ def _decode_identifier(identifier: bytes) -> Optional[str]:
90
92
  if not identifier:
91
93
  return None
92
- try:
93
- # OTEL trace_id is 16 bytes, so it matches UUID's length, but
94
- # OTEL span_id is 8 bytes, so we double up by concatenating.
95
- return UUID(bytes=identifier[:8] + identifier[-8:])
96
- except ValueError:
97
- # Fallback to a seeding a UUID from the bytes.
98
- return UUID(int=int.from_bytes(identifier, byteorder="big"))
94
+ # Hex encoding is used for trace and span identifiers in OTLP.
95
+ # See e.g. https://github.com/open-telemetry/opentelemetry-go/blob/ce3faf1488b72921921f9589048835dddfe97f33/trace/trace.go#L33 # noqa: E501
96
+ return hexlify(identifier).decode()
99
97
 
100
98
 
101
99
  def _decode_event(otlp_event: otlp.Span.Event) -> SpanEvent:
@@ -149,6 +147,27 @@ def _decode_value(any_value: AnyValue) -> Any:
149
147
  assert_never(which)
150
148
 
151
149
 
150
+ _JSON_STRING_ATTRIBUTES = (
151
+ DOCUMENT_METADATA,
152
+ LLM_PROMPT_TEMPLATE_VARIABLES,
153
+ TOOL_PARAMETERS,
154
+ )
155
+
156
+
157
+ def _load_json_strings(key_values: Iterable[Tuple[str, Any]]) -> Iterator[Tuple[str, Any]]:
158
+ for key, value in key_values:
159
+ if key.endswith(_JSON_STRING_ATTRIBUTES):
160
+ try:
161
+ dict_value = json.loads(value)
162
+ except Exception:
163
+ yield key, value
164
+ else:
165
+ if dict_value:
166
+ yield key, dict_value
167
+ else:
168
+ yield key, value
169
+
170
+
152
171
  StatusMessage: TypeAlias = str
153
172
 
154
173
  _STATUS_DECODING = MappingProxyType(
@@ -277,9 +296,9 @@ _BILLION = 1_000_000_000 # for converting seconds to nanoseconds
277
296
 
278
297
 
279
298
  def encode(span: Span) -> otlp.Span:
280
- trace_id: bytes = span.context.trace_id.bytes
281
- span_id: bytes = _span_id_to_bytes(span.context.span_id)
282
- parent_span_id: bytes = _span_id_to_bytes(span.parent_id) if span.parent_id else bytes()
299
+ trace_id: bytes = _encode_identifier(span.context.trace_id)
300
+ span_id: bytes = _encode_identifier(span.context.span_id)
301
+ parent_span_id: bytes = _encode_identifier(span.parent_id)
283
302
 
284
303
  # floating point rounding error can cause the timestamp to be slightly different from expected
285
304
  start_time_unix_nano: int = int(span.start_time.timestamp() * _BILLION)
@@ -297,7 +316,10 @@ def encode(span: Span) -> otlp.Span:
297
316
  attributes.pop(key, None)
298
317
  elif isinstance(value, Mapping):
299
318
  attributes.pop(key, None)
300
- attributes.update(_flatten_mapping(value, key))
319
+ if key.endswith(_JSON_STRING_ATTRIBUTES):
320
+ attributes[key] = json.dumps(value)
321
+ else:
322
+ attributes.update(_flatten_mapping(value, key))
301
323
  elif not isinstance(value, str) and isinstance(value, Sequence) and _has_mapping(value):
302
324
  attributes.pop(key, None)
303
325
  attributes.update(_flatten_sequence(value, key))
@@ -334,10 +356,13 @@ def _encode_status(span_status_code: SpanStatusCode, status_message: str) -> otl
334
356
  return otlp.Status(code=code, message=status_message)
335
357
 
336
358
 
337
- def _span_id_to_bytes(span_id: SpanID) -> bytes:
338
- # Note that this is not compliant with the OTEL spec, which uses 8-byte span IDs.
339
- # This is a stopgap solution for backward compatibility until we move away from UUIDs.
340
- return span_id.bytes
359
+ def _encode_identifier(identifier: Optional[str]) -> bytes:
360
+ if not identifier:
361
+ return bytes()
362
+ # For legacy JSONL files containing UUID strings we
363
+ # need to remove the hyphen.
364
+ identifier = identifier.replace("-", "")
365
+ return unhexlify(identifier)
341
366
 
342
367
 
343
368
  def _has_mapping(sequence: Sequence[Any]) -> bool:
@@ -354,7 +379,10 @@ def _flatten_mapping(
354
379
  for key, value in mapping.items():
355
380
  prefixed_key = f"{prefix}.{key}"
356
381
  if isinstance(value, Mapping):
357
- yield from _flatten_mapping(value, prefixed_key)
382
+ if key.endswith(_JSON_STRING_ATTRIBUTES):
383
+ yield prefixed_key, json.dumps(value)
384
+ else:
385
+ yield from _flatten_mapping(value, prefixed_key)
358
386
  elif isinstance(value, Sequence):
359
387
  yield from _flatten_sequence(value, prefixed_key)
360
388
  elif value is not None:
phoenix/trace/schemas.py CHANGED
@@ -54,8 +54,8 @@ class SpanKind(Enum):
54
54
  return None if v else cls.UNKNOWN
55
55
 
56
56
 
57
- TraceID = UUID
58
- SpanID = UUID
57
+ TraceID = str
58
+ SpanID = str
59
59
  AttributePrimitiveValue = Union[str, bool, float, int]
60
60
  AttributeValue = Union[AttributePrimitiveValue, List[AttributePrimitiveValue]]
61
61
  SpanAttributes = Dict[str, AttributeValue]
@@ -1,7 +1,6 @@
1
1
  import json
2
2
  from datetime import datetime
3
3
  from typing import Any, Dict, Optional
4
- from uuid import UUID
5
4
 
6
5
  from phoenix.trace.schemas import (
7
6
  MimeType,
@@ -10,8 +9,10 @@ from phoenix.trace.schemas import (
10
9
  SpanConversationAttributes,
11
10
  SpanEvent,
12
11
  SpanException,
12
+ SpanID,
13
13
  SpanKind,
14
14
  SpanStatusCode,
15
+ TraceID,
15
16
  )
16
17
  from phoenix.trace.semantic_conventions import (
17
18
  EXCEPTION_MESSAGE,
@@ -54,11 +55,11 @@ def json_to_span(data: Dict[str, Any]) -> Any:
54
55
  if not isinstance(context, dict):
55
56
  raise ValueError(f"context should be dict, but context={context}")
56
57
  data["context"] = SpanContext(
57
- trace_id=UUID(context["trace_id"]),
58
- span_id=UUID(context["span_id"]),
58
+ trace_id=TraceID(context["trace_id"]),
59
+ span_id=SpanID(context["span_id"]),
59
60
  )
60
61
  parent_id = data.get("parent_id")
61
- data["parent_id"] = UUID(parent_id) if parent_id else None
62
+ data["parent_id"] = parent_id
62
63
  attributes = data.get("attributes")
63
64
  data["attributes"] = json_to_attributes(attributes)
64
65
  data["start_time"] = datetime.fromisoformat(data["start_time"])
phoenix/trace/tracer.py CHANGED
@@ -2,7 +2,7 @@ import logging
2
2
  from datetime import datetime
3
3
  from threading import RLock
4
4
  from typing import Any, Callable, Iterator, List, Optional, Protocol
5
- from uuid import UUID, uuid4
5
+ from uuid import uuid4
6
6
 
7
7
  from .schemas import (
8
8
  Span,
@@ -13,6 +13,7 @@ from .schemas import (
13
13
  SpanID,
14
14
  SpanKind,
15
15
  SpanStatusCode,
16
+ TraceID,
16
17
  )
17
18
 
18
19
  logger = logging.getLogger(__name__)
@@ -68,18 +69,18 @@ class Tracer:
68
69
  status_code: SpanStatusCode = SpanStatusCode.UNSET,
69
70
  status_message: Optional[str] = "",
70
71
  parent_id: Optional[SpanID] = None,
71
- trace_id: Optional[UUID] = None,
72
+ trace_id: Optional[TraceID] = None,
72
73
  attributes: Optional[SpanAttributes] = None,
73
74
  events: Optional[List[SpanEvent]] = None,
74
75
  conversation: Optional[SpanConversationAttributes] = None,
75
- span_id: Optional[UUID] = None,
76
+ span_id: Optional[SpanID] = None,
76
77
  ) -> Span:
77
78
  """
78
79
  create_span creates a new span with the given name and options.
79
80
  """
80
81
  # If no trace_id is provided, generate a new one
81
82
  if trace_id is None:
82
- trace_id = uuid4()
83
+ trace_id = TraceID(uuid4())
83
84
 
84
85
  # If no attributes are provided, create an empty dict
85
86
  if attributes is None:
@@ -91,7 +92,7 @@ class Tracer:
91
92
 
92
93
  span = Span(
93
94
  name=name,
94
- context=SpanContext(trace_id=trace_id, span_id=span_id or uuid4()),
95
+ context=SpanContext(trace_id=trace_id, span_id=span_id or SpanID(uuid4())),
95
96
  span_kind=span_kind,
96
97
  parent_id=parent_id,
97
98
  start_time=start_time,
phoenix/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.7.0"
1
+ __version__ = "2.8.0"