arize-phoenix 3.25.0__py3-none-any.whl → 4.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (113) hide show
  1. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/METADATA +26 -4
  2. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/RECORD +80 -75
  3. phoenix/__init__.py +9 -5
  4. phoenix/config.py +109 -53
  5. phoenix/datetime_utils.py +18 -1
  6. phoenix/db/README.md +25 -0
  7. phoenix/db/__init__.py +4 -0
  8. phoenix/db/alembic.ini +119 -0
  9. phoenix/db/bulk_inserter.py +206 -0
  10. phoenix/db/engines.py +152 -0
  11. phoenix/db/helpers.py +47 -0
  12. phoenix/db/insertion/evaluation.py +209 -0
  13. phoenix/db/insertion/helpers.py +51 -0
  14. phoenix/db/insertion/span.py +142 -0
  15. phoenix/db/migrate.py +71 -0
  16. phoenix/db/migrations/env.py +121 -0
  17. phoenix/db/migrations/script.py.mako +26 -0
  18. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
  19. phoenix/db/models.py +371 -0
  20. phoenix/exceptions.py +5 -1
  21. phoenix/server/api/context.py +40 -3
  22. phoenix/server/api/dataloaders/__init__.py +97 -0
  23. phoenix/server/api/dataloaders/cache/__init__.py +3 -0
  24. phoenix/server/api/dataloaders/cache/two_tier_cache.py +67 -0
  25. phoenix/server/api/dataloaders/document_evaluation_summaries.py +152 -0
  26. phoenix/server/api/dataloaders/document_evaluations.py +37 -0
  27. phoenix/server/api/dataloaders/document_retrieval_metrics.py +98 -0
  28. phoenix/server/api/dataloaders/evaluation_summaries.py +151 -0
  29. phoenix/server/api/dataloaders/latency_ms_quantile.py +198 -0
  30. phoenix/server/api/dataloaders/min_start_or_max_end_times.py +93 -0
  31. phoenix/server/api/dataloaders/record_counts.py +125 -0
  32. phoenix/server/api/dataloaders/span_descendants.py +64 -0
  33. phoenix/server/api/dataloaders/span_evaluations.py +37 -0
  34. phoenix/server/api/dataloaders/token_counts.py +138 -0
  35. phoenix/server/api/dataloaders/trace_evaluations.py +37 -0
  36. phoenix/server/api/input_types/SpanSort.py +138 -68
  37. phoenix/server/api/routers/v1/__init__.py +11 -0
  38. phoenix/server/api/routers/v1/evaluations.py +275 -0
  39. phoenix/server/api/routers/v1/spans.py +126 -0
  40. phoenix/server/api/routers/v1/traces.py +82 -0
  41. phoenix/server/api/schema.py +112 -48
  42. phoenix/server/api/types/DocumentEvaluationSummary.py +1 -1
  43. phoenix/server/api/types/Evaluation.py +29 -12
  44. phoenix/server/api/types/EvaluationSummary.py +29 -44
  45. phoenix/server/api/types/MimeType.py +2 -2
  46. phoenix/server/api/types/Model.py +9 -9
  47. phoenix/server/api/types/Project.py +240 -171
  48. phoenix/server/api/types/Span.py +87 -131
  49. phoenix/server/api/types/Trace.py +29 -20
  50. phoenix/server/api/types/pagination.py +151 -10
  51. phoenix/server/app.py +263 -35
  52. phoenix/server/grpc_server.py +93 -0
  53. phoenix/server/main.py +75 -60
  54. phoenix/server/openapi/docs.py +218 -0
  55. phoenix/server/prometheus.py +23 -7
  56. phoenix/server/static/index.js +662 -643
  57. phoenix/server/telemetry.py +68 -0
  58. phoenix/services.py +4 -0
  59. phoenix/session/client.py +34 -30
  60. phoenix/session/data_extractor.py +8 -3
  61. phoenix/session/session.py +176 -155
  62. phoenix/settings.py +13 -0
  63. phoenix/trace/attributes.py +349 -0
  64. phoenix/trace/dsl/README.md +116 -0
  65. phoenix/trace/dsl/filter.py +660 -192
  66. phoenix/trace/dsl/helpers.py +24 -5
  67. phoenix/trace/dsl/query.py +562 -185
  68. phoenix/trace/fixtures.py +69 -7
  69. phoenix/trace/otel.py +44 -200
  70. phoenix/trace/schemas.py +14 -8
  71. phoenix/trace/span_evaluations.py +5 -2
  72. phoenix/utilities/__init__.py +0 -26
  73. phoenix/utilities/span_store.py +0 -23
  74. phoenix/version.py +1 -1
  75. phoenix/core/project.py +0 -773
  76. phoenix/core/traces.py +0 -96
  77. phoenix/datasets/dataset.py +0 -214
  78. phoenix/datasets/fixtures.py +0 -24
  79. phoenix/datasets/schema.py +0 -31
  80. phoenix/experimental/evals/__init__.py +0 -73
  81. phoenix/experimental/evals/evaluators.py +0 -413
  82. phoenix/experimental/evals/functions/__init__.py +0 -4
  83. phoenix/experimental/evals/functions/classify.py +0 -453
  84. phoenix/experimental/evals/functions/executor.py +0 -353
  85. phoenix/experimental/evals/functions/generate.py +0 -138
  86. phoenix/experimental/evals/functions/processing.py +0 -76
  87. phoenix/experimental/evals/models/__init__.py +0 -14
  88. phoenix/experimental/evals/models/anthropic.py +0 -175
  89. phoenix/experimental/evals/models/base.py +0 -170
  90. phoenix/experimental/evals/models/bedrock.py +0 -221
  91. phoenix/experimental/evals/models/litellm.py +0 -134
  92. phoenix/experimental/evals/models/openai.py +0 -453
  93. phoenix/experimental/evals/models/rate_limiters.py +0 -246
  94. phoenix/experimental/evals/models/vertex.py +0 -173
  95. phoenix/experimental/evals/models/vertexai.py +0 -186
  96. phoenix/experimental/evals/retrievals.py +0 -96
  97. phoenix/experimental/evals/templates/__init__.py +0 -50
  98. phoenix/experimental/evals/templates/default_templates.py +0 -472
  99. phoenix/experimental/evals/templates/template.py +0 -195
  100. phoenix/experimental/evals/utils/__init__.py +0 -172
  101. phoenix/experimental/evals/utils/threads.py +0 -27
  102. phoenix/server/api/routers/evaluation_handler.py +0 -110
  103. phoenix/server/api/routers/span_handler.py +0 -70
  104. phoenix/server/api/routers/trace_handler.py +0 -60
  105. phoenix/storage/span_store/__init__.py +0 -23
  106. phoenix/storage/span_store/text_file.py +0 -85
  107. phoenix/trace/dsl/missing.py +0 -60
  108. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/WHEEL +0 -0
  109. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/licenses/IP_NOTICE +0 -0
  110. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/licenses/LICENSE +0 -0
  111. /phoenix/{datasets → db/insertion}/__init__.py +0 -0
  112. /phoenix/{experimental → db/migrations}/__init__.py +0 -0
  113. /phoenix/{storage → server/openapi}/__init__.py +0 -0
phoenix/trace/fixtures.py CHANGED
@@ -1,11 +1,15 @@
1
- from dataclasses import dataclass, field
2
- from typing import Iterable, Iterator, List, NamedTuple, Optional, Tuple, cast
1
+ from binascii import hexlify
2
+ from dataclasses import dataclass, field, replace
3
+ from datetime import datetime, timezone
4
+ from random import getrandbits
5
+ from typing import Dict, Iterable, Iterator, List, NamedTuple, Optional, Tuple, cast
3
6
  from urllib import request
4
7
 
5
8
  import pandas as pd
6
9
  from google.protobuf.wrappers_pb2 import DoubleValue, StringValue
7
10
 
8
11
  import phoenix.trace.v1 as pb
12
+ from phoenix.trace.schemas import Span
9
13
  from phoenix.trace.trace_dataset import TraceDataset
10
14
  from phoenix.trace.utils import json_lines_to_df
11
15
 
@@ -105,7 +109,7 @@ TRACES_FIXTURES: List[TracesFixture] = [
105
109
  NAME_TO_TRACES_FIXTURE = {fixture.name: fixture for fixture in TRACES_FIXTURES}
106
110
 
107
111
 
108
- def _get_trace_fixture_by_name(fixture_name: str) -> TracesFixture:
112
+ def get_trace_fixture_by_name(fixture_name: str) -> TracesFixture:
109
113
  """
110
114
  Returns the fixture whose name matches the input name.
111
115
 
@@ -120,7 +124,7 @@ def _get_trace_fixture_by_name(fixture_name: str) -> TracesFixture:
120
124
  return NAME_TO_TRACES_FIXTURE[fixture_name]
121
125
 
122
126
 
123
- def _download_traces_fixture(
127
+ def download_traces_fixture(
124
128
  fixture: TracesFixture,
125
129
  host: Optional[str] = "https://storage.googleapis.com/",
126
130
  bucket: Optional[str] = "arize-assets",
@@ -138,12 +142,12 @@ def load_example_traces(use_case: str) -> TraceDataset:
138
142
  """
139
143
  Loads a trace dataframe by name.
140
144
  """
141
- fixture = _get_trace_fixture_by_name(use_case)
142
- return TraceDataset(json_lines_to_df(_download_traces_fixture(fixture)))
145
+ fixture = get_trace_fixture_by_name(use_case)
146
+ return TraceDataset(json_lines_to_df(download_traces_fixture(fixture)))
143
147
 
144
148
 
145
149
  def get_evals_from_fixture(use_case: str) -> Iterator[pb.Evaluation]:
146
- fixture = _get_trace_fixture_by_name(use_case)
150
+ fixture = get_trace_fixture_by_name(use_case)
147
151
  for eval_fixture in fixture.evaluation_fixtures:
148
152
  yield from _read_eval_fixture(eval_fixture)
149
153
 
@@ -195,3 +199,61 @@ def _url(
195
199
  prefix: Optional[str] = "phoenix/traces/",
196
200
  ) -> str:
197
201
  return f"{host}{bucket}/{prefix}{file_name}"
202
+
203
+
204
+ def reset_fixture_span_ids_and_timestamps(
205
+ spans: Iterable[Span],
206
+ evals: Iterable[pb.Evaluation] = (),
207
+ ) -> Tuple[List[Span], List[pb.Evaluation]]:
208
+ old_spans, old_evals = list(spans), list(evals)
209
+ new_trace_ids: Dict[str, str] = {}
210
+ new_span_ids: Dict[str, str] = {}
211
+ for old_span in old_spans:
212
+ new_trace_ids[old_span.context.trace_id] = _new_trace_id()
213
+ new_span_ids[old_span.context.span_id] = _new_span_id()
214
+ if old_span.parent_id:
215
+ new_span_ids[old_span.parent_id] = _new_span_id()
216
+ for old_eval in old_evals:
217
+ subject_id = old_eval.subject_id
218
+ if trace_id := subject_id.trace_id:
219
+ new_trace_ids[trace_id] = _new_trace_id()
220
+ elif span_id := subject_id.span_id:
221
+ new_span_ids[span_id] = _new_span_id()
222
+ elif span_id := subject_id.document_retrieval_id.span_id:
223
+ new_span_ids[span_id] = _new_span_id()
224
+ max_end_time = max(old_span.end_time for old_span in old_spans)
225
+ time_diff = datetime.now(timezone.utc) - max_end_time
226
+ new_spans: List[Span] = []
227
+ new_evals: List[pb.Evaluation] = []
228
+ for old_span in old_spans:
229
+ new_trace_id = new_trace_ids[old_span.context.trace_id]
230
+ new_span_id = new_span_ids[old_span.context.span_id]
231
+ new_parent_id = new_span_ids[old_span.parent_id] if old_span.parent_id else None
232
+ new_span = replace(
233
+ old_span,
234
+ context=replace(old_span.context, trace_id=new_trace_id, span_id=new_span_id),
235
+ parent_id=new_parent_id,
236
+ start_time=old_span.start_time + time_diff,
237
+ end_time=old_span.end_time + time_diff,
238
+ )
239
+ new_spans.append(new_span)
240
+ for old_eval in old_evals:
241
+ new_eval = pb.Evaluation()
242
+ new_eval.CopyFrom(old_eval)
243
+ subject_id = new_eval.subject_id
244
+ if trace_id := subject_id.trace_id:
245
+ subject_id.trace_id = new_trace_ids[trace_id]
246
+ elif span_id := subject_id.span_id:
247
+ subject_id.span_id = new_span_ids[span_id]
248
+ elif span_id := subject_id.document_retrieval_id.span_id:
249
+ subject_id.document_retrieval_id.span_id = new_span_ids[span_id]
250
+ new_evals.append(new_eval)
251
+ return new_spans, new_evals
252
+
253
+
254
+ def _new_trace_id() -> str:
255
+ return hexlify(getrandbits(128).to_bytes(16, "big")).decode()
256
+
257
+
258
+ def _new_span_id() -> str:
259
+ return hexlify(getrandbits(64).to_bytes(8, "big")).decode()
phoenix/trace/otel.py CHANGED
@@ -1,39 +1,44 @@
1
- import inspect
2
1
  import json
3
2
  from binascii import hexlify, unhexlify
4
3
  from datetime import datetime, timezone
5
4
  from types import MappingProxyType
6
5
  from typing import (
7
6
  Any,
8
- DefaultDict,
9
7
  Dict,
10
8
  Iterable,
11
9
  Iterator,
12
- List,
13
10
  Mapping,
14
11
  Optional,
15
12
  Sequence,
16
- Set,
17
13
  SupportsFloat,
18
14
  Tuple,
19
- Union,
20
15
  cast,
21
16
  )
22
17
 
23
18
  import numpy as np
24
19
  import opentelemetry.proto.trace.v1.trace_pb2 as otlp
25
- from openinference.semconv import trace
26
- from openinference.semconv.trace import DocumentAttributes, SpanAttributes
20
+ from openinference.semconv.trace import (
21
+ DocumentAttributes,
22
+ OpenInferenceMimeTypeValues,
23
+ SpanAttributes,
24
+ )
27
25
  from opentelemetry.proto.common.v1.common_pb2 import AnyValue, ArrayValue, KeyValue
28
26
  from opentelemetry.util.types import Attributes, AttributeValue
29
27
  from typing_extensions import TypeAlias, assert_never
30
28
 
29
+ from phoenix.trace.attributes import (
30
+ JSON_STRING_ATTRIBUTES,
31
+ flatten,
32
+ get_attribute_value,
33
+ has_mapping,
34
+ load_json_strings,
35
+ unflatten,
36
+ )
31
37
  from phoenix.trace.schemas import (
32
38
  EXCEPTION_ESCAPED,
33
39
  EXCEPTION_MESSAGE,
34
40
  EXCEPTION_STACKTRACE,
35
41
  EXCEPTION_TYPE,
36
- MimeType,
37
42
  Span,
38
43
  SpanContext,
39
44
  SpanEvent,
@@ -61,20 +66,20 @@ def decode_otlp_span(otlp_span: otlp.Span) -> Span:
61
66
  parent_id = _decode_identifier(otlp_span.parent_span_id)
62
67
 
63
68
  start_time = _decode_unix_nano(otlp_span.start_time_unix_nano)
64
- end_time = (
65
- _decode_unix_nano(otlp_span.end_time_unix_nano) if otlp_span.end_time_unix_nano else None
66
- )
67
-
68
- attributes = dict(_unflatten(_load_json_strings(_decode_key_values(otlp_span.attributes))))
69
- span_kind = SpanKind(attributes.pop(OPENINFERENCE_SPAN_KIND, None))
69
+ end_time = _decode_unix_nano(otlp_span.end_time_unix_nano)
70
70
 
71
- for mime_type in (INPUT_MIME_TYPE, OUTPUT_MIME_TYPE):
72
- if mime_type in attributes:
73
- attributes[mime_type] = MimeType(attributes[mime_type])
71
+ attributes = unflatten(load_json_strings(_decode_key_values(otlp_span.attributes)))
72
+ span_kind = SpanKind(get_attribute_value(attributes, OPENINFERENCE_SPAN_KIND))
74
73
 
75
74
  status_code, status_message = _decode_status(otlp_span.status)
76
75
  events = [_decode_event(event) for event in otlp_span.events]
77
76
 
77
+ if (input_value := get_attribute_value(attributes, INPUT_VALUE)) and not isinstance(
78
+ input_value, str
79
+ ):
80
+ attributes["input"]["value"] = json.dumps(input_value)
81
+ attributes["input"]["mime_type"] = OpenInferenceMimeTypeValues.JSON.value
82
+
78
83
  return Span(
79
84
  name=otlp_span.name,
80
85
  context=SpanContext(
@@ -152,28 +157,6 @@ def _decode_value(any_value: AnyValue) -> Any:
152
157
  assert_never(which)
153
158
 
154
159
 
155
- _JSON_STRING_ATTRIBUTES = (
156
- DOCUMENT_METADATA,
157
- LLM_PROMPT_TEMPLATE_VARIABLES,
158
- METADATA,
159
- TOOL_PARAMETERS,
160
- )
161
-
162
-
163
- def _load_json_strings(key_values: Iterable[Tuple[str, Any]]) -> Iterator[Tuple[str, Any]]:
164
- for key, value in key_values:
165
- if key.endswith(_JSON_STRING_ATTRIBUTES):
166
- try:
167
- dict_value = json.loads(value)
168
- except Exception:
169
- yield key, value
170
- else:
171
- if dict_value:
172
- yield key, dict_value
173
- else:
174
- yield key, value
175
-
176
-
177
160
  StatusMessage: TypeAlias = str
178
161
 
179
162
  _STATUS_DECODING = MappingProxyType(
@@ -190,120 +173,6 @@ def _decode_status(otlp_status: otlp.Status) -> Tuple[SpanStatusCode, StatusMess
190
173
  return status_code, otlp_status.message
191
174
 
192
175
 
193
- _SEMANTIC_CONVENTIONS: List[str] = sorted(
194
- (
195
- getattr(klass, attr)
196
- for name in dir(trace)
197
- if name.endswith("Attributes") and inspect.isclass(klass := getattr(trace, name))
198
- for attr in dir(klass)
199
- if attr.isupper()
200
- ),
201
- reverse=True,
202
- ) # sorted so the longer strings go first
203
-
204
-
205
- def _semantic_convention_prefix_partition(key: str, separator: str = ".") -> Tuple[str, str, str]:
206
- """Return the longest prefix of `key` that is a semantic convention, and the remaining suffix
207
- separated by `.`. For example, if `key` is "retrieval.documents.2.document.score", return
208
- ("retrieval.documents", ".", "2.document.score"). The return signature is based on Python's
209
- `.partition` method for strings.
210
- """
211
- for prefix in _SEMANTIC_CONVENTIONS:
212
- if key == prefix:
213
- return key, "", ""
214
- if key.startswith(prefix) and key[len(prefix) :].startswith(separator):
215
- return prefix, separator, key[len(prefix) + len(separator) :]
216
- return "", "", ""
217
-
218
-
219
- class _Trie(DefaultDict[Union[str, int], "_Trie"]):
220
- """Prefix Tree with special handling for indices (i.e. all-digit keys)."""
221
-
222
- def __init__(self) -> None:
223
- super().__init__(_Trie)
224
- self.value: Any = None
225
- self.indices: Set[int] = set()
226
- self.branches: Set[Union[str, int]] = set()
227
-
228
- def set_value(self, value: Any) -> None:
229
- self.value = value
230
- # value and indices must not coexist
231
- self.branches.update(self.indices)
232
- self.indices.clear()
233
-
234
- def add_index(self, index: int) -> "_Trie":
235
- if self.value is not None:
236
- self.branches.add(index)
237
- elif index not in self.branches:
238
- self.indices.add(index)
239
- return self[index]
240
-
241
- def add_branch(self, branch: Union[str, int]) -> "_Trie":
242
- if branch in self.indices:
243
- self.indices.discard(cast(int, branch))
244
- self.branches.add(branch)
245
- return self[branch]
246
-
247
-
248
- # FIXME: Ideally we should not need something so complicated as a Trie, but it's useful here
249
- # for backward compatibility reasons regarding some deeply nested objects such as TOOL_PARAMETERS.
250
- # In the future, we should `json_dumps` them and not let things get too deeply nested.
251
- def _build_trie(
252
- key_value_pairs: Iterable[Tuple[str, Any]],
253
- separator: str = ".",
254
- ) -> _Trie:
255
- """Build a Trie (a.k.a. prefix tree) from `key_value_pairs`, by partitioning the keys by
256
- separator. Each partition is a branch in the Trie. Special handling is done for partitions
257
- that are all digits, e.g. "0", "12", etc., which are converted to integers and collected
258
- as indices.
259
- """
260
- trie = _Trie()
261
- for key, value in key_value_pairs:
262
- if value is None:
263
- continue
264
- t = trie
265
- while True:
266
- prefix, _, suffix = _semantic_convention_prefix_partition(key, separator)
267
- if prefix:
268
- t = t.add_branch(prefix)
269
- else:
270
- prefix, _, suffix = key.partition(separator)
271
- if prefix.isdigit():
272
- index = int(prefix)
273
- t = t.add_index(index) if suffix else t.add_branch(index)
274
- else:
275
- t = t.add_branch(prefix)
276
- if not suffix:
277
- break
278
- key = suffix
279
- t.set_value(value)
280
- return trie
281
-
282
-
283
- def _walk(trie: _Trie, prefix: str = "") -> Iterator[Tuple[str, Any]]:
284
- if trie.value is not None:
285
- yield prefix, trie.value
286
- elif prefix and trie.indices:
287
- yield prefix, [dict(_walk(trie[index])) for index in sorted(trie.indices)]
288
- elif trie.indices:
289
- for index in trie.indices:
290
- yield from _walk(trie[index], prefix=f"{index}")
291
- elif prefix:
292
- yield prefix, dict(_walk(trie))
293
- return
294
- for branch in trie.branches:
295
- new_prefix = f"{prefix}.{branch}" if prefix else f"{branch}"
296
- yield from _walk(trie[branch], new_prefix)
297
-
298
-
299
- def _unflatten(
300
- key_value_pairs: Iterable[Tuple[str, Any]],
301
- separator: str = ".",
302
- ) -> Iterator[Tuple[str, Any]]:
303
- trie = _build_trie(key_value_pairs, separator)
304
- yield from _walk(trie)
305
-
306
-
307
176
  _BILLION = 1_000_000_000 # for converting seconds to nanoseconds
308
177
 
309
178
 
@@ -316,11 +185,7 @@ def encode_span_to_otlp(span: Span) -> otlp.Span:
316
185
  start_time_unix_nano: int = int(span.start_time.timestamp() * _BILLION)
317
186
  end_time_unix_nano: int = int(span.end_time.timestamp() * _BILLION) if span.end_time else 0
318
187
 
319
- attributes: Dict[str, Any] = span.attributes.copy()
320
-
321
- for mime_type in (INPUT_MIME_TYPE, OUTPUT_MIME_TYPE):
322
- if mime_type in attributes:
323
- attributes[mime_type] = attributes[mime_type].value
188
+ attributes: Dict[str, Any] = dict(span.attributes)
324
189
 
325
190
  for key, value in span.attributes.items():
326
191
  if value is None:
@@ -328,19 +193,34 @@ def encode_span_to_otlp(span: Span) -> otlp.Span:
328
193
  attributes.pop(key, None)
329
194
  elif isinstance(value, Mapping):
330
195
  attributes.pop(key, None)
331
- if key.endswith(_JSON_STRING_ATTRIBUTES):
196
+ if key.endswith(JSON_STRING_ATTRIBUTES):
332
197
  attributes[key] = json.dumps(value)
333
198
  else:
334
- attributes.update(_flatten_mapping(value, key))
199
+ attributes.update(
200
+ flatten(
201
+ value,
202
+ prefix=key,
203
+ recurse_on_sequence=True,
204
+ json_string_attributes=JSON_STRING_ATTRIBUTES,
205
+ )
206
+ )
335
207
  elif (
336
208
  not isinstance(value, str)
337
209
  and (isinstance(value, Sequence) or isinstance(value, np.ndarray))
338
- and _has_mapping(value)
210
+ and has_mapping(value)
339
211
  ):
340
212
  attributes.pop(key, None)
341
- attributes.update(_flatten_sequence(value, key))
342
-
343
- attributes[OPENINFERENCE_SPAN_KIND] = span.span_kind.value
213
+ attributes.update(
214
+ flatten(
215
+ value,
216
+ prefix=key,
217
+ recurse_on_sequence=True,
218
+ json_string_attributes=JSON_STRING_ATTRIBUTES,
219
+ )
220
+ )
221
+
222
+ if OPENINFERENCE_SPAN_KIND not in attributes:
223
+ attributes[OPENINFERENCE_SPAN_KIND] = span.span_kind.value
344
224
 
345
225
  status = _encode_status(span.status_code, span.status_message)
346
226
  events = map(_encode_event, span.events)
@@ -381,42 +261,6 @@ def _encode_identifier(identifier: Optional[str]) -> bytes:
381
261
  return unhexlify(identifier)
382
262
 
383
263
 
384
- def _has_mapping(sequence: Sequence[Any]) -> bool:
385
- for item in sequence:
386
- if isinstance(item, Mapping):
387
- return True
388
- return False
389
-
390
-
391
- def _flatten_mapping(
392
- mapping: Mapping[str, Any],
393
- prefix: str,
394
- ) -> Iterator[Tuple[str, Any]]:
395
- for key, value in mapping.items():
396
- prefixed_key = f"{prefix}.{key}"
397
- if isinstance(value, Mapping):
398
- if key.endswith(_JSON_STRING_ATTRIBUTES):
399
- yield prefixed_key, json.dumps(value)
400
- else:
401
- yield from _flatten_mapping(value, prefixed_key)
402
- elif isinstance(value, Sequence):
403
- yield from _flatten_sequence(value, prefixed_key)
404
- elif value is not None:
405
- yield prefixed_key, value
406
-
407
-
408
- def _flatten_sequence(
409
- sequence: Sequence[Any],
410
- prefix: str,
411
- ) -> Iterator[Tuple[str, Any]]:
412
- if isinstance(sequence, str) or not _has_mapping(sequence):
413
- yield prefix, sequence
414
- for idx, obj in enumerate(sequence):
415
- if not isinstance(obj, Mapping):
416
- continue
417
- yield from _flatten_mapping(obj, f"{prefix}.{idx}")
418
-
419
-
420
264
  def _encode_event(event: SpanEvent) -> otlp.Span.Event:
421
265
  return otlp.Span.Event(
422
266
  name=event.name,
phoenix/trace/schemas.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from dataclasses import dataclass
2
2
  from datetime import datetime
3
3
  from enum import Enum
4
- from typing import Any, Dict, List, Optional, Union
4
+ from typing import Any, List, Mapping, NamedTuple, Optional
5
5
  from uuid import UUID
6
6
 
7
7
  EXCEPTION_TYPE = "exception.type"
@@ -47,16 +47,14 @@ class SpanKind(Enum):
47
47
 
48
48
  @classmethod
49
49
  def _missing_(cls, v: Any) -> Optional["SpanKind"]:
50
- if v and isinstance(v, str) and not v.isupper():
50
+ if v and isinstance(v, str) and v.isascii() and not v.isupper():
51
51
  return cls(v.upper())
52
- return None if v else cls.UNKNOWN
52
+ return cls.UNKNOWN
53
53
 
54
54
 
55
55
  TraceID = str
56
56
  SpanID = str
57
- AttributePrimitiveValue = Union[str, bool, float, int]
58
- AttributeValue = Union[AttributePrimitiveValue, List[AttributePrimitiveValue]]
59
- SpanAttributes = Dict[str, AttributeValue]
57
+ SpanAttributes = Mapping[str, Any]
60
58
 
61
59
 
62
60
  @dataclass(frozen=True)
@@ -73,7 +71,7 @@ class SpanConversationAttributes:
73
71
 
74
72
 
75
73
  @dataclass(frozen=True)
76
- class SpanEvent(Dict[str, Any]):
74
+ class SpanEvent:
77
75
  """
78
76
  A Span Event can be thought of as a structured log message (or annotation)
79
77
  on a Span, typically used to denote a meaningful, singular point in time
@@ -142,7 +140,7 @@ class Span:
142
140
  "If the parent_id is None, this is the root span"
143
141
  parent_id: Optional[SpanID]
144
142
  start_time: datetime
145
- end_time: Optional[datetime]
143
+ end_time: datetime
146
144
  status_code: SpanStatusCode
147
145
  status_message: str
148
146
  """
@@ -202,3 +200,11 @@ class ComputedAttributes(Enum):
202
200
  CUMULATIVE_LLM_TOKEN_COUNT_COMPLETION = "cumulative_token_count.completion"
203
201
  ERROR_COUNT = "error_count"
204
202
  CUMULATIVE_ERROR_COUNT = "cumulative_error_count"
203
+
204
+
205
+ class ComputedValues(NamedTuple):
206
+ latency_ms: float
207
+ cumulative_error_count: int
208
+ cumulative_llm_token_count_prompt: int
209
+ cumulative_llm_token_count_completion: int
210
+ cumulative_llm_token_count_total: int
@@ -12,6 +12,7 @@ from pandas.api.types import is_integer_dtype, is_numeric_dtype, is_string_dtype
12
12
  from pyarrow import RecordBatchStreamReader, Schema, Table, parquet
13
13
 
14
14
  from phoenix.config import TRACE_DATASET_DIR
15
+ from phoenix.exceptions import PhoenixEvaluationNameIsMissing
15
16
  from phoenix.trace.errors import InvalidParquetMetadataError
16
17
 
17
18
  EVAL_NAME_COLUMN_PREFIX = "eval."
@@ -335,8 +336,10 @@ def _parse_schema_metadata(schema: Schema) -> Tuple[UUID, str, Type[Evaluations]
335
336
  arize_metadata = json.loads(metadata[b"arize"])
336
337
  eval_classes = {subclass.__name__: subclass for subclass in Evaluations.__subclasses__()}
337
338
  eval_id = UUID(arize_metadata["eval_id"])
338
- if not isinstance((eval_name := arize_metadata["eval_name"]), str):
339
- raise ValueError('Arize metadata must contain a string value for key "eval_name"')
339
+ if not isinstance((eval_name := arize_metadata["eval_name"]), str) or not eval_name.strip():
340
+ raise PhoenixEvaluationNameIsMissing(
341
+ 'Arize metadata must contain a non-empty string value for key "eval_name"'
342
+ )
340
343
  evaluations_cls = eval_classes[arize_metadata["eval_type"]]
341
344
  return eval_id, eval_name, evaluations_cls
342
345
  except Exception as err:
@@ -1,26 +0,0 @@
1
- from datetime import datetime
2
- from typing import List, Optional
3
-
4
- import pandas as pd
5
-
6
- from phoenix.core.project import Project
7
- from phoenix.trace.dsl import SpanQuery
8
-
9
-
10
- def query_spans(
11
- project: Optional[Project],
12
- *queries: SpanQuery,
13
- start_time: Optional[datetime] = None,
14
- stop_time: Optional[datetime] = None,
15
- root_spans_only: Optional[bool] = None,
16
- ) -> List[pd.DataFrame]:
17
- if not queries or not project:
18
- return []
19
- spans = tuple(
20
- project.get_spans(
21
- start_time=start_time,
22
- stop_time=stop_time,
23
- root_spans_only=root_spans_only,
24
- )
25
- )
26
- return [query(spans) for query in queries]
@@ -1,23 +0,0 @@
1
- from typing import Optional
2
-
3
- from phoenix.config import get_env_span_storage_type, get_storage_dir
4
- from phoenix.core.traces import Traces
5
- from phoenix.storage.span_store import SPAN_STORE_FACTORIES, SpanStore
6
- from phoenix.trace.otel import decode_otlp_span
7
- from phoenix.utilities.project import get_project_name
8
-
9
-
10
- def get_span_store() -> Optional[SpanStore]:
11
- if span_store_type := get_env_span_storage_type():
12
- span_store_factory = SPAN_STORE_FACTORIES[span_store_type]
13
- return span_store_factory(get_storage_dir())
14
- return None
15
-
16
-
17
- def load_traces_data_from_store(traces: Traces, span_store: SpanStore) -> None:
18
- for traces_data in span_store.load():
19
- for resource_spans in traces_data.resource_spans:
20
- project_name = get_project_name(resource_spans.resource.attributes)
21
- for scope_span in resource_spans.scope_spans:
22
- for span in scope_span.spans:
23
- traces.put(decode_otlp_span(span), project_name=project_name)
phoenix/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "3.25.0"
1
+ __version__ = "4.0.1"