arize-phoenix 8.31.0__py3-none-any.whl → 8.32.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

phoenix/session/client.py CHANGED
@@ -122,6 +122,7 @@ class Client(TraceDataExtractor):
122
122
  # Deprecated
123
123
  stop_time: Optional[datetime] = None,
124
124
  timeout: Optional[int] = DEFAULT_TIMEOUT_IN_SECONDS,
125
+ orphan_span_as_root_span: bool = True,
125
126
  ) -> Optional[Union[pd.DataFrame, list[pd.DataFrame]]]:
126
127
  """
127
128
  Queries spans from the Phoenix server or active session based on specified criteria.
@@ -131,9 +132,12 @@ class Client(TraceDataExtractor):
131
132
  start_time (datetime, optional): The start time for the query range. Default None.
132
133
  end_time (datetime, optional): The end time for the query range. Default None.
133
134
  root_spans_only (bool, optional): If True, only root spans are returned. Default None.
135
+ orphan_span_as_root_span (bool): If True, orphan spans are treated as root spans. An
136
+ orphan span has a non-null `parent_id` but a span with that ID is currently not
137
+ found in the database. Default True.
134
138
  project_name (str, optional): The project name to query spans for. This can be set
135
139
  using environment variables. If not provided, falls back to the default project.
136
- timeout (int, optional): The number of seconds to wait for the server to respond.
140
+ timeout (int, optional): The number of seconds to wait for the server to respond.
137
141
 
138
142
  Returns:
139
143
  Union[pd.DataFrame, list[pd.DataFrame]]:
@@ -163,6 +167,7 @@ class Client(TraceDataExtractor):
163
167
  "end_time": _to_iso_format(normalize_datetime(end_time)),
164
168
  "limit": limit,
165
169
  "root_spans_only": root_spans_only,
170
+ "orphan_span_as_root_span": orphan_span_as_root_span,
166
171
  },
167
172
  timeout=timeout,
168
173
  )
@@ -5,13 +5,13 @@ from dataclasses import dataclass, field, replace
5
5
  from datetime import datetime
6
6
  from functools import cached_property
7
7
  from itertools import chain
8
- from random import randint, random
8
+ from secrets import token_hex
9
9
  from types import MappingProxyType
10
10
  from typing import Any, Optional, cast
11
11
 
12
12
  import pandas as pd
13
13
  from openinference.semconv.trace import SpanAttributes
14
- from sqlalchemy import JSON, Column, Label, Select, SQLColumnExpression, and_, func, select
14
+ from sqlalchemy import JSON, Column, Label, Select, SQLColumnExpression, and_, func, or_, select
15
15
  from sqlalchemy.dialects.postgresql import aggregate_order_by
16
16
  from sqlalchemy.orm import Session
17
17
  from typing_extensions import assert_never
@@ -94,7 +94,7 @@ class _HasTmpSuffix(_Base):
94
94
 
95
95
  def __post_init__(self) -> None:
96
96
  super().__post_init__()
97
- object.__setattr__(self, "_tmp_suffix", f"{randint(0, 10**6):06d}")
97
+ object.__setattr__(self, "_tmp_suffix", _randomize())
98
98
 
99
99
  def _remove_tmp_suffix(self, name: str) -> str:
100
100
  if name.endswith(self._tmp_suffix):
@@ -126,7 +126,7 @@ class Explosion(_HasTmpSuffix, Projection):
126
126
  position_prefix = _PRESCRIBED_POSITION_PREFIXES.get(self.key, "")
127
127
  object.__setattr__(self, "_position_prefix", position_prefix)
128
128
  object.__setattr__(self, "_primary_index", Projection(self.primary_index_key))
129
- object.__setattr__(self, "_array_tmp_col_label", f"__array_tmp_col_{random()}")
129
+ object.__setattr__(self, "_array_tmp_col_label", _randomize("__array_tmp_col"))
130
130
 
131
131
  @cached_property
132
132
  def index_keys(self) -> list[str]:
@@ -290,7 +290,7 @@ class Concatenation(_HasTmpSuffix, Projection):
290
290
 
291
291
  def __post_init__(self) -> None:
292
292
  super().__post_init__()
293
- object.__setattr__(self, "_array_tmp_col_label", f"__array_tmp_col_{random()}")
293
+ object.__setattr__(self, "_array_tmp_col_label", _randomize("__array_tmp_col"))
294
294
 
295
295
  def with_separator(self, separator: str = "\n\n") -> "Concatenation":
296
296
  return replace(self, separator=separator)
@@ -440,7 +440,7 @@ class SpanQuery(_HasTmpSuffix):
440
440
 
441
441
  def __post_init__(self) -> None:
442
442
  super().__post_init__()
443
- object.__setattr__(self, "_pk_tmp_col_label", f"__pk_tmp_col_{random()}")
443
+ object.__setattr__(self, "_pk_tmp_col_label", _randomize("__pk_tmp_col"))
444
444
 
445
445
  def __bool__(self) -> bool:
446
446
  return bool(self._select) or bool(self._filter) or bool(self._explode) or bool(self._concat)
@@ -511,6 +511,38 @@ class SpanQuery(_HasTmpSuffix):
511
511
  *,
512
512
  orphan_span_as_root_span: bool = True,
513
513
  ) -> pd.DataFrame:
514
+ """Execute the span query and return results as a pandas DataFrame.
515
+
516
+ This method executes the configured span query against the database and returns
517
+ the results as a pandas DataFrame. The query can include projections, filters,
518
+ explosions, and concatenations of span data.
519
+
520
+ Args:
521
+ session (Session): The SQLAlchemy database session to use for the query.
522
+ project_name (str, optional): The name of the project to query spans for.
523
+ If not provided, uses the default project name. Default None.
524
+ start_time (datetime, optional): The start time for the query range. Default None.
525
+ end_time (datetime, optional): The end time for the query range. Default None.
526
+ limit (int, optional): Maximum number of spans to return. Defaults to DEFAULT_SPAN_LIMIT.
527
+ root_spans_only (bool, optional): If True, only root spans are returned. Default None.
528
+ stop_time (datetime, optional): Deprecated. Use end_time instead. Default None.
529
+ orphan_span_as_root_span (bool): If True, orphan spans are treated as root spans. An
530
+ orphan span has a non-null `parent_id` but a span with that ID is currently not
531
+ found in the database. Default True.
532
+
533
+ Returns:
534
+ pd.DataFrame: A DataFrame containing the query results. The structure of the DataFrame
535
+ depends on the query configuration:
536
+ - If no projections are specified, returns all span fields
537
+ - If projections are specified, returns only the requested fields
538
+ - If explosion is configured, returns exploded array fields as separate rows
539
+ - If concatenation is configured, returns concatenated array fields as strings
540
+ - The index is set to the configured index field (default: context.span_id)
541
+
542
+ Note:
543
+ The query execution is optimized based on the database dialect (SQLite or PostgreSQL).
544
+ Some operations may be performed in pandas after fetching the data from SQLite.
545
+ """ # noqa: E501
514
546
  if not project_name:
515
547
  project_name = DEFAULT_PROJECT_NAME
516
548
  if stop_time:
@@ -529,6 +561,7 @@ class SpanQuery(_HasTmpSuffix):
529
561
  end_time=end_time,
530
562
  limit=limit,
531
563
  root_spans_only=root_spans_only,
564
+ orphan_span_as_root_span=orphan_span_as_root_span,
532
565
  )
533
566
  assert session.bind is not None
534
567
  dialect = SupportedSQLDialect(session.bind.dialect.name)
@@ -699,10 +732,47 @@ def _get_spans_dataframe(
699
732
  end_time: Optional[datetime] = None,
700
733
  limit: Optional[int] = DEFAULT_SPAN_LIMIT,
701
734
  root_spans_only: Optional[bool] = None,
735
+ orphan_span_as_root_span: bool = True,
702
736
  # Deprecated
703
737
  stop_time: Optional[datetime] = None,
704
- orphan_span_as_root_span: bool = True,
705
738
  ) -> pd.DataFrame:
739
+ """Retrieve spans from the database and return them as a pandas DataFrame.
740
+
741
+ This function queries the database for spans matching the specified criteria and returns
742
+ them as a pandas DataFrame. The spans are joined with their associated traces and projects,
743
+ and their attributes are flattened into columns.
744
+
745
+ Args:
746
+ session (Session): The SQLAlchemy database session to use for the query.
747
+ project_name (str): The name of the project to query spans for.
748
+ span_filter (SpanFilter, optional): A filter to apply to the spans query. Default None.
749
+ start_time (datetime, optional): The start time for the query range. Default None.
750
+ end_time (datetime, optional): The end time for the query range. Default None.
751
+ limit (int, optional): Maximum number of spans to return. Defaults to DEFAULT_SPAN_LIMIT.
752
+ root_spans_only (bool, optional): If True, only root spans are returned. Default None.
753
+ orphan_span_as_root_span (bool): If True, orphan spans are treated as root spans. An
754
+ orphan span has a non-null `parent_id` but a span with that ID is currently not
755
+ found in the database. Default True.
756
+ stop_time (datetime, optional): Deprecated. Use end_time instead. Default None.
757
+
758
+ Returns:
759
+ pd.DataFrame: A DataFrame containing the spans data with the following columns:
760
+ - name: The span name
761
+ - span_kind: The kind of span
762
+ - parent_id: The ID of the parent span
763
+ - start_time: When the span started
764
+ - end_time: When the span ended
765
+ - status_code: The status code of the span
766
+ - status_message: The status message of the span
767
+ - events: The events associated with the span
768
+ - context.span_id: The span ID
769
+ - context.trace_id: The trace ID
770
+ - attributes.*: Flattened span attributes
771
+
772
+ Note:
773
+ The function flattens semantic conventions in the span attributes and adds them as
774
+ prefixed columns to the DataFrame. Custom attributes are preserved as is.
775
+ """ # noqa: E501
706
776
  # use legacy labels for backward-compatibility
707
777
  span_id_label = "context.span_id"
708
778
  trace_id_label = "context.trace_id"
@@ -743,13 +813,16 @@ def _get_spans_dataframe(
743
813
  # A root span is either a span with no parent_id or an orphan span
744
814
  # (a span whose parent_id references a span that doesn't exist in the database)
745
815
  if orphan_span_as_root_span:
746
- # Include both types of root spans:
816
+ # Include both types of root spans
747
817
  parent_spans = select(models.Span.span_id).alias("parent_spans")
748
- stmt = stmt.where(
749
- ~select(1).where(models.Span.parent_id == parent_spans.c.span_id).exists(),
750
- # Note: We avoid using an OR clause with Span.parent_id.is_(None) here
751
- # because it significantly degraded PostgreSQL performance (>10x worse)
752
- # during testing.
818
+ candidate_spans = stmt.cte("candidate_spans")
819
+ stmt = select(candidate_spans).where(
820
+ or_(
821
+ candidate_spans.c.parent_id.is_(None),
822
+ ~select(1)
823
+ .where(candidate_spans.c.parent_id == parent_spans.c.span_id)
824
+ .exists(),
825
+ ),
753
826
  )
754
827
  else:
755
828
  # Only include explicit root spans (spans with parent_id = NULL)
@@ -797,3 +870,10 @@ def _flatten_semantic_conventions(attributes: Mapping[str, Any]) -> dict[str, An
797
870
  prefix_exclusions=SEMANTIC_CONVENTIONS,
798
871
  )
799
872
  return ans
873
+
874
+
875
+ def _randomize(name: str = "") -> str:
876
+ """Append a short random suffix for a column name to avoid name collisions. The suffix
877
+ should be short because PostgreSQL has a limit of 63 characters for column names.
878
+ """ # noqa: E501
879
+ return f"{name}_{token_hex(3)}"
phoenix/trace/fixtures.py CHANGED
@@ -139,6 +139,29 @@ demo_llama_index_rag_fixture = TracesFixture(
139
139
  ),
140
140
  )
141
141
 
142
+ demo_toolcalling_fixture = TracesFixture(
143
+ name="demo_toolcalling",
144
+ project_name="demo_agent",
145
+ description="Tool calling traces",
146
+ file_name="agents-toolcalling-tracesv2.parquet",
147
+ dataset_fixtures=(
148
+ DatasetFixture(
149
+ file_name="questions.csv.gz",
150
+ input_keys=("query",),
151
+ output_keys=("responses",),
152
+ name="Valid Queries",
153
+ description="Valid queries for the demo agent",
154
+ ),
155
+ DatasetFixture(
156
+ file_name="invalid_questions.csv.gz",
157
+ input_keys=("query",),
158
+ output_keys=("responses",),
159
+ name="Invalid Queries",
160
+ description="Invalid queries for the demo agent",
161
+ ),
162
+ ),
163
+ )
164
+
142
165
  demo_code_based_agent_fixture = TracesFixture(
143
166
  name="demo_code_based_agent",
144
167
  project_name="demo_agents",
@@ -298,6 +321,7 @@ TRACES_FIXTURES: list[TracesFixture] = [
298
321
  vision_fixture,
299
322
  anthropic_tools_fixture,
300
323
  project_sessions_llama_index_rag_arize_docs_fixture,
324
+ demo_toolcalling_fixture,
301
325
  ]
302
326
 
303
327
  NAME_TO_TRACES_FIXTURE: dict[str, TracesFixture] = {
phoenix/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "8.31.0"
1
+ __version__ = "8.32.1"