arize-phoenix 3.25.0__py3-none-any.whl → 4.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/METADATA +26 -4
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/RECORD +80 -75
- phoenix/__init__.py +9 -5
- phoenix/config.py +109 -53
- phoenix/datetime_utils.py +18 -1
- phoenix/db/README.md +25 -0
- phoenix/db/__init__.py +4 -0
- phoenix/db/alembic.ini +119 -0
- phoenix/db/bulk_inserter.py +206 -0
- phoenix/db/engines.py +152 -0
- phoenix/db/helpers.py +47 -0
- phoenix/db/insertion/evaluation.py +209 -0
- phoenix/db/insertion/helpers.py +51 -0
- phoenix/db/insertion/span.py +142 -0
- phoenix/db/migrate.py +71 -0
- phoenix/db/migrations/env.py +121 -0
- phoenix/db/migrations/script.py.mako +26 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
- phoenix/db/models.py +371 -0
- phoenix/exceptions.py +5 -1
- phoenix/server/api/context.py +40 -3
- phoenix/server/api/dataloaders/__init__.py +97 -0
- phoenix/server/api/dataloaders/cache/__init__.py +3 -0
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +67 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +152 -0
- phoenix/server/api/dataloaders/document_evaluations.py +37 -0
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +98 -0
- phoenix/server/api/dataloaders/evaluation_summaries.py +151 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +198 -0
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +93 -0
- phoenix/server/api/dataloaders/record_counts.py +125 -0
- phoenix/server/api/dataloaders/span_descendants.py +64 -0
- phoenix/server/api/dataloaders/span_evaluations.py +37 -0
- phoenix/server/api/dataloaders/token_counts.py +138 -0
- phoenix/server/api/dataloaders/trace_evaluations.py +37 -0
- phoenix/server/api/input_types/SpanSort.py +138 -68
- phoenix/server/api/routers/v1/__init__.py +11 -0
- phoenix/server/api/routers/v1/evaluations.py +275 -0
- phoenix/server/api/routers/v1/spans.py +126 -0
- phoenix/server/api/routers/v1/traces.py +82 -0
- phoenix/server/api/schema.py +112 -48
- phoenix/server/api/types/DocumentEvaluationSummary.py +1 -1
- phoenix/server/api/types/Evaluation.py +29 -12
- phoenix/server/api/types/EvaluationSummary.py +29 -44
- phoenix/server/api/types/MimeType.py +2 -2
- phoenix/server/api/types/Model.py +9 -9
- phoenix/server/api/types/Project.py +240 -171
- phoenix/server/api/types/Span.py +87 -131
- phoenix/server/api/types/Trace.py +29 -20
- phoenix/server/api/types/pagination.py +151 -10
- phoenix/server/app.py +263 -35
- phoenix/server/grpc_server.py +93 -0
- phoenix/server/main.py +75 -60
- phoenix/server/openapi/docs.py +218 -0
- phoenix/server/prometheus.py +23 -7
- phoenix/server/static/index.js +662 -643
- phoenix/server/telemetry.py +68 -0
- phoenix/services.py +4 -0
- phoenix/session/client.py +34 -30
- phoenix/session/data_extractor.py +8 -3
- phoenix/session/session.py +176 -155
- phoenix/settings.py +13 -0
- phoenix/trace/attributes.py +349 -0
- phoenix/trace/dsl/README.md +116 -0
- phoenix/trace/dsl/filter.py +660 -192
- phoenix/trace/dsl/helpers.py +24 -5
- phoenix/trace/dsl/query.py +562 -185
- phoenix/trace/fixtures.py +69 -7
- phoenix/trace/otel.py +44 -200
- phoenix/trace/schemas.py +14 -8
- phoenix/trace/span_evaluations.py +5 -2
- phoenix/utilities/__init__.py +0 -26
- phoenix/utilities/span_store.py +0 -23
- phoenix/version.py +1 -1
- phoenix/core/project.py +0 -773
- phoenix/core/traces.py +0 -96
- phoenix/datasets/dataset.py +0 -214
- phoenix/datasets/fixtures.py +0 -24
- phoenix/datasets/schema.py +0 -31
- phoenix/experimental/evals/__init__.py +0 -73
- phoenix/experimental/evals/evaluators.py +0 -413
- phoenix/experimental/evals/functions/__init__.py +0 -4
- phoenix/experimental/evals/functions/classify.py +0 -453
- phoenix/experimental/evals/functions/executor.py +0 -353
- phoenix/experimental/evals/functions/generate.py +0 -138
- phoenix/experimental/evals/functions/processing.py +0 -76
- phoenix/experimental/evals/models/__init__.py +0 -14
- phoenix/experimental/evals/models/anthropic.py +0 -175
- phoenix/experimental/evals/models/base.py +0 -170
- phoenix/experimental/evals/models/bedrock.py +0 -221
- phoenix/experimental/evals/models/litellm.py +0 -134
- phoenix/experimental/evals/models/openai.py +0 -453
- phoenix/experimental/evals/models/rate_limiters.py +0 -246
- phoenix/experimental/evals/models/vertex.py +0 -173
- phoenix/experimental/evals/models/vertexai.py +0 -186
- phoenix/experimental/evals/retrievals.py +0 -96
- phoenix/experimental/evals/templates/__init__.py +0 -50
- phoenix/experimental/evals/templates/default_templates.py +0 -472
- phoenix/experimental/evals/templates/template.py +0 -195
- phoenix/experimental/evals/utils/__init__.py +0 -172
- phoenix/experimental/evals/utils/threads.py +0 -27
- phoenix/server/api/routers/evaluation_handler.py +0 -110
- phoenix/server/api/routers/span_handler.py +0 -70
- phoenix/server/api/routers/trace_handler.py +0 -60
- phoenix/storage/span_store/__init__.py +0 -23
- phoenix/storage/span_store/text_file.py +0 -85
- phoenix/trace/dsl/missing.py +0 -60
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/WHEEL +0 -0
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → db/insertion}/__init__.py +0 -0
- /phoenix/{experimental → db/migrations}/__init__.py +0 -0
- /phoenix/{storage → server/openapi}/__init__.py +0 -0
phoenix/trace/dsl/query.py
CHANGED
|
@@ -1,31 +1,46 @@
|
|
|
1
|
-
import
|
|
1
|
+
import warnings
|
|
2
2
|
from collections import defaultdict
|
|
3
|
-
from dataclasses import dataclass, field,
|
|
4
|
-
from
|
|
3
|
+
from dataclasses import dataclass, field, replace
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from functools import cached_property
|
|
6
|
+
from itertools import chain
|
|
7
|
+
from random import randint, random
|
|
5
8
|
from types import MappingProxyType
|
|
6
9
|
from typing import (
|
|
7
10
|
Any,
|
|
8
|
-
|
|
9
|
-
ClassVar,
|
|
11
|
+
DefaultDict,
|
|
10
12
|
Dict,
|
|
11
13
|
Iterable,
|
|
12
|
-
Iterator,
|
|
13
14
|
List,
|
|
14
15
|
Mapping,
|
|
15
16
|
Optional,
|
|
16
17
|
Sequence,
|
|
17
|
-
Sized,
|
|
18
|
-
Tuple,
|
|
19
18
|
cast,
|
|
20
19
|
)
|
|
21
20
|
|
|
22
21
|
import pandas as pd
|
|
23
22
|
from openinference.semconv.trace import SpanAttributes
|
|
24
|
-
|
|
23
|
+
from sqlalchemy import JSON, Column, Label, Select, SQLColumnExpression, and_, func, select
|
|
24
|
+
from sqlalchemy.dialects.postgresql import aggregate_order_by
|
|
25
|
+
from sqlalchemy.orm import Session, aliased
|
|
26
|
+
from typing_extensions import assert_never
|
|
27
|
+
|
|
28
|
+
from phoenix.config import DEFAULT_PROJECT_NAME
|
|
29
|
+
from phoenix.db import models
|
|
30
|
+
from phoenix.db.helpers import SupportedSQLDialect
|
|
31
|
+
from phoenix.trace.attributes import (
|
|
32
|
+
JSON_STRING_ATTRIBUTES,
|
|
33
|
+
SEMANTIC_CONVENTIONS,
|
|
34
|
+
flatten,
|
|
35
|
+
get_attribute_value,
|
|
36
|
+
load_json_strings,
|
|
37
|
+
unflatten,
|
|
38
|
+
)
|
|
25
39
|
from phoenix.trace.dsl import SpanFilter
|
|
26
|
-
from phoenix.trace.dsl.filter import
|
|
27
|
-
from phoenix.trace.schemas import ATTRIBUTE_PREFIX
|
|
28
|
-
|
|
40
|
+
from phoenix.trace.dsl.filter import Projector
|
|
41
|
+
from phoenix.trace.schemas import ATTRIBUTE_PREFIX
|
|
42
|
+
|
|
43
|
+
DEFAULT_SPAN_LIMIT = 1000
|
|
29
44
|
|
|
30
45
|
RETRIEVAL_DOCUMENTS = SpanAttributes.RETRIEVAL_DOCUMENTS
|
|
31
46
|
|
|
@@ -39,127 +54,208 @@ _ALIASES = {
|
|
|
39
54
|
"trace_id": "context.trace_id",
|
|
40
55
|
}
|
|
41
56
|
|
|
42
|
-
# Because span_kind is an enum, it needs to be converted to string,
|
|
43
|
-
# so it's serializable by pyarrow.
|
|
44
|
-
_CONVERT_TO_STRING = ("span_kind",)
|
|
45
|
-
|
|
46
57
|
|
|
47
58
|
def _unalias(key: str) -> str:
|
|
48
59
|
return _ALIASES.get(key, key)
|
|
49
60
|
|
|
50
61
|
|
|
51
62
|
@dataclass(frozen=True)
|
|
52
|
-
class
|
|
53
|
-
|
|
54
|
-
value: Callable[[Span], Any] = field(init=False, repr=False)
|
|
55
|
-
span_fields: ClassVar[Tuple[str, ...]] = tuple(f.name for f in fields(Span))
|
|
56
|
-
|
|
57
|
-
def __bool__(self) -> bool:
|
|
58
|
-
return bool(self.key)
|
|
63
|
+
class _Base:
|
|
64
|
+
"""The sole purpose of this class is for `super().__post_init__()` to work"""
|
|
59
65
|
|
|
60
66
|
def __post_init__(self) -> None:
|
|
61
|
-
|
|
62
|
-
object.__setattr__(self, "key", key)
|
|
63
|
-
if key.startswith(CONTEXT_PREFIX):
|
|
64
|
-
key = key[len(CONTEXT_PREFIX) :]
|
|
65
|
-
value = partial(self._from_context, key=key)
|
|
66
|
-
elif key.startswith(ATTRIBUTE_PREFIX):
|
|
67
|
-
key = self.key[len(ATTRIBUTE_PREFIX) :]
|
|
68
|
-
value = partial(self._from_attributes, key=key)
|
|
69
|
-
elif key in self.span_fields:
|
|
70
|
-
value = partial(self._from_span, key=key)
|
|
71
|
-
else:
|
|
72
|
-
value = partial(self._from_attributes, key=key)
|
|
73
|
-
if self.key in _CONVERT_TO_STRING:
|
|
74
|
-
object.__setattr__(
|
|
75
|
-
self,
|
|
76
|
-
"value",
|
|
77
|
-
lambda span: None if (v := value(span)) is None else str(v),
|
|
78
|
-
)
|
|
79
|
-
else:
|
|
80
|
-
object.__setattr__(self, "value", value)
|
|
67
|
+
pass
|
|
81
68
|
|
|
82
|
-
def __call__(self, span: Span) -> Any:
|
|
83
|
-
return self.value(span)
|
|
84
69
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
70
|
+
@dataclass(frozen=True)
|
|
71
|
+
class Projection(_Base):
|
|
72
|
+
key: str = ""
|
|
73
|
+
_projector: Projector = field(init=False, repr=False)
|
|
88
74
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
75
|
+
def __post_init__(self) -> None:
|
|
76
|
+
super().__post_init__()
|
|
77
|
+
object.__setattr__(self, "key", _unalias(self.key))
|
|
78
|
+
object.__setattr__(self, "_projector", Projector(self.key))
|
|
92
79
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
80
|
+
def __bool__(self) -> bool:
|
|
81
|
+
return bool(self.key)
|
|
82
|
+
|
|
83
|
+
def __call__(self) -> SQLColumnExpression[Any]:
|
|
84
|
+
return self._projector()
|
|
96
85
|
|
|
97
86
|
def to_dict(self) -> Dict[str, Any]:
|
|
98
87
|
return {"key": self.key}
|
|
99
88
|
|
|
100
89
|
@classmethod
|
|
101
90
|
def from_dict(cls, obj: Mapping[str, Any]) -> "Projection":
|
|
102
|
-
return cls(
|
|
103
|
-
|
|
104
|
-
|
|
91
|
+
return cls(**({"key": cast(str, key)} if (key := obj.get("key")) else {}))
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass(frozen=True)
|
|
95
|
+
class _HasTmpSuffix(_Base):
|
|
96
|
+
_tmp_suffix: str = field(init=False, repr=False)
|
|
97
|
+
"""Ideally every column label should get a temporary random suffix that will
|
|
98
|
+
be removed at the end. This is necessary during query construction because
|
|
99
|
+
sqlalchemy is not always foolproof, e.g. we have seen `group_by` clauses that
|
|
100
|
+
were incorrect or ambiguous. We should actively avoid name collisions, which
|
|
101
|
+
is increasingly likely as queries get more complex.
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
def __post_init__(self) -> None:
|
|
105
|
+
super().__post_init__()
|
|
106
|
+
object.__setattr__(self, "_tmp_suffix", f"{randint(0, 10**6):06d}")
|
|
107
|
+
|
|
108
|
+
def _remove_tmp_suffix(self, name: str) -> str:
|
|
109
|
+
if name.endswith(self._tmp_suffix):
|
|
110
|
+
return name[: -len(self._tmp_suffix)]
|
|
111
|
+
return name
|
|
112
|
+
|
|
113
|
+
def _add_tmp_suffix(self, name: str) -> str:
|
|
114
|
+
if name.endswith(self._tmp_suffix):
|
|
115
|
+
return name
|
|
116
|
+
return name + self._tmp_suffix
|
|
105
117
|
|
|
106
118
|
|
|
107
119
|
@dataclass(frozen=True)
|
|
108
|
-
class Explosion(Projection):
|
|
120
|
+
class Explosion(_HasTmpSuffix, Projection):
|
|
109
121
|
kwargs: Mapping[str, str] = field(default_factory=lambda: MappingProxyType({}))
|
|
110
122
|
primary_index_key: str = "context.span_id"
|
|
111
123
|
|
|
112
|
-
|
|
113
|
-
|
|
124
|
+
_position_prefix: str = field(init=False, repr=False)
|
|
125
|
+
_primary_index: Projection = field(init=False, repr=False)
|
|
126
|
+
_array_tmp_col_label: str = field(init=False, repr=False)
|
|
127
|
+
"""For sqlite we need to store the array in a temporary column to be able
|
|
128
|
+
to explode it later in pandas. `_array_tmp_col_label` is the name of this
|
|
129
|
+
temporary column. The temporary column will have a unique name
|
|
130
|
+
per instance.
|
|
131
|
+
"""
|
|
114
132
|
|
|
115
133
|
def __post_init__(self) -> None:
|
|
116
134
|
super().__post_init__()
|
|
117
135
|
position_prefix = _PRESCRIBED_POSITION_PREFIXES.get(self.key, "")
|
|
118
|
-
object.__setattr__(self, "
|
|
119
|
-
object.__setattr__(self, "
|
|
136
|
+
object.__setattr__(self, "_position_prefix", position_prefix)
|
|
137
|
+
object.__setattr__(self, "_primary_index", Projection(self.primary_index_key))
|
|
138
|
+
object.__setattr__(self, "_array_tmp_col_label", f"__array_tmp_col_{random()}")
|
|
120
139
|
|
|
121
140
|
@cached_property
|
|
122
|
-
def index_keys(self) ->
|
|
123
|
-
return
|
|
124
|
-
|
|
125
|
-
def with_primary_index_key(self,
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
}
|
|
144
|
-
return
|
|
145
|
-
for i, item in enumerate(seq):
|
|
146
|
-
if not isinstance(item, Mapping):
|
|
147
|
-
continue
|
|
148
|
-
record = (
|
|
149
|
-
{name: item.get(key) for name, key in self.kwargs.items()}
|
|
150
|
-
if self.kwargs
|
|
151
|
-
else dict(item)
|
|
141
|
+
def index_keys(self) -> List[str]:
|
|
142
|
+
return [self._primary_index.key, f"{self._position_prefix}position"]
|
|
143
|
+
|
|
144
|
+
def with_primary_index_key(self, _: str) -> "Explosion":
|
|
145
|
+
print("`.with_primary_index_key(...)` is deprecated and will be removed in the future.")
|
|
146
|
+
return self
|
|
147
|
+
|
|
148
|
+
def update_sql(
|
|
149
|
+
self,
|
|
150
|
+
stmt: Select[Any],
|
|
151
|
+
dialect: SupportedSQLDialect,
|
|
152
|
+
) -> Select[Any]:
|
|
153
|
+
array = self()
|
|
154
|
+
if dialect is SupportedSQLDialect.SQLITE:
|
|
155
|
+
# Because sqlite doesn't support `WITH ORDINALITY`, the order of
|
|
156
|
+
# the returned (table) values is not guaranteed. So we resort to
|
|
157
|
+
# post hoc processing using pandas.
|
|
158
|
+
stmt = stmt.where(
|
|
159
|
+
func.json_type(array) == "array",
|
|
160
|
+
).add_columns(
|
|
161
|
+
array.label(self._array_tmp_col_label),
|
|
152
162
|
)
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
163
|
+
return stmt
|
|
164
|
+
elif dialect is SupportedSQLDialect.POSTGRESQL:
|
|
165
|
+
element = (
|
|
166
|
+
func.jsonb_array_elements(array)
|
|
167
|
+
.table_valued(
|
|
168
|
+
Column("obj", JSON),
|
|
169
|
+
with_ordinality="position",
|
|
170
|
+
joins_implicitly=True,
|
|
171
|
+
)
|
|
172
|
+
.render_derived()
|
|
173
|
+
)
|
|
174
|
+
obj, position = element.c.obj, element.c.position
|
|
175
|
+
# Use zero-based indexing for backward-compatibility.
|
|
176
|
+
position_label = (position - 1).label(f"{self._position_prefix}position")
|
|
177
|
+
if self.kwargs:
|
|
178
|
+
columns: Iterable[Label[Any]] = (
|
|
179
|
+
obj[key.split(".")].label(self._add_tmp_suffix(name))
|
|
180
|
+
for name, key in self.kwargs.items()
|
|
181
|
+
)
|
|
156
182
|
else:
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
183
|
+
columns = (obj.label(self._array_tmp_col_label),)
|
|
184
|
+
stmt = (
|
|
185
|
+
stmt.where(func.jsonb_typeof(array) == "array")
|
|
186
|
+
.where(func.jsonb_typeof(obj) == "object")
|
|
187
|
+
.add_columns(position_label, *columns)
|
|
188
|
+
)
|
|
189
|
+
return stmt
|
|
190
|
+
else:
|
|
191
|
+
assert_never(dialect)
|
|
192
|
+
|
|
193
|
+
def update_df(
|
|
194
|
+
self,
|
|
195
|
+
df: pd.DataFrame,
|
|
196
|
+
dialect: SupportedSQLDialect,
|
|
197
|
+
) -> pd.DataFrame:
|
|
198
|
+
df = df.rename(self._remove_tmp_suffix, axis=1)
|
|
199
|
+
if df.empty:
|
|
200
|
+
columns = list(
|
|
201
|
+
set(
|
|
202
|
+
chain(
|
|
203
|
+
self.index_keys,
|
|
204
|
+
df.drop(self._array_tmp_col_label, axis=1, errors="ignore").columns,
|
|
205
|
+
self.kwargs.keys(),
|
|
206
|
+
)
|
|
207
|
+
)
|
|
208
|
+
)
|
|
209
|
+
df = pd.DataFrame(columns=columns).set_index(self.index_keys)
|
|
210
|
+
return df
|
|
211
|
+
if dialect != SupportedSQLDialect.SQLITE and self.kwargs:
|
|
212
|
+
df = df.set_index(self.index_keys)
|
|
213
|
+
return df
|
|
214
|
+
if dialect is SupportedSQLDialect.SQLITE:
|
|
215
|
+
# Because sqlite doesn't support `WITH ORDINALITY`, the order of
|
|
216
|
+
# the returned (table) values is not guaranteed. So we resort to
|
|
217
|
+
# post hoc processing using pandas.
|
|
218
|
+
def _extract_values(array: List[Any]) -> List[Dict[str, Any]]:
|
|
219
|
+
if not isinstance(array, Iterable):
|
|
220
|
+
return []
|
|
221
|
+
if not self.kwargs:
|
|
222
|
+
return [
|
|
223
|
+
{
|
|
224
|
+
**dict(flatten(obj)),
|
|
225
|
+
f"{self._position_prefix}position": i,
|
|
226
|
+
}
|
|
227
|
+
for i, obj in enumerate(array)
|
|
228
|
+
if isinstance(obj, Mapping)
|
|
229
|
+
]
|
|
230
|
+
res: List[Dict[str, Any]] = []
|
|
231
|
+
for i, obj in enumerate(array):
|
|
232
|
+
if not isinstance(obj, Mapping):
|
|
233
|
+
continue
|
|
234
|
+
values: Dict[str, Any] = {f"{self._position_prefix}position": i}
|
|
235
|
+
for name, key in self.kwargs.items():
|
|
236
|
+
if (value := get_attribute_value(obj, key)) is not None:
|
|
237
|
+
values[name] = value
|
|
238
|
+
res.append(values)
|
|
239
|
+
return res
|
|
240
|
+
|
|
241
|
+
records = df.loc[:, self._array_tmp_col_label].dropna().map(_extract_values).explode()
|
|
242
|
+
elif dialect is SupportedSQLDialect.POSTGRESQL:
|
|
243
|
+
records = df.loc[:, self._array_tmp_col_label].dropna().map(flatten).map(dict)
|
|
244
|
+
else:
|
|
245
|
+
assert_never(dialect)
|
|
246
|
+
df = df.drop(self._array_tmp_col_label, axis=1)
|
|
247
|
+
if records.empty:
|
|
248
|
+
df = df.set_index(self.index_keys[0])
|
|
249
|
+
return df
|
|
250
|
+
df_explode = pd.DataFrame.from_records(records.to_list(), index=records.index)
|
|
251
|
+
if dialect is SupportedSQLDialect.SQLITE:
|
|
252
|
+
df = _outer_join(df, df_explode)
|
|
253
|
+
elif dialect is SupportedSQLDialect.POSTGRESQL:
|
|
254
|
+
df = pd.concat([df, df_explode], axis=1)
|
|
255
|
+
else:
|
|
256
|
+
assert_never(dialect)
|
|
257
|
+
df = df.set_index(self.index_keys)
|
|
258
|
+
return df
|
|
163
259
|
|
|
164
260
|
def to_dict(self) -> Dict[str, Any]:
|
|
165
261
|
return {
|
|
@@ -186,27 +282,126 @@ class Explosion(Projection):
|
|
|
186
282
|
|
|
187
283
|
|
|
188
284
|
@dataclass(frozen=True)
|
|
189
|
-
class Concatenation(Projection):
|
|
285
|
+
class Concatenation(_HasTmpSuffix, Projection):
|
|
190
286
|
kwargs: Mapping[str, str] = field(default_factory=lambda: MappingProxyType({}))
|
|
191
287
|
separator: str = "\n\n"
|
|
192
288
|
|
|
289
|
+
_array_tmp_col_label: str = field(init=False, repr=False)
|
|
290
|
+
"""For SQLite we need to store the array in a temporary column to be able
|
|
291
|
+
to concatenate it later in pandas. `_array_tmp_col_label` is the name of
|
|
292
|
+
this temporary column. The temporary column will have a unique name
|
|
293
|
+
per instance.
|
|
294
|
+
"""
|
|
295
|
+
|
|
296
|
+
def __post_init__(self) -> None:
|
|
297
|
+
super().__post_init__()
|
|
298
|
+
object.__setattr__(self, "_array_tmp_col_label", f"__array_tmp_col_{random()}")
|
|
299
|
+
|
|
193
300
|
def with_separator(self, separator: str = "\n\n") -> "Concatenation":
|
|
194
301
|
return replace(self, separator=separator)
|
|
195
302
|
|
|
196
|
-
def
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
303
|
+
def update_sql(
|
|
304
|
+
self,
|
|
305
|
+
stmt: Select[Any],
|
|
306
|
+
dialect: SupportedSQLDialect,
|
|
307
|
+
) -> Select[Any]:
|
|
308
|
+
array = self()
|
|
309
|
+
if dialect is SupportedSQLDialect.SQLITE:
|
|
310
|
+
# Because SQLite doesn't support `WITH ORDINALITY`, the order of
|
|
311
|
+
# the returned table-values is not guaranteed. So we resort to
|
|
312
|
+
# post hoc processing using pandas.
|
|
313
|
+
stmt = stmt.where(
|
|
314
|
+
func.json_type(array) == "array",
|
|
315
|
+
).add_columns(
|
|
316
|
+
array.label(self._array_tmp_col_label),
|
|
317
|
+
)
|
|
318
|
+
return stmt
|
|
319
|
+
elif dialect is SupportedSQLDialect.POSTGRESQL:
|
|
320
|
+
element = (
|
|
321
|
+
(
|
|
322
|
+
func.jsonb_array_elements(array)
|
|
323
|
+
if self.kwargs
|
|
324
|
+
else func.jsonb_array_elements_text(array)
|
|
325
|
+
)
|
|
326
|
+
.table_valued(
|
|
327
|
+
Column("obj", JSON),
|
|
328
|
+
with_ordinality="position",
|
|
329
|
+
joins_implicitly=True,
|
|
330
|
+
)
|
|
331
|
+
.render_derived()
|
|
332
|
+
)
|
|
333
|
+
obj, position = element.c.obj, element.c.position
|
|
334
|
+
if self.kwargs:
|
|
335
|
+
columns: Iterable[Label[Any]] = (
|
|
336
|
+
func.string_agg(
|
|
337
|
+
obj[key.split(".")].as_string(),
|
|
338
|
+
aggregate_order_by(self.separator, position), # type: ignore
|
|
339
|
+
).label(self._add_tmp_suffix(label))
|
|
340
|
+
for label, key in self.kwargs.items()
|
|
341
|
+
)
|
|
342
|
+
else:
|
|
343
|
+
columns = (
|
|
344
|
+
func.string_agg(
|
|
345
|
+
obj,
|
|
346
|
+
aggregate_order_by(self.separator, position), # type: ignore
|
|
347
|
+
).label(self.key),
|
|
348
|
+
)
|
|
349
|
+
stmt = (
|
|
350
|
+
stmt.where(
|
|
351
|
+
and_(
|
|
352
|
+
func.jsonb_typeof(array) == "array",
|
|
353
|
+
*((func.jsonb_typeof(obj) == "object",) if self.kwargs else ()),
|
|
354
|
+
)
|
|
355
|
+
)
|
|
356
|
+
.add_columns(*columns)
|
|
357
|
+
.group_by(*stmt.columns.keys())
|
|
358
|
+
)
|
|
359
|
+
return stmt
|
|
360
|
+
else:
|
|
361
|
+
assert_never(dialect)
|
|
362
|
+
|
|
363
|
+
def update_df(
|
|
364
|
+
self,
|
|
365
|
+
df: pd.DataFrame,
|
|
366
|
+
dialect: SupportedSQLDialect,
|
|
367
|
+
) -> pd.DataFrame:
|
|
368
|
+
df = df.rename(self._remove_tmp_suffix, axis=1)
|
|
369
|
+
if df.empty:
|
|
370
|
+
columns = list(
|
|
371
|
+
set(
|
|
372
|
+
chain(
|
|
373
|
+
df.drop(self._array_tmp_col_label, axis=1, errors="ignore").columns,
|
|
374
|
+
self.kwargs.keys(),
|
|
375
|
+
)
|
|
376
|
+
)
|
|
377
|
+
)
|
|
378
|
+
return pd.DataFrame(columns=columns, index=df.index)
|
|
379
|
+
if dialect is SupportedSQLDialect.SQLITE:
|
|
380
|
+
# Because SQLite doesn't support `WITH ORDINALITY`, the order of
|
|
381
|
+
# the returned table-values is not guaranteed. So we resort to
|
|
382
|
+
# post hoc processing using pandas.
|
|
383
|
+
def _concat_values(array: List[Any]) -> Dict[str, Any]:
|
|
384
|
+
if not isinstance(array, Iterable):
|
|
385
|
+
return {}
|
|
386
|
+
if not self.kwargs:
|
|
387
|
+
return {self.key: self.separator.join(str(obj) for obj in array)}
|
|
388
|
+
values: DefaultDict[str, List[str]] = defaultdict(list)
|
|
389
|
+
for i, obj in enumerate(array):
|
|
390
|
+
if not isinstance(obj, Mapping):
|
|
391
|
+
continue
|
|
392
|
+
for label, key in self.kwargs.items():
|
|
393
|
+
if (value := get_attribute_value(obj, key)) is not None:
|
|
394
|
+
values[label].append(str(value))
|
|
395
|
+
return {label: self.separator.join(vs) for label, vs in values.items()}
|
|
396
|
+
|
|
397
|
+
records = df.loc[:, self._array_tmp_col_label].map(_concat_values)
|
|
398
|
+
df_concat = pd.DataFrame.from_records(records.to_list(), index=records.index)
|
|
399
|
+
return df.drop(self._array_tmp_col_label, axis=1).join(df_concat, how="outer")
|
|
400
|
+
elif dialect is SupportedSQLDialect.POSTGRESQL:
|
|
401
|
+
pass
|
|
402
|
+
else:
|
|
403
|
+
assert_never(dialect)
|
|
404
|
+
return df
|
|
210
405
|
|
|
211
406
|
def to_dict(self) -> Dict[str, Any]:
|
|
212
407
|
return {
|
|
@@ -233,13 +428,24 @@ class Concatenation(Projection):
|
|
|
233
428
|
|
|
234
429
|
|
|
235
430
|
@dataclass(frozen=True)
|
|
236
|
-
class SpanQuery:
|
|
431
|
+
class SpanQuery(_HasTmpSuffix):
|
|
237
432
|
_select: Mapping[str, Projection] = field(default_factory=lambda: MappingProxyType({}))
|
|
238
|
-
_concat: Concatenation = field(
|
|
239
|
-
_explode: Explosion = field(
|
|
240
|
-
_filter: SpanFilter = field(
|
|
433
|
+
_concat: Optional[Concatenation] = field(default=None)
|
|
434
|
+
_explode: Optional[Explosion] = field(default=None)
|
|
435
|
+
_filter: Optional[SpanFilter] = field(default=None)
|
|
241
436
|
_rename: Mapping[str, str] = field(default_factory=lambda: MappingProxyType({}))
|
|
242
437
|
_index: Projection = field(default_factory=lambda: Projection("context.span_id"))
|
|
438
|
+
_concat_separator: str = field(default="\n\n", repr=False)
|
|
439
|
+
_pk_tmp_col_label: str = field(init=False, repr=False)
|
|
440
|
+
"""We use `_pk_tmp_col_label` as a temporary column for storing
|
|
441
|
+
the row id, i.e. the primary key, of the spans table. This will help
|
|
442
|
+
us with joins without the risk of naming conflicts. The temporary
|
|
443
|
+
column will have a unique name per instance.
|
|
444
|
+
"""
|
|
445
|
+
|
|
446
|
+
def __post_init__(self) -> None:
|
|
447
|
+
super().__post_init__()
|
|
448
|
+
object.__setattr__(self, "_pk_tmp_col_label", f"__pk_tmp_col_{random()}")
|
|
243
449
|
|
|
244
450
|
def __bool__(self) -> bool:
|
|
245
451
|
return bool(self._select) or bool(self._filter) or bool(self._explode) or bool(self._concat)
|
|
@@ -255,11 +461,21 @@ class SpanQuery:
|
|
|
255
461
|
return replace(self, _filter=_filter)
|
|
256
462
|
|
|
257
463
|
def explode(self, key: str, **kwargs: str) -> "SpanQuery":
|
|
464
|
+
assert (
|
|
465
|
+
isinstance(key, str) and key
|
|
466
|
+
), "The field name for explosion must be a non-empty string."
|
|
258
467
|
_explode = Explosion(key=key, kwargs=kwargs, primary_index_key=self._index.key)
|
|
259
468
|
return replace(self, _explode=_explode)
|
|
260
469
|
|
|
261
470
|
def concat(self, key: str, **kwargs: str) -> "SpanQuery":
|
|
262
|
-
|
|
471
|
+
assert (
|
|
472
|
+
isinstance(key, str) and key
|
|
473
|
+
), "The field name for concatenation must be a non-empty string."
|
|
474
|
+
_concat = (
|
|
475
|
+
Concatenation(key=key, kwargs=kwargs, separator=self._concat.separator)
|
|
476
|
+
if self._concat
|
|
477
|
+
else Concatenation(key=key, kwargs=kwargs, separator=self._concat_separator)
|
|
478
|
+
)
|
|
263
479
|
return replace(self, _concat=_concat)
|
|
264
480
|
|
|
265
481
|
def rename(self, **kwargs: str) -> "SpanQuery":
|
|
@@ -268,75 +484,136 @@ class SpanQuery:
|
|
|
268
484
|
|
|
269
485
|
def with_index(self, key: str = "context.span_id") -> "SpanQuery":
|
|
270
486
|
_index = Projection(key=key)
|
|
271
|
-
return
|
|
487
|
+
return (
|
|
488
|
+
replace(self, _index=_index, _explode=replace(self._explode, primary_index_key=key))
|
|
489
|
+
if self._explode
|
|
490
|
+
else replace(self, _index=_index)
|
|
491
|
+
)
|
|
272
492
|
|
|
273
493
|
def with_concat_separator(self, separator: str = "\n\n") -> "SpanQuery":
|
|
494
|
+
if not self._concat:
|
|
495
|
+
return replace(self, _concat_separator=separator)
|
|
274
496
|
_concat = self._concat.with_separator(separator)
|
|
275
497
|
return replace(self, _concat=_concat)
|
|
276
498
|
|
|
277
|
-
def with_explode_primary_index_key(self,
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
499
|
+
def with_explode_primary_index_key(self, _: str) -> "SpanQuery":
|
|
500
|
+
print(
|
|
501
|
+
"`.with_explode_primary_index_key(...)` is deprecated and will be "
|
|
502
|
+
"removed in the future. Use `.with_index(...)` instead."
|
|
503
|
+
)
|
|
504
|
+
return self
|
|
505
|
+
|
|
506
|
+
def __call__(
|
|
507
|
+
self,
|
|
508
|
+
session: Session,
|
|
509
|
+
project_name: Optional[str] = None,
|
|
510
|
+
start_time: Optional[datetime] = None,
|
|
511
|
+
end_time: Optional[datetime] = None,
|
|
512
|
+
limit: Optional[int] = DEFAULT_SPAN_LIMIT,
|
|
513
|
+
root_spans_only: Optional[bool] = None,
|
|
514
|
+
# Deprecated
|
|
515
|
+
stop_time: Optional[datetime] = None,
|
|
516
|
+
) -> pd.DataFrame:
|
|
517
|
+
if not project_name:
|
|
518
|
+
project_name = DEFAULT_PROJECT_NAME
|
|
519
|
+
if stop_time:
|
|
520
|
+
# Deprecated. Raise a warning
|
|
521
|
+
warnings.warn(
|
|
522
|
+
"stop_time is deprecated. Use end_time instead.",
|
|
523
|
+
DeprecationWarning,
|
|
293
524
|
)
|
|
525
|
+
end_time = end_time or stop_time
|
|
294
526
|
if not (self._select or self._explode or self._concat):
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
527
|
+
return _get_spans_dataframe(
|
|
528
|
+
session,
|
|
529
|
+
project_name,
|
|
530
|
+
span_filter=self._filter,
|
|
531
|
+
start_time=start_time,
|
|
532
|
+
end_time=end_time,
|
|
533
|
+
limit=limit,
|
|
534
|
+
root_spans_only=root_spans_only,
|
|
301
535
|
)
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
536
|
+
assert session.bind is not None
|
|
537
|
+
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
538
|
+
row_id = models.Span.id.label(self._pk_tmp_col_label)
|
|
539
|
+
stmt: Select[Any] = (
|
|
540
|
+
# We do not allow `group_by` anything other than `row_id` because otherwise
|
|
541
|
+
# it's too complex for the post hoc processing step in pandas.
|
|
542
|
+
select(row_id)
|
|
543
|
+
.join(models.Trace)
|
|
544
|
+
.join(models.Project)
|
|
545
|
+
.where(models.Project.name == project_name)
|
|
546
|
+
)
|
|
547
|
+
if start_time:
|
|
548
|
+
stmt = stmt.where(start_time <= models.Span.start_time)
|
|
549
|
+
if end_time:
|
|
550
|
+
stmt = stmt.where(models.Span.start_time < end_time)
|
|
551
|
+
if limit is not None:
|
|
552
|
+
stmt = stmt.limit(limit)
|
|
553
|
+
if root_spans_only:
|
|
554
|
+
parent = aliased(models.Span)
|
|
555
|
+
stmt = stmt.outerjoin(
|
|
556
|
+
parent,
|
|
557
|
+
models.Span.parent_id == parent.span_id,
|
|
558
|
+
).where(parent.span_id == None) # noqa E711
|
|
559
|
+
stmt0_orig: Select[Any] = stmt
|
|
560
|
+
stmt1_filter: Optional[Select[Any]] = None
|
|
561
|
+
if self._filter:
|
|
562
|
+
stmt = stmt1_filter = self._filter(stmt)
|
|
563
|
+
stmt2_select: Optional[Select[Any]] = None
|
|
564
|
+
if self._select:
|
|
565
|
+
columns: Iterable[Label[Any]] = (
|
|
566
|
+
proj().label(self._add_tmp_suffix(label)) for label, proj in self._select.items()
|
|
567
|
+
)
|
|
568
|
+
stmt = stmt2_select = stmt.add_columns(*columns)
|
|
569
|
+
stmt3_explode: Optional[Select[Any]] = None
|
|
330
570
|
if self._explode:
|
|
331
|
-
|
|
332
|
-
|
|
571
|
+
stmt = stmt3_explode = self._explode.update_sql(stmt, dialect)
|
|
572
|
+
index: Label[Any] = self._index().label(self._add_tmp_suffix(self._index.key))
|
|
573
|
+
df: Optional[pd.DataFrame] = None
|
|
574
|
+
# `concat` is done separately because it has `group_by` but we can't
|
|
575
|
+
# always join to it as a subquery because it may require post hoc
|
|
576
|
+
# processing in pandas. It's kept separate for simplicity.
|
|
577
|
+
df_concat: Optional[pd.DataFrame] = None
|
|
578
|
+
conn = session.connection()
|
|
579
|
+
if self._explode or not self._concat:
|
|
580
|
+
if index.name not in stmt.selected_columns.keys():
|
|
581
|
+
stmt = stmt.add_columns(index)
|
|
582
|
+
df = pd.read_sql_query(stmt, conn, self._pk_tmp_col_label)
|
|
583
|
+
if self._concat:
|
|
584
|
+
if df is not None:
|
|
585
|
+
assert stmt3_explode is not None
|
|
586
|
+
# We can't include stmt3_explode because it may be trying to
|
|
587
|
+
# explode the same column that we're trying to concatenate,
|
|
588
|
+
# resulting in duplicated joins.
|
|
589
|
+
stmt_no_explode = (
|
|
590
|
+
stmt2_select
|
|
591
|
+
if stmt2_select is not None
|
|
592
|
+
else (stmt1_filter if stmt1_filter is not None else stmt0_orig)
|
|
593
|
+
)
|
|
594
|
+
stmt4_concat = stmt_no_explode.with_only_columns(row_id)
|
|
333
595
|
else:
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
if not
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
596
|
+
assert stmt3_explode is None
|
|
597
|
+
stmt4_concat = stmt
|
|
598
|
+
if (df is None or df.empty) and index.name not in stmt4_concat.selected_columns.keys():
|
|
599
|
+
stmt4_concat = stmt4_concat.add_columns(index)
|
|
600
|
+
stmt4_concat = self._concat.update_sql(stmt4_concat, dialect)
|
|
601
|
+
df_concat = pd.read_sql_query(stmt4_concat, conn, self._pk_tmp_col_label)
|
|
602
|
+
df_concat = self._concat.update_df(df_concat, dialect)
|
|
603
|
+
assert df is not None or df_concat is not None
|
|
604
|
+
if df is None:
|
|
605
|
+
df = df_concat
|
|
606
|
+
elif df_concat is not None:
|
|
607
|
+
df = _outer_join(df, df_concat)
|
|
608
|
+
assert df is not None and self._pk_tmp_col_label not in df.columns
|
|
609
|
+
df = df.rename(self._remove_tmp_suffix, axis=1)
|
|
610
|
+
if self._explode:
|
|
611
|
+
df = self._explode.update_df(df, dialect)
|
|
612
|
+
else:
|
|
613
|
+
df = df.set_index(self._index.key)
|
|
614
|
+
df = df.rename(_ALIASES, axis=1, errors="ignore")
|
|
615
|
+
df = df.rename(self._rename, axis=1, errors="ignore")
|
|
616
|
+
return df
|
|
340
617
|
|
|
341
618
|
def to_dict(self) -> Dict[str, Any]:
|
|
342
619
|
return {
|
|
@@ -345,9 +622,9 @@ class SpanQuery:
|
|
|
345
622
|
if self._select
|
|
346
623
|
else {}
|
|
347
624
|
),
|
|
348
|
-
"filter": self._filter.to_dict(),
|
|
349
|
-
"explode": self._explode.to_dict(),
|
|
350
|
-
"concat": self._concat.to_dict(),
|
|
625
|
+
**({"filter": self._filter.to_dict()} if self._filter else {}),
|
|
626
|
+
**({"explode": self._explode.to_dict()} if self._explode else {}),
|
|
627
|
+
**({"concat": self._concat.to_dict()} if self._concat else {}),
|
|
351
628
|
**({"rename": dict(self._rename)} if self._rename else {}),
|
|
352
629
|
"index": self._index.to_dict(),
|
|
353
630
|
}
|
|
@@ -356,7 +633,6 @@ class SpanQuery:
|
|
|
356
633
|
def from_dict(
|
|
357
634
|
cls,
|
|
358
635
|
obj: Mapping[str, Any],
|
|
359
|
-
evals: Optional[SupportsGetSpanEvaluation] = None,
|
|
360
636
|
valid_eval_names: Optional[Sequence[str]] = None,
|
|
361
637
|
) -> "SpanQuery":
|
|
362
638
|
return cls(
|
|
@@ -376,7 +652,6 @@ class SpanQuery:
|
|
|
376
652
|
{
|
|
377
653
|
"_filter": SpanFilter.from_dict(
|
|
378
654
|
cast(Mapping[str, Any], filter),
|
|
379
|
-
evals=evals,
|
|
380
655
|
valid_eval_names=valid_eval_names,
|
|
381
656
|
)
|
|
382
657
|
} # type: ignore
|
|
@@ -386,11 +661,13 @@ class SpanQuery:
|
|
|
386
661
|
**(
|
|
387
662
|
{"_explode": Explosion.from_dict(cast(Mapping[str, Any], explode))} # type: ignore
|
|
388
663
|
if (explode := obj.get("explode"))
|
|
664
|
+
and explode.get("key") # check `key` for backward-compatible truthiness
|
|
389
665
|
else {}
|
|
390
666
|
),
|
|
391
667
|
**(
|
|
392
668
|
{"_concat": Concatenation.from_dict(cast(Mapping[str, Any], concat))} # type: ignore
|
|
393
669
|
if (concat := obj.get("concat"))
|
|
670
|
+
and concat.get("key") # check `key` for backward-compatible truthiness
|
|
394
671
|
else {}
|
|
395
672
|
),
|
|
396
673
|
**(
|
|
@@ -404,3 +681,103 @@ class SpanQuery:
|
|
|
404
681
|
else {}
|
|
405
682
|
),
|
|
406
683
|
)
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def _get_spans_dataframe(
|
|
687
|
+
session: Session,
|
|
688
|
+
project_name: str,
|
|
689
|
+
/,
|
|
690
|
+
*,
|
|
691
|
+
span_filter: Optional[SpanFilter] = None,
|
|
692
|
+
start_time: Optional[datetime] = None,
|
|
693
|
+
end_time: Optional[datetime] = None,
|
|
694
|
+
limit: Optional[int] = DEFAULT_SPAN_LIMIT,
|
|
695
|
+
root_spans_only: Optional[bool] = None,
|
|
696
|
+
# Deprecated
|
|
697
|
+
stop_time: Optional[datetime] = None,
|
|
698
|
+
) -> pd.DataFrame:
|
|
699
|
+
# use legacy labels for backward-compatibility
|
|
700
|
+
span_id_label = "context.span_id"
|
|
701
|
+
trace_id_label = "context.trace_id"
|
|
702
|
+
if stop_time:
|
|
703
|
+
# Deprecated. Raise a warning
|
|
704
|
+
warnings.warn(
|
|
705
|
+
"stop_time is deprecated. Use end_time instead.",
|
|
706
|
+
DeprecationWarning,
|
|
707
|
+
)
|
|
708
|
+
end_time = end_time or stop_time
|
|
709
|
+
stmt: Select[Any] = (
|
|
710
|
+
select(
|
|
711
|
+
models.Span.name,
|
|
712
|
+
models.Span.span_kind,
|
|
713
|
+
models.Span.parent_id,
|
|
714
|
+
models.Span.start_time,
|
|
715
|
+
models.Span.end_time,
|
|
716
|
+
models.Span.status_code,
|
|
717
|
+
models.Span.status_message,
|
|
718
|
+
models.Span.events,
|
|
719
|
+
models.Span.span_id.label(span_id_label),
|
|
720
|
+
models.Trace.trace_id.label(trace_id_label),
|
|
721
|
+
models.Span.attributes,
|
|
722
|
+
)
|
|
723
|
+
.join(models.Trace)
|
|
724
|
+
.join(models.Project)
|
|
725
|
+
.where(models.Project.name == project_name)
|
|
726
|
+
)
|
|
727
|
+
if span_filter:
|
|
728
|
+
stmt = span_filter(stmt)
|
|
729
|
+
if start_time:
|
|
730
|
+
stmt = stmt.where(start_time <= models.Span.start_time)
|
|
731
|
+
if end_time:
|
|
732
|
+
stmt = stmt.where(models.Span.start_time < end_time)
|
|
733
|
+
if limit is not None:
|
|
734
|
+
stmt = stmt.limit(limit)
|
|
735
|
+
if root_spans_only:
|
|
736
|
+
parent = aliased(models.Span)
|
|
737
|
+
stmt = stmt.outerjoin(
|
|
738
|
+
parent,
|
|
739
|
+
models.Span.parent_id == parent.span_id,
|
|
740
|
+
).where(parent.span_id == None) # noqa E711
|
|
741
|
+
conn = session.connection()
|
|
742
|
+
# set `drop=False` for backward-compatibility
|
|
743
|
+
df = pd.read_sql_query(stmt, conn).set_index(span_id_label, drop=False)
|
|
744
|
+
if df.empty:
|
|
745
|
+
return df.drop("attributes", axis=1)
|
|
746
|
+
df_attributes = pd.DataFrame.from_records(
|
|
747
|
+
df.attributes.map(_flatten_semantic_conventions),
|
|
748
|
+
).set_axis(df.index, axis=0)
|
|
749
|
+
df = pd.concat(
|
|
750
|
+
[
|
|
751
|
+
df.drop("attributes", axis=1),
|
|
752
|
+
df_attributes.add_prefix("attributes" + "."),
|
|
753
|
+
],
|
|
754
|
+
axis=1,
|
|
755
|
+
)
|
|
756
|
+
return df
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
def _outer_join(left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:
|
|
760
|
+
if (columns_intersection := left.columns.intersection(right.columns)).empty:
|
|
761
|
+
df = left.join(right, how="outer")
|
|
762
|
+
else:
|
|
763
|
+
df = left.join(right, how="outer", lsuffix="_L", rsuffix="_R")
|
|
764
|
+
for col in columns_intersection:
|
|
765
|
+
df.loc[:, col] = df.loc[:, f"{col}_L"].fillna(df.loc[:, f"{col}_R"])
|
|
766
|
+
df = df.drop([f"{col}_L", f"{col}_R"], axis=1)
|
|
767
|
+
return df
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def _flatten_semantic_conventions(attributes: Mapping[str, Any]) -> Dict[str, Any]:
|
|
771
|
+
# This may be inefficient, but is needed to preserve backward-compatibility.
|
|
772
|
+
# For example, custom attributes do not get flattened.
|
|
773
|
+
ans = unflatten(
|
|
774
|
+
load_json_strings(
|
|
775
|
+
flatten(
|
|
776
|
+
attributes,
|
|
777
|
+
recurse_on_sequence=True,
|
|
778
|
+
json_string_attributes=JSON_STRING_ATTRIBUTES,
|
|
779
|
+
),
|
|
780
|
+
),
|
|
781
|
+
prefix_exclusions=SEMANTIC_CONVENTIONS,
|
|
782
|
+
)
|
|
783
|
+
return ans
|