arize-phoenix 3.25.0__py3-none-any.whl → 4.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/METADATA +26 -4
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/RECORD +80 -75
- phoenix/__init__.py +9 -5
- phoenix/config.py +109 -53
- phoenix/datetime_utils.py +18 -1
- phoenix/db/README.md +25 -0
- phoenix/db/__init__.py +4 -0
- phoenix/db/alembic.ini +119 -0
- phoenix/db/bulk_inserter.py +206 -0
- phoenix/db/engines.py +152 -0
- phoenix/db/helpers.py +47 -0
- phoenix/db/insertion/evaluation.py +209 -0
- phoenix/db/insertion/helpers.py +51 -0
- phoenix/db/insertion/span.py +142 -0
- phoenix/db/migrate.py +71 -0
- phoenix/db/migrations/env.py +121 -0
- phoenix/db/migrations/script.py.mako +26 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
- phoenix/db/models.py +371 -0
- phoenix/exceptions.py +5 -1
- phoenix/server/api/context.py +40 -3
- phoenix/server/api/dataloaders/__init__.py +97 -0
- phoenix/server/api/dataloaders/cache/__init__.py +3 -0
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +67 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +152 -0
- phoenix/server/api/dataloaders/document_evaluations.py +37 -0
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +98 -0
- phoenix/server/api/dataloaders/evaluation_summaries.py +151 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +198 -0
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +93 -0
- phoenix/server/api/dataloaders/record_counts.py +125 -0
- phoenix/server/api/dataloaders/span_descendants.py +64 -0
- phoenix/server/api/dataloaders/span_evaluations.py +37 -0
- phoenix/server/api/dataloaders/token_counts.py +138 -0
- phoenix/server/api/dataloaders/trace_evaluations.py +37 -0
- phoenix/server/api/input_types/SpanSort.py +138 -68
- phoenix/server/api/routers/v1/__init__.py +11 -0
- phoenix/server/api/routers/v1/evaluations.py +275 -0
- phoenix/server/api/routers/v1/spans.py +126 -0
- phoenix/server/api/routers/v1/traces.py +82 -0
- phoenix/server/api/schema.py +112 -48
- phoenix/server/api/types/DocumentEvaluationSummary.py +1 -1
- phoenix/server/api/types/Evaluation.py +29 -12
- phoenix/server/api/types/EvaluationSummary.py +29 -44
- phoenix/server/api/types/MimeType.py +2 -2
- phoenix/server/api/types/Model.py +9 -9
- phoenix/server/api/types/Project.py +240 -171
- phoenix/server/api/types/Span.py +87 -131
- phoenix/server/api/types/Trace.py +29 -20
- phoenix/server/api/types/pagination.py +151 -10
- phoenix/server/app.py +263 -35
- phoenix/server/grpc_server.py +93 -0
- phoenix/server/main.py +75 -60
- phoenix/server/openapi/docs.py +218 -0
- phoenix/server/prometheus.py +23 -7
- phoenix/server/static/index.js +662 -643
- phoenix/server/telemetry.py +68 -0
- phoenix/services.py +4 -0
- phoenix/session/client.py +34 -30
- phoenix/session/data_extractor.py +8 -3
- phoenix/session/session.py +176 -155
- phoenix/settings.py +13 -0
- phoenix/trace/attributes.py +349 -0
- phoenix/trace/dsl/README.md +116 -0
- phoenix/trace/dsl/filter.py +660 -192
- phoenix/trace/dsl/helpers.py +24 -5
- phoenix/trace/dsl/query.py +562 -185
- phoenix/trace/fixtures.py +69 -7
- phoenix/trace/otel.py +44 -200
- phoenix/trace/schemas.py +14 -8
- phoenix/trace/span_evaluations.py +5 -2
- phoenix/utilities/__init__.py +0 -26
- phoenix/utilities/span_store.py +0 -23
- phoenix/version.py +1 -1
- phoenix/core/project.py +0 -773
- phoenix/core/traces.py +0 -96
- phoenix/datasets/dataset.py +0 -214
- phoenix/datasets/fixtures.py +0 -24
- phoenix/datasets/schema.py +0 -31
- phoenix/experimental/evals/__init__.py +0 -73
- phoenix/experimental/evals/evaluators.py +0 -413
- phoenix/experimental/evals/functions/__init__.py +0 -4
- phoenix/experimental/evals/functions/classify.py +0 -453
- phoenix/experimental/evals/functions/executor.py +0 -353
- phoenix/experimental/evals/functions/generate.py +0 -138
- phoenix/experimental/evals/functions/processing.py +0 -76
- phoenix/experimental/evals/models/__init__.py +0 -14
- phoenix/experimental/evals/models/anthropic.py +0 -175
- phoenix/experimental/evals/models/base.py +0 -170
- phoenix/experimental/evals/models/bedrock.py +0 -221
- phoenix/experimental/evals/models/litellm.py +0 -134
- phoenix/experimental/evals/models/openai.py +0 -453
- phoenix/experimental/evals/models/rate_limiters.py +0 -246
- phoenix/experimental/evals/models/vertex.py +0 -173
- phoenix/experimental/evals/models/vertexai.py +0 -186
- phoenix/experimental/evals/retrievals.py +0 -96
- phoenix/experimental/evals/templates/__init__.py +0 -50
- phoenix/experimental/evals/templates/default_templates.py +0 -472
- phoenix/experimental/evals/templates/template.py +0 -195
- phoenix/experimental/evals/utils/__init__.py +0 -172
- phoenix/experimental/evals/utils/threads.py +0 -27
- phoenix/server/api/routers/evaluation_handler.py +0 -110
- phoenix/server/api/routers/span_handler.py +0 -70
- phoenix/server/api/routers/trace_handler.py +0 -60
- phoenix/storage/span_store/__init__.py +0 -23
- phoenix/storage/span_store/text_file.py +0 -85
- phoenix/trace/dsl/missing.py +0 -60
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/WHEEL +0 -0
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → db/insertion}/__init__.py +0 -0
- /phoenix/{experimental → db/migrations}/__init__.py +0 -0
- /phoenix/{storage → server/openapi}/__init__.py +0 -0
phoenix/trace/dsl/filter.py
CHANGED
|
@@ -1,199 +1,549 @@
|
|
|
1
1
|
import ast
|
|
2
|
-
import
|
|
2
|
+
import re
|
|
3
3
|
import sys
|
|
4
|
+
import typing
|
|
4
5
|
from dataclasses import dataclass, field
|
|
5
6
|
from difflib import SequenceMatcher
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
Iterable,
|
|
10
|
-
Iterator,
|
|
11
|
-
Mapping,
|
|
12
|
-
Optional,
|
|
13
|
-
Protocol,
|
|
14
|
-
Sequence,
|
|
15
|
-
Tuple,
|
|
16
|
-
cast,
|
|
17
|
-
)
|
|
7
|
+
from itertools import chain
|
|
8
|
+
from random import randint
|
|
9
|
+
from types import MappingProxyType
|
|
18
10
|
|
|
19
|
-
|
|
20
|
-
from
|
|
11
|
+
import sqlalchemy
|
|
12
|
+
from sqlalchemy.orm import Mapped, aliased
|
|
13
|
+
from sqlalchemy.orm.util import AliasedClass
|
|
14
|
+
from sqlalchemy.sql.expression import Select
|
|
15
|
+
from typing_extensions import TypeAlias, TypeGuard, assert_never
|
|
21
16
|
|
|
22
17
|
import phoenix.trace.v1 as pb
|
|
23
|
-
from phoenix.
|
|
24
|
-
from phoenix.trace.schemas import ComputedAttributes, Span, SpanID
|
|
18
|
+
from phoenix.db import models
|
|
25
19
|
|
|
26
|
-
_VALID_EVAL_ATTRIBUTES: Tuple[str, ...] = tuple(
|
|
20
|
+
_VALID_EVAL_ATTRIBUTES: typing.Tuple[str, ...] = tuple(
|
|
27
21
|
field.name for field in pb.Evaluation.Result.DESCRIPTOR.fields
|
|
28
22
|
)
|
|
29
23
|
|
|
30
24
|
|
|
31
|
-
|
|
32
|
-
|
|
25
|
+
EvalAttribute: TypeAlias = typing.Literal["label", "score"]
|
|
26
|
+
EvalExpression: TypeAlias = str
|
|
27
|
+
EvalName: TypeAlias = str
|
|
28
|
+
|
|
29
|
+
EVAL_EXPRESSION_PATTERN = re.compile(r"""\b(evals\[(".*?"|'.*?')\][.](label|score))\b""")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class AliasedAnnotationRelation:
|
|
34
|
+
"""
|
|
35
|
+
Represents an aliased `span_annotation` relation (i.e., SQL table). Used to
|
|
36
|
+
perform joins on span evaluations during filtering. An alias is required
|
|
37
|
+
because the `span_annotation` may be joined multiple times for different
|
|
38
|
+
evaluation names.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
index: int
|
|
42
|
+
name: str
|
|
43
|
+
table: AliasedClass[models.SpanAnnotation] = field(init=False, repr=False)
|
|
44
|
+
_label_attribute_alias: str = field(init=False, repr=False)
|
|
45
|
+
_score_attribute_alias: str = field(init=False, repr=False)
|
|
46
|
+
|
|
47
|
+
def __post_init__(self) -> None:
|
|
48
|
+
table_alias = f"span_annotation_{self.index}"
|
|
49
|
+
alias_id = f"{randint(0, 10**6):06d}" # prevent conflicts with user-defined attributes
|
|
50
|
+
label_attribute_alias = f"{table_alias}_label_{alias_id}"
|
|
51
|
+
score_attribute_alias = f"{table_alias}_score_{alias_id}"
|
|
52
|
+
table = aliased(models.SpanAnnotation, name=table_alias)
|
|
53
|
+
object.__setattr__(self, "_label_attribute_alias", label_attribute_alias)
|
|
54
|
+
object.__setattr__(self, "_score_attribute_alias", score_attribute_alias)
|
|
55
|
+
object.__setattr__(
|
|
56
|
+
self,
|
|
57
|
+
"table",
|
|
58
|
+
table,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def attributes(self) -> typing.Iterator[typing.Tuple[str, Mapped[typing.Any]]]:
|
|
63
|
+
"""
|
|
64
|
+
Alias names and attributes (i.e., columns) of the `span_annotation`
|
|
65
|
+
relation.
|
|
66
|
+
"""
|
|
67
|
+
yield self._label_attribute_alias, self.table.label
|
|
68
|
+
yield self._score_attribute_alias, self.table.score
|
|
69
|
+
|
|
70
|
+
def attribute_alias(self, attribute: EvalAttribute) -> str:
|
|
71
|
+
"""
|
|
72
|
+
Returns an alias for the given attribute (i.e., column).
|
|
73
|
+
"""
|
|
74
|
+
if attribute == "label":
|
|
75
|
+
return self._label_attribute_alias
|
|
76
|
+
if attribute == "score":
|
|
77
|
+
return self._score_attribute_alias
|
|
78
|
+
assert_never(attribute)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# Because postgresql is strongly typed, we cast JSON values to string
|
|
82
|
+
# by default unless it's hinted otherwise as done here.
|
|
83
|
+
_FLOAT_ATTRIBUTES: typing.FrozenSet[str] = frozenset(
|
|
84
|
+
{
|
|
85
|
+
"llm.token_count.completion",
|
|
86
|
+
"llm.token_count.prompt",
|
|
87
|
+
"llm.token_count.total",
|
|
88
|
+
}
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
_STRING_NAMES: typing.Mapping[str, sqlalchemy.SQLColumnExpression[typing.Any]] = MappingProxyType(
|
|
92
|
+
{
|
|
93
|
+
"span_id": models.Span.span_id,
|
|
94
|
+
"trace_id": models.Trace.trace_id,
|
|
95
|
+
"context.span_id": models.Span.span_id,
|
|
96
|
+
"context.trace_id": models.Trace.trace_id,
|
|
97
|
+
"parent_id": models.Span.parent_id,
|
|
98
|
+
"span_kind": models.Span.span_kind,
|
|
99
|
+
"name": models.Span.name,
|
|
100
|
+
"status_code": models.Span.status_code,
|
|
101
|
+
"status_message": models.Span.status_message,
|
|
102
|
+
}
|
|
103
|
+
)
|
|
104
|
+
_FLOAT_NAMES: typing.Mapping[str, sqlalchemy.SQLColumnExpression[typing.Any]] = MappingProxyType(
|
|
105
|
+
{
|
|
106
|
+
"latency_ms": models.Span.latency_ms,
|
|
107
|
+
"cumulative_llm_token_count_completion": models.Span.cumulative_llm_token_count_completion,
|
|
108
|
+
"cumulative_llm_token_count_prompt": models.Span.cumulative_llm_token_count_prompt,
|
|
109
|
+
"cumulative_llm_token_count_total": models.Span.cumulative_llm_token_count_total,
|
|
110
|
+
}
|
|
111
|
+
)
|
|
112
|
+
_NAMES: typing.Mapping[str, sqlalchemy.SQLColumnExpression[typing.Any]] = MappingProxyType(
|
|
113
|
+
{
|
|
114
|
+
**_STRING_NAMES,
|
|
115
|
+
**_FLOAT_NAMES,
|
|
116
|
+
"attributes": models.Span.attributes,
|
|
117
|
+
"events": models.Span.events,
|
|
118
|
+
}
|
|
119
|
+
)
|
|
120
|
+
_BACKWARD_COMPATIBILITY_REPLACEMENTS: typing.Mapping[str, str] = MappingProxyType(
|
|
121
|
+
{
|
|
122
|
+
# for backward-compatibility
|
|
123
|
+
"context.span_id": "span_id",
|
|
124
|
+
"context.trace_id": "trace_id",
|
|
125
|
+
"cumulative_token_count.completion": "cumulative_llm_token_count_completion",
|
|
126
|
+
"cumulative_token_count.prompt": "cumulative_llm_token_count_prompt",
|
|
127
|
+
"cumulative_token_count.total": "cumulative_llm_token_count_total",
|
|
128
|
+
}
|
|
129
|
+
)
|
|
33
130
|
|
|
34
131
|
|
|
35
132
|
@dataclass(frozen=True)
|
|
36
133
|
class SpanFilter:
|
|
37
134
|
condition: str = ""
|
|
38
|
-
|
|
39
|
-
valid_eval_names: Optional[Sequence[str]] = None
|
|
135
|
+
valid_eval_names: typing.Optional[typing.Sequence[str]] = None
|
|
40
136
|
translated: ast.Expression = field(init=False, repr=False)
|
|
41
|
-
compiled: Any = field(init=False, repr=False)
|
|
137
|
+
compiled: typing.Any = field(init=False, repr=False)
|
|
138
|
+
_aliased_annotation_relations: typing.Tuple[AliasedAnnotationRelation] = field(
|
|
139
|
+
init=False, repr=False
|
|
140
|
+
)
|
|
141
|
+
_aliased_annotation_attributes: typing.Dict[str, Mapped[typing.Any]] = field(
|
|
142
|
+
init=False, repr=False
|
|
143
|
+
)
|
|
42
144
|
|
|
43
145
|
def __bool__(self) -> bool:
|
|
44
146
|
return bool(self.condition)
|
|
45
147
|
|
|
46
148
|
def __post_init__(self) -> None:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
149
|
+
if not (source := self.condition):
|
|
150
|
+
return
|
|
151
|
+
root = ast.parse(source, mode="eval")
|
|
152
|
+
_validate_expression(root, source, valid_eval_names=self.valid_eval_names)
|
|
153
|
+
source, aliased_annotation_relations = _apply_eval_aliasing(source)
|
|
154
|
+
root = ast.parse(source, mode="eval")
|
|
155
|
+
translated = _FilterTranslator(
|
|
156
|
+
source=source,
|
|
157
|
+
reserved_keywords=(
|
|
158
|
+
alias
|
|
159
|
+
for aliased_annotation in aliased_annotation_relations
|
|
160
|
+
for alias, _ in aliased_annotation.attributes
|
|
161
|
+
),
|
|
162
|
+
).visit(root)
|
|
52
163
|
ast.fix_missing_locations(translated)
|
|
53
164
|
compiled = compile(translated, filename="", mode="eval")
|
|
165
|
+
aliased_annotation_attributes = {
|
|
166
|
+
alias: attribute
|
|
167
|
+
for aliased_annotation in aliased_annotation_relations
|
|
168
|
+
for alias, attribute in aliased_annotation.attributes
|
|
169
|
+
}
|
|
54
170
|
object.__setattr__(self, "translated", translated)
|
|
55
171
|
object.__setattr__(self, "compiled", compiled)
|
|
56
|
-
object.__setattr__(self, "
|
|
172
|
+
object.__setattr__(self, "_aliased_annotation_relations", aliased_annotation_relations)
|
|
173
|
+
object.__setattr__(self, "_aliased_annotation_attributes", aliased_annotation_attributes)
|
|
57
174
|
|
|
58
|
-
def __call__(self,
|
|
59
|
-
|
|
60
|
-
|
|
175
|
+
def __call__(self, select: Select[typing.Any]) -> Select[typing.Any]:
|
|
176
|
+
if not self.condition:
|
|
177
|
+
return select
|
|
178
|
+
return self._join_aliased_relations(select).where(
|
|
61
179
|
eval(
|
|
62
180
|
self.compiled,
|
|
63
|
-
{
|
|
64
|
-
|
|
181
|
+
{
|
|
182
|
+
**_NAMES,
|
|
183
|
+
**self._aliased_annotation_attributes,
|
|
184
|
+
"not_": sqlalchemy.not_,
|
|
185
|
+
"and_": sqlalchemy.and_,
|
|
186
|
+
"or_": sqlalchemy.or_,
|
|
187
|
+
"cast": sqlalchemy.cast,
|
|
188
|
+
"Float": sqlalchemy.Float,
|
|
189
|
+
"String": sqlalchemy.String,
|
|
190
|
+
"TextContains": models.TextContains,
|
|
191
|
+
},
|
|
192
|
+
)
|
|
65
193
|
)
|
|
66
194
|
|
|
67
|
-
def to_dict(self) -> Dict[str, Any]:
|
|
195
|
+
def to_dict(self) -> typing.Dict[str, typing.Any]:
|
|
68
196
|
return {"condition": self.condition}
|
|
69
197
|
|
|
70
198
|
@classmethod
|
|
71
199
|
def from_dict(
|
|
72
200
|
cls,
|
|
73
|
-
obj: Mapping[str, Any],
|
|
74
|
-
|
|
75
|
-
valid_eval_names: Optional[Sequence[str]] = None,
|
|
201
|
+
obj: typing.Mapping[str, typing.Any],
|
|
202
|
+
valid_eval_names: typing.Optional[typing.Sequence[str]] = None,
|
|
76
203
|
) -> "SpanFilter":
|
|
77
204
|
return cls(
|
|
78
205
|
condition=obj.get("condition") or "",
|
|
79
|
-
evals=evals,
|
|
80
206
|
valid_eval_names=valid_eval_names,
|
|
81
207
|
)
|
|
82
208
|
|
|
209
|
+
def _join_aliased_relations(self, stmt: Select[typing.Any]) -> Select[typing.Any]:
|
|
210
|
+
"""
|
|
211
|
+
Joins the aliased relations to the given statement. E.g., for the filter condition:
|
|
212
|
+
|
|
213
|
+
```
|
|
214
|
+
evals["Hallucination"].score > 0.5
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
an alias (e.g., `A`) is generated for the `span_annotations` relation. An input statement
|
|
218
|
+
`select(Span)` is transformed to:
|
|
219
|
+
|
|
220
|
+
```
|
|
221
|
+
A = aliased(SpanAnnotation)
|
|
222
|
+
select(Span).join(A, onclause=(and_(Span.id == A.span_rowid, A.name == "Hallucination")))
|
|
223
|
+
```
|
|
224
|
+
"""
|
|
225
|
+
for eval_alias in self._aliased_annotation_relations:
|
|
226
|
+
eval_name = eval_alias.name
|
|
227
|
+
AliasedSpanAnnotation = eval_alias.table
|
|
228
|
+
stmt = stmt.join(
|
|
229
|
+
AliasedSpanAnnotation,
|
|
230
|
+
onclause=(
|
|
231
|
+
sqlalchemy.and_(
|
|
232
|
+
AliasedSpanAnnotation.span_rowid == models.Span.id,
|
|
233
|
+
AliasedSpanAnnotation.name == eval_name,
|
|
234
|
+
)
|
|
235
|
+
),
|
|
236
|
+
)
|
|
237
|
+
return stmt
|
|
83
238
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
239
|
+
|
|
240
|
+
@dataclass(frozen=True)
|
|
241
|
+
class Projector:
|
|
242
|
+
expression: str
|
|
243
|
+
translated: ast.Expression = field(init=False, repr=False)
|
|
244
|
+
compiled: typing.Any = field(init=False, repr=False)
|
|
245
|
+
|
|
246
|
+
def __post_init__(self) -> None:
|
|
247
|
+
if not (source := self.expression):
|
|
248
|
+
raise ValueError("missing expression")
|
|
249
|
+
root = ast.parse(source, mode="eval")
|
|
250
|
+
translated = _ProjectionTranslator(source).visit(root)
|
|
251
|
+
ast.fix_missing_locations(translated)
|
|
252
|
+
compiled = compile(translated, filename="", mode="eval")
|
|
253
|
+
object.__setattr__(self, "translated", translated)
|
|
254
|
+
object.__setattr__(self, "compiled", compiled)
|
|
255
|
+
|
|
256
|
+
def __call__(self) -> sqlalchemy.SQLColumnExpression[typing.Any]:
|
|
257
|
+
return typing.cast(
|
|
258
|
+
sqlalchemy.SQLColumnExpression[typing.Any],
|
|
259
|
+
eval(self.compiled, {**_NAMES}),
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _is_string_constant(node: typing.Any) -> TypeGuard[ast.Constant]:
|
|
264
|
+
return isinstance(node, ast.Constant) and isinstance(node.value, str)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _is_float_constant(node: typing.Any) -> TypeGuard[ast.Constant]:
|
|
268
|
+
return isinstance(node, ast.Constant) and isinstance(node.value, typing.SupportsFloat)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _is_string_attribute(node: typing.Any) -> TypeGuard[ast.Call]:
|
|
272
|
+
return (
|
|
273
|
+
isinstance(node, ast.Call)
|
|
274
|
+
and isinstance(func := node.func, ast.Attribute)
|
|
275
|
+
and func.attr == "as_string"
|
|
276
|
+
and isinstance(value := func.value, ast.Subscript)
|
|
277
|
+
and isinstance(name := value.value, ast.Name)
|
|
278
|
+
and name.id == "attributes"
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _is_float_attribute(node: typing.Any) -> TypeGuard[ast.Call]:
|
|
283
|
+
return (
|
|
284
|
+
isinstance(node, ast.Call)
|
|
285
|
+
and isinstance(func := node.func, ast.Attribute)
|
|
286
|
+
and func.attr == "as_float"
|
|
287
|
+
and isinstance(value := func.value, ast.Subscript)
|
|
288
|
+
and isinstance(name := value.value, ast.Name)
|
|
289
|
+
and name.id == "attributes"
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _as_string_attribute(node: typing.Union[ast.Subscript, ast.Call]) -> ast.Call:
|
|
294
|
+
if isinstance(node, ast.Call):
|
|
295
|
+
value = typing.cast(ast.Attribute, node.func).value
|
|
296
|
+
elif isinstance(node, ast.Subscript):
|
|
297
|
+
value = node
|
|
298
|
+
else:
|
|
299
|
+
assert_never(node)
|
|
300
|
+
return ast.Call(
|
|
301
|
+
func=ast.Attribute(
|
|
302
|
+
value=value,
|
|
303
|
+
attr="as_string",
|
|
304
|
+
ctx=ast.Load(),
|
|
99
305
|
),
|
|
100
|
-
|
|
101
|
-
|
|
306
|
+
args=[],
|
|
307
|
+
keywords=[],
|
|
102
308
|
)
|
|
103
309
|
|
|
104
310
|
|
|
105
|
-
def
|
|
106
|
-
|
|
107
|
-
|
|
311
|
+
def _as_float_attribute(node: typing.Union[ast.Subscript, ast.Call]) -> ast.Call:
|
|
312
|
+
if isinstance(node, ast.Call):
|
|
313
|
+
value = typing.cast(ast.Attribute, node.func).value
|
|
314
|
+
elif isinstance(node, ast.Subscript):
|
|
315
|
+
value = node
|
|
316
|
+
else:
|
|
317
|
+
assert_never(node)
|
|
318
|
+
return ast.Call(
|
|
319
|
+
func=ast.Attribute(
|
|
320
|
+
value=value,
|
|
321
|
+
attr="as_float",
|
|
322
|
+
ctx=ast.Load(),
|
|
323
|
+
),
|
|
324
|
+
args=[],
|
|
325
|
+
keywords=[],
|
|
326
|
+
)
|
|
108
327
|
|
|
109
328
|
|
|
110
|
-
def
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
329
|
+
def _is_cast(
|
|
330
|
+
node: typing.Any,
|
|
331
|
+
type_: typing.Optional[typing.Literal["Float", "String"]] = None,
|
|
332
|
+
) -> TypeGuard[ast.Call]:
|
|
333
|
+
return (
|
|
334
|
+
isinstance(node, ast.Call)
|
|
335
|
+
and isinstance(func := node.func, ast.Name)
|
|
336
|
+
and func.id == "cast"
|
|
337
|
+
and len(node.args) == 2
|
|
338
|
+
and isinstance(name := node.args[1], ast.Name)
|
|
339
|
+
and (not type_ or name.id == type_)
|
|
117
340
|
)
|
|
118
|
-
return _replace_none_with_missing(ast.parse(expression, mode="eval").body, as_str)
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
def _allowed_replacements() -> Iterator[Tuple[str, ast.expr]]:
|
|
122
|
-
for source_segment, ast_replacement in {
|
|
123
|
-
"name": _ast_replacement("span.name"),
|
|
124
|
-
"status_code": _ast_replacement("span.status_code"),
|
|
125
|
-
"span_kind": _ast_replacement("span.span_kind"),
|
|
126
|
-
"parent_id": _ast_replacement("span.parent_id"),
|
|
127
|
-
}.items():
|
|
128
|
-
yield source_segment, ast_replacement
|
|
129
|
-
yield "span." + source_segment, ast_replacement
|
|
130
|
-
|
|
131
|
-
for source_segment, ast_replacement in {
|
|
132
|
-
"span_id": _ast_replacement("span.context.span_id"),
|
|
133
|
-
"trace_id": _ast_replacement("span.context.trace_id"),
|
|
134
|
-
}.items():
|
|
135
|
-
yield source_segment, ast_replacement
|
|
136
|
-
yield "context." + source_segment, ast_replacement
|
|
137
|
-
yield "span.context." + source_segment, ast_replacement
|
|
138
|
-
|
|
139
|
-
for field_name in (
|
|
140
|
-
getattr(klass, attr)
|
|
141
|
-
for name in dir(trace)
|
|
142
|
-
if name.endswith("Attributes") and inspect.isclass(klass := getattr(trace, name))
|
|
143
|
-
for attr in dir(klass)
|
|
144
|
-
if attr.isupper()
|
|
145
|
-
):
|
|
146
|
-
source_segment = field_name
|
|
147
|
-
ast_replacement = _ast_replacement(f"span.attributes.get('{field_name}')")
|
|
148
|
-
yield source_segment, ast_replacement
|
|
149
|
-
yield "attributes." + source_segment, ast_replacement
|
|
150
|
-
yield "span.attributes." + source_segment, ast_replacement
|
|
151
341
|
|
|
152
|
-
for computed_attribute in ComputedAttributes:
|
|
153
|
-
source_segment = computed_attribute.value
|
|
154
|
-
ast_replacement = _ast_replacement(f"span.get_computed_value('{source_segment}')")
|
|
155
|
-
yield source_segment, ast_replacement
|
|
156
342
|
|
|
343
|
+
def _remove_cast(node: typing.Any) -> typing.Any:
|
|
344
|
+
return node.args[0] if _is_cast(node) else node
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def _cast_as(
|
|
348
|
+
type_: typing.Literal["Float", "String"],
|
|
349
|
+
node: typing.Any,
|
|
350
|
+
) -> ast.Call:
|
|
351
|
+
if type_ == "Float" and (_is_subscript(node, "attributes") or _is_string_attribute(node)):
|
|
352
|
+
return _as_float_attribute(node)
|
|
353
|
+
if type_ == "String" and (_is_subscript(node, "attributes") or _is_float_attribute(node)):
|
|
354
|
+
return _as_string_attribute(node)
|
|
355
|
+
return ast.Call(
|
|
356
|
+
func=ast.Name(id="cast", ctx=ast.Load()),
|
|
357
|
+
args=[
|
|
358
|
+
_remove_cast(node),
|
|
359
|
+
ast.Name(id=type_, ctx=ast.Load()),
|
|
360
|
+
],
|
|
361
|
+
keywords=[],
|
|
362
|
+
)
|
|
157
363
|
|
|
158
|
-
class _Translator(ast.NodeTransformer):
|
|
159
|
-
_allowed_fields: Mapping[str, ast.expr] = dict(_allowed_replacements())
|
|
160
364
|
|
|
161
|
-
|
|
365
|
+
def _is_string(node: typing.Any) -> TypeGuard[ast.Call]:
|
|
366
|
+
return (
|
|
367
|
+
isinstance(node, ast.Name)
|
|
368
|
+
and node.id in _STRING_NAMES
|
|
369
|
+
or _is_cast(node, "String")
|
|
370
|
+
or _is_string_constant(node)
|
|
371
|
+
or _is_string_attribute(node)
|
|
372
|
+
or isinstance(node, (ast.List, ast.Tuple))
|
|
373
|
+
and len(node.elts) > 0
|
|
374
|
+
and _is_string(node.elts[0])
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def _is_float(node: typing.Any) -> TypeGuard[ast.Call]:
|
|
379
|
+
return (
|
|
380
|
+
isinstance(node, ast.Name)
|
|
381
|
+
and node.id in _FLOAT_NAMES
|
|
382
|
+
or _is_cast(node, "Float")
|
|
383
|
+
or _is_float_constant(node)
|
|
384
|
+
or _is_float_attribute(node)
|
|
385
|
+
or isinstance(node, (ast.List, ast.Tuple))
|
|
386
|
+
and len(node.elts) > 0
|
|
387
|
+
and _is_float(node.elts[0])
|
|
388
|
+
or isinstance(node, ast.BinOp)
|
|
389
|
+
and (not isinstance(node.op, ast.Add) or (_is_float(node.left) or _is_float(node.right)))
|
|
390
|
+
or isinstance(node, ast.UnaryOp)
|
|
391
|
+
and isinstance(node.op, (ast.USub, ast.UAdd))
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
class _ProjectionTranslator(ast.NodeTransformer):
|
|
396
|
+
def __init__(self, source: str, reserved_keywords: typing.Iterable[str] = ()) -> None:
|
|
162
397
|
# Regarding the need for `source: str` for getting source segments:
|
|
163
398
|
# In Python 3.8, we have to use `ast.get_source_segment(source, node)`.
|
|
164
399
|
# In Python 3.9+, we can use `ast.unparse(node)` (no need for `source`).
|
|
165
400
|
self._source = source
|
|
401
|
+
self._reserved_keywords = frozenset(
|
|
402
|
+
chain(
|
|
403
|
+
reserved_keywords,
|
|
404
|
+
_STRING_NAMES.keys(),
|
|
405
|
+
_FLOAT_NAMES.keys(),
|
|
406
|
+
)
|
|
407
|
+
)
|
|
166
408
|
|
|
167
|
-
def
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
def visit_Attribute(self, node: ast.Attribute) -> Any:
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
return
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
source_segment
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
def
|
|
189
|
-
|
|
409
|
+
def visit_generic(self, node: ast.AST) -> typing.Any:
|
|
410
|
+
raise SyntaxError(f"invalid expression: {ast.get_source_segment(self._source, node)}")
|
|
411
|
+
|
|
412
|
+
def visit_Expression(self, node: ast.Expression) -> typing.Any:
|
|
413
|
+
return ast.Expression(body=self.visit(node.body))
|
|
414
|
+
|
|
415
|
+
def visit_Attribute(self, node: ast.Attribute) -> typing.Any:
|
|
416
|
+
source_segment = typing.cast(str, ast.get_source_segment(self._source, node))
|
|
417
|
+
if replacement := _BACKWARD_COMPATIBILITY_REPLACEMENTS.get(source_segment):
|
|
418
|
+
return ast.Name(id=replacement, ctx=ast.Load())
|
|
419
|
+
if (keys := _get_attribute_keys_list(node)) is not None:
|
|
420
|
+
return _as_attribute(keys)
|
|
421
|
+
raise SyntaxError(f"invalid expression: {source_segment}")
|
|
422
|
+
|
|
423
|
+
def visit_Name(self, node: ast.Name) -> typing.Any:
|
|
424
|
+
source_segment = typing.cast(str, ast.get_source_segment(self._source, node))
|
|
425
|
+
if source_segment in self._reserved_keywords:
|
|
426
|
+
return node
|
|
427
|
+
name = source_segment
|
|
428
|
+
return _as_attribute([ast.Constant(value=name, kind=None)])
|
|
429
|
+
|
|
430
|
+
def visit_Subscript(self, node: ast.Subscript) -> typing.Any:
|
|
431
|
+
if (keys := _get_attribute_keys_list(node)) is not None:
|
|
432
|
+
return _as_attribute(keys)
|
|
433
|
+
raise SyntaxError(f"invalid expression: {ast.get_source_segment(self._source, node)}")
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
class _FilterTranslator(_ProjectionTranslator):
|
|
437
|
+
def visit_Compare(self, node: ast.Compare) -> typing.Any:
|
|
438
|
+
if len(node.comparators) > 1:
|
|
439
|
+
args: typing.List[typing.Any] = []
|
|
440
|
+
left = node.left
|
|
441
|
+
for i, (op, comparator) in enumerate(zip(node.ops, node.comparators)):
|
|
442
|
+
args.append(self.visit(ast.Compare(left=left, ops=[op], comparators=[comparator])))
|
|
443
|
+
left = comparator
|
|
444
|
+
return ast.Call(func=ast.Name(id="and_", ctx=ast.Load()), args=args, keywords=[])
|
|
445
|
+
left, op, right = self.visit(node.left), node.ops[0], self.visit(node.comparators[0])
|
|
446
|
+
if _is_subscript(left, "attributes"):
|
|
447
|
+
left = _cast_as("String", left)
|
|
448
|
+
if _is_subscript(right, "attributes"):
|
|
449
|
+
right = _cast_as("String", right)
|
|
450
|
+
if _is_float(left) and not _is_float(right):
|
|
451
|
+
right = _cast_as("Float", right)
|
|
452
|
+
elif not _is_float(left) and _is_float(right):
|
|
453
|
+
left = _cast_as("Float", left)
|
|
454
|
+
if isinstance(op, (ast.In, ast.NotIn)):
|
|
455
|
+
if (
|
|
456
|
+
_is_string_attribute(right)
|
|
457
|
+
or (typing.cast(str, ast.get_source_segment(self._source, right))) in _NAMES
|
|
458
|
+
):
|
|
459
|
+
call = ast.Call(
|
|
460
|
+
func=ast.Name(id="TextContains", ctx=ast.Load()),
|
|
461
|
+
args=[right, left],
|
|
462
|
+
keywords=[],
|
|
463
|
+
)
|
|
464
|
+
if isinstance(op, ast.NotIn):
|
|
465
|
+
call = ast.Call(
|
|
466
|
+
func=ast.Name(id="not_", ctx=ast.Load()), args=[call], keywords=[]
|
|
467
|
+
)
|
|
468
|
+
return call
|
|
469
|
+
elif isinstance(right, (ast.List, ast.Tuple)):
|
|
470
|
+
attr = "in_" if isinstance(op, ast.In) else "not_in"
|
|
471
|
+
return ast.Call(
|
|
472
|
+
func=ast.Attribute(value=left, attr=attr, ctx=ast.Load()),
|
|
473
|
+
args=[right],
|
|
474
|
+
keywords=[],
|
|
475
|
+
)
|
|
476
|
+
else:
|
|
477
|
+
raise SyntaxError(f"invalid expression: {ast.get_source_segment(self._source, op)}")
|
|
478
|
+
if isinstance(op, ast.Is):
|
|
479
|
+
op = ast.Eq()
|
|
480
|
+
elif isinstance(op, ast.IsNot):
|
|
481
|
+
op = ast.NotEq()
|
|
482
|
+
return ast.Compare(left=left, ops=[op], comparators=[right])
|
|
483
|
+
|
|
484
|
+
def visit_BoolOp(self, node: ast.BoolOp) -> typing.Any:
|
|
485
|
+
if isinstance(node.op, ast.And):
|
|
486
|
+
func = ast.Name(id="and_", ctx=ast.Load())
|
|
487
|
+
elif isinstance(node.op, ast.Or):
|
|
488
|
+
func = ast.Name(id="or_", ctx=ast.Load())
|
|
489
|
+
else:
|
|
490
|
+
raise SyntaxError(f"invalid expression: {ast.get_source_segment(self._source, node)}")
|
|
491
|
+
args = [self.visit(value) for value in node.values]
|
|
492
|
+
return ast.Call(func=func, args=args, keywords=[])
|
|
493
|
+
|
|
494
|
+
def visit_UnaryOp(self, node: ast.UnaryOp) -> typing.Any:
|
|
495
|
+
operand = self.visit(node.operand)
|
|
496
|
+
if isinstance(node.op, ast.Not):
|
|
497
|
+
return ast.Call(
|
|
498
|
+
func=ast.Name(id="not_", ctx=ast.Load()),
|
|
499
|
+
args=[operand],
|
|
500
|
+
keywords=[],
|
|
501
|
+
)
|
|
502
|
+
node = ast.UnaryOp(op=node.op, operand=operand)
|
|
503
|
+
if isinstance(node.op, (ast.USub, ast.UAdd)):
|
|
504
|
+
if not _is_float(node.operand):
|
|
505
|
+
operand = _cast_as("Float", node.operand)
|
|
506
|
+
return ast.UnaryOp(op=ast.USub(), operand=operand)
|
|
507
|
+
return node
|
|
508
|
+
return node
|
|
509
|
+
|
|
510
|
+
def visit_BinOp(self, node: ast.BinOp) -> typing.Any:
|
|
511
|
+
left, op, right = self.visit(node.left), node.op, self.visit(node.right)
|
|
512
|
+
if _is_subscript(left, "attributes"):
|
|
513
|
+
left = _cast_as("String", left)
|
|
514
|
+
if _is_subscript(right, "attributes"):
|
|
515
|
+
right = _cast_as("String", right)
|
|
516
|
+
type_: typing.Literal["Float", "String"] = "String"
|
|
517
|
+
if not isinstance(op, ast.Add) or _is_float(left) or _is_float(right):
|
|
518
|
+
type_ = "Float"
|
|
519
|
+
if not _is_float(left):
|
|
520
|
+
left = _cast_as(type_, left)
|
|
521
|
+
if not _is_float(right):
|
|
522
|
+
right = _cast_as(type_, right)
|
|
523
|
+
return ast.BinOp(left=left, op=op, right=right)
|
|
524
|
+
return _cast_as(type_, ast.BinOp(left=left, op=op, right=right))
|
|
525
|
+
|
|
526
|
+
def visit_Call(self, node: ast.Call) -> typing.Any:
|
|
527
|
+
source_segment = typing.cast(str, ast.get_source_segment(self._source, node))
|
|
528
|
+
if len(node.args) != 1:
|
|
529
|
+
raise SyntaxError(f"invalid expression: {source_segment}")
|
|
530
|
+
if not isinstance(node.func, ast.Name) or node.func.id not in ("str", "float", "int"):
|
|
531
|
+
raise SyntaxError(
|
|
532
|
+
f"invalid expression: {ast.get_source_segment(self._source, node.func)}"
|
|
533
|
+
)
|
|
534
|
+
arg = self.visit(node.args[0])
|
|
535
|
+
if node.func.id in ("float", "int") and not _is_float(arg):
|
|
536
|
+
return _cast_as("Float", arg)
|
|
537
|
+
if node.func.id in ("str",) and not _is_string(arg):
|
|
538
|
+
return _cast_as("String", arg)
|
|
539
|
+
return arg
|
|
190
540
|
|
|
191
541
|
|
|
192
542
|
def _validate_expression(
|
|
193
543
|
expression: ast.Expression,
|
|
194
544
|
source: str,
|
|
195
|
-
valid_eval_names: Optional[Sequence[str]] = None,
|
|
196
|
-
valid_eval_attributes: Tuple[str, ...] = _VALID_EVAL_ATTRIBUTES,
|
|
545
|
+
valid_eval_names: typing.Optional[typing.Sequence[str]] = None,
|
|
546
|
+
valid_eval_attributes: typing.Tuple[str, ...] = _VALID_EVAL_ATTRIBUTES,
|
|
197
547
|
) -> None:
|
|
198
548
|
"""
|
|
199
549
|
Validate primarily the structural (i.e. not semantic) characteristics of an
|
|
@@ -208,19 +558,25 @@ def _validate_expression(
|
|
|
208
558
|
# In Python 3.8, we have to use `ast.get_source_segment(source, node)`.
|
|
209
559
|
# In Python 3.9+, we can use `ast.unparse(node)` (no need for `source`).
|
|
210
560
|
if not isinstance(expression, ast.Expression):
|
|
211
|
-
raise SyntaxError(f"invalid expression: {source}")
|
|
561
|
+
raise SyntaxError(f"invalid expression: {source}")
|
|
212
562
|
for i, node in enumerate(ast.walk(expression.body)):
|
|
213
563
|
if i == 0:
|
|
214
|
-
if
|
|
564
|
+
if (
|
|
565
|
+
isinstance(node, (ast.BoolOp, ast.Compare))
|
|
566
|
+
or isinstance(node, ast.UnaryOp)
|
|
567
|
+
and isinstance(node.op, ast.Not)
|
|
568
|
+
):
|
|
215
569
|
continue
|
|
216
|
-
elif
|
|
570
|
+
elif (
|
|
571
|
+
_is_subscript(node, "metadata") or _is_subscript(node, "attributes")
|
|
572
|
+
) and _get_attribute_keys_list(node) is not None:
|
|
217
573
|
continue
|
|
218
|
-
elif _is_eval(node):
|
|
574
|
+
elif _is_eval(node) and _get_subscript_key(node) is not None:
|
|
219
575
|
# e.g. `evals["name"]`
|
|
220
576
|
if not (eval_name := _get_subscript_key(node)) or (
|
|
221
577
|
valid_eval_names is not None and eval_name not in valid_eval_names
|
|
222
578
|
):
|
|
223
|
-
source_segment = cast(str, ast.get_source_segment(source, node))
|
|
579
|
+
source_segment = typing.cast(str, ast.get_source_segment(source, node))
|
|
224
580
|
if eval_name and valid_eval_names:
|
|
225
581
|
# suggest a valid eval name most similar to the one given
|
|
226
582
|
choice, score = _find_best_match(eval_name, valid_eval_names)
|
|
@@ -240,7 +596,7 @@ def _validate_expression(
|
|
|
240
596
|
elif isinstance(node, ast.Attribute) and _is_eval(node.value):
|
|
241
597
|
# e.g. `evals["name"].score`
|
|
242
598
|
if (attr := node.attr) not in valid_eval_attributes:
|
|
243
|
-
source_segment = cast(str, ast.get_source_segment(source, node))
|
|
599
|
+
source_segment = typing.cast(str, ast.get_source_segment(source, node))
|
|
244
600
|
# suggest a valid attribute most similar to the one given
|
|
245
601
|
choice, score = _find_best_match(attr, valid_eval_attributes)
|
|
246
602
|
if choice and score > 0.75: # arbitrary threshold
|
|
@@ -256,6 +612,13 @@ def _validate_expression(
|
|
|
256
612
|
else ""
|
|
257
613
|
)
|
|
258
614
|
continue
|
|
615
|
+
elif (
|
|
616
|
+
isinstance(node, ast.Call)
|
|
617
|
+
and isinstance(node.func, ast.Name)
|
|
618
|
+
and node.func.id in ("str", "float", "int")
|
|
619
|
+
):
|
|
620
|
+
# allow type casting functions
|
|
621
|
+
continue
|
|
259
622
|
elif isinstance(
|
|
260
623
|
node,
|
|
261
624
|
(
|
|
@@ -281,41 +644,24 @@ def _validate_expression(
|
|
|
281
644
|
),
|
|
282
645
|
):
|
|
283
646
|
continue
|
|
284
|
-
source_segment = cast(str, ast.get_source_segment(source, node))
|
|
285
|
-
raise SyntaxError(f"invalid expression: {source_segment}")
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
def
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
f" ).result"
|
|
299
|
-
f").HasField('{attr}') "
|
|
300
|
-
f"else _MISSING"
|
|
301
|
-
)
|
|
302
|
-
return ast.parse(source, mode="eval").body
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
def _ast_metadata_subscript(key: str) -> ast.expr:
|
|
306
|
-
source = (
|
|
307
|
-
f"_MISSING if ("
|
|
308
|
-
f" _MD := span.attributes.get('metadata')"
|
|
309
|
-
f") is None else ("
|
|
310
|
-
f" _MISSING if not hasattr(_MD, 'get') or ("
|
|
311
|
-
f" _VALUE := _MD.get('{key}')"
|
|
312
|
-
f" ) is None else _VALUE"
|
|
313
|
-
f")"
|
|
647
|
+
source_segment = typing.cast(str, ast.get_source_segment(source, node))
|
|
648
|
+
raise SyntaxError(f"invalid expression: {source_segment}")
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
def _as_attribute(
|
|
652
|
+
keys: typing.List[ast.Constant],
|
|
653
|
+
# as_float: typing.Optional[bool] = None,
|
|
654
|
+
) -> ast.Subscript:
|
|
655
|
+
return ast.Subscript(
|
|
656
|
+
value=ast.Name(id="attributes", ctx=ast.Load()),
|
|
657
|
+
slice=ast.List(elts=keys, ctx=ast.Load())
|
|
658
|
+
if sys.version_info >= (3, 9)
|
|
659
|
+
else ast.Index(value=ast.List(elts=keys, ctx=ast.Load())),
|
|
660
|
+
ctx=ast.Load(),
|
|
314
661
|
)
|
|
315
|
-
return ast.parse(source, mode="eval").body
|
|
316
662
|
|
|
317
663
|
|
|
318
|
-
def _is_eval(node: Any) -> TypeGuard[ast.Subscript]:
|
|
664
|
+
def _is_eval(node: typing.Any) -> TypeGuard[ast.Subscript]:
|
|
319
665
|
# e.g. `evals["name"]`
|
|
320
666
|
return (
|
|
321
667
|
isinstance(node, ast.Subscript)
|
|
@@ -324,35 +670,98 @@ def _is_eval(node: Any) -> TypeGuard[ast.Subscript]:
|
|
|
324
670
|
)
|
|
325
671
|
|
|
326
672
|
|
|
327
|
-
def
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
673
|
+
def _is_subscript(
|
|
674
|
+
node: typing.Any,
|
|
675
|
+
id_: typing.Literal["attributes", "metadata"],
|
|
676
|
+
) -> TypeGuard[ast.Subscript]:
|
|
677
|
+
# e.g. `attributes["key"]`
|
|
678
|
+
# e.g. `attributes[["a", "b.c", "d"]]`
|
|
679
|
+
# e.g. `attributes["a"]["b.c"]["d"]`
|
|
680
|
+
while isinstance(node, ast.Subscript):
|
|
681
|
+
node = node.value
|
|
682
|
+
if isinstance(node, ast.Name) and node.id == id_:
|
|
683
|
+
return True
|
|
684
|
+
return False
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
def _get_attribute_keys_list(
|
|
688
|
+
node: typing.Any,
|
|
689
|
+
) -> typing.Optional[typing.List[ast.Constant]]:
|
|
690
|
+
# e.g. `attributes["key"]` -> `["key"]`
|
|
691
|
+
# e.g. `attributes["a"]["b.c"][["d"]]` -> `["a", "b.c", "d"]`
|
|
692
|
+
# e.g. `attributes["a"][["b.c", "d"]]` -> `["a", "b.c", "d"]`
|
|
693
|
+
# e.g. `metadata["key"]` -> `["metadata", "key"]`
|
|
694
|
+
# e.g. `metadata["a"]["b.c"][["d"]]` -> `["metadata", "a", "b.c", "d"]`
|
|
695
|
+
# e.g. `metadata["a"][["b.c", "d"]]` -> `["metadata", "a", "b.c", "d"]`
|
|
696
|
+
keys: typing.List[ast.Constant] = []
|
|
697
|
+
if isinstance(node, ast.Attribute):
|
|
698
|
+
while isinstance(node, ast.Attribute):
|
|
699
|
+
keys.append(ast.Constant(value=node.attr, kind=None))
|
|
700
|
+
node = node.value
|
|
701
|
+
if isinstance(node, ast.Name):
|
|
702
|
+
keys.append(ast.Constant(value=node.id, kind=None))
|
|
703
|
+
return keys[::-1]
|
|
704
|
+
elif isinstance(node, ast.Subscript):
|
|
705
|
+
while isinstance(node, ast.Subscript):
|
|
706
|
+
if not (sub_keys := _get_subscript_keys_list(node)):
|
|
707
|
+
return None
|
|
708
|
+
keys.extend(reversed(sub_keys))
|
|
709
|
+
node = node.value
|
|
710
|
+
if isinstance(node, ast.Name):
|
|
711
|
+
if not isinstance(keys[-1].value, str):
|
|
712
|
+
return None
|
|
713
|
+
if node.id == "metadata":
|
|
714
|
+
keys.append(ast.Constant(value="metadata", kind=None))
|
|
715
|
+
return keys[::-1]
|
|
716
|
+
return None
|
|
717
|
+
|
|
718
|
+
|
|
719
|
+
def _get_subscript_keys_list(
|
|
720
|
+
node: ast.Subscript,
|
|
721
|
+
) -> typing.Optional[typing.List[ast.Constant]]:
|
|
722
|
+
if sys.version_info < (3, 9):
|
|
723
|
+
# Note that `ast.Index` is deprecated in Python 3.9+, but is necessary
|
|
724
|
+
# for Python 3.8 as part of `ast.Subscript`.
|
|
725
|
+
if not isinstance(node.slice, ast.Index):
|
|
726
|
+
return None
|
|
727
|
+
child = node.slice.value
|
|
728
|
+
else:
|
|
729
|
+
child = node.slice
|
|
730
|
+
if isinstance(child, ast.Constant):
|
|
731
|
+
if not isinstance(child.value, (str, int)) or isinstance(child.value, bool):
|
|
732
|
+
return None
|
|
733
|
+
return [child]
|
|
734
|
+
if not (
|
|
735
|
+
isinstance(child, ast.List)
|
|
736
|
+
and (elts := child.elts)
|
|
737
|
+
and all(
|
|
738
|
+
isinstance(elt, ast.Constant)
|
|
739
|
+
and isinstance(elt.value, (str, int))
|
|
740
|
+
and not isinstance(elt.value, bool)
|
|
741
|
+
for elt in elts
|
|
742
|
+
)
|
|
743
|
+
):
|
|
744
|
+
return None
|
|
745
|
+
return [typing.cast(ast.Constant, elt) for elt in elts]
|
|
334
746
|
|
|
335
747
|
|
|
336
|
-
def _get_subscript_key(
|
|
748
|
+
def _get_subscript_key(
|
|
749
|
+
node: ast.Subscript,
|
|
750
|
+
) -> typing.Optional[str]:
|
|
337
751
|
if sys.version_info < (3, 9):
|
|
338
752
|
# Note that `ast.Index` is deprecated in Python 3.9+, but is necessary
|
|
339
753
|
# for Python 3.8 as part of `ast.Subscript`.
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
return
|
|
348
|
-
eval_name
|
|
349
|
-
if isinstance(node_slice := node.slice, ast.Constant)
|
|
350
|
-
and isinstance(eval_name := node_slice.value, str)
|
|
351
|
-
else None
|
|
352
|
-
)
|
|
754
|
+
if not isinstance(node.slice, ast.Index):
|
|
755
|
+
return None
|
|
756
|
+
child = node.slice.value
|
|
757
|
+
else:
|
|
758
|
+
child = node.slice
|
|
759
|
+
if not (isinstance(child, ast.Constant) and isinstance(child.value, str)):
|
|
760
|
+
return None
|
|
761
|
+
return child.value
|
|
353
762
|
|
|
354
763
|
|
|
355
|
-
def _disjunction(choices: Sequence[str]) -> str:
|
|
764
|
+
def _disjunction(choices: typing.Sequence[str]) -> str:
|
|
356
765
|
"""
|
|
357
766
|
E.g. `["a", "b", "c"]` becomes `"one of a, b, or c"`
|
|
358
767
|
"""
|
|
@@ -365,10 +774,69 @@ def _disjunction(choices: Sequence[str]) -> str:
|
|
|
365
774
|
return f"one of {', '.join(choices[:-1])}, or {choices[-1]}"
|
|
366
775
|
|
|
367
776
|
|
|
368
|
-
def _find_best_match(
|
|
777
|
+
def _find_best_match(
|
|
778
|
+
source: str, choices: typing.Iterable[str]
|
|
779
|
+
) -> typing.Tuple[typing.Optional[str], float]:
|
|
369
780
|
best_choice, best_score = None, 0.0
|
|
370
781
|
for choice in choices:
|
|
371
782
|
score = SequenceMatcher(None, source, choice).ratio()
|
|
372
783
|
if score > best_score:
|
|
373
784
|
best_choice, best_score = choice, score
|
|
374
785
|
return best_choice, best_score
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
def _apply_eval_aliasing(
|
|
789
|
+
source: str,
|
|
790
|
+
) -> typing.Tuple[
|
|
791
|
+
str,
|
|
792
|
+
typing.Tuple[AliasedAnnotationRelation, ...],
|
|
793
|
+
]:
|
|
794
|
+
"""
|
|
795
|
+
Substitutes `evals[<eval-name>].<attribute>` with aliases. Returns the
|
|
796
|
+
updated source code in addition to the aliased relations.
|
|
797
|
+
|
|
798
|
+
Example:
|
|
799
|
+
|
|
800
|
+
input:
|
|
801
|
+
|
|
802
|
+
```
|
|
803
|
+
evals['Hallucination'].label == 'correct' or evals['Hallucination'].score < 0.5
|
|
804
|
+
```
|
|
805
|
+
|
|
806
|
+
output:
|
|
807
|
+
|
|
808
|
+
```
|
|
809
|
+
span_annotation_0_label_123 == 'correct' or span_annotation_0_score_456 < 0.5
|
|
810
|
+
```
|
|
811
|
+
"""
|
|
812
|
+
eval_aliases: typing.Dict[EvalName, AliasedAnnotationRelation] = {}
|
|
813
|
+
for eval_expression, eval_name, eval_attribute in _parse_eval_expressions_and_names(source):
|
|
814
|
+
if (eval_alias := eval_aliases.get(eval_name)) is None:
|
|
815
|
+
eval_alias = AliasedAnnotationRelation(index=len(eval_aliases), name=eval_name)
|
|
816
|
+
eval_aliases[eval_name] = eval_alias
|
|
817
|
+
alias_name = eval_alias.attribute_alias(eval_attribute)
|
|
818
|
+
source = source.replace(eval_expression, alias_name)
|
|
819
|
+
return source, tuple(eval_aliases.values())
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
def _parse_eval_expressions_and_names(
|
|
823
|
+
source: str,
|
|
824
|
+
) -> typing.Iterator[typing.Tuple[EvalExpression, EvalName, EvalAttribute]]:
|
|
825
|
+
"""
|
|
826
|
+
Parses filter conditions for evaluation expressions of the form:
|
|
827
|
+
|
|
828
|
+
```
|
|
829
|
+
evals["<eval-name>"].<attribute>
|
|
830
|
+
```
|
|
831
|
+
"""
|
|
832
|
+
for match in EVAL_EXPRESSION_PATTERN.finditer(source):
|
|
833
|
+
(
|
|
834
|
+
eval_expression,
|
|
835
|
+
quoted_eval_name,
|
|
836
|
+
evaluation_attribute_name,
|
|
837
|
+
) = match.groups()
|
|
838
|
+
yield (
|
|
839
|
+
eval_expression,
|
|
840
|
+
quoted_eval_name[1:-1],
|
|
841
|
+
typing.cast(EvalAttribute, evaluation_attribute_name),
|
|
842
|
+
)
|