arize-phoenix 3.25.0__py3-none-any.whl → 4.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (113) hide show
  1. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/METADATA +26 -4
  2. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/RECORD +80 -75
  3. phoenix/__init__.py +9 -5
  4. phoenix/config.py +109 -53
  5. phoenix/datetime_utils.py +18 -1
  6. phoenix/db/README.md +25 -0
  7. phoenix/db/__init__.py +4 -0
  8. phoenix/db/alembic.ini +119 -0
  9. phoenix/db/bulk_inserter.py +206 -0
  10. phoenix/db/engines.py +152 -0
  11. phoenix/db/helpers.py +47 -0
  12. phoenix/db/insertion/evaluation.py +209 -0
  13. phoenix/db/insertion/helpers.py +51 -0
  14. phoenix/db/insertion/span.py +142 -0
  15. phoenix/db/migrate.py +71 -0
  16. phoenix/db/migrations/env.py +121 -0
  17. phoenix/db/migrations/script.py.mako +26 -0
  18. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
  19. phoenix/db/models.py +371 -0
  20. phoenix/exceptions.py +5 -1
  21. phoenix/server/api/context.py +40 -3
  22. phoenix/server/api/dataloaders/__init__.py +97 -0
  23. phoenix/server/api/dataloaders/cache/__init__.py +3 -0
  24. phoenix/server/api/dataloaders/cache/two_tier_cache.py +67 -0
  25. phoenix/server/api/dataloaders/document_evaluation_summaries.py +152 -0
  26. phoenix/server/api/dataloaders/document_evaluations.py +37 -0
  27. phoenix/server/api/dataloaders/document_retrieval_metrics.py +98 -0
  28. phoenix/server/api/dataloaders/evaluation_summaries.py +151 -0
  29. phoenix/server/api/dataloaders/latency_ms_quantile.py +198 -0
  30. phoenix/server/api/dataloaders/min_start_or_max_end_times.py +93 -0
  31. phoenix/server/api/dataloaders/record_counts.py +125 -0
  32. phoenix/server/api/dataloaders/span_descendants.py +64 -0
  33. phoenix/server/api/dataloaders/span_evaluations.py +37 -0
  34. phoenix/server/api/dataloaders/token_counts.py +138 -0
  35. phoenix/server/api/dataloaders/trace_evaluations.py +37 -0
  36. phoenix/server/api/input_types/SpanSort.py +138 -68
  37. phoenix/server/api/routers/v1/__init__.py +11 -0
  38. phoenix/server/api/routers/v1/evaluations.py +275 -0
  39. phoenix/server/api/routers/v1/spans.py +126 -0
  40. phoenix/server/api/routers/v1/traces.py +82 -0
  41. phoenix/server/api/schema.py +112 -48
  42. phoenix/server/api/types/DocumentEvaluationSummary.py +1 -1
  43. phoenix/server/api/types/Evaluation.py +29 -12
  44. phoenix/server/api/types/EvaluationSummary.py +29 -44
  45. phoenix/server/api/types/MimeType.py +2 -2
  46. phoenix/server/api/types/Model.py +9 -9
  47. phoenix/server/api/types/Project.py +240 -171
  48. phoenix/server/api/types/Span.py +87 -131
  49. phoenix/server/api/types/Trace.py +29 -20
  50. phoenix/server/api/types/pagination.py +151 -10
  51. phoenix/server/app.py +263 -35
  52. phoenix/server/grpc_server.py +93 -0
  53. phoenix/server/main.py +75 -60
  54. phoenix/server/openapi/docs.py +218 -0
  55. phoenix/server/prometheus.py +23 -7
  56. phoenix/server/static/index.js +662 -643
  57. phoenix/server/telemetry.py +68 -0
  58. phoenix/services.py +4 -0
  59. phoenix/session/client.py +34 -30
  60. phoenix/session/data_extractor.py +8 -3
  61. phoenix/session/session.py +176 -155
  62. phoenix/settings.py +13 -0
  63. phoenix/trace/attributes.py +349 -0
  64. phoenix/trace/dsl/README.md +116 -0
  65. phoenix/trace/dsl/filter.py +660 -192
  66. phoenix/trace/dsl/helpers.py +24 -5
  67. phoenix/trace/dsl/query.py +562 -185
  68. phoenix/trace/fixtures.py +69 -7
  69. phoenix/trace/otel.py +44 -200
  70. phoenix/trace/schemas.py +14 -8
  71. phoenix/trace/span_evaluations.py +5 -2
  72. phoenix/utilities/__init__.py +0 -26
  73. phoenix/utilities/span_store.py +0 -23
  74. phoenix/version.py +1 -1
  75. phoenix/core/project.py +0 -773
  76. phoenix/core/traces.py +0 -96
  77. phoenix/datasets/dataset.py +0 -214
  78. phoenix/datasets/fixtures.py +0 -24
  79. phoenix/datasets/schema.py +0 -31
  80. phoenix/experimental/evals/__init__.py +0 -73
  81. phoenix/experimental/evals/evaluators.py +0 -413
  82. phoenix/experimental/evals/functions/__init__.py +0 -4
  83. phoenix/experimental/evals/functions/classify.py +0 -453
  84. phoenix/experimental/evals/functions/executor.py +0 -353
  85. phoenix/experimental/evals/functions/generate.py +0 -138
  86. phoenix/experimental/evals/functions/processing.py +0 -76
  87. phoenix/experimental/evals/models/__init__.py +0 -14
  88. phoenix/experimental/evals/models/anthropic.py +0 -175
  89. phoenix/experimental/evals/models/base.py +0 -170
  90. phoenix/experimental/evals/models/bedrock.py +0 -221
  91. phoenix/experimental/evals/models/litellm.py +0 -134
  92. phoenix/experimental/evals/models/openai.py +0 -453
  93. phoenix/experimental/evals/models/rate_limiters.py +0 -246
  94. phoenix/experimental/evals/models/vertex.py +0 -173
  95. phoenix/experimental/evals/models/vertexai.py +0 -186
  96. phoenix/experimental/evals/retrievals.py +0 -96
  97. phoenix/experimental/evals/templates/__init__.py +0 -50
  98. phoenix/experimental/evals/templates/default_templates.py +0 -472
  99. phoenix/experimental/evals/templates/template.py +0 -195
  100. phoenix/experimental/evals/utils/__init__.py +0 -172
  101. phoenix/experimental/evals/utils/threads.py +0 -27
  102. phoenix/server/api/routers/evaluation_handler.py +0 -110
  103. phoenix/server/api/routers/span_handler.py +0 -70
  104. phoenix/server/api/routers/trace_handler.py +0 -60
  105. phoenix/storage/span_store/__init__.py +0 -23
  106. phoenix/storage/span_store/text_file.py +0 -85
  107. phoenix/trace/dsl/missing.py +0 -60
  108. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/WHEEL +0 -0
  109. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/licenses/IP_NOTICE +0 -0
  110. {arize_phoenix-3.25.0.dist-info → arize_phoenix-4.0.1.dist-info}/licenses/LICENSE +0 -0
  111. /phoenix/{datasets → db/insertion}/__init__.py +0 -0
  112. /phoenix/{experimental → db/migrations}/__init__.py +0 -0
  113. /phoenix/{storage → server/openapi}/__init__.py +0 -0
@@ -1,199 +1,549 @@
1
1
  import ast
2
- import inspect
2
+ import re
3
3
  import sys
4
+ import typing
4
5
  from dataclasses import dataclass, field
5
6
  from difflib import SequenceMatcher
6
- from typing import (
7
- Any,
8
- Dict,
9
- Iterable,
10
- Iterator,
11
- Mapping,
12
- Optional,
13
- Protocol,
14
- Sequence,
15
- Tuple,
16
- cast,
17
- )
7
+ from itertools import chain
8
+ from random import randint
9
+ from types import MappingProxyType
18
10
 
19
- from openinference.semconv import trace
20
- from typing_extensions import TypeGuard
11
+ import sqlalchemy
12
+ from sqlalchemy.orm import Mapped, aliased
13
+ from sqlalchemy.orm.util import AliasedClass
14
+ from sqlalchemy.sql.expression import Select
15
+ from typing_extensions import TypeAlias, TypeGuard, assert_never
21
16
 
22
17
  import phoenix.trace.v1 as pb
23
- from phoenix.trace.dsl.missing import MISSING
24
- from phoenix.trace.schemas import ComputedAttributes, Span, SpanID
18
+ from phoenix.db import models
25
19
 
26
- _VALID_EVAL_ATTRIBUTES: Tuple[str, ...] = tuple(
20
+ _VALID_EVAL_ATTRIBUTES: typing.Tuple[str, ...] = tuple(
27
21
  field.name for field in pb.Evaluation.Result.DESCRIPTOR.fields
28
22
  )
29
23
 
30
24
 
31
- class SupportsGetSpanEvaluation(Protocol):
32
- def get_span_evaluation(self, span_id: SpanID, name: str) -> Optional[pb.Evaluation]: ...
25
+ EvalAttribute: TypeAlias = typing.Literal["label", "score"]
26
+ EvalExpression: TypeAlias = str
27
+ EvalName: TypeAlias = str
28
+
29
+ EVAL_EXPRESSION_PATTERN = re.compile(r"""\b(evals\[(".*?"|'.*?')\][.](label|score))\b""")
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class AliasedAnnotationRelation:
34
+ """
35
+ Represents an aliased `span_annotation` relation (i.e., SQL table). Used to
36
+ perform joins on span evaluations during filtering. An alias is required
37
+ because the `span_annotation` may be joined multiple times for different
38
+ evaluation names.
39
+ """
40
+
41
+ index: int
42
+ name: str
43
+ table: AliasedClass[models.SpanAnnotation] = field(init=False, repr=False)
44
+ _label_attribute_alias: str = field(init=False, repr=False)
45
+ _score_attribute_alias: str = field(init=False, repr=False)
46
+
47
+ def __post_init__(self) -> None:
48
+ table_alias = f"span_annotation_{self.index}"
49
+ alias_id = f"{randint(0, 10**6):06d}" # prevent conflicts with user-defined attributes
50
+ label_attribute_alias = f"{table_alias}_label_{alias_id}"
51
+ score_attribute_alias = f"{table_alias}_score_{alias_id}"
52
+ table = aliased(models.SpanAnnotation, name=table_alias)
53
+ object.__setattr__(self, "_label_attribute_alias", label_attribute_alias)
54
+ object.__setattr__(self, "_score_attribute_alias", score_attribute_alias)
55
+ object.__setattr__(
56
+ self,
57
+ "table",
58
+ table,
59
+ )
60
+
61
+ @property
62
+ def attributes(self) -> typing.Iterator[typing.Tuple[str, Mapped[typing.Any]]]:
63
+ """
64
+ Alias names and attributes (i.e., columns) of the `span_annotation`
65
+ relation.
66
+ """
67
+ yield self._label_attribute_alias, self.table.label
68
+ yield self._score_attribute_alias, self.table.score
69
+
70
+ def attribute_alias(self, attribute: EvalAttribute) -> str:
71
+ """
72
+ Returns an alias for the given attribute (i.e., column).
73
+ """
74
+ if attribute == "label":
75
+ return self._label_attribute_alias
76
+ if attribute == "score":
77
+ return self._score_attribute_alias
78
+ assert_never(attribute)
79
+
80
+
81
+ # Because postgresql is strongly typed, we cast JSON values to string
82
+ # by default unless it's hinted otherwise as done here.
83
+ _FLOAT_ATTRIBUTES: typing.FrozenSet[str] = frozenset(
84
+ {
85
+ "llm.token_count.completion",
86
+ "llm.token_count.prompt",
87
+ "llm.token_count.total",
88
+ }
89
+ )
90
+
91
+ _STRING_NAMES: typing.Mapping[str, sqlalchemy.SQLColumnExpression[typing.Any]] = MappingProxyType(
92
+ {
93
+ "span_id": models.Span.span_id,
94
+ "trace_id": models.Trace.trace_id,
95
+ "context.span_id": models.Span.span_id,
96
+ "context.trace_id": models.Trace.trace_id,
97
+ "parent_id": models.Span.parent_id,
98
+ "span_kind": models.Span.span_kind,
99
+ "name": models.Span.name,
100
+ "status_code": models.Span.status_code,
101
+ "status_message": models.Span.status_message,
102
+ }
103
+ )
104
+ _FLOAT_NAMES: typing.Mapping[str, sqlalchemy.SQLColumnExpression[typing.Any]] = MappingProxyType(
105
+ {
106
+ "latency_ms": models.Span.latency_ms,
107
+ "cumulative_llm_token_count_completion": models.Span.cumulative_llm_token_count_completion,
108
+ "cumulative_llm_token_count_prompt": models.Span.cumulative_llm_token_count_prompt,
109
+ "cumulative_llm_token_count_total": models.Span.cumulative_llm_token_count_total,
110
+ }
111
+ )
112
+ _NAMES: typing.Mapping[str, sqlalchemy.SQLColumnExpression[typing.Any]] = MappingProxyType(
113
+ {
114
+ **_STRING_NAMES,
115
+ **_FLOAT_NAMES,
116
+ "attributes": models.Span.attributes,
117
+ "events": models.Span.events,
118
+ }
119
+ )
120
+ _BACKWARD_COMPATIBILITY_REPLACEMENTS: typing.Mapping[str, str] = MappingProxyType(
121
+ {
122
+ # for backward-compatibility
123
+ "context.span_id": "span_id",
124
+ "context.trace_id": "trace_id",
125
+ "cumulative_token_count.completion": "cumulative_llm_token_count_completion",
126
+ "cumulative_token_count.prompt": "cumulative_llm_token_count_prompt",
127
+ "cumulative_token_count.total": "cumulative_llm_token_count_total",
128
+ }
129
+ )
33
130
 
34
131
 
35
132
  @dataclass(frozen=True)
36
133
  class SpanFilter:
37
134
  condition: str = ""
38
- evals: Optional[SupportsGetSpanEvaluation] = None
39
- valid_eval_names: Optional[Sequence[str]] = None
135
+ valid_eval_names: typing.Optional[typing.Sequence[str]] = None
40
136
  translated: ast.Expression = field(init=False, repr=False)
41
- compiled: Any = field(init=False, repr=False)
137
+ compiled: typing.Any = field(init=False, repr=False)
138
+ _aliased_annotation_relations: typing.Tuple[AliasedAnnotationRelation] = field(
139
+ init=False, repr=False
140
+ )
141
+ _aliased_annotation_attributes: typing.Dict[str, Mapped[typing.Any]] = field(
142
+ init=False, repr=False
143
+ )
42
144
 
43
145
  def __bool__(self) -> bool:
44
146
  return bool(self.condition)
45
147
 
46
148
  def __post_init__(self) -> None:
47
- condition = self.condition or "True" # default to no op
48
- root = ast.parse(condition, mode="eval")
49
- if self.condition:
50
- _validate_expression(root, condition, valid_eval_names=self.valid_eval_names)
51
- translated = _Translator(condition).visit(root)
149
+ if not (source := self.condition):
150
+ return
151
+ root = ast.parse(source, mode="eval")
152
+ _validate_expression(root, source, valid_eval_names=self.valid_eval_names)
153
+ source, aliased_annotation_relations = _apply_eval_aliasing(source)
154
+ root = ast.parse(source, mode="eval")
155
+ translated = _FilterTranslator(
156
+ source=source,
157
+ reserved_keywords=(
158
+ alias
159
+ for aliased_annotation in aliased_annotation_relations
160
+ for alias, _ in aliased_annotation.attributes
161
+ ),
162
+ ).visit(root)
52
163
  ast.fix_missing_locations(translated)
53
164
  compiled = compile(translated, filename="", mode="eval")
165
+ aliased_annotation_attributes = {
166
+ alias: attribute
167
+ for aliased_annotation in aliased_annotation_relations
168
+ for alias, attribute in aliased_annotation.attributes
169
+ }
54
170
  object.__setattr__(self, "translated", translated)
55
171
  object.__setattr__(self, "compiled", compiled)
56
- object.__setattr__(self, "evals", self.evals or MISSING)
172
+ object.__setattr__(self, "_aliased_annotation_relations", aliased_annotation_relations)
173
+ object.__setattr__(self, "_aliased_annotation_attributes", aliased_annotation_attributes)
57
174
 
58
- def __call__(self, span: Span) -> bool:
59
- return cast(
60
- bool,
175
+ def __call__(self, select: Select[typing.Any]) -> Select[typing.Any]:
176
+ if not self.condition:
177
+ return select
178
+ return self._join_aliased_relations(select).where(
61
179
  eval(
62
180
  self.compiled,
63
- {"span": span, "_MISSING": MISSING, "evals": self.evals},
64
- ),
181
+ {
182
+ **_NAMES,
183
+ **self._aliased_annotation_attributes,
184
+ "not_": sqlalchemy.not_,
185
+ "and_": sqlalchemy.and_,
186
+ "or_": sqlalchemy.or_,
187
+ "cast": sqlalchemy.cast,
188
+ "Float": sqlalchemy.Float,
189
+ "String": sqlalchemy.String,
190
+ "TextContains": models.TextContains,
191
+ },
192
+ )
65
193
  )
66
194
 
67
- def to_dict(self) -> Dict[str, Any]:
195
+ def to_dict(self) -> typing.Dict[str, typing.Any]:
68
196
  return {"condition": self.condition}
69
197
 
70
198
  @classmethod
71
199
  def from_dict(
72
200
  cls,
73
- obj: Mapping[str, Any],
74
- evals: Optional[SupportsGetSpanEvaluation] = None,
75
- valid_eval_names: Optional[Sequence[str]] = None,
201
+ obj: typing.Mapping[str, typing.Any],
202
+ valid_eval_names: typing.Optional[typing.Sequence[str]] = None,
76
203
  ) -> "SpanFilter":
77
204
  return cls(
78
205
  condition=obj.get("condition") or "",
79
- evals=evals,
80
206
  valid_eval_names=valid_eval_names,
81
207
  )
82
208
 
209
+ def _join_aliased_relations(self, stmt: Select[typing.Any]) -> Select[typing.Any]:
210
+ """
211
+ Joins the aliased relations to the given statement. E.g., for the filter condition:
212
+
213
+ ```
214
+ evals["Hallucination"].score > 0.5
215
+ ```
216
+
217
+ an alias (e.g., `A`) is generated for the `span_annotations` relation. An input statement
218
+ `select(Span)` is transformed to:
219
+
220
+ ```
221
+ A = aliased(SpanAnnotation)
222
+ select(Span).join(A, onclause=(and_(Span.id == A.span_rowid, A.name == "Hallucination")))
223
+ ```
224
+ """
225
+ for eval_alias in self._aliased_annotation_relations:
226
+ eval_name = eval_alias.name
227
+ AliasedSpanAnnotation = eval_alias.table
228
+ stmt = stmt.join(
229
+ AliasedSpanAnnotation,
230
+ onclause=(
231
+ sqlalchemy.and_(
232
+ AliasedSpanAnnotation.span_rowid == models.Span.id,
233
+ AliasedSpanAnnotation.name == eval_name,
234
+ )
235
+ ),
236
+ )
237
+ return stmt
83
238
 
84
- def _replace_none_with_missing(
85
- value: ast.expr,
86
- as_str: bool = False,
87
- ) -> ast.IfExp:
88
- """
89
- E.g. `value` becomes
90
- `_MISSING if (_VALUE := value) is None else _VALUE`
91
- """
92
- _store_VALUE = ast.Name(id="_VALUE", ctx=ast.Store())
93
- _load_VALUE = ast.Name(id="_VALUE", ctx=ast.Load())
94
- return ast.IfExp(
95
- test=ast.Compare(
96
- left=ast.NamedExpr(target=_store_VALUE, value=value),
97
- ops=[ast.Is()],
98
- comparators=[ast.Constant(value=None)],
239
+
240
+ @dataclass(frozen=True)
241
+ class Projector:
242
+ expression: str
243
+ translated: ast.Expression = field(init=False, repr=False)
244
+ compiled: typing.Any = field(init=False, repr=False)
245
+
246
+ def __post_init__(self) -> None:
247
+ if not (source := self.expression):
248
+ raise ValueError("missing expression")
249
+ root = ast.parse(source, mode="eval")
250
+ translated = _ProjectionTranslator(source).visit(root)
251
+ ast.fix_missing_locations(translated)
252
+ compiled = compile(translated, filename="", mode="eval")
253
+ object.__setattr__(self, "translated", translated)
254
+ object.__setattr__(self, "compiled", compiled)
255
+
256
+ def __call__(self) -> sqlalchemy.SQLColumnExpression[typing.Any]:
257
+ return typing.cast(
258
+ sqlalchemy.SQLColumnExpression[typing.Any],
259
+ eval(self.compiled, {**_NAMES}),
260
+ )
261
+
262
+
263
+ def _is_string_constant(node: typing.Any) -> TypeGuard[ast.Constant]:
264
+ return isinstance(node, ast.Constant) and isinstance(node.value, str)
265
+
266
+
267
+ def _is_float_constant(node: typing.Any) -> TypeGuard[ast.Constant]:
268
+ return isinstance(node, ast.Constant) and isinstance(node.value, typing.SupportsFloat)
269
+
270
+
271
+ def _is_string_attribute(node: typing.Any) -> TypeGuard[ast.Call]:
272
+ return (
273
+ isinstance(node, ast.Call)
274
+ and isinstance(func := node.func, ast.Attribute)
275
+ and func.attr == "as_string"
276
+ and isinstance(value := func.value, ast.Subscript)
277
+ and isinstance(name := value.value, ast.Name)
278
+ and name.id == "attributes"
279
+ )
280
+
281
+
282
+ def _is_float_attribute(node: typing.Any) -> TypeGuard[ast.Call]:
283
+ return (
284
+ isinstance(node, ast.Call)
285
+ and isinstance(func := node.func, ast.Attribute)
286
+ and func.attr == "as_float"
287
+ and isinstance(value := func.value, ast.Subscript)
288
+ and isinstance(name := value.value, ast.Name)
289
+ and name.id == "attributes"
290
+ )
291
+
292
+
293
+ def _as_string_attribute(node: typing.Union[ast.Subscript, ast.Call]) -> ast.Call:
294
+ if isinstance(node, ast.Call):
295
+ value = typing.cast(ast.Attribute, node.func).value
296
+ elif isinstance(node, ast.Subscript):
297
+ value = node
298
+ else:
299
+ assert_never(node)
300
+ return ast.Call(
301
+ func=ast.Attribute(
302
+ value=value,
303
+ attr="as_string",
304
+ ctx=ast.Load(),
99
305
  ),
100
- body=ast.Name(id="_MISSING", ctx=ast.Load()),
101
- orelse=_as_str(_load_VALUE) if as_str else _load_VALUE,
306
+ args=[],
307
+ keywords=[],
102
308
  )
103
309
 
104
310
 
105
- def _as_str(value: ast.expr) -> ast.Call:
106
- """E.g. `value` becomes `str(value)`"""
107
- return ast.Call(func=ast.Name(id="str", ctx=ast.Load()), args=[value], keywords=[])
311
+ def _as_float_attribute(node: typing.Union[ast.Subscript, ast.Call]) -> ast.Call:
312
+ if isinstance(node, ast.Call):
313
+ value = typing.cast(ast.Attribute, node.func).value
314
+ elif isinstance(node, ast.Subscript):
315
+ value = node
316
+ else:
317
+ assert_never(node)
318
+ return ast.Call(
319
+ func=ast.Attribute(
320
+ value=value,
321
+ attr="as_float",
322
+ ctx=ast.Load(),
323
+ ),
324
+ args=[],
325
+ keywords=[],
326
+ )
108
327
 
109
328
 
110
- def _ast_replacement(expression: str) -> ast.expr:
111
- as_str = expression in (
112
- "span.status_code",
113
- "span.span_kind",
114
- "span.parent_id",
115
- "span.context.span_id",
116
- "span.context.trace_id",
329
+ def _is_cast(
330
+ node: typing.Any,
331
+ type_: typing.Optional[typing.Literal["Float", "String"]] = None,
332
+ ) -> TypeGuard[ast.Call]:
333
+ return (
334
+ isinstance(node, ast.Call)
335
+ and isinstance(func := node.func, ast.Name)
336
+ and func.id == "cast"
337
+ and len(node.args) == 2
338
+ and isinstance(name := node.args[1], ast.Name)
339
+ and (not type_ or name.id == type_)
117
340
  )
118
- return _replace_none_with_missing(ast.parse(expression, mode="eval").body, as_str)
119
-
120
-
121
- def _allowed_replacements() -> Iterator[Tuple[str, ast.expr]]:
122
- for source_segment, ast_replacement in {
123
- "name": _ast_replacement("span.name"),
124
- "status_code": _ast_replacement("span.status_code"),
125
- "span_kind": _ast_replacement("span.span_kind"),
126
- "parent_id": _ast_replacement("span.parent_id"),
127
- }.items():
128
- yield source_segment, ast_replacement
129
- yield "span." + source_segment, ast_replacement
130
-
131
- for source_segment, ast_replacement in {
132
- "span_id": _ast_replacement("span.context.span_id"),
133
- "trace_id": _ast_replacement("span.context.trace_id"),
134
- }.items():
135
- yield source_segment, ast_replacement
136
- yield "context." + source_segment, ast_replacement
137
- yield "span.context." + source_segment, ast_replacement
138
-
139
- for field_name in (
140
- getattr(klass, attr)
141
- for name in dir(trace)
142
- if name.endswith("Attributes") and inspect.isclass(klass := getattr(trace, name))
143
- for attr in dir(klass)
144
- if attr.isupper()
145
- ):
146
- source_segment = field_name
147
- ast_replacement = _ast_replacement(f"span.attributes.get('{field_name}')")
148
- yield source_segment, ast_replacement
149
- yield "attributes." + source_segment, ast_replacement
150
- yield "span.attributes." + source_segment, ast_replacement
151
341
 
152
- for computed_attribute in ComputedAttributes:
153
- source_segment = computed_attribute.value
154
- ast_replacement = _ast_replacement(f"span.get_computed_value('{source_segment}')")
155
- yield source_segment, ast_replacement
156
342
 
343
+ def _remove_cast(node: typing.Any) -> typing.Any:
344
+ return node.args[0] if _is_cast(node) else node
345
+
346
+
347
+ def _cast_as(
348
+ type_: typing.Literal["Float", "String"],
349
+ node: typing.Any,
350
+ ) -> ast.Call:
351
+ if type_ == "Float" and (_is_subscript(node, "attributes") or _is_string_attribute(node)):
352
+ return _as_float_attribute(node)
353
+ if type_ == "String" and (_is_subscript(node, "attributes") or _is_float_attribute(node)):
354
+ return _as_string_attribute(node)
355
+ return ast.Call(
356
+ func=ast.Name(id="cast", ctx=ast.Load()),
357
+ args=[
358
+ _remove_cast(node),
359
+ ast.Name(id=type_, ctx=ast.Load()),
360
+ ],
361
+ keywords=[],
362
+ )
157
363
 
158
- class _Translator(ast.NodeTransformer):
159
- _allowed_fields: Mapping[str, ast.expr] = dict(_allowed_replacements())
160
364
 
161
- def __init__(self, source: str) -> None:
365
+ def _is_string(node: typing.Any) -> TypeGuard[ast.Call]:
366
+ return (
367
+ isinstance(node, ast.Name)
368
+ and node.id in _STRING_NAMES
369
+ or _is_cast(node, "String")
370
+ or _is_string_constant(node)
371
+ or _is_string_attribute(node)
372
+ or isinstance(node, (ast.List, ast.Tuple))
373
+ and len(node.elts) > 0
374
+ and _is_string(node.elts[0])
375
+ )
376
+
377
+
378
+ def _is_float(node: typing.Any) -> TypeGuard[ast.Call]:
379
+ return (
380
+ isinstance(node, ast.Name)
381
+ and node.id in _FLOAT_NAMES
382
+ or _is_cast(node, "Float")
383
+ or _is_float_constant(node)
384
+ or _is_float_attribute(node)
385
+ or isinstance(node, (ast.List, ast.Tuple))
386
+ and len(node.elts) > 0
387
+ and _is_float(node.elts[0])
388
+ or isinstance(node, ast.BinOp)
389
+ and (not isinstance(node.op, ast.Add) or (_is_float(node.left) or _is_float(node.right)))
390
+ or isinstance(node, ast.UnaryOp)
391
+ and isinstance(node.op, (ast.USub, ast.UAdd))
392
+ )
393
+
394
+
395
+ class _ProjectionTranslator(ast.NodeTransformer):
396
+ def __init__(self, source: str, reserved_keywords: typing.Iterable[str] = ()) -> None:
162
397
  # Regarding the need for `source: str` for getting source segments:
163
398
  # In Python 3.8, we have to use `ast.get_source_segment(source, node)`.
164
399
  # In Python 3.9+, we can use `ast.unparse(node)` (no need for `source`).
165
400
  self._source = source
401
+ self._reserved_keywords = frozenset(
402
+ chain(
403
+ reserved_keywords,
404
+ _STRING_NAMES.keys(),
405
+ _FLOAT_NAMES.keys(),
406
+ )
407
+ )
166
408
 
167
- def visit_Subscript(self, node: ast.Subscript) -> Any:
168
- if _is_metadata(node) and (key := _get_subscript_key(node)):
169
- return _ast_metadata_subscript(key)
170
- source_segment: str = cast(str, ast.get_source_segment(self._source, node))
171
- raise SyntaxError(f"invalid expression: {source_segment}") # TODO: add details
172
-
173
- def visit_Attribute(self, node: ast.Attribute) -> Any:
174
- if _is_eval(node.value) and (eval_name := _get_subscript_key(node.value)):
175
- # e.g. `evals["name"].score`
176
- return _ast_evaluation_result_value(eval_name, node.attr)
177
- source_segment: str = cast(str, ast.get_source_segment(self._source, node))
178
- if replacement := self._allowed_fields.get(source_segment):
179
- return replacement
180
- raise SyntaxError(f"invalid expression: {source_segment}") # TODO: add details
181
-
182
- def visit_Name(self, node: ast.Name) -> Any:
183
- source_segment: str = cast(str, ast.get_source_segment(self._source, node))
184
- if replacement := self._allowed_fields.get(source_segment):
185
- return replacement
186
- raise SyntaxError(f"invalid expression: {source_segment}") # TODO: add details
187
-
188
- def visit_Constant(self, node: ast.Constant) -> Any:
189
- return ast.Name(id="_MISSING", ctx=ast.Load()) if node.value is None else node
409
+ def visit_generic(self, node: ast.AST) -> typing.Any:
410
+ raise SyntaxError(f"invalid expression: {ast.get_source_segment(self._source, node)}")
411
+
412
+ def visit_Expression(self, node: ast.Expression) -> typing.Any:
413
+ return ast.Expression(body=self.visit(node.body))
414
+
415
+ def visit_Attribute(self, node: ast.Attribute) -> typing.Any:
416
+ source_segment = typing.cast(str, ast.get_source_segment(self._source, node))
417
+ if replacement := _BACKWARD_COMPATIBILITY_REPLACEMENTS.get(source_segment):
418
+ return ast.Name(id=replacement, ctx=ast.Load())
419
+ if (keys := _get_attribute_keys_list(node)) is not None:
420
+ return _as_attribute(keys)
421
+ raise SyntaxError(f"invalid expression: {source_segment}")
422
+
423
+ def visit_Name(self, node: ast.Name) -> typing.Any:
424
+ source_segment = typing.cast(str, ast.get_source_segment(self._source, node))
425
+ if source_segment in self._reserved_keywords:
426
+ return node
427
+ name = source_segment
428
+ return _as_attribute([ast.Constant(value=name, kind=None)])
429
+
430
+ def visit_Subscript(self, node: ast.Subscript) -> typing.Any:
431
+ if (keys := _get_attribute_keys_list(node)) is not None:
432
+ return _as_attribute(keys)
433
+ raise SyntaxError(f"invalid expression: {ast.get_source_segment(self._source, node)}")
434
+
435
+
436
+ class _FilterTranslator(_ProjectionTranslator):
437
+ def visit_Compare(self, node: ast.Compare) -> typing.Any:
438
+ if len(node.comparators) > 1:
439
+ args: typing.List[typing.Any] = []
440
+ left = node.left
441
+ for i, (op, comparator) in enumerate(zip(node.ops, node.comparators)):
442
+ args.append(self.visit(ast.Compare(left=left, ops=[op], comparators=[comparator])))
443
+ left = comparator
444
+ return ast.Call(func=ast.Name(id="and_", ctx=ast.Load()), args=args, keywords=[])
445
+ left, op, right = self.visit(node.left), node.ops[0], self.visit(node.comparators[0])
446
+ if _is_subscript(left, "attributes"):
447
+ left = _cast_as("String", left)
448
+ if _is_subscript(right, "attributes"):
449
+ right = _cast_as("String", right)
450
+ if _is_float(left) and not _is_float(right):
451
+ right = _cast_as("Float", right)
452
+ elif not _is_float(left) and _is_float(right):
453
+ left = _cast_as("Float", left)
454
+ if isinstance(op, (ast.In, ast.NotIn)):
455
+ if (
456
+ _is_string_attribute(right)
457
+ or (typing.cast(str, ast.get_source_segment(self._source, right))) in _NAMES
458
+ ):
459
+ call = ast.Call(
460
+ func=ast.Name(id="TextContains", ctx=ast.Load()),
461
+ args=[right, left],
462
+ keywords=[],
463
+ )
464
+ if isinstance(op, ast.NotIn):
465
+ call = ast.Call(
466
+ func=ast.Name(id="not_", ctx=ast.Load()), args=[call], keywords=[]
467
+ )
468
+ return call
469
+ elif isinstance(right, (ast.List, ast.Tuple)):
470
+ attr = "in_" if isinstance(op, ast.In) else "not_in"
471
+ return ast.Call(
472
+ func=ast.Attribute(value=left, attr=attr, ctx=ast.Load()),
473
+ args=[right],
474
+ keywords=[],
475
+ )
476
+ else:
477
+ raise SyntaxError(f"invalid expression: {ast.get_source_segment(self._source, op)}")
478
+ if isinstance(op, ast.Is):
479
+ op = ast.Eq()
480
+ elif isinstance(op, ast.IsNot):
481
+ op = ast.NotEq()
482
+ return ast.Compare(left=left, ops=[op], comparators=[right])
483
+
484
+ def visit_BoolOp(self, node: ast.BoolOp) -> typing.Any:
485
+ if isinstance(node.op, ast.And):
486
+ func = ast.Name(id="and_", ctx=ast.Load())
487
+ elif isinstance(node.op, ast.Or):
488
+ func = ast.Name(id="or_", ctx=ast.Load())
489
+ else:
490
+ raise SyntaxError(f"invalid expression: {ast.get_source_segment(self._source, node)}")
491
+ args = [self.visit(value) for value in node.values]
492
+ return ast.Call(func=func, args=args, keywords=[])
493
+
494
+ def visit_UnaryOp(self, node: ast.UnaryOp) -> typing.Any:
495
+ operand = self.visit(node.operand)
496
+ if isinstance(node.op, ast.Not):
497
+ return ast.Call(
498
+ func=ast.Name(id="not_", ctx=ast.Load()),
499
+ args=[operand],
500
+ keywords=[],
501
+ )
502
+ node = ast.UnaryOp(op=node.op, operand=operand)
503
+ if isinstance(node.op, (ast.USub, ast.UAdd)):
504
+ if not _is_float(node.operand):
505
+ operand = _cast_as("Float", node.operand)
506
+ return ast.UnaryOp(op=ast.USub(), operand=operand)
507
+ return node
508
+ return node
509
+
510
+ def visit_BinOp(self, node: ast.BinOp) -> typing.Any:
511
+ left, op, right = self.visit(node.left), node.op, self.visit(node.right)
512
+ if _is_subscript(left, "attributes"):
513
+ left = _cast_as("String", left)
514
+ if _is_subscript(right, "attributes"):
515
+ right = _cast_as("String", right)
516
+ type_: typing.Literal["Float", "String"] = "String"
517
+ if not isinstance(op, ast.Add) or _is_float(left) or _is_float(right):
518
+ type_ = "Float"
519
+ if not _is_float(left):
520
+ left = _cast_as(type_, left)
521
+ if not _is_float(right):
522
+ right = _cast_as(type_, right)
523
+ return ast.BinOp(left=left, op=op, right=right)
524
+ return _cast_as(type_, ast.BinOp(left=left, op=op, right=right))
525
+
526
+ def visit_Call(self, node: ast.Call) -> typing.Any:
527
+ source_segment = typing.cast(str, ast.get_source_segment(self._source, node))
528
+ if len(node.args) != 1:
529
+ raise SyntaxError(f"invalid expression: {source_segment}")
530
+ if not isinstance(node.func, ast.Name) or node.func.id not in ("str", "float", "int"):
531
+ raise SyntaxError(
532
+ f"invalid expression: {ast.get_source_segment(self._source, node.func)}"
533
+ )
534
+ arg = self.visit(node.args[0])
535
+ if node.func.id in ("float", "int") and not _is_float(arg):
536
+ return _cast_as("Float", arg)
537
+ if node.func.id in ("str",) and not _is_string(arg):
538
+ return _cast_as("String", arg)
539
+ return arg
190
540
 
191
541
 
192
542
  def _validate_expression(
193
543
  expression: ast.Expression,
194
544
  source: str,
195
- valid_eval_names: Optional[Sequence[str]] = None,
196
- valid_eval_attributes: Tuple[str, ...] = _VALID_EVAL_ATTRIBUTES,
545
+ valid_eval_names: typing.Optional[typing.Sequence[str]] = None,
546
+ valid_eval_attributes: typing.Tuple[str, ...] = _VALID_EVAL_ATTRIBUTES,
197
547
  ) -> None:
198
548
  """
199
549
  Validate primarily the structural (i.e. not semantic) characteristics of an
@@ -208,19 +558,25 @@ def _validate_expression(
208
558
  # In Python 3.8, we have to use `ast.get_source_segment(source, node)`.
209
559
  # In Python 3.9+, we can use `ast.unparse(node)` (no need for `source`).
210
560
  if not isinstance(expression, ast.Expression):
211
- raise SyntaxError(f"invalid expression: {source}") # TODO: add details
561
+ raise SyntaxError(f"invalid expression: {source}")
212
562
  for i, node in enumerate(ast.walk(expression.body)):
213
563
  if i == 0:
214
- if isinstance(node, (ast.BoolOp, ast.Compare)):
564
+ if (
565
+ isinstance(node, (ast.BoolOp, ast.Compare))
566
+ or isinstance(node, ast.UnaryOp)
567
+ and isinstance(node.op, ast.Not)
568
+ ):
215
569
  continue
216
- elif _is_metadata(node):
570
+ elif (
571
+ _is_subscript(node, "metadata") or _is_subscript(node, "attributes")
572
+ ) and _get_attribute_keys_list(node) is not None:
217
573
  continue
218
- elif _is_eval(node):
574
+ elif _is_eval(node) and _get_subscript_key(node) is not None:
219
575
  # e.g. `evals["name"]`
220
576
  if not (eval_name := _get_subscript_key(node)) or (
221
577
  valid_eval_names is not None and eval_name not in valid_eval_names
222
578
  ):
223
- source_segment = cast(str, ast.get_source_segment(source, node))
579
+ source_segment = typing.cast(str, ast.get_source_segment(source, node))
224
580
  if eval_name and valid_eval_names:
225
581
  # suggest a valid eval name most similar to the one given
226
582
  choice, score = _find_best_match(eval_name, valid_eval_names)
@@ -240,7 +596,7 @@ def _validate_expression(
240
596
  elif isinstance(node, ast.Attribute) and _is_eval(node.value):
241
597
  # e.g. `evals["name"].score`
242
598
  if (attr := node.attr) not in valid_eval_attributes:
243
- source_segment = cast(str, ast.get_source_segment(source, node))
599
+ source_segment = typing.cast(str, ast.get_source_segment(source, node))
244
600
  # suggest a valid attribute most similar to the one given
245
601
  choice, score = _find_best_match(attr, valid_eval_attributes)
246
602
  if choice and score > 0.75: # arbitrary threshold
@@ -256,6 +612,13 @@ def _validate_expression(
256
612
  else ""
257
613
  )
258
614
  continue
615
+ elif (
616
+ isinstance(node, ast.Call)
617
+ and isinstance(node.func, ast.Name)
618
+ and node.func.id in ("str", "float", "int")
619
+ ):
620
+ # allow type casting functions
621
+ continue
259
622
  elif isinstance(
260
623
  node,
261
624
  (
@@ -281,41 +644,24 @@ def _validate_expression(
281
644
  ),
282
645
  ):
283
646
  continue
284
- source_segment = cast(str, ast.get_source_segment(source, node))
285
- raise SyntaxError(f"invalid expression: {source_segment}") # TODO: add details
286
-
287
-
288
- def _ast_evaluation_result_value(name: str, attr: str) -> ast.expr:
289
- source = (
290
- f"_RESULT.{attr}.value if ("
291
- f" _RESULT := ("
292
- f" _MISSING if ("
293
- f" _VALUE := evals.get_span_evaluation("
294
- f" span.context.span_id, '{name}'"
295
- f" )"
296
- f" ) is None "
297
- f" else _VALUE"
298
- f" ).result"
299
- f").HasField('{attr}') "
300
- f"else _MISSING"
301
- )
302
- return ast.parse(source, mode="eval").body
303
-
304
-
305
- def _ast_metadata_subscript(key: str) -> ast.expr:
306
- source = (
307
- f"_MISSING if ("
308
- f" _MD := span.attributes.get('metadata')"
309
- f") is None else ("
310
- f" _MISSING if not hasattr(_MD, 'get') or ("
311
- f" _VALUE := _MD.get('{key}')"
312
- f" ) is None else _VALUE"
313
- f")"
647
+ source_segment = typing.cast(str, ast.get_source_segment(source, node))
648
+ raise SyntaxError(f"invalid expression: {source_segment}")
649
+
650
+
651
+ def _as_attribute(
652
+ keys: typing.List[ast.Constant],
653
+ # as_float: typing.Optional[bool] = None,
654
+ ) -> ast.Subscript:
655
+ return ast.Subscript(
656
+ value=ast.Name(id="attributes", ctx=ast.Load()),
657
+ slice=ast.List(elts=keys, ctx=ast.Load())
658
+ if sys.version_info >= (3, 9)
659
+ else ast.Index(value=ast.List(elts=keys, ctx=ast.Load())),
660
+ ctx=ast.Load(),
314
661
  )
315
- return ast.parse(source, mode="eval").body
316
662
 
317
663
 
318
- def _is_eval(node: Any) -> TypeGuard[ast.Subscript]:
664
+ def _is_eval(node: typing.Any) -> TypeGuard[ast.Subscript]:
319
665
  # e.g. `evals["name"]`
320
666
  return (
321
667
  isinstance(node, ast.Subscript)
@@ -324,35 +670,98 @@ def _is_eval(node: Any) -> TypeGuard[ast.Subscript]:
324
670
  )
325
671
 
326
672
 
327
- def _is_metadata(node: Any) -> TypeGuard[ast.Subscript]:
328
- # e.g. `metadata["name"]`
329
- return (
330
- isinstance(node, ast.Subscript)
331
- and isinstance(value := node.value, ast.Name)
332
- and value.id == "metadata"
333
- )
673
+ def _is_subscript(
674
+ node: typing.Any,
675
+ id_: typing.Literal["attributes", "metadata"],
676
+ ) -> TypeGuard[ast.Subscript]:
677
+ # e.g. `attributes["key"]`
678
+ # e.g. `attributes[["a", "b.c", "d"]]`
679
+ # e.g. `attributes["a"]["b.c"]["d"]`
680
+ while isinstance(node, ast.Subscript):
681
+ node = node.value
682
+ if isinstance(node, ast.Name) and node.id == id_:
683
+ return True
684
+ return False
685
+
686
+
687
+ def _get_attribute_keys_list(
688
+ node: typing.Any,
689
+ ) -> typing.Optional[typing.List[ast.Constant]]:
690
+ # e.g. `attributes["key"]` -> `["key"]`
691
+ # e.g. `attributes["a"]["b.c"][["d"]]` -> `["a", "b.c", "d"]`
692
+ # e.g. `attributes["a"][["b.c", "d"]]` -> `["a", "b.c", "d"]`
693
+ # e.g. `metadata["key"]` -> `["metadata", "key"]`
694
+ # e.g. `metadata["a"]["b.c"][["d"]]` -> `["metadata", "a", "b.c", "d"]`
695
+ # e.g. `metadata["a"][["b.c", "d"]]` -> `["metadata", "a", "b.c", "d"]`
696
+ keys: typing.List[ast.Constant] = []
697
+ if isinstance(node, ast.Attribute):
698
+ while isinstance(node, ast.Attribute):
699
+ keys.append(ast.Constant(value=node.attr, kind=None))
700
+ node = node.value
701
+ if isinstance(node, ast.Name):
702
+ keys.append(ast.Constant(value=node.id, kind=None))
703
+ return keys[::-1]
704
+ elif isinstance(node, ast.Subscript):
705
+ while isinstance(node, ast.Subscript):
706
+ if not (sub_keys := _get_subscript_keys_list(node)):
707
+ return None
708
+ keys.extend(reversed(sub_keys))
709
+ node = node.value
710
+ if isinstance(node, ast.Name):
711
+ if not isinstance(keys[-1].value, str):
712
+ return None
713
+ if node.id == "metadata":
714
+ keys.append(ast.Constant(value="metadata", kind=None))
715
+ return keys[::-1]
716
+ return None
717
+
718
+
719
+ def _get_subscript_keys_list(
720
+ node: ast.Subscript,
721
+ ) -> typing.Optional[typing.List[ast.Constant]]:
722
+ if sys.version_info < (3, 9):
723
+ # Note that `ast.Index` is deprecated in Python 3.9+, but is necessary
724
+ # for Python 3.8 as part of `ast.Subscript`.
725
+ if not isinstance(node.slice, ast.Index):
726
+ return None
727
+ child = node.slice.value
728
+ else:
729
+ child = node.slice
730
+ if isinstance(child, ast.Constant):
731
+ if not isinstance(child.value, (str, int)) or isinstance(child.value, bool):
732
+ return None
733
+ return [child]
734
+ if not (
735
+ isinstance(child, ast.List)
736
+ and (elts := child.elts)
737
+ and all(
738
+ isinstance(elt, ast.Constant)
739
+ and isinstance(elt.value, (str, int))
740
+ and not isinstance(elt.value, bool)
741
+ for elt in elts
742
+ )
743
+ ):
744
+ return None
745
+ return [typing.cast(ast.Constant, elt) for elt in elts]
334
746
 
335
747
 
336
- def _get_subscript_key(node: ast.Subscript) -> Optional[str]:
748
+ def _get_subscript_key(
749
+ node: ast.Subscript,
750
+ ) -> typing.Optional[str]:
337
751
  if sys.version_info < (3, 9):
338
752
  # Note that `ast.Index` is deprecated in Python 3.9+, but is necessary
339
753
  # for Python 3.8 as part of `ast.Subscript`.
340
- return (
341
- eval_name
342
- if isinstance(node_slice := node.slice, ast.Index)
343
- and isinstance(slice_value := node_slice.value, ast.Constant)
344
- and isinstance(eval_name := slice_value.value, str)
345
- else None
346
- )
347
- return (
348
- eval_name
349
- if isinstance(node_slice := node.slice, ast.Constant)
350
- and isinstance(eval_name := node_slice.value, str)
351
- else None
352
- )
754
+ if not isinstance(node.slice, ast.Index):
755
+ return None
756
+ child = node.slice.value
757
+ else:
758
+ child = node.slice
759
+ if not (isinstance(child, ast.Constant) and isinstance(child.value, str)):
760
+ return None
761
+ return child.value
353
762
 
354
763
 
355
- def _disjunction(choices: Sequence[str]) -> str:
764
+ def _disjunction(choices: typing.Sequence[str]) -> str:
356
765
  """
357
766
  E.g. `["a", "b", "c"]` becomes `"one of a, b, or c"`
358
767
  """
@@ -365,10 +774,69 @@ def _disjunction(choices: Sequence[str]) -> str:
365
774
  return f"one of {', '.join(choices[:-1])}, or {choices[-1]}"
366
775
 
367
776
 
368
- def _find_best_match(source: str, choices: Iterable[str]) -> Tuple[Optional[str], float]:
777
+ def _find_best_match(
778
+ source: str, choices: typing.Iterable[str]
779
+ ) -> typing.Tuple[typing.Optional[str], float]:
369
780
  best_choice, best_score = None, 0.0
370
781
  for choice in choices:
371
782
  score = SequenceMatcher(None, source, choice).ratio()
372
783
  if score > best_score:
373
784
  best_choice, best_score = choice, score
374
785
  return best_choice, best_score
786
+
787
+
788
+ def _apply_eval_aliasing(
789
+ source: str,
790
+ ) -> typing.Tuple[
791
+ str,
792
+ typing.Tuple[AliasedAnnotationRelation, ...],
793
+ ]:
794
+ """
795
+ Substitutes `evals[<eval-name>].<attribute>` with aliases. Returns the
796
+ updated source code in addition to the aliased relations.
797
+
798
+ Example:
799
+
800
+ input:
801
+
802
+ ```
803
+ evals['Hallucination'].label == 'correct' or evals['Hallucination'].score < 0.5
804
+ ```
805
+
806
+ output:
807
+
808
+ ```
809
+ span_annotation_0_label_123 == 'correct' or span_annotation_0_score_456 < 0.5
810
+ ```
811
+ """
812
+ eval_aliases: typing.Dict[EvalName, AliasedAnnotationRelation] = {}
813
+ for eval_expression, eval_name, eval_attribute in _parse_eval_expressions_and_names(source):
814
+ if (eval_alias := eval_aliases.get(eval_name)) is None:
815
+ eval_alias = AliasedAnnotationRelation(index=len(eval_aliases), name=eval_name)
816
+ eval_aliases[eval_name] = eval_alias
817
+ alias_name = eval_alias.attribute_alias(eval_attribute)
818
+ source = source.replace(eval_expression, alias_name)
819
+ return source, tuple(eval_aliases.values())
820
+
821
+
822
+ def _parse_eval_expressions_and_names(
823
+ source: str,
824
+ ) -> typing.Iterator[typing.Tuple[EvalExpression, EvalName, EvalAttribute]]:
825
+ """
826
+ Parses filter conditions for evaluation expressions of the form:
827
+
828
+ ```
829
+ evals["<eval-name>"].<attribute>
830
+ ```
831
+ """
832
+ for match in EVAL_EXPRESSION_PATTERN.finditer(source):
833
+ (
834
+ eval_expression,
835
+ quoted_eval_name,
836
+ evaluation_attribute_name,
837
+ ) = match.groups()
838
+ yield (
839
+ eval_expression,
840
+ quoted_eval_name[1:-1],
841
+ typing.cast(EvalAttribute, evaluation_attribute_name),
842
+ )