arize-phoenix 3.16.0__py3-none-any.whl → 7.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (338) hide show
  1. arize_phoenix-7.7.0.dist-info/METADATA +261 -0
  2. arize_phoenix-7.7.0.dist-info/RECORD +345 -0
  3. {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/WHEEL +1 -1
  4. arize_phoenix-7.7.0.dist-info/entry_points.txt +3 -0
  5. phoenix/__init__.py +86 -14
  6. phoenix/auth.py +309 -0
  7. phoenix/config.py +675 -45
  8. phoenix/core/model.py +32 -30
  9. phoenix/core/model_schema.py +102 -109
  10. phoenix/core/model_schema_adapter.py +48 -45
  11. phoenix/datetime_utils.py +24 -3
  12. phoenix/db/README.md +54 -0
  13. phoenix/db/__init__.py +4 -0
  14. phoenix/db/alembic.ini +85 -0
  15. phoenix/db/bulk_inserter.py +294 -0
  16. phoenix/db/engines.py +208 -0
  17. phoenix/db/enums.py +20 -0
  18. phoenix/db/facilitator.py +113 -0
  19. phoenix/db/helpers.py +159 -0
  20. phoenix/db/insertion/constants.py +2 -0
  21. phoenix/db/insertion/dataset.py +227 -0
  22. phoenix/db/insertion/document_annotation.py +171 -0
  23. phoenix/db/insertion/evaluation.py +191 -0
  24. phoenix/db/insertion/helpers.py +98 -0
  25. phoenix/db/insertion/span.py +193 -0
  26. phoenix/db/insertion/span_annotation.py +158 -0
  27. phoenix/db/insertion/trace_annotation.py +158 -0
  28. phoenix/db/insertion/types.py +256 -0
  29. phoenix/db/migrate.py +86 -0
  30. phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
  31. phoenix/db/migrations/env.py +114 -0
  32. phoenix/db/migrations/script.py.mako +26 -0
  33. phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
  34. phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
  35. phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
  36. phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
  37. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
  38. phoenix/db/models.py +807 -0
  39. phoenix/exceptions.py +5 -1
  40. phoenix/experiments/__init__.py +6 -0
  41. phoenix/experiments/evaluators/__init__.py +29 -0
  42. phoenix/experiments/evaluators/base.py +158 -0
  43. phoenix/experiments/evaluators/code_evaluators.py +184 -0
  44. phoenix/experiments/evaluators/llm_evaluators.py +473 -0
  45. phoenix/experiments/evaluators/utils.py +236 -0
  46. phoenix/experiments/functions.py +772 -0
  47. phoenix/experiments/tracing.py +86 -0
  48. phoenix/experiments/types.py +726 -0
  49. phoenix/experiments/utils.py +25 -0
  50. phoenix/inferences/__init__.py +0 -0
  51. phoenix/{datasets → inferences}/errors.py +6 -5
  52. phoenix/{datasets → inferences}/fixtures.py +49 -42
  53. phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
  54. phoenix/{datasets → inferences}/schema.py +11 -11
  55. phoenix/{datasets → inferences}/validation.py +13 -14
  56. phoenix/logging/__init__.py +3 -0
  57. phoenix/logging/_config.py +90 -0
  58. phoenix/logging/_filter.py +6 -0
  59. phoenix/logging/_formatter.py +69 -0
  60. phoenix/metrics/__init__.py +5 -4
  61. phoenix/metrics/binning.py +4 -3
  62. phoenix/metrics/metrics.py +2 -1
  63. phoenix/metrics/mixins.py +7 -6
  64. phoenix/metrics/retrieval_metrics.py +2 -1
  65. phoenix/metrics/timeseries.py +5 -4
  66. phoenix/metrics/wrappers.py +9 -3
  67. phoenix/pointcloud/clustering.py +5 -5
  68. phoenix/pointcloud/pointcloud.py +7 -5
  69. phoenix/pointcloud/projectors.py +5 -6
  70. phoenix/pointcloud/umap_parameters.py +53 -52
  71. phoenix/server/api/README.md +28 -0
  72. phoenix/server/api/auth.py +44 -0
  73. phoenix/server/api/context.py +152 -9
  74. phoenix/server/api/dataloaders/__init__.py +91 -0
  75. phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
  76. phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
  77. phoenix/server/api/dataloaders/cache/__init__.py +3 -0
  78. phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
  79. phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
  80. phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
  81. phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
  82. phoenix/server/api/dataloaders/document_evaluations.py +31 -0
  83. phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
  84. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
  85. phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
  86. phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
  87. phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
  88. phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
  89. phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
  90. phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
  91. phoenix/server/api/dataloaders/project_by_name.py +31 -0
  92. phoenix/server/api/dataloaders/record_counts.py +116 -0
  93. phoenix/server/api/dataloaders/session_io.py +79 -0
  94. phoenix/server/api/dataloaders/session_num_traces.py +30 -0
  95. phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
  96. phoenix/server/api/dataloaders/session_token_usages.py +41 -0
  97. phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
  98. phoenix/server/api/dataloaders/span_annotations.py +26 -0
  99. phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
  100. phoenix/server/api/dataloaders/span_descendants.py +57 -0
  101. phoenix/server/api/dataloaders/span_projects.py +33 -0
  102. phoenix/server/api/dataloaders/token_counts.py +124 -0
  103. phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
  104. phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
  105. phoenix/server/api/dataloaders/user_roles.py +30 -0
  106. phoenix/server/api/dataloaders/users.py +33 -0
  107. phoenix/server/api/exceptions.py +48 -0
  108. phoenix/server/api/helpers/__init__.py +12 -0
  109. phoenix/server/api/helpers/dataset_helpers.py +217 -0
  110. phoenix/server/api/helpers/experiment_run_filters.py +763 -0
  111. phoenix/server/api/helpers/playground_clients.py +948 -0
  112. phoenix/server/api/helpers/playground_registry.py +70 -0
  113. phoenix/server/api/helpers/playground_spans.py +455 -0
  114. phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
  115. phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
  116. phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
  117. phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
  118. phoenix/server/api/input_types/ClearProjectInput.py +15 -0
  119. phoenix/server/api/input_types/ClusterInput.py +2 -2
  120. phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
  121. phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
  122. phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
  123. phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
  124. phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
  125. phoenix/server/api/input_types/DatasetSort.py +17 -0
  126. phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
  127. phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
  128. phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
  129. phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
  130. phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
  131. phoenix/server/api/input_types/DimensionFilter.py +4 -4
  132. phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
  133. phoenix/server/api/input_types/Granularity.py +1 -1
  134. phoenix/server/api/input_types/InvocationParameters.py +162 -0
  135. phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
  136. phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
  137. phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
  138. phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
  139. phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
  140. phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
  141. phoenix/server/api/input_types/SpanSort.py +134 -69
  142. phoenix/server/api/input_types/TemplateOptions.py +10 -0
  143. phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
  144. phoenix/server/api/input_types/UserRoleInput.py +9 -0
  145. phoenix/server/api/mutations/__init__.py +28 -0
  146. phoenix/server/api/mutations/api_key_mutations.py +167 -0
  147. phoenix/server/api/mutations/chat_mutations.py +593 -0
  148. phoenix/server/api/mutations/dataset_mutations.py +591 -0
  149. phoenix/server/api/mutations/experiment_mutations.py +75 -0
  150. phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
  151. phoenix/server/api/mutations/project_mutations.py +57 -0
  152. phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
  153. phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
  154. phoenix/server/api/mutations/user_mutations.py +329 -0
  155. phoenix/server/api/openapi/__init__.py +0 -0
  156. phoenix/server/api/openapi/main.py +17 -0
  157. phoenix/server/api/openapi/schema.py +16 -0
  158. phoenix/server/api/queries.py +738 -0
  159. phoenix/server/api/routers/__init__.py +11 -0
  160. phoenix/server/api/routers/auth.py +284 -0
  161. phoenix/server/api/routers/embeddings.py +26 -0
  162. phoenix/server/api/routers/oauth2.py +488 -0
  163. phoenix/server/api/routers/v1/__init__.py +64 -0
  164. phoenix/server/api/routers/v1/datasets.py +1017 -0
  165. phoenix/server/api/routers/v1/evaluations.py +362 -0
  166. phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
  167. phoenix/server/api/routers/v1/experiment_runs.py +167 -0
  168. phoenix/server/api/routers/v1/experiments.py +308 -0
  169. phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
  170. phoenix/server/api/routers/v1/spans.py +267 -0
  171. phoenix/server/api/routers/v1/traces.py +208 -0
  172. phoenix/server/api/routers/v1/utils.py +95 -0
  173. phoenix/server/api/schema.py +44 -247
  174. phoenix/server/api/subscriptions.py +597 -0
  175. phoenix/server/api/types/Annotation.py +21 -0
  176. phoenix/server/api/types/AnnotationSummary.py +55 -0
  177. phoenix/server/api/types/AnnotatorKind.py +16 -0
  178. phoenix/server/api/types/ApiKey.py +27 -0
  179. phoenix/server/api/types/AuthMethod.py +9 -0
  180. phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
  181. phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
  182. phoenix/server/api/types/Cluster.py +25 -24
  183. phoenix/server/api/types/CreateDatasetPayload.py +8 -0
  184. phoenix/server/api/types/DataQualityMetric.py +31 -13
  185. phoenix/server/api/types/Dataset.py +288 -63
  186. phoenix/server/api/types/DatasetExample.py +85 -0
  187. phoenix/server/api/types/DatasetExampleRevision.py +34 -0
  188. phoenix/server/api/types/DatasetVersion.py +14 -0
  189. phoenix/server/api/types/Dimension.py +32 -31
  190. phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
  191. phoenix/server/api/types/EmbeddingDimension.py +56 -49
  192. phoenix/server/api/types/Evaluation.py +25 -31
  193. phoenix/server/api/types/EvaluationSummary.py +30 -50
  194. phoenix/server/api/types/Event.py +20 -20
  195. phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
  196. phoenix/server/api/types/Experiment.py +152 -0
  197. phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
  198. phoenix/server/api/types/ExperimentComparison.py +17 -0
  199. phoenix/server/api/types/ExperimentRun.py +119 -0
  200. phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
  201. phoenix/server/api/types/GenerativeModel.py +9 -0
  202. phoenix/server/api/types/GenerativeProvider.py +85 -0
  203. phoenix/server/api/types/Inferences.py +80 -0
  204. phoenix/server/api/types/InferencesRole.py +23 -0
  205. phoenix/server/api/types/LabelFraction.py +7 -0
  206. phoenix/server/api/types/MimeType.py +2 -2
  207. phoenix/server/api/types/Model.py +54 -54
  208. phoenix/server/api/types/PerformanceMetric.py +8 -5
  209. phoenix/server/api/types/Project.py +407 -142
  210. phoenix/server/api/types/ProjectSession.py +139 -0
  211. phoenix/server/api/types/Segments.py +4 -4
  212. phoenix/server/api/types/Span.py +221 -176
  213. phoenix/server/api/types/SpanAnnotation.py +43 -0
  214. phoenix/server/api/types/SpanIOValue.py +15 -0
  215. phoenix/server/api/types/SystemApiKey.py +9 -0
  216. phoenix/server/api/types/TemplateLanguage.py +10 -0
  217. phoenix/server/api/types/TimeSeries.py +19 -15
  218. phoenix/server/api/types/TokenUsage.py +11 -0
  219. phoenix/server/api/types/Trace.py +154 -0
  220. phoenix/server/api/types/TraceAnnotation.py +45 -0
  221. phoenix/server/api/types/UMAPPoints.py +7 -7
  222. phoenix/server/api/types/User.py +60 -0
  223. phoenix/server/api/types/UserApiKey.py +45 -0
  224. phoenix/server/api/types/UserRole.py +15 -0
  225. phoenix/server/api/types/node.py +13 -107
  226. phoenix/server/api/types/pagination.py +156 -57
  227. phoenix/server/api/utils.py +34 -0
  228. phoenix/server/app.py +864 -115
  229. phoenix/server/bearer_auth.py +163 -0
  230. phoenix/server/dml_event.py +136 -0
  231. phoenix/server/dml_event_handler.py +256 -0
  232. phoenix/server/email/__init__.py +0 -0
  233. phoenix/server/email/sender.py +97 -0
  234. phoenix/server/email/templates/__init__.py +0 -0
  235. phoenix/server/email/templates/password_reset.html +19 -0
  236. phoenix/server/email/types.py +11 -0
  237. phoenix/server/grpc_server.py +102 -0
  238. phoenix/server/jwt_store.py +505 -0
  239. phoenix/server/main.py +305 -116
  240. phoenix/server/oauth2.py +52 -0
  241. phoenix/server/openapi/__init__.py +0 -0
  242. phoenix/server/prometheus.py +111 -0
  243. phoenix/server/rate_limiters.py +188 -0
  244. phoenix/server/static/.vite/manifest.json +87 -0
  245. phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
  246. phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
  247. phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
  248. phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
  249. phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
  250. phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
  251. phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
  252. phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
  253. phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
  254. phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
  255. phoenix/server/telemetry.py +68 -0
  256. phoenix/server/templates/index.html +82 -23
  257. phoenix/server/thread_server.py +3 -3
  258. phoenix/server/types.py +275 -0
  259. phoenix/services.py +27 -18
  260. phoenix/session/client.py +743 -68
  261. phoenix/session/data_extractor.py +31 -7
  262. phoenix/session/evaluation.py +3 -9
  263. phoenix/session/session.py +263 -219
  264. phoenix/settings.py +22 -0
  265. phoenix/trace/__init__.py +2 -22
  266. phoenix/trace/attributes.py +338 -0
  267. phoenix/trace/dsl/README.md +116 -0
  268. phoenix/trace/dsl/filter.py +663 -213
  269. phoenix/trace/dsl/helpers.py +73 -21
  270. phoenix/trace/dsl/query.py +574 -201
  271. phoenix/trace/exporter.py +24 -19
  272. phoenix/trace/fixtures.py +368 -32
  273. phoenix/trace/otel.py +71 -219
  274. phoenix/trace/projects.py +3 -2
  275. phoenix/trace/schemas.py +33 -11
  276. phoenix/trace/span_evaluations.py +21 -16
  277. phoenix/trace/span_json_decoder.py +6 -4
  278. phoenix/trace/span_json_encoder.py +2 -2
  279. phoenix/trace/trace_dataset.py +47 -32
  280. phoenix/trace/utils.py +21 -4
  281. phoenix/utilities/__init__.py +0 -26
  282. phoenix/utilities/client.py +132 -0
  283. phoenix/utilities/deprecation.py +31 -0
  284. phoenix/utilities/error_handling.py +3 -2
  285. phoenix/utilities/json.py +109 -0
  286. phoenix/utilities/logging.py +8 -0
  287. phoenix/utilities/project.py +2 -2
  288. phoenix/utilities/re.py +49 -0
  289. phoenix/utilities/span_store.py +0 -23
  290. phoenix/utilities/template_formatters.py +99 -0
  291. phoenix/version.py +1 -1
  292. arize_phoenix-3.16.0.dist-info/METADATA +0 -495
  293. arize_phoenix-3.16.0.dist-info/RECORD +0 -178
  294. phoenix/core/project.py +0 -617
  295. phoenix/core/traces.py +0 -100
  296. phoenix/experimental/evals/__init__.py +0 -73
  297. phoenix/experimental/evals/evaluators.py +0 -413
  298. phoenix/experimental/evals/functions/__init__.py +0 -4
  299. phoenix/experimental/evals/functions/classify.py +0 -453
  300. phoenix/experimental/evals/functions/executor.py +0 -353
  301. phoenix/experimental/evals/functions/generate.py +0 -138
  302. phoenix/experimental/evals/functions/processing.py +0 -76
  303. phoenix/experimental/evals/models/__init__.py +0 -14
  304. phoenix/experimental/evals/models/anthropic.py +0 -175
  305. phoenix/experimental/evals/models/base.py +0 -170
  306. phoenix/experimental/evals/models/bedrock.py +0 -221
  307. phoenix/experimental/evals/models/litellm.py +0 -134
  308. phoenix/experimental/evals/models/openai.py +0 -448
  309. phoenix/experimental/evals/models/rate_limiters.py +0 -246
  310. phoenix/experimental/evals/models/vertex.py +0 -173
  311. phoenix/experimental/evals/models/vertexai.py +0 -186
  312. phoenix/experimental/evals/retrievals.py +0 -96
  313. phoenix/experimental/evals/templates/__init__.py +0 -50
  314. phoenix/experimental/evals/templates/default_templates.py +0 -472
  315. phoenix/experimental/evals/templates/template.py +0 -195
  316. phoenix/experimental/evals/utils/__init__.py +0 -172
  317. phoenix/experimental/evals/utils/threads.py +0 -27
  318. phoenix/server/api/helpers.py +0 -11
  319. phoenix/server/api/routers/evaluation_handler.py +0 -109
  320. phoenix/server/api/routers/span_handler.py +0 -70
  321. phoenix/server/api/routers/trace_handler.py +0 -60
  322. phoenix/server/api/types/DatasetRole.py +0 -23
  323. phoenix/server/static/index.css +0 -6
  324. phoenix/server/static/index.js +0 -7447
  325. phoenix/storage/span_store/__init__.py +0 -23
  326. phoenix/storage/span_store/text_file.py +0 -85
  327. phoenix/trace/dsl/missing.py +0 -60
  328. phoenix/trace/langchain/__init__.py +0 -3
  329. phoenix/trace/langchain/instrumentor.py +0 -35
  330. phoenix/trace/llama_index/__init__.py +0 -3
  331. phoenix/trace/llama_index/callback.py +0 -102
  332. phoenix/trace/openai/__init__.py +0 -3
  333. phoenix/trace/openai/instrumentor.py +0 -30
  334. {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/IP_NOTICE +0 -0
  335. {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/LICENSE +0 -0
  336. /phoenix/{datasets → db/insertion}/__init__.py +0 -0
  337. /phoenix/{experimental → db/migrations}/__init__.py +0 -0
  338. /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
@@ -1,199 +1,542 @@
1
1
  import ast
2
- import inspect
2
+ import re
3
3
  import sys
4
+ import typing
4
5
  from dataclasses import dataclass, field
5
6
  from difflib import SequenceMatcher
6
- from typing import (
7
- Any,
8
- Dict,
9
- Iterable,
10
- Iterator,
11
- Mapping,
12
- Optional,
13
- Protocol,
14
- Sequence,
15
- Tuple,
16
- cast,
17
- )
7
+ from itertools import chain
8
+ from types import MappingProxyType
9
+ from uuid import uuid4
18
10
 
19
- from openinference.semconv import trace
20
- from typing_extensions import TypeGuard
11
+ import sqlalchemy
12
+ from sqlalchemy.orm import Mapped, aliased
13
+ from sqlalchemy.orm.util import AliasedClass
14
+ from sqlalchemy.sql.expression import Select
15
+ from typing_extensions import TypeAlias, TypeGuard, assert_never
21
16
 
22
17
  import phoenix.trace.v1 as pb
23
- from phoenix.trace.dsl.missing import MISSING
24
- from phoenix.trace.schemas import ComputedAttributes, Span, SpanID
18
+ from phoenix.db import models
25
19
 
26
- _VALID_EVAL_ATTRIBUTES: Tuple[str, ...] = tuple(
20
+ _VALID_EVAL_ATTRIBUTES: tuple[str, ...] = tuple(
27
21
  field.name for field in pb.Evaluation.Result.DESCRIPTOR.fields
28
22
  )
29
23
 
30
24
 
31
- class SupportsGetSpanEvaluation(Protocol):
32
- def get_span_evaluation(self, span_id: SpanID, name: str) -> Optional[pb.Evaluation]: ...
25
+ AnnotationType: TypeAlias = typing.Literal["annotations", "evals"]
26
+ AnnotationAttribute: TypeAlias = typing.Literal["label", "score"]
27
+ AnnotationExpression: TypeAlias = str
28
+ AnnotationName: TypeAlias = str
29
+
30
+ EVAL_EXPRESSION_PATTERN = re.compile(
31
+ r"""\b((annotations|evals)\[(".*?"|'.*?')\][.](label|score))\b"""
32
+ )
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class AliasedAnnotationRelation:
37
+ """
38
+ Represents an aliased `span_annotation` relation (i.e., SQL table). Used to
39
+ perform joins on span evaluations during filtering. An alias is required
40
+ because the `span_annotation` may be joined multiple times for different
41
+ evaluation names.
42
+ """
43
+
44
+ index: int
45
+ name: str
46
+ table: AliasedClass[models.SpanAnnotation] = field(init=False, repr=False)
47
+ _label_attribute_alias: str = field(init=False, repr=False)
48
+ _score_attribute_alias: str = field(init=False, repr=False)
49
+
50
+ def __post_init__(self) -> None:
51
+ table_alias = f"span_annotation_{self.index}"
52
+ alias_id = uuid4().hex
53
+ label_attribute_alias = f"{table_alias}_label_{alias_id}"
54
+ score_attribute_alias = f"{table_alias}_score_{alias_id}"
55
+
56
+ table = aliased(models.SpanAnnotation, name=table_alias)
57
+ object.__setattr__(self, "_label_attribute_alias", label_attribute_alias)
58
+ object.__setattr__(self, "_score_attribute_alias", score_attribute_alias)
59
+ object.__setattr__(self, "table", table)
60
+
61
+ @property
62
+ def attributes(self) -> typing.Iterator[tuple[str, Mapped[typing.Any]]]:
63
+ """
64
+ Alias names and attributes (i.e., columns) of the `span_annotation`
65
+ relation.
66
+ """
67
+ yield self._label_attribute_alias, self.table.label
68
+ yield self._score_attribute_alias, self.table.score
69
+
70
+ def attribute_alias(self, attribute: AnnotationAttribute) -> str:
71
+ """
72
+ Returns an alias for the given attribute (i.e., column).
73
+ """
74
+ if attribute == "label":
75
+ return self._label_attribute_alias
76
+ if attribute == "score":
77
+ return self._score_attribute_alias
78
+ assert_never(attribute)
79
+
80
+
81
+ # Because postgresql is strongly typed, we cast JSON values to string
82
+ # by default unless it's hinted otherwise as done here.
83
+ _FLOAT_ATTRIBUTES: frozenset[str] = frozenset(
84
+ {
85
+ "llm.token_count.completion",
86
+ "llm.token_count.prompt",
87
+ "llm.token_count.total",
88
+ }
89
+ )
90
+
91
+ _STRING_NAMES: typing.Mapping[str, sqlalchemy.SQLColumnExpression[typing.Any]] = MappingProxyType(
92
+ {
93
+ "span_id": models.Span.span_id,
94
+ "trace_id": models.Trace.trace_id,
95
+ "context.span_id": models.Span.span_id,
96
+ "context.trace_id": models.Trace.trace_id,
97
+ "parent_id": models.Span.parent_id,
98
+ "span_kind": models.Span.span_kind,
99
+ "name": models.Span.name,
100
+ "status_code": models.Span.status_code,
101
+ "status_message": models.Span.status_message,
102
+ }
103
+ )
104
+ _FLOAT_NAMES: typing.Mapping[str, sqlalchemy.SQLColumnExpression[typing.Any]] = MappingProxyType(
105
+ {
106
+ "latency_ms": models.Span.latency_ms,
107
+ "cumulative_llm_token_count_completion": models.Span.cumulative_llm_token_count_completion,
108
+ "cumulative_llm_token_count_prompt": models.Span.cumulative_llm_token_count_prompt,
109
+ "cumulative_llm_token_count_total": models.Span.cumulative_llm_token_count_total,
110
+ }
111
+ )
112
+ _DATETIME_NAMES: typing.Mapping[str, sqlalchemy.SQLColumnExpression[typing.Any]] = MappingProxyType(
113
+ {
114
+ "start_time": models.Span.start_time,
115
+ "end_time": models.Span.end_time,
116
+ }
117
+ )
118
+ _NAMES: typing.Mapping[str, sqlalchemy.SQLColumnExpression[typing.Any]] = MappingProxyType(
119
+ {
120
+ **_STRING_NAMES,
121
+ **_FLOAT_NAMES,
122
+ **_DATETIME_NAMES,
123
+ "attributes": models.Span.attributes,
124
+ "events": models.Span.events,
125
+ }
126
+ )
127
+ _BACKWARD_COMPATIBILITY_REPLACEMENTS: typing.Mapping[str, str] = MappingProxyType(
128
+ {
129
+ # for backward-compatibility
130
+ "context.span_id": "span_id",
131
+ "context.trace_id": "trace_id",
132
+ "cumulative_token_count.completion": "cumulative_llm_token_count_completion",
133
+ "cumulative_token_count.prompt": "cumulative_llm_token_count_prompt",
134
+ "cumulative_token_count.total": "cumulative_llm_token_count_total",
135
+ }
136
+ )
33
137
 
34
138
 
35
139
  @dataclass(frozen=True)
36
140
  class SpanFilter:
37
141
  condition: str = ""
38
- evals: Optional[SupportsGetSpanEvaluation] = None
39
- valid_eval_names: Optional[Sequence[str]] = None
142
+ valid_eval_names: typing.Optional[typing.Sequence[str]] = None
40
143
  translated: ast.Expression = field(init=False, repr=False)
41
- compiled: Any = field(init=False, repr=False)
144
+ compiled: typing.Any = field(init=False, repr=False)
145
+ _aliased_annotation_relations: tuple[AliasedAnnotationRelation] = field(init=False, repr=False)
146
+ _aliased_annotation_attributes: dict[str, Mapped[typing.Any]] = field(init=False, repr=False)
42
147
 
43
148
  def __bool__(self) -> bool:
44
149
  return bool(self.condition)
45
150
 
46
151
  def __post_init__(self) -> None:
47
- condition = self.condition or "True" # default to no op
48
- root = ast.parse(condition, mode="eval")
49
- if self.condition:
50
- _validate_expression(root, condition, valid_eval_names=self.valid_eval_names)
51
- translated = _Translator(condition).visit(root)
152
+ if not (source := self.condition):
153
+ return
154
+ root = ast.parse(source, mode="eval")
155
+ _validate_expression(root, valid_eval_names=self.valid_eval_names)
156
+ source, aliased_annotation_relations = _apply_eval_aliasing(source)
157
+ root = ast.parse(source, mode="eval")
158
+ translated = _FilterTranslator(
159
+ reserved_keywords=(
160
+ alias
161
+ for aliased_annotation in aliased_annotation_relations
162
+ for alias, _ in aliased_annotation.attributes
163
+ ),
164
+ ).visit(root)
52
165
  ast.fix_missing_locations(translated)
53
166
  compiled = compile(translated, filename="", mode="eval")
167
+ aliased_annotation_attributes = {
168
+ alias: attribute
169
+ for aliased_annotation in aliased_annotation_relations
170
+ for alias, attribute in aliased_annotation.attributes
171
+ }
54
172
  object.__setattr__(self, "translated", translated)
55
173
  object.__setattr__(self, "compiled", compiled)
56
- object.__setattr__(self, "evals", self.evals or MISSING)
174
+ object.__setattr__(self, "_aliased_annotation_relations", aliased_annotation_relations)
175
+ object.__setattr__(self, "_aliased_annotation_attributes", aliased_annotation_attributes)
57
176
 
58
- def __call__(self, span: Span) -> bool:
59
- return cast(
60
- bool,
177
+ def __call__(self, select: Select[typing.Any]) -> Select[typing.Any]:
178
+ if not self.condition:
179
+ return select
180
+ return self._join_aliased_relations(select).where(
61
181
  eval(
62
182
  self.compiled,
63
- {"span": span, "_MISSING": MISSING, "evals": self.evals},
64
- ),
183
+ {
184
+ **_NAMES,
185
+ **self._aliased_annotation_attributes,
186
+ "not_": sqlalchemy.not_,
187
+ "and_": sqlalchemy.and_,
188
+ "or_": sqlalchemy.or_,
189
+ "cast": sqlalchemy.cast,
190
+ "Float": sqlalchemy.Float,
191
+ "String": sqlalchemy.String,
192
+ "TextContains": models.TextContains,
193
+ },
194
+ )
65
195
  )
66
196
 
67
- def to_dict(self) -> Dict[str, Any]:
197
+ def to_dict(self) -> dict[str, typing.Any]:
68
198
  return {"condition": self.condition}
69
199
 
70
200
  @classmethod
71
201
  def from_dict(
72
202
  cls,
73
- obj: Mapping[str, Any],
74
- evals: Optional[SupportsGetSpanEvaluation] = None,
75
- valid_eval_names: Optional[Sequence[str]] = None,
203
+ obj: typing.Mapping[str, typing.Any],
204
+ valid_eval_names: typing.Optional[typing.Sequence[str]] = None,
76
205
  ) -> "SpanFilter":
77
206
  return cls(
78
207
  condition=obj.get("condition") or "",
79
- evals=evals,
80
208
  valid_eval_names=valid_eval_names,
81
209
  )
82
210
 
211
+ def _join_aliased_relations(self, stmt: Select[typing.Any]) -> Select[typing.Any]:
212
+ """
213
+ Joins the aliased relations to the given statement. E.g., for the filter condition:
214
+
215
+ ```
216
+ evals["Hallucination"].score > 0.5
217
+ ```
218
+
219
+ an alias (e.g., `A`) is generated for the `span_annotations` relation. An input statement
220
+ `select(Span)` is transformed to:
221
+
222
+ ```
223
+ A = aliased(SpanAnnotation)
224
+ select(Span).join(A, onclause=(and_(Span.id == A.span_rowid, A.name == "Hallucination")))
225
+ ```
226
+ """
227
+ for eval_alias in self._aliased_annotation_relations:
228
+ eval_name = eval_alias.name
229
+ AliasedSpanAnnotation = eval_alias.table
230
+ stmt = stmt.outerjoin(
231
+ AliasedSpanAnnotation,
232
+ onclause=(
233
+ sqlalchemy.and_(
234
+ AliasedSpanAnnotation.span_rowid == models.Span.id,
235
+ AliasedSpanAnnotation.name == eval_name,
236
+ )
237
+ ),
238
+ )
239
+ return stmt
83
240
 
84
- def _replace_none_with_missing(
85
- value: ast.expr,
86
- as_str: bool = False,
87
- ) -> ast.IfExp:
88
- """
89
- E.g. `value` becomes
90
- `_MISSING if (_VALUE := value) is None else _VALUE`
91
- """
92
- _store_VALUE = ast.Name(id="_VALUE", ctx=ast.Store())
93
- _load_VALUE = ast.Name(id="_VALUE", ctx=ast.Load())
94
- return ast.IfExp(
95
- test=ast.Compare(
96
- left=ast.NamedExpr(target=_store_VALUE, value=value),
97
- ops=[ast.Is()],
98
- comparators=[ast.Constant(value=None)],
241
+
242
+ @dataclass(frozen=True)
243
+ class Projector:
244
+ expression: str
245
+ translated: ast.Expression = field(init=False, repr=False)
246
+ compiled: typing.Any = field(init=False, repr=False)
247
+
248
+ def __post_init__(self) -> None:
249
+ if not (source := self.expression):
250
+ raise ValueError("missing expression")
251
+ root = ast.parse(source, mode="eval")
252
+ translated = _ProjectionTranslator(source).visit(root)
253
+ ast.fix_missing_locations(translated)
254
+ compiled = compile(translated, filename="", mode="eval")
255
+ object.__setattr__(self, "translated", translated)
256
+ object.__setattr__(self, "compiled", compiled)
257
+
258
+ def __call__(self) -> sqlalchemy.SQLColumnExpression[typing.Any]:
259
+ return typing.cast(
260
+ sqlalchemy.SQLColumnExpression[typing.Any],
261
+ eval(self.compiled, {**_NAMES}),
262
+ )
263
+
264
+
265
+ def _is_string_constant(node: typing.Any) -> TypeGuard[ast.Constant]:
266
+ return isinstance(node, ast.Constant) and isinstance(node.value, str)
267
+
268
+
269
+ def _is_float_constant(node: typing.Any) -> TypeGuard[ast.Constant]:
270
+ return isinstance(node, ast.Constant) and isinstance(node.value, typing.SupportsFloat)
271
+
272
+
273
+ def _is_string_attribute(node: typing.Any) -> TypeGuard[ast.Call]:
274
+ return (
275
+ isinstance(node, ast.Call)
276
+ and isinstance(func := node.func, ast.Attribute)
277
+ and func.attr == "as_string"
278
+ and isinstance(value := func.value, ast.Subscript)
279
+ and isinstance(name := value.value, ast.Name)
280
+ and name.id == "attributes"
281
+ )
282
+
283
+
284
+ def _is_float_attribute(node: typing.Any) -> TypeGuard[ast.Call]:
285
+ return (
286
+ isinstance(node, ast.Call)
287
+ and isinstance(func := node.func, ast.Attribute)
288
+ and func.attr == "as_float"
289
+ and isinstance(value := func.value, ast.Subscript)
290
+ and isinstance(name := value.value, ast.Name)
291
+ and name.id == "attributes"
292
+ )
293
+
294
+
295
+ def _as_string_attribute(node: typing.Union[ast.Subscript, ast.Call]) -> ast.Call:
296
+ if isinstance(node, ast.Call):
297
+ value = typing.cast(ast.Attribute, node.func).value
298
+ elif isinstance(node, ast.Subscript):
299
+ value = node
300
+ else:
301
+ assert_never(node)
302
+ return ast.Call(
303
+ func=ast.Attribute(
304
+ value=value,
305
+ attr="as_string",
306
+ ctx=ast.Load(),
307
+ ),
308
+ args=[],
309
+ keywords=[],
310
+ )
311
+
312
+
313
+ def _as_float_attribute(node: typing.Union[ast.Subscript, ast.Call]) -> ast.Call:
314
+ if isinstance(node, ast.Call):
315
+ value = typing.cast(ast.Attribute, node.func).value
316
+ elif isinstance(node, ast.Subscript):
317
+ value = node
318
+ else:
319
+ assert_never(node)
320
+ return ast.Call(
321
+ func=ast.Attribute(
322
+ value=value,
323
+ attr="as_float",
324
+ ctx=ast.Load(),
99
325
  ),
100
- body=ast.Name(id="_MISSING", ctx=ast.Load()),
101
- orelse=_as_str(_load_VALUE) if as_str else _load_VALUE,
326
+ args=[],
327
+ keywords=[],
102
328
  )
103
329
 
104
330
 
105
- def _as_str(value: ast.expr) -> ast.Call:
106
- """E.g. `value` becomes `str(value)`"""
107
- return ast.Call(func=ast.Name(id="str", ctx=ast.Load()), args=[value], keywords=[])
331
+ def _is_cast(
332
+ node: typing.Any,
333
+ type_: typing.Optional[typing.Literal["Float", "String"]] = None,
334
+ ) -> TypeGuard[ast.Call]:
335
+ return (
336
+ isinstance(node, ast.Call)
337
+ and isinstance(func := node.func, ast.Name)
338
+ and func.id == "cast"
339
+ and len(node.args) == 2
340
+ and isinstance(name := node.args[1], ast.Name)
341
+ and (not type_ or name.id == type_)
342
+ )
108
343
 
109
344
 
110
- def _ast_replacement(expression: str) -> ast.expr:
111
- as_str = expression in (
112
- "span.status_code",
113
- "span.span_kind",
114
- "span.parent_id",
115
- "span.context.span_id",
116
- "span.context.trace_id",
345
+ def _remove_cast(node: typing.Any) -> typing.Any:
346
+ return node.args[0] if _is_cast(node) else node
347
+
348
+
349
+ def _cast_as(
350
+ type_: typing.Literal["Float", "String"],
351
+ node: typing.Any,
352
+ ) -> ast.Call:
353
+ if type_ == "Float" and (_is_subscript(node, "attributes") or _is_string_attribute(node)):
354
+ return _as_float_attribute(node)
355
+ if type_ == "String" and (_is_subscript(node, "attributes") or _is_float_attribute(node)):
356
+ return _as_string_attribute(node)
357
+ return ast.Call(
358
+ func=ast.Name(id="cast", ctx=ast.Load()),
359
+ args=[
360
+ _remove_cast(node),
361
+ ast.Name(id=type_, ctx=ast.Load()),
362
+ ],
363
+ keywords=[],
117
364
  )
118
- return _replace_none_with_missing(ast.parse(expression, mode="eval").body, as_str)
119
-
120
-
121
- def _allowed_replacements() -> Iterator[Tuple[str, ast.expr]]:
122
- for source_segment, ast_replacement in {
123
- "name": _ast_replacement("span.name"),
124
- "status_code": _ast_replacement("span.status_code"),
125
- "span_kind": _ast_replacement("span.span_kind"),
126
- "parent_id": _ast_replacement("span.parent_id"),
127
- }.items():
128
- yield source_segment, ast_replacement
129
- yield "span." + source_segment, ast_replacement
130
-
131
- for source_segment, ast_replacement in {
132
- "span_id": _ast_replacement("span.context.span_id"),
133
- "trace_id": _ast_replacement("span.context.trace_id"),
134
- }.items():
135
- yield source_segment, ast_replacement
136
- yield "context." + source_segment, ast_replacement
137
- yield "span.context." + source_segment, ast_replacement
138
-
139
- for field_name in (
140
- getattr(klass, attr)
141
- for name in dir(trace)
142
- if name.endswith("Attributes") and inspect.isclass(klass := getattr(trace, name))
143
- for attr in dir(klass)
144
- if attr.isupper()
145
- ):
146
- source_segment = field_name
147
- ast_replacement = _ast_replacement(f"span.attributes.get('{field_name}')")
148
- yield source_segment, ast_replacement
149
- yield "attributes." + source_segment, ast_replacement
150
- yield "span.attributes." + source_segment, ast_replacement
151
-
152
- for computed_attribute in ComputedAttributes:
153
- source_segment = computed_attribute.value
154
- ast_replacement = _ast_replacement(f"span.get_computed_value('{source_segment}')")
155
- yield source_segment, ast_replacement
156
-
157
-
158
- class _Translator(ast.NodeTransformer):
159
- _allowed_fields: Mapping[str, ast.expr] = dict(_allowed_replacements())
160
-
161
- def __init__(self, source: str) -> None:
162
- # Regarding the need for `source: str` for getting source segments:
163
- # In Python 3.8, we have to use `ast.get_source_segment(source, node)`.
164
- # In Python 3.9+, we can use `ast.unparse(node)` (no need for `source`).
165
- self._source = source
166
-
167
- def visit_Subscript(self, node: ast.Subscript) -> Any:
168
- if _is_metadata(node) and (key := _get_subscript_key(node)):
169
- return _ast_metadata_subscript(key)
170
- source_segment: str = cast(str, ast.get_source_segment(self._source, node))
171
- raise SyntaxError(f"invalid expression: {source_segment}") # TODO: add details
172
-
173
- def visit_Attribute(self, node: ast.Attribute) -> Any:
174
- if _is_eval(node.value) and (eval_name := _get_subscript_key(node.value)):
175
- # e.g. `evals["name"].score`
176
- return _ast_evaluation_result_value(eval_name, node.attr)
177
- source_segment: str = cast(str, ast.get_source_segment(self._source, node))
178
- if replacement := self._allowed_fields.get(source_segment):
179
- return replacement
180
- raise SyntaxError(f"invalid expression: {source_segment}") # TODO: add details
181
365
 
182
- def visit_Name(self, node: ast.Name) -> Any:
183
- source_segment: str = cast(str, ast.get_source_segment(self._source, node))
184
- if replacement := self._allowed_fields.get(source_segment):
185
- return replacement
186
- raise SyntaxError(f"invalid expression: {source_segment}") # TODO: add details
187
366
 
188
- def visit_Constant(self, node: ast.Constant) -> Any:
189
- return ast.Name(id="_MISSING", ctx=ast.Load()) if node.value is None else node
367
+ def _is_string(node: typing.Any) -> TypeGuard[ast.Call]:
368
+ return (
369
+ isinstance(node, ast.Name)
370
+ and node.id in _STRING_NAMES
371
+ or _is_cast(node, "String")
372
+ or _is_string_constant(node)
373
+ or _is_string_attribute(node)
374
+ or isinstance(node, (ast.List, ast.Tuple))
375
+ and len(node.elts) > 0
376
+ and _is_string(node.elts[0])
377
+ )
378
+
379
+
380
+ def _is_float(node: typing.Any) -> TypeGuard[ast.Call]:
381
+ return (
382
+ isinstance(node, ast.Name)
383
+ and node.id in _FLOAT_NAMES
384
+ or _is_cast(node, "Float")
385
+ or _is_float_constant(node)
386
+ or _is_float_attribute(node)
387
+ or isinstance(node, (ast.List, ast.Tuple))
388
+ and len(node.elts) > 0
389
+ and _is_float(node.elts[0])
390
+ or isinstance(node, ast.BinOp)
391
+ and (not isinstance(node.op, ast.Add) or (_is_float(node.left) or _is_float(node.right)))
392
+ or isinstance(node, ast.UnaryOp)
393
+ and isinstance(node.op, (ast.USub, ast.UAdd))
394
+ )
395
+
396
+
397
+ class _ProjectionTranslator(ast.NodeTransformer):
398
+ def __init__(self, reserved_keywords: typing.Iterable[str] = ()) -> None:
399
+ self._reserved_keywords = frozenset(
400
+ chain(
401
+ reserved_keywords,
402
+ _STRING_NAMES.keys(),
403
+ _FLOAT_NAMES.keys(),
404
+ _DATETIME_NAMES.keys(),
405
+ )
406
+ )
407
+
408
+ def visit_generic(self, node: ast.AST) -> typing.Any:
409
+ raise SyntaxError(f"invalid expression: {ast.unparse(node)}")
410
+
411
+ def visit_Expression(self, node: ast.Expression) -> typing.Any:
412
+ return ast.Expression(body=self.visit(node.body))
413
+
414
+ def visit_Attribute(self, node: ast.Attribute) -> typing.Any:
415
+ source_segment = ast.unparse(node)
416
+ if replacement := _BACKWARD_COMPATIBILITY_REPLACEMENTS.get(source_segment):
417
+ return ast.Name(id=replacement, ctx=ast.Load())
418
+ if (keys := _get_attribute_keys_list(node)) is not None:
419
+ return _as_attribute(keys)
420
+ raise SyntaxError(f"invalid expression: {source_segment}")
421
+
422
+ def visit_Name(self, node: ast.Name) -> typing.Any:
423
+ source_segment = ast.unparse(node)
424
+ if source_segment in self._reserved_keywords:
425
+ return node
426
+ name = source_segment
427
+ return _as_attribute([ast.Constant(value=name, kind=None)])
428
+
429
+ def visit_Subscript(self, node: ast.Subscript) -> typing.Any:
430
+ if (keys := _get_attribute_keys_list(node)) is not None:
431
+ return _as_attribute(keys)
432
+ raise SyntaxError(f"invalid expression: {ast.unparse(node)}")
433
+
434
+
435
+ class _FilterTranslator(_ProjectionTranslator):
436
+ def visit_Compare(self, node: ast.Compare) -> typing.Any:
437
+ if len(node.comparators) > 1:
438
+ args: list[typing.Any] = []
439
+ left = node.left
440
+ for i, (op, comparator) in enumerate(zip(node.ops, node.comparators)):
441
+ args.append(self.visit(ast.Compare(left=left, ops=[op], comparators=[comparator])))
442
+ left = comparator
443
+ return ast.Call(func=ast.Name(id="and_", ctx=ast.Load()), args=args, keywords=[])
444
+ left, op, right = self.visit(node.left), node.ops[0], self.visit(node.comparators[0])
445
+ if _is_subscript(left, "attributes"):
446
+ left = _cast_as("String", left)
447
+ if _is_subscript(right, "attributes"):
448
+ right = _cast_as("String", right)
449
+ if _is_float(left) and not _is_float(right):
450
+ right = _cast_as("Float", right)
451
+ elif not _is_float(left) and _is_float(right):
452
+ left = _cast_as("Float", left)
453
+ if isinstance(op, (ast.In, ast.NotIn)):
454
+ if _is_string_attribute(right) or ast.unparse(right) in _NAMES:
455
+ call = ast.Call(
456
+ func=ast.Name(id="TextContains", ctx=ast.Load()),
457
+ args=[right, left],
458
+ keywords=[],
459
+ )
460
+ if isinstance(op, ast.NotIn):
461
+ call = ast.Call(
462
+ func=ast.Name(id="not_", ctx=ast.Load()), args=[call], keywords=[]
463
+ )
464
+ return call
465
+ elif isinstance(right, (ast.List, ast.Tuple)):
466
+ attr = "in_" if isinstance(op, ast.In) else "not_in"
467
+ return ast.Call(
468
+ func=ast.Attribute(value=left, attr=attr, ctx=ast.Load()),
469
+ args=[right],
470
+ keywords=[],
471
+ )
472
+ else:
473
+ raise SyntaxError(f"invalid expression: {ast.unparse(op)}")
474
+ if isinstance(op, ast.Is):
475
+ op = ast.Eq()
476
+ elif isinstance(op, ast.IsNot):
477
+ op = ast.NotEq()
478
+ return ast.Compare(left=left, ops=[op], comparators=[right])
479
+
480
+ def visit_BoolOp(self, node: ast.BoolOp) -> typing.Any:
481
+ if isinstance(node.op, ast.And):
482
+ func = ast.Name(id="and_", ctx=ast.Load())
483
+ elif isinstance(node.op, ast.Or):
484
+ func = ast.Name(id="or_", ctx=ast.Load())
485
+ else:
486
+ raise SyntaxError(f"invalid expression: {ast.unparse(node)}")
487
+ args = [self.visit(value) for value in node.values]
488
+ return ast.Call(func=func, args=args, keywords=[])
489
+
490
+ def visit_UnaryOp(self, node: ast.UnaryOp) -> typing.Any:
491
+ operand = self.visit(node.operand)
492
+ if isinstance(node.op, ast.Not):
493
+ return ast.Call(
494
+ func=ast.Name(id="not_", ctx=ast.Load()),
495
+ args=[operand],
496
+ keywords=[],
497
+ )
498
+ node = ast.UnaryOp(op=node.op, operand=operand)
499
+ if isinstance(node.op, (ast.USub, ast.UAdd)):
500
+ if not _is_float(node.operand):
501
+ operand = _cast_as("Float", node.operand)
502
+ return ast.UnaryOp(op=ast.USub(), operand=operand)
503
+ return node
504
+ return node
505
+
506
+ def visit_BinOp(self, node: ast.BinOp) -> typing.Any:
507
+ left, op, right = self.visit(node.left), node.op, self.visit(node.right)
508
+ if _is_subscript(left, "attributes"):
509
+ left = _cast_as("String", left)
510
+ if _is_subscript(right, "attributes"):
511
+ right = _cast_as("String", right)
512
+ type_: typing.Literal["Float", "String"] = "String"
513
+ if not isinstance(op, ast.Add) or _is_float(left) or _is_float(right):
514
+ type_ = "Float"
515
+ if not _is_float(left):
516
+ left = _cast_as(type_, left)
517
+ if not _is_float(right):
518
+ right = _cast_as(type_, right)
519
+ return ast.BinOp(left=left, op=op, right=right)
520
+ return _cast_as(type_, ast.BinOp(left=left, op=op, right=right))
521
+
522
+ def visit_Call(self, node: ast.Call) -> typing.Any:
523
+ source_segment = ast.unparse(node)
524
+ if len(node.args) != 1:
525
+ raise SyntaxError(f"invalid expression: {source_segment}")
526
+ if not isinstance(node.func, ast.Name) or node.func.id not in ("str", "float", "int"):
527
+ raise SyntaxError(f"invalid expression: {ast.unparse(node.func)}")
528
+ arg = self.visit(node.args[0])
529
+ if node.func.id in ("float", "int") and not _is_float(arg):
530
+ return _cast_as("Float", arg)
531
+ if node.func.id in ("str",) and not _is_string(arg):
532
+ return _cast_as("String", arg)
533
+ return arg
190
534
 
191
535
 
192
536
  def _validate_expression(
193
537
  expression: ast.Expression,
194
- source: str,
195
- valid_eval_names: Optional[Sequence[str]] = None,
196
- valid_eval_attributes: Tuple[str, ...] = _VALID_EVAL_ATTRIBUTES,
538
+ valid_eval_names: typing.Optional[typing.Sequence[str]] = None,
539
+ valid_eval_attributes: tuple[str, ...] = _VALID_EVAL_ATTRIBUTES,
197
540
  ) -> None:
198
541
  """
199
542
  Validate primarily the structural (i.e. not semantic) characteristics of an
@@ -204,23 +547,26 @@ def _validate_expression(
204
547
  additional exceptions may be raised later by the NodeTransformer regarding
205
548
  either structural and semantic issues.
206
549
  """
207
- # Regarding the need for `source: str` for getting source segments:
208
- # In Python 3.8, we have to use `ast.get_source_segment(source, node)`.
209
- # In Python 3.9+, we can use `ast.unparse(node)` (no need for `source`).
210
550
  if not isinstance(expression, ast.Expression):
211
- raise SyntaxError(f"invalid expression: {source}") # TODO: add details
551
+ raise SyntaxError(f"invalid expression: {ast.unparse(expression)}")
212
552
  for i, node in enumerate(ast.walk(expression.body)):
213
553
  if i == 0:
214
- if isinstance(node, (ast.BoolOp, ast.Compare)):
554
+ if (
555
+ isinstance(node, (ast.BoolOp, ast.Compare))
556
+ or isinstance(node, ast.UnaryOp)
557
+ and isinstance(node.op, ast.Not)
558
+ ):
215
559
  continue
216
- elif _is_metadata(node):
560
+ elif (
561
+ _is_subscript(node, "metadata") or _is_subscript(node, "attributes")
562
+ ) and _get_attribute_keys_list(node) is not None:
217
563
  continue
218
- elif _is_eval(node):
564
+ elif _is_annotation(node) and _get_subscript_key(node) is not None:
219
565
  # e.g. `evals["name"]`
220
566
  if not (eval_name := _get_subscript_key(node)) or (
221
567
  valid_eval_names is not None and eval_name not in valid_eval_names
222
568
  ):
223
- source_segment = cast(str, ast.get_source_segment(source, node))
569
+ source_segment = ast.unparse(node)
224
570
  if eval_name and valid_eval_names:
225
571
  # suggest a valid eval name most similar to the one given
226
572
  choice, score = _find_best_match(eval_name, valid_eval_names)
@@ -237,10 +583,10 @@ def _validate_expression(
237
583
  else ""
238
584
  )
239
585
  continue
240
- elif isinstance(node, ast.Attribute) and _is_eval(node.value):
586
+ elif isinstance(node, ast.Attribute) and _is_annotation(node.value):
241
587
  # e.g. `evals["name"].score`
242
588
  if (attr := node.attr) not in valid_eval_attributes:
243
- source_segment = cast(str, ast.get_source_segment(source, node))
589
+ source_segment = ast.unparse(node)
244
590
  # suggest a valid attribute most similar to the one given
245
591
  choice, score = _find_best_match(attr, valid_eval_attributes)
246
592
  if choice and score > 0.75: # arbitrary threshold
@@ -256,6 +602,13 @@ def _validate_expression(
256
602
  else ""
257
603
  )
258
604
  continue
605
+ elif (
606
+ isinstance(node, ast.Call)
607
+ and isinstance(node.func, ast.Name)
608
+ and node.func.id in ("str", "float", "int")
609
+ ):
610
+ # allow type casting functions
611
+ continue
259
612
  elif isinstance(
260
613
  node,
261
614
  (
@@ -273,86 +626,113 @@ def _validate_expression(
273
626
  ast.cmpop,
274
627
  ast.operator,
275
628
  ast.unaryop,
276
- # Prior to Python 3.9, `ast.Index` is part of `ast.Subscript`,
277
- # so it needs to allowed here, but note that `ast.Subscript` is
278
- # not allowed in general except in the case of `evals["name"]`.
279
- # Note that `ast.Index` is deprecated in Python 3.9+.
280
- *((ast.Index,) if sys.version_info < (3, 9) else ()),
281
629
  ),
282
630
  ):
283
631
  continue
284
- source_segment = cast(str, ast.get_source_segment(source, node))
285
- raise SyntaxError(f"invalid expression: {source_segment}") # TODO: add details
286
-
287
-
288
- def _ast_evaluation_result_value(name: str, attr: str) -> ast.expr:
289
- source = (
290
- f"_RESULT.{attr}.value if ("
291
- f" _RESULT := ("
292
- f" _MISSING if ("
293
- f" _VALUE := evals.get_span_evaluation("
294
- f" span.context.span_id, '{name}'"
295
- f" )"
296
- f" ) is None "
297
- f" else _VALUE"
298
- f" ).result"
299
- f").HasField('{attr}') "
300
- f"else _MISSING"
301
- )
302
- return ast.parse(source, mode="eval").body
303
-
304
-
305
- def _ast_metadata_subscript(key: str) -> ast.expr:
306
- source = (
307
- f"_MISSING if ("
308
- f" _MD := span.attributes.get('metadata')"
309
- f") is None else ("
310
- f" _MISSING if not hasattr(_MD, 'get') or ("
311
- f" _VALUE := _MD.get('{key}')"
312
- f" ) is None else _VALUE"
313
- f")"
632
+ source_segment = ast.unparse(node)
633
+ raise SyntaxError(f"invalid expression: {source_segment}")
634
+
635
+
636
+ def _as_attribute(
637
+ keys: list[ast.Constant],
638
+ # as_float: typing.Optional[bool] = None,
639
+ ) -> ast.Subscript:
640
+ return ast.Subscript(
641
+ value=ast.Name(id="attributes", ctx=ast.Load()),
642
+ slice=ast.List(elts=keys, ctx=ast.Load()) # type: ignore[arg-type]
643
+ if sys.version_info >= (3, 9)
644
+ else ast.Index(value=ast.List(elts=keys, ctx=ast.Load())), # type: ignore
645
+ ctx=ast.Load(),
314
646
  )
315
- return ast.parse(source, mode="eval").body
316
647
 
317
648
 
318
- def _is_eval(node: Any) -> TypeGuard[ast.Subscript]:
649
+ def _is_annotation(node: typing.Any) -> TypeGuard[ast.Subscript]:
319
650
  # e.g. `evals["name"]`
320
651
  return (
321
652
  isinstance(node, ast.Subscript)
322
653
  and isinstance(value := node.value, ast.Name)
323
- and value.id == "evals"
654
+ and value.id in ["evals", "annotations"]
324
655
  )
325
656
 
326
657
 
327
- def _is_metadata(node: Any) -> TypeGuard[ast.Subscript]:
328
- # e.g. `metadata["name"]`
329
- return (
330
- isinstance(node, ast.Subscript)
331
- and isinstance(value := node.value, ast.Name)
332
- and value.id == "metadata"
333
- )
658
+ def _is_subscript(
659
+ node: typing.Any,
660
+ id_: typing.Literal["attributes", "metadata"],
661
+ ) -> TypeGuard[ast.Subscript]:
662
+ # e.g. `attributes["key"]`
663
+ # e.g. `attributes[["a", "b.c", "d"]]`
664
+ # e.g. `attributes["a"]["b.c"]["d"]`
665
+ while isinstance(node, ast.Subscript):
666
+ node = node.value
667
+ if isinstance(node, ast.Name) and node.id == id_:
668
+ return True
669
+ return False
670
+
671
+
672
+ def _get_attribute_keys_list(
673
+ node: typing.Any,
674
+ ) -> typing.Optional[list[ast.Constant]]:
675
+ # e.g. `attributes["key"]` -> `["key"]`
676
+ # e.g. `attributes["a"]["b.c"][["d"]]` -> `["a", "b.c", "d"]`
677
+ # e.g. `attributes["a"][["b.c", "d"]]` -> `["a", "b.c", "d"]`
678
+ # e.g. `metadata["key"]` -> `["metadata", "key"]`
679
+ # e.g. `metadata["a"]["b.c"][["d"]]` -> `["metadata", "a", "b.c", "d"]`
680
+ # e.g. `metadata["a"][["b.c", "d"]]` -> `["metadata", "a", "b.c", "d"]`
681
+ keys: list[ast.Constant] = []
682
+ if isinstance(node, ast.Attribute):
683
+ while isinstance(node, ast.Attribute):
684
+ keys.append(ast.Constant(value=node.attr, kind=None))
685
+ node = node.value
686
+ if isinstance(node, ast.Name):
687
+ keys.append(ast.Constant(value=node.id, kind=None))
688
+ return keys[::-1]
689
+ elif isinstance(node, ast.Subscript):
690
+ while isinstance(node, ast.Subscript):
691
+ if not (sub_keys := _get_subscript_keys_list(node)):
692
+ return None
693
+ keys.extend(reversed(sub_keys))
694
+ node = node.value
695
+ if isinstance(node, ast.Name):
696
+ if not isinstance(keys[-1].value, str):
697
+ return None
698
+ if node.id == "metadata":
699
+ keys.append(ast.Constant(value="metadata", kind=None))
700
+ return keys[::-1]
701
+ return None
702
+
703
+
704
+ def _get_subscript_keys_list(
705
+ node: ast.Subscript,
706
+ ) -> typing.Optional[list[ast.Constant]]:
707
+ child = node.slice
708
+ if isinstance(child, ast.Constant):
709
+ if not isinstance(child.value, (str, int)) or isinstance(child.value, bool):
710
+ return None
711
+ return [child]
712
+ if not (
713
+ isinstance(child, ast.List)
714
+ and (elts := child.elts)
715
+ and all(
716
+ isinstance(elt, ast.Constant)
717
+ and isinstance(elt.value, (str, int))
718
+ and not isinstance(elt.value, bool)
719
+ for elt in elts
720
+ )
721
+ ):
722
+ return None
723
+ return [typing.cast(ast.Constant, elt) for elt in elts]
334
724
 
335
725
 
336
- def _get_subscript_key(node: ast.Subscript) -> Optional[str]:
337
- if sys.version_info < (3, 9):
338
- # Note that `ast.Index` is deprecated in Python 3.9+, but is necessary
339
- # for Python 3.8 as part of `ast.Subscript`.
340
- return (
341
- eval_name
342
- if isinstance(node_slice := node.slice, ast.Index)
343
- and isinstance(slice_value := node_slice.value, ast.Constant)
344
- and isinstance(eval_name := slice_value.value, str)
345
- else None
346
- )
347
- return (
348
- eval_name
349
- if isinstance(node_slice := node.slice, ast.Constant)
350
- and isinstance(eval_name := node_slice.value, str)
351
- else None
352
- )
726
+ def _get_subscript_key(
727
+ node: ast.Subscript,
728
+ ) -> typing.Optional[str]:
729
+ child = node.slice
730
+ if not (isinstance(child, ast.Constant) and isinstance(child.value, str)):
731
+ return None
732
+ return child.value
353
733
 
354
734
 
355
- def _disjunction(choices: Sequence[str]) -> str:
735
+ def _disjunction(choices: typing.Sequence[str]) -> str:
356
736
  """
357
737
  E.g. `["a", "b", "c"]` becomes `"one of a, b, or c"`
358
738
  """
@@ -365,10 +745,80 @@ def _disjunction(choices: Sequence[str]) -> str:
365
745
  return f"one of {', '.join(choices[:-1])}, or {choices[-1]}"
366
746
 
367
747
 
368
- def _find_best_match(source: str, choices: Iterable[str]) -> Tuple[Optional[str], float]:
748
+ def _find_best_match(
749
+ source: str, choices: typing.Iterable[str]
750
+ ) -> tuple[typing.Optional[str], float]:
369
751
  best_choice, best_score = None, 0.0
370
752
  for choice in choices:
371
753
  score = SequenceMatcher(None, source, choice).ratio()
372
754
  if score > best_score:
373
755
  best_choice, best_score = choice, score
374
756
  return best_choice, best_score
757
+
758
+
759
+ def _apply_eval_aliasing(
760
+ source: str,
761
+ ) -> tuple[
762
+ str,
763
+ tuple[AliasedAnnotationRelation, ...],
764
+ ]:
765
+ """
766
+ Substitutes `evals[<eval-name>].<attribute>` with aliases. Returns the
767
+ updated source code in addition to the aliased relations.
768
+
769
+ Example:
770
+
771
+ input:
772
+
773
+ ```
774
+ evals['Hallucination'].label == 'correct' or evals['Hallucination'].score < 0.5
775
+ ```
776
+
777
+ output:
778
+
779
+ ```
780
+ span_annotation_0_label_123 == 'correct' or span_annotation_0_score_456 < 0.5
781
+ ```
782
+ """
783
+ eval_aliases: dict[AnnotationName, AliasedAnnotationRelation] = {}
784
+ for (
785
+ annotation_expression,
786
+ annotation_type,
787
+ annotation_name,
788
+ annotation_attribute,
789
+ ) in _parse_annotation_expressions_and_names(source):
790
+ if (eval_alias := eval_aliases.get(annotation_name)) is None:
791
+ eval_alias = AliasedAnnotationRelation(index=len(eval_aliases), name=annotation_name)
792
+ eval_aliases[annotation_name] = eval_alias
793
+ alias_name = eval_alias.attribute_alias(annotation_attribute)
794
+ source = source.replace(annotation_expression, alias_name)
795
+ return source, tuple(eval_aliases.values())
796
+
797
+
798
+ def _parse_annotation_expressions_and_names(
799
+ source: str,
800
+ ) -> typing.Iterator[
801
+ tuple[AnnotationExpression, AnnotationType, AnnotationName, AnnotationAttribute]
802
+ ]:
803
+ """
804
+ Parses filter conditions for evaluation expressions of the form:
805
+
806
+ ```
807
+ evals["<eval-name>"].<attribute>
808
+ annotations["eval-name"].<attribute>
809
+ ```
810
+ """
811
+ for match in EVAL_EXPRESSION_PATTERN.finditer(source):
812
+ (
813
+ annotation_expression,
814
+ annotation_type,
815
+ quoted_eval_name,
816
+ evaluation_attribute_name,
817
+ ) = match.groups()
818
+ annotation_type = typing.cast(AnnotationType, annotation_type)
819
+ yield (
820
+ annotation_expression,
821
+ annotation_type,
822
+ quoted_eval_name[1:-1],
823
+ typing.cast(AnnotationAttribute, evaluation_attribute_name),
824
+ )