arize-phoenix 5.5.2__py3-none-any.whl → 5.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (186) hide show
  1. {arize_phoenix-5.5.2.dist-info → arize_phoenix-5.7.0.dist-info}/METADATA +4 -7
  2. arize_phoenix-5.7.0.dist-info/RECORD +330 -0
  3. phoenix/config.py +50 -8
  4. phoenix/core/model.py +3 -3
  5. phoenix/core/model_schema.py +41 -50
  6. phoenix/core/model_schema_adapter.py +17 -16
  7. phoenix/datetime_utils.py +2 -2
  8. phoenix/db/bulk_inserter.py +10 -20
  9. phoenix/db/engines.py +2 -1
  10. phoenix/db/enums.py +2 -2
  11. phoenix/db/helpers.py +8 -7
  12. phoenix/db/insertion/dataset.py +9 -19
  13. phoenix/db/insertion/document_annotation.py +14 -13
  14. phoenix/db/insertion/helpers.py +6 -16
  15. phoenix/db/insertion/span_annotation.py +14 -13
  16. phoenix/db/insertion/trace_annotation.py +14 -13
  17. phoenix/db/insertion/types.py +19 -30
  18. phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +8 -8
  19. phoenix/db/models.py +28 -28
  20. phoenix/experiments/evaluators/base.py +2 -1
  21. phoenix/experiments/evaluators/code_evaluators.py +4 -5
  22. phoenix/experiments/evaluators/llm_evaluators.py +157 -4
  23. phoenix/experiments/evaluators/utils.py +3 -2
  24. phoenix/experiments/functions.py +10 -21
  25. phoenix/experiments/tracing.py +2 -1
  26. phoenix/experiments/types.py +20 -29
  27. phoenix/experiments/utils.py +2 -1
  28. phoenix/inferences/errors.py +6 -5
  29. phoenix/inferences/fixtures.py +6 -5
  30. phoenix/inferences/inferences.py +37 -37
  31. phoenix/inferences/schema.py +11 -10
  32. phoenix/inferences/validation.py +13 -14
  33. phoenix/logging/_formatter.py +3 -3
  34. phoenix/metrics/__init__.py +5 -4
  35. phoenix/metrics/binning.py +2 -1
  36. phoenix/metrics/metrics.py +2 -1
  37. phoenix/metrics/mixins.py +7 -6
  38. phoenix/metrics/retrieval_metrics.py +2 -1
  39. phoenix/metrics/timeseries.py +5 -4
  40. phoenix/metrics/wrappers.py +2 -2
  41. phoenix/pointcloud/clustering.py +3 -4
  42. phoenix/pointcloud/pointcloud.py +7 -5
  43. phoenix/pointcloud/umap_parameters.py +2 -1
  44. phoenix/server/api/dataloaders/annotation_summaries.py +12 -19
  45. phoenix/server/api/dataloaders/average_experiment_run_latency.py +2 -2
  46. phoenix/server/api/dataloaders/cache/two_tier_cache.py +3 -2
  47. phoenix/server/api/dataloaders/dataset_example_revisions.py +3 -8
  48. phoenix/server/api/dataloaders/dataset_example_spans.py +2 -5
  49. phoenix/server/api/dataloaders/document_evaluation_summaries.py +12 -18
  50. phoenix/server/api/dataloaders/document_evaluations.py +3 -7
  51. phoenix/server/api/dataloaders/document_retrieval_metrics.py +6 -13
  52. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +4 -8
  53. phoenix/server/api/dataloaders/experiment_error_rates.py +2 -5
  54. phoenix/server/api/dataloaders/experiment_run_annotations.py +3 -7
  55. phoenix/server/api/dataloaders/experiment_run_counts.py +1 -5
  56. phoenix/server/api/dataloaders/experiment_sequence_number.py +2 -5
  57. phoenix/server/api/dataloaders/latency_ms_quantile.py +21 -30
  58. phoenix/server/api/dataloaders/min_start_or_max_end_times.py +7 -13
  59. phoenix/server/api/dataloaders/project_by_name.py +3 -3
  60. phoenix/server/api/dataloaders/record_counts.py +11 -18
  61. phoenix/server/api/dataloaders/span_annotations.py +3 -7
  62. phoenix/server/api/dataloaders/span_dataset_examples.py +3 -8
  63. phoenix/server/api/dataloaders/span_descendants.py +3 -7
  64. phoenix/server/api/dataloaders/span_projects.py +2 -2
  65. phoenix/server/api/dataloaders/token_counts.py +12 -19
  66. phoenix/server/api/dataloaders/trace_row_ids.py +3 -7
  67. phoenix/server/api/dataloaders/user_roles.py +3 -3
  68. phoenix/server/api/dataloaders/users.py +3 -3
  69. phoenix/server/api/helpers/__init__.py +4 -3
  70. phoenix/server/api/helpers/dataset_helpers.py +10 -9
  71. phoenix/server/api/helpers/playground_clients.py +671 -0
  72. phoenix/server/api/helpers/playground_registry.py +70 -0
  73. phoenix/server/api/helpers/playground_spans.py +325 -0
  74. phoenix/server/api/input_types/AddExamplesToDatasetInput.py +2 -2
  75. phoenix/server/api/input_types/AddSpansToDatasetInput.py +2 -2
  76. phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
  77. phoenix/server/api/input_types/ChatCompletionMessageInput.py +13 -1
  78. phoenix/server/api/input_types/ClusterInput.py +2 -2
  79. phoenix/server/api/input_types/DeleteAnnotationsInput.py +1 -3
  80. phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +2 -2
  81. phoenix/server/api/input_types/DeleteExperimentsInput.py +1 -3
  82. phoenix/server/api/input_types/DimensionFilter.py +4 -4
  83. phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
  84. phoenix/server/api/input_types/Granularity.py +1 -1
  85. phoenix/server/api/input_types/InvocationParameters.py +156 -13
  86. phoenix/server/api/input_types/PatchDatasetExamplesInput.py +2 -2
  87. phoenix/server/api/input_types/TemplateOptions.py +10 -0
  88. phoenix/server/api/mutations/__init__.py +4 -0
  89. phoenix/server/api/mutations/chat_mutations.py +374 -0
  90. phoenix/server/api/mutations/dataset_mutations.py +4 -4
  91. phoenix/server/api/mutations/experiment_mutations.py +1 -2
  92. phoenix/server/api/mutations/export_events_mutations.py +7 -7
  93. phoenix/server/api/mutations/span_annotations_mutations.py +4 -4
  94. phoenix/server/api/mutations/trace_annotations_mutations.py +4 -4
  95. phoenix/server/api/mutations/user_mutations.py +4 -4
  96. phoenix/server/api/openapi/schema.py +2 -2
  97. phoenix/server/api/queries.py +61 -72
  98. phoenix/server/api/routers/oauth2.py +4 -4
  99. phoenix/server/api/routers/v1/datasets.py +22 -36
  100. phoenix/server/api/routers/v1/evaluations.py +6 -5
  101. phoenix/server/api/routers/v1/experiment_evaluations.py +2 -2
  102. phoenix/server/api/routers/v1/experiment_runs.py +2 -2
  103. phoenix/server/api/routers/v1/experiments.py +4 -4
  104. phoenix/server/api/routers/v1/spans.py +13 -12
  105. phoenix/server/api/routers/v1/traces.py +5 -5
  106. phoenix/server/api/routers/v1/utils.py +5 -5
  107. phoenix/server/api/schema.py +42 -10
  108. phoenix/server/api/subscriptions.py +347 -494
  109. phoenix/server/api/types/AnnotationSummary.py +3 -3
  110. phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +44 -0
  111. phoenix/server/api/types/Cluster.py +8 -7
  112. phoenix/server/api/types/Dataset.py +5 -4
  113. phoenix/server/api/types/Dimension.py +3 -3
  114. phoenix/server/api/types/DocumentEvaluationSummary.py +8 -7
  115. phoenix/server/api/types/EmbeddingDimension.py +6 -5
  116. phoenix/server/api/types/EvaluationSummary.py +3 -3
  117. phoenix/server/api/types/Event.py +7 -7
  118. phoenix/server/api/types/Experiment.py +3 -3
  119. phoenix/server/api/types/ExperimentComparison.py +2 -4
  120. phoenix/server/api/types/GenerativeProvider.py +27 -3
  121. phoenix/server/api/types/Inferences.py +9 -8
  122. phoenix/server/api/types/InferencesRole.py +2 -2
  123. phoenix/server/api/types/Model.py +2 -2
  124. phoenix/server/api/types/Project.py +11 -18
  125. phoenix/server/api/types/Segments.py +3 -3
  126. phoenix/server/api/types/Span.py +45 -7
  127. phoenix/server/api/types/TemplateLanguage.py +9 -0
  128. phoenix/server/api/types/TimeSeries.py +8 -7
  129. phoenix/server/api/types/Trace.py +2 -2
  130. phoenix/server/api/types/UMAPPoints.py +6 -6
  131. phoenix/server/api/types/User.py +3 -3
  132. phoenix/server/api/types/node.py +1 -3
  133. phoenix/server/api/types/pagination.py +4 -4
  134. phoenix/server/api/utils.py +2 -4
  135. phoenix/server/app.py +76 -37
  136. phoenix/server/bearer_auth.py +4 -10
  137. phoenix/server/dml_event.py +3 -3
  138. phoenix/server/dml_event_handler.py +10 -24
  139. phoenix/server/grpc_server.py +3 -2
  140. phoenix/server/jwt_store.py +22 -21
  141. phoenix/server/main.py +17 -4
  142. phoenix/server/oauth2.py +3 -2
  143. phoenix/server/rate_limiters.py +5 -8
  144. phoenix/server/static/.vite/manifest.json +31 -31
  145. phoenix/server/static/assets/components-Csu8UKOs.js +1612 -0
  146. phoenix/server/static/assets/{index-DCzakdJq.js → index-Bk5C9EA7.js} +2 -2
  147. phoenix/server/static/assets/{pages-CAL1FDMt.js → pages-UeWaKXNs.js} +337 -442
  148. phoenix/server/static/assets/{vendor-6IcPAw_j.js → vendor-CtqfhlbC.js} +6 -6
  149. phoenix/server/static/assets/{vendor-arizeai-DRZuoyuF.js → vendor-arizeai-C_3SBz56.js} +2 -2
  150. phoenix/server/static/assets/{vendor-codemirror-DVE2_WBr.js → vendor-codemirror-wfdk9cjp.js} +1 -1
  151. phoenix/server/static/assets/{vendor-recharts-DwrexFA4.js → vendor-recharts-BiVnSv90.js} +1 -1
  152. phoenix/server/templates/index.html +1 -0
  153. phoenix/server/thread_server.py +1 -1
  154. phoenix/server/types.py +17 -29
  155. phoenix/services.py +8 -3
  156. phoenix/session/client.py +12 -24
  157. phoenix/session/data_extractor.py +3 -3
  158. phoenix/session/evaluation.py +1 -2
  159. phoenix/session/session.py +26 -21
  160. phoenix/trace/attributes.py +16 -28
  161. phoenix/trace/dsl/filter.py +17 -21
  162. phoenix/trace/dsl/helpers.py +3 -3
  163. phoenix/trace/dsl/query.py +13 -22
  164. phoenix/trace/fixtures.py +11 -17
  165. phoenix/trace/otel.py +5 -15
  166. phoenix/trace/projects.py +3 -2
  167. phoenix/trace/schemas.py +2 -2
  168. phoenix/trace/span_evaluations.py +9 -8
  169. phoenix/trace/span_json_decoder.py +3 -3
  170. phoenix/trace/span_json_encoder.py +2 -2
  171. phoenix/trace/trace_dataset.py +6 -5
  172. phoenix/trace/utils.py +6 -6
  173. phoenix/utilities/deprecation.py +3 -2
  174. phoenix/utilities/error_handling.py +3 -2
  175. phoenix/utilities/json.py +2 -1
  176. phoenix/utilities/logging.py +2 -2
  177. phoenix/utilities/project.py +1 -1
  178. phoenix/utilities/re.py +3 -4
  179. phoenix/utilities/template_formatters.py +16 -5
  180. phoenix/version.py +1 -1
  181. arize_phoenix-5.5.2.dist-info/RECORD +0 -321
  182. phoenix/server/static/assets/components-hX0LgYz3.js +0 -1428
  183. {arize_phoenix-5.5.2.dist-info → arize_phoenix-5.7.0.dist-info}/WHEEL +0 -0
  184. {arize_phoenix-5.5.2.dist-info → arize_phoenix-5.7.0.dist-info}/entry_points.txt +0 -0
  185. {arize_phoenix-5.5.2.dist-info → arize_phoenix-5.7.0.dist-info}/licenses/IP_NOTICE +0 -0
  186. {arize_phoenix-5.5.2.dist-info → arize_phoenix-5.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,7 @@
1
1
  import re
2
+ from collections.abc import Callable
2
3
  from types import MappingProxyType
3
- from typing import Any, Callable, Optional, Type
4
+ from typing import Any, Optional
4
5
 
5
6
  from phoenix.evals.models.base import BaseModel as LLMBaseModel
6
7
  from phoenix.evals.utils import snap_to_rail
@@ -121,7 +122,7 @@ class LLMCriteriaEvaluator(LLMEvaluator):
121
122
 
122
123
  def criteria_evaluator_factory(
123
124
  class_name: str, criteria: str, description: str, default_name: str
124
- ) -> Type[ExperimentEvaluator]:
125
+ ) -> type[ExperimentEvaluator]:
125
126
  def _init(self, model: LLMBaseModel, name: str = default_name) -> None: # type: ignore
126
127
  LLMCriteriaEvaluator.__init__(self, model, criteria, description, name=name)
127
128
 
@@ -240,8 +241,8 @@ class RelevanceEvaluator(LLMEvaluator):
240
241
  "LABEL: *true or false*\n\n"
241
242
  "Follow this template for the following example:\n\n"
242
243
  "CRITERIA: the response is 'relevant' to the query\n"
243
- "QUERY: {reference}\n"
244
- "RESPONSE: {submission}\n"
244
+ "QUERY: {query}\n"
245
+ "RESPONSE: {response}\n"
245
246
  "EXPLANATION: "
246
247
  )
247
248
 
@@ -318,3 +319,155 @@ class RelevanceEvaluator(LLMEvaluator):
318
319
  formatted_template = self._format_eval_template(output, input, metadata)
319
320
  unparsed_response = await self.model._async_generate(formatted_template)
320
321
  return self._parse_eval_output(unparsed_response)
322
+
323
+
324
+ class LLMRelationalEvaluator(LLMEvaluator):
325
+ """
326
+ An LLM experiment evaluator that checks how a response is related to reference text.
327
+
328
+ `LLMRelationalEvaluator` uses the chain-of-thought technique to perform a binary evaluation of
329
+ how a response is related to reference text in a specified manner. When used as an experiment
330
+ evaluator, `LLMRelationalEvaluator` will return a score of 1.0 if the response is related to
331
+ the reference text in the specified manner and a score of 0.0 if not. The explanation
332
+ produced by the chain-of-thought technique will be included in the experiment evaluation as
333
+ well.
334
+
335
+ In order to evaluate how a response is related to reference text, a specific relation and
336
+ description of that relation must be specified. The relation should be a phrase that can be
337
+ used in the following manner: "The response '{relation}' the reference". The description
338
+ should complete the sentence "In this context, '{relation}' means the response {description}".
339
+
340
+ Example relations and descriptions:
341
+ - "is a good summary of" - "the response clearly concisely summarizes the reference"
342
+ - "directly quotes" - "the response contains specific information from the reference"
343
+ - "professionally addresses" - "the response is respectful and relevant to the reference"
344
+
345
+ Args:
346
+ model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
347
+ the `phoenix.evals` module.
348
+ relation: The relation to evaluate the text against, the relation should be a phrase that
349
+ can be used in the following manner: "The response '{relation}' the reference".
350
+ description (str): A description of the relation, used to clarify instructions to the LLM.
351
+ The description should complete the sentence "In this context, '{relation}'
352
+ means {description}". It is helpful to specifically use the words "response" and
353
+ "reference" to describe the relation.
354
+ name (str): The name of the evaluator
355
+ get_reference (callable, optional): A function that extracts the reference from the input of
356
+ the experiment task. The function should take the input and metadata of the dataset
357
+ example and return a string. By default, the function will return the string
358
+ representation of the input.
359
+ get_response (callable, optional): A function that extracts the response from the output of
360
+ the experiment task. The function should take the output and metadata of the experiment
361
+ task and return a string. By default, the function will return the string representation
362
+ of the output.
363
+ """
364
+
365
+ _base_template = (
366
+ "Determine if the following response '{relation}' the reference. {description}"
367
+ "First, explain step-by-step why you think the response '{relation}' the reference. "
368
+ "Then provide a single word label; 'true' if the response '{relation}' the reference or "
369
+ "'false' if the text is not '{relation}' to the reference. "
370
+ "Here is an example template for your reponse:\n\n"
371
+ "CRITERIA: the response '{relation}' the reference\n"
372
+ "REFERENCE: *text that contains a reference*\n"
373
+ "RESPONSE: *a response that may or may not be '{relation}' to the reference*\n"
374
+ "EXPLANATION: *a step by step explanation of your reasoning for whether or not the "
375
+ "response '{relation}' the reference*\n"
376
+ "LABEL: *true or false*\n\n"
377
+ "Follow this template for the following example:\n\n"
378
+ "CRITERIA: the response '{relation}' the reference\n"
379
+ "REFERENCE: {reference}\n"
380
+ "RESPONSE: {response}\n"
381
+ "EXPLANATION: "
382
+ )
383
+ _description = "In this context, '{relation}' means '{description}'. "
384
+
385
+ def __init__(
386
+ self,
387
+ model: LLMBaseModel,
388
+ relation: str,
389
+ description: str,
390
+ name: str,
391
+ get_reference: Optional[Callable[[ExampleInput, ExampleMetadata], str]] = None,
392
+ get_response: Optional[Callable[[Optional[TaskOutput], ExampleMetadata], str]] = None,
393
+ ):
394
+ self.model = model
395
+ self._name = name
396
+ self.relation = relation
397
+ self.description = description
398
+ self.template = self._format_base_template(self.relation, self.description)
399
+ self.get_reference = get_reference or self._default_get_reference
400
+ self.get_response = get_response or self._default_get_response
401
+
402
+ @classmethod
403
+ def _format_base_template(cls, relation: str, description: Optional[str] = None) -> str:
404
+ formatted_description = cls._description.format(relation=relation, description=description)
405
+ formatted_template = cls._base_template.format(
406
+ relation=relation,
407
+ description=formatted_description,
408
+ response="{response}", # leave the response field as a placeholder
409
+ reference="{reference}", # leave the reference field as a placeholder
410
+ )
411
+ return formatted_template
412
+
413
+ def _format_eval_template(
414
+ self,
415
+ output: Optional[TaskOutput] = None,
416
+ input: ExampleInput = MappingProxyType({}),
417
+ metadata: ExampleMetadata = MappingProxyType({}),
418
+ ) -> str:
419
+ assert output is not None
420
+ reference = self.get_reference(input, metadata)
421
+ response = self.get_response(output, metadata)
422
+ return self.template.format(reference=reference, response=response)
423
+
424
+ def _parse_eval_output(self, unparsed_response: str) -> EvaluationResult:
425
+ raw_label, explanation = (
426
+ _parse_label_from_explanation(unparsed_response),
427
+ unparsed_response,
428
+ )
429
+ label = snap_to_rail(raw_label, ["true", "false"])
430
+ if label == "true":
431
+ score = 1.0
432
+ elif label == "false":
433
+ score = 0.0
434
+ else:
435
+ raise RuntimeError(f"Could not parse LLM evaluation: {unparsed_response}")
436
+ return EvaluationResult(
437
+ score=score,
438
+ explanation=explanation,
439
+ metadata={},
440
+ )
441
+
442
+ def _default_get_reference(self, input: ExampleInput, *args: Any, **kwargs: Any) -> str:
443
+ return str(input)
444
+
445
+ def _default_get_response(
446
+ self, output: Optional[TaskOutput] = None, *args: Any, **kwargs: Any
447
+ ) -> str:
448
+ assert output is not None
449
+ return str(unwrap_json(output))
450
+
451
+ def evaluate(
452
+ self,
453
+ *,
454
+ output: Optional[TaskOutput] = None,
455
+ metadata: ExampleMetadata = MappingProxyType({}),
456
+ input: ExampleInput = MappingProxyType({}),
457
+ **_: Any,
458
+ ) -> EvaluationResult:
459
+ formatted_template = self._format_eval_template(output, input, metadata)
460
+ unparsed_response = self.model._generate(formatted_template)
461
+ return self._parse_eval_output(unparsed_response)
462
+
463
+ async def async_evaluate(
464
+ self,
465
+ *,
466
+ output: Optional[TaskOutput] = None,
467
+ metadata: ExampleMetadata = MappingProxyType({}),
468
+ input: ExampleInput = MappingProxyType({}),
469
+ **_: Any,
470
+ ) -> EvaluationResult:
471
+ formatted_template = self._format_eval_template(output, input, metadata)
472
+ unparsed_response = await self.model._async_generate(formatted_template)
473
+ return self._parse_eval_output(unparsed_response)
@@ -1,6 +1,7 @@
1
1
  import functools
2
2
  import inspect
3
- from typing import TYPE_CHECKING, Any, Callable, Optional, Union
3
+ from collections.abc import Callable
4
+ from typing import TYPE_CHECKING, Any, Optional, Union
4
5
 
5
6
  from phoenix.experiments.types import (
6
7
  AnnotatorKind,
@@ -134,7 +135,7 @@ def create_evaluator(
134
135
  from textdistance import levenshtein
135
136
 
136
137
  @create_evaluator(kind="CODE", name="levenshtein-distance")
137
- def ld(output: str, expected: str) -> Tuple[float, str]:
138
+ def ld(output: str, expected: str) -> tuple[float, str]:
138
139
  return (
139
140
  levenshtein(output, expected),
140
141
  f"Levenshtein distance between {output} and {expected}"
@@ -4,24 +4,13 @@ import inspect
4
4
  import json
5
5
  import traceback
6
6
  from binascii import hexlify
7
+ from collections.abc import Awaitable, Mapping, Sequence
7
8
  from contextlib import ExitStack
8
9
  from copy import deepcopy
9
10
  from dataclasses import replace
10
11
  from datetime import datetime, timezone
11
12
  from itertools import product
12
- from typing import (
13
- Any,
14
- Awaitable,
15
- Dict,
16
- Literal,
17
- Mapping,
18
- Optional,
19
- Sequence,
20
- Tuple,
21
- Type,
22
- Union,
23
- cast,
24
- )
13
+ from typing import Any, Literal, Optional, Union, cast
25
14
  from urllib.parse import urljoin
26
15
 
27
16
  import httpx
@@ -76,7 +65,7 @@ from phoenix.utilities.client import VersionedAsyncClient, VersionedClient
76
65
  from phoenix.utilities.json import jsonify
77
66
 
78
67
 
79
- def _phoenix_clients() -> Tuple[httpx.Client, httpx.AsyncClient]:
68
+ def _phoenix_clients() -> tuple[httpx.Client, httpx.AsyncClient]:
80
69
  return VersionedClient(
81
70
  base_url=get_base_url(),
82
71
  ), VersionedAsyncClient(
@@ -91,7 +80,7 @@ Evaluators: TypeAlias = Union[
91
80
  ]
92
81
 
93
82
 
94
- RateLimitErrors: TypeAlias = Union[Type[BaseException], Sequence[Type[BaseException]]]
83
+ RateLimitErrors: TypeAlias = Union[type[BaseException], Sequence[type[BaseException]]]
95
84
 
96
85
 
97
86
  def run_experiment(
@@ -369,7 +358,7 @@ def run_experiment(
369
358
  exp_run = replace(exp_run, id=resp.json()["data"]["id"])
370
359
  return exp_run
371
360
 
372
- _errors: Tuple[Type[BaseException], ...]
361
+ _errors: tuple[type[BaseException], ...]
373
362
  if not isinstance(rate_limit_errors, Sequence):
374
363
  _errors = (rate_limit_errors,) if rate_limit_errors is not None else ()
375
364
  else:
@@ -498,7 +487,7 @@ def evaluate_experiment(
498
487
  root_span_kind = EVALUATOR
499
488
 
500
489
  def sync_evaluate_run(
501
- obj: Tuple[Example, ExperimentRun, Evaluator],
490
+ obj: tuple[Example, ExperimentRun, Evaluator],
502
491
  ) -> ExperimentEvaluationRun:
503
492
  example, experiment_run, evaluator = obj
504
493
  result: Optional[EvaluationResult] = None
@@ -550,7 +539,7 @@ def evaluate_experiment(
550
539
  return eval_run
551
540
 
552
541
  async def async_evaluate_run(
553
- obj: Tuple[Example, ExperimentRun, Evaluator],
542
+ obj: tuple[Example, ExperimentRun, Evaluator],
554
543
  ) -> ExperimentEvaluationRun:
555
544
  example, experiment_run, evaluator = obj
556
545
  result: Optional[EvaluationResult] = None
@@ -611,7 +600,7 @@ def evaluate_experiment(
611
600
  eval_run = replace(eval_run, id=resp.json()["data"]["id"])
612
601
  return eval_run
613
602
 
614
- _errors: Tuple[Type[BaseException], ...]
603
+ _errors: tuple[type[BaseException], ...]
615
604
  if not isinstance(rate_limit_errors, Sequence):
616
605
  _errors = (rate_limit_errors,) if rate_limit_errors is not None else ()
617
606
  else:
@@ -649,7 +638,7 @@ def evaluate_experiment(
649
638
 
650
639
 
651
640
  def _evaluators_by_name(obj: Optional[Evaluators]) -> Mapping[EvaluatorName, Evaluator]:
652
- evaluators_by_name: Dict[EvaluatorName, Evaluator] = {}
641
+ evaluators_by_name: dict[EvaluatorName, Evaluator] = {}
653
642
  if obj is None:
654
643
  return evaluators_by_name
655
644
  if isinstance(mapping := obj, Mapping):
@@ -678,7 +667,7 @@ def _evaluators_by_name(obj: Optional[Evaluators]) -> Mapping[EvaluatorName, Eva
678
667
  return evaluators_by_name
679
668
 
680
669
 
681
- def _get_tracer(project_name: Optional[str] = None) -> Tuple[Tracer, Resource]:
670
+ def _get_tracer(project_name: Optional[str] = None) -> tuple[Tracer, Resource]:
682
671
  resource = Resource({ResourceAttributes.PROJECT_NAME: project_name} if project_name else {})
683
672
  tracer_provider = trace_sdk.TracerProvider(resource=resource)
684
673
  span_processor = (
@@ -1,9 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from collections.abc import Callable, Iterator
3
4
  from contextlib import contextmanager
4
5
  from contextvars import ContextVar
5
6
  from threading import Lock
6
- from typing import Any, Callable, Iterator, Optional
7
+ from typing import Any, Optional
7
8
 
8
9
  from opentelemetry.sdk.resources import Resource
9
10
  from opentelemetry.sdk.trace import ReadableSpan
@@ -3,6 +3,13 @@ from __future__ import annotations
3
3
  import json
4
4
  import textwrap
5
5
  from collections import Counter
6
+ from collections.abc import (
7
+ Awaitable,
8
+ Callable,
9
+ Iterable,
10
+ Iterator,
11
+ Mapping,
12
+ )
6
13
  from copy import copy, deepcopy
7
14
  from dataclasses import dataclass, field, fields
8
15
  from datetime import datetime
@@ -10,23 +17,7 @@ from enum import Enum
10
17
  from functools import cached_property
11
18
  from importlib.metadata import version
12
19
  from random import getrandbits
13
- from typing import (
14
- Any,
15
- Awaitable,
16
- Callable,
17
- Dict,
18
- FrozenSet,
19
- Iterable,
20
- Iterator,
21
- List,
22
- Mapping,
23
- Optional,
24
- Tuple,
25
- TypeVar,
26
- Union,
27
- cast,
28
- overload,
29
- )
20
+ from typing import Any, Optional, TypeVar, Union, cast, overload
30
21
 
31
22
  import pandas as pd
32
23
  from typing_extensions import TypeAlias
@@ -41,7 +32,7 @@ class AnnotatorKind(Enum):
41
32
  LLM = "LLM"
42
33
 
43
34
 
44
- JSONSerializable: TypeAlias = Optional[Union[Dict[str, Any], List[Any], str, int, float, bool]]
35
+ JSONSerializable: TypeAlias = Optional[Union[dict[str, Any], list[Any], str, int, float, bool]]
45
36
  ExperimentId: TypeAlias = str
46
37
  DatasetId: TypeAlias = str
47
38
  DatasetVersionId: TypeAlias = str
@@ -63,7 +54,7 @@ Explanation: TypeAlias = Optional[str]
63
54
  EvaluatorName: TypeAlias = str
64
55
  EvaluatorKind: TypeAlias = str
65
56
  EvaluatorOutput: TypeAlias = Union[
66
- "EvaluationResult", bool, int, float, str, Tuple[Score, Label, Explanation]
57
+ "EvaluationResult", bool, int, float, str, tuple[Score, Label, Explanation]
67
58
  ]
68
59
 
69
60
  DRY_RUN: ExperimentId = "DRY_RUN"
@@ -135,14 +126,14 @@ class Dataset:
135
126
  return iter(self.examples.values())
136
127
 
137
128
  @cached_property
138
- def _keys(self) -> Tuple[str, ...]:
129
+ def _keys(self) -> tuple[str, ...]:
139
130
  return tuple(self.examples.keys())
140
131
 
141
132
  @overload
142
133
  def __getitem__(self, key: int) -> Example: ...
143
134
  @overload
144
- def __getitem__(self, key: slice) -> List[Example]: ...
145
- def __getitem__(self, key: Union[int, slice]) -> Union[Example, List[Example]]:
135
+ def __getitem__(self, key: slice) -> list[Example]: ...
136
+ def __getitem__(self, key: Union[int, slice]) -> Union[Example, list[Example]]:
146
137
  if isinstance(key, int):
147
138
  return self.examples[self._keys[key]]
148
139
  return [self.examples[k] for k in self._keys[key]]
@@ -306,7 +297,7 @@ class ExperimentParameters:
306
297
 
307
298
  @dataclass(frozen=True)
308
299
  class EvaluationParameters:
309
- eval_names: FrozenSet[str]
300
+ eval_names: frozenset[str]
310
301
  exp_params: ExperimentParameters
311
302
 
312
303
 
@@ -485,8 +476,8 @@ class RanExperiment(Experiment):
485
476
  dataset: Dataset = field(repr=False)
486
477
  runs: Mapping[ExperimentRunId, ExperimentRun] = field(repr=False)
487
478
  task_summary: TaskSummary = field(repr=False)
488
- eval_runs: Tuple[ExperimentEvaluationRun, ...] = field(repr=False, default=())
489
- eval_summaries: Tuple[EvaluationSummary, ...] = field(repr=False, default=())
479
+ eval_runs: tuple[ExperimentEvaluationRun, ...] = field(repr=False, default=())
480
+ eval_summaries: tuple[EvaluationSummary, ...] = field(repr=False, default=())
490
481
 
491
482
  @property
492
483
  def url(self) -> str:
@@ -514,14 +505,14 @@ class RanExperiment(Experiment):
514
505
  return iter(self.runs.values())
515
506
 
516
507
  @cached_property
517
- def _keys(self) -> Tuple[str, ...]:
508
+ def _keys(self) -> tuple[str, ...]:
518
509
  return tuple(self.runs.keys())
519
510
 
520
511
  @overload
521
512
  def __getitem__(self, key: int) -> ExperimentRun: ...
522
513
  @overload
523
- def __getitem__(self, key: slice) -> List[ExperimentRun]: ...
524
- def __getitem__(self, key: Union[int, slice]) -> Union[ExperimentRun, List[ExperimentRun]]:
514
+ def __getitem__(self, key: slice) -> list[ExperimentRun]: ...
515
+ def __getitem__(self, key: Union[int, slice]) -> Union[ExperimentRun, list[ExperimentRun]]:
525
516
  if isinstance(key, int):
526
517
  return self.runs[self._keys[key]]
527
518
  return [self.runs[k] for k in self._keys[key]]
@@ -596,7 +587,7 @@ class RanExperiment(Experiment):
596
587
  raise NotImplementedError
597
588
 
598
589
 
599
- def _asdict(dc: Any) -> Dict[str, Any]:
590
+ def _asdict(dc: Any) -> dict[str, Any]:
600
591
  # non-recursive version of `dataclasses.asdict()`
601
592
  return {field.name: getattr(dc, field.name) for field in fields(dc)}
602
593
 
@@ -1,5 +1,6 @@
1
1
  import functools
2
- from typing import Any, Callable
2
+ from collections.abc import Callable
3
+ from typing import Any
3
4
 
4
5
  from phoenix.config import get_web_base_url
5
6
 
@@ -1,5 +1,6 @@
1
1
  from abc import abstractmethod
2
- from typing import Any, Iterable, List, Union
2
+ from collections.abc import Iterable
3
+ from typing import Any, Union
3
4
 
4
5
 
5
6
  class ValidationError(Exception):
@@ -57,8 +58,8 @@ class InvalidSchemaError(ValidationError):
57
58
  class DatasetError(Exception):
58
59
  """An error raised when the dataset is invalid or incomplete"""
59
60
 
60
- def __init__(self, errors: Union[ValidationError, List[ValidationError]]):
61
- self.errors: List[ValidationError] = errors if isinstance(errors, list) else [errors]
61
+ def __init__(self, errors: Union[ValidationError, list[ValidationError]]):
62
+ self.errors: list[ValidationError] = errors if isinstance(errors, list) else [errors]
62
63
 
63
64
  def __str__(self) -> str:
64
65
  return "\n".join(map(str, self.errors))
@@ -142,7 +143,7 @@ class EmbeddingVectorSizeMismatch(ValidationError):
142
143
  vector lengths"""
143
144
 
144
145
  def __init__(
145
- self, embedding_feature_name: str, vector_column_name: str, vector_lengths: List[int]
146
+ self, embedding_feature_name: str, vector_column_name: str, vector_lengths: list[int]
146
147
  ) -> None:
147
148
  self.embedding_feature_name = embedding_feature_name
148
149
  self.vector_column_name = vector_column_name
@@ -238,5 +239,5 @@ class MissingTimestampColumnName(ValidationError):
238
239
  class SchemaError(Exception):
239
240
  """An error raised when the Schema is invalid or incomplete"""
240
241
 
241
- def __init__(self, errors: Union[ValidationError, List[ValidationError]]):
242
+ def __init__(self, errors: Union[ValidationError, list[ValidationError]]):
242
243
  self.errors = errors
@@ -1,9 +1,10 @@
1
1
  import json
2
2
  import logging
3
+ from collections.abc import Iterator
3
4
  from dataclasses import dataclass, replace
4
5
  from enum import Enum, auto
5
6
  from pathlib import Path
6
- from typing import Iterator, NamedTuple, Optional, Tuple
7
+ from typing import NamedTuple, Optional
7
8
  from urllib import request
8
9
  from urllib.parse import quote, urljoin
9
10
 
@@ -39,7 +40,7 @@ class Fixture:
39
40
  corpus_file_name: Optional[str] = None
40
41
  corpus_schema: Optional[Schema] = None
41
42
 
42
- def paths(self) -> Iterator[Tuple[InferencesRole, Path]]:
43
+ def paths(self) -> Iterator[tuple[InferencesRole, Path]]:
43
44
  return (
44
45
  (role, Path(self.prefix) / name)
45
46
  for role, name in zip(
@@ -397,7 +398,7 @@ wikipedia_fixture = Fixture(
397
398
  corpus_file_name="corpus.parquet",
398
399
  )
399
400
 
400
- FIXTURES: Tuple[Fixture, ...] = (
401
+ FIXTURES: tuple[Fixture, ...] = (
401
402
  sentiment_classification_language_drift_fixture,
402
403
  image_classification_fixture,
403
404
  fashion_mnist_fixture,
@@ -416,7 +417,7 @@ NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
416
417
  def get_inferences(
417
418
  fixture_name: str,
418
419
  no_internet: bool = False,
419
- ) -> Tuple[Inferences, Optional[Inferences], Optional[Inferences]]:
420
+ ) -> tuple[Inferences, Optional[Inferences], Optional[Inferences]]:
420
421
  """
421
422
  Downloads primary and reference inferences for a fixture if they are not found
422
423
  locally.
@@ -550,7 +551,7 @@ class GCSAssets(NamedTuple):
550
551
  )
551
552
 
552
553
 
553
- def _download(fixture: Fixture, location: Path) -> Iterator[Tuple[InferencesRole, Path]]:
554
+ def _download(fixture: Fixture, location: Path) -> Iterator[tuple[InferencesRole, Path]]:
554
555
  for role, path in fixture.paths():
555
556
  yield role, GCSAssets().metadata(path).save_artifact(location)
556
557