PyPI - arize-phoenix - Versions diffs - 4.12.1rc1__py3-none-any.whl → 4.15.0__py3-none-any.whl - Mend

arize-phoenix 4.12.1rc1py3-none-any.whl → 4.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (73) hide show

phoenix/experiments/evaluators/llm_evaluators.py CHANGED Viewed

@@ -18,6 +18,31 @@ from phoenix.experiments.types import (
 class LLMCriteriaEvaluator(LLMEvaluator):
+    """
+    An experiment evaluator that uses an LLM to evaluate whether the text meets a custom criteria.
+    This evaluator uses the chain-of-thought technique to perform a binary evaluation of text based
+    on a custom criteria and description. When used as an experiment evaluator,
+    `LLMCriteriaEvaluator` will return a score of 1.0 if the text meets the criteria and a score of
+    0.0 if not. The explanation produced by the chain-of-thought technique will be included in the
+    experiment evaluation as well.
+    Example criteria and descriptions:
+        - "thoughtfulness" - "shows careful consideration and fair judgement"
+        - "clarity" - "is easy to understand and follow"
+        - "professionalism" - "is respectful and appropriate for a formal setting"
+    Args:
+        model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
+            the `phoenix.evals` module.
+        criteria: The criteria to evaluate the text against, the criteria should be able to be used
+            as a noun in a sentence.
+        description (str): A description of the criteria, used to clarify instructions to the LLM.
+            The description should complete this sentence: "{criteria} means the text
+            {description}".
+        name (str): The name of the evaluator
+    """
     _base_template = (
         "Determine if the following text is {criteria}. {description}"
         "First, explain step-by-step why you think the text is or is not {criteria}. Then provide "
@@ -117,6 +142,14 @@ ConcisenessEvaluator = criteria_evaluator_factory(
     description="is just a few sentences and easy to follow",
     default_name="Conciseness",
 )
+"""
+An experiment evaluator that uses an LLM to evaluate whether the text is concise.
+Args:
+    model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
+        the `phoenix.evals` module.
+    name (str, optional): The name of the evaluator, defaults to "Conciseness".
+"""
 HelpfulnessEvaluator = criteria_evaluator_factory(
@@ -125,6 +158,14 @@ HelpfulnessEvaluator = criteria_evaluator_factory(
     description="provides useful information",
     default_name="Helpfulness",
 )
+"""
+An experiment evaluator that uses an LLM to evaluate whether the text is helpful.
+Args:
+    model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
+        the `phoenix.evals` module.
+    name (str, optional): The name of the evaluator, defaults to "Helpfulness".
+"""
 CoherenceEvaluator = criteria_evaluator_factory(
@@ -133,6 +174,14 @@ CoherenceEvaluator = criteria_evaluator_factory(
     description="is coherent, well-structured, and logically sound",
     default_name="Coherence",
 )
+"""
+An experiment evaluator that uses an LLM to evaluate whether the text is coherent.
+Args:
+    model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
+        the `phoenix.evals` module.
+    name (str, optional): The name of the evaluator, defaults to "Coherence".
+"""
 def _parse_label_from_explanation(raw_string: str) -> str:
@@ -149,6 +198,33 @@ def _parse_label_from_explanation(raw_string: str) -> str:
 class RelevanceEvaluator(LLMEvaluator):
+    """
+    An experiment evaluator that uses an LLM to evaluate whether a response is relevant to a query.
+    This evaluator uses the chain-of-thought technique to perform a binary evaluation of whether
+    the output "response" of an experiment is relevant to its input "query". When used as an
+    experiment evaluator, `RelevanceEvaluator` will return a score of 1.0 if the response is
+    relevant to the query and a score of 0.0 if not. The explanation produced by the
+    chain-of-thought technique will be included in the experiment evaluation as well.
+    Optionally, you can provide custom functions to extract the query and response from the input
+    and output of the experiment task. By default, the evaluator will use the dataset example as
+    the input and the output of the experiment task as the response.
+    Args:
+        model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
+            the `phoenix.evals` module.
+        get_query (callable, optional): A function that extracts the query from the input of the
+            experiment task. The function should take the input and metadata of the dataset example
+            and return a string. By default, the function will return the string representation of
+            the input.
+        get_response (callable, optional): A function that extracts the response from the output of
+            the experiment task. The function should take the output and metadata of the experiment
+            task and return a string. By default, the function will return the string representation
+            of the output.
+        name (str, optional): The name of the evaluator. Defaults to "Relevance".
+    """
     template = (
         "Determine if the following response is relevant to the query. In this context, "
         "'relevance' means that the response directly addresses the core question or topic of the "
@@ -174,7 +250,7 @@ class RelevanceEvaluator(LLMEvaluator):
         model: LLMBaseModel,
         get_query: Optional[Callable[[ExampleInput, ExampleMetadata], str]] = None,
         get_response: Optional[Callable[[Optional[TaskOutput], ExampleMetadata], str]] = None,
-        name: str = "RelevanceEvaluator",
+        name: str = "Relevance",
     ):
         self.model = model
         self._name = name

phoenix/experiments/evaluators/utils.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import functools
 import inspect
-from itertools import chain, islice, repeat
 from typing import TYPE_CHECKING, Any, Callable, Optional, Union
 from phoenix.experiments.types import (
@@ -75,6 +74,72 @@ def create_evaluator(
     name: Optional[str] = None,
     scorer: Optional[Callable[[Any], EvaluationResult]] = None,
 ) -> Callable[[Callable[..., Any]], "Evaluator"]:
+    """
+    A decorator that configures a sync or async function to be used as an experiment evaluator.
+    If the `evaluator` is a function of one argument then that argument will be
+    bound to the `output` of an experiment task. Alternatively, the `evaluator` can be a function
+    of any combination of specific argument names that will be bound to special values:
+        `input`: The input field of the dataset example
+        `output`: The output of an experiment task
+        `expected`: The expected or reference output of the dataset example
+        `reference`: An alias for `expected`
+        `metadata`: Metadata associated with the dataset example
+    Args:
+        kind (str | AnnotatorKind): Broadly indicates how the evaluator scores an experiment run.
+            Valid kinds are: "CODE", "LLM". Defaults to "CODE".
+        name (str, optional): The name of the evaluator. If not provided, the name of the function
+            will be used.
+        scorer (callable, optional): An optional function that converts the output of the wrapped
+            function into an `EvaluationResult`. This allows configuring the evaluation
+            payload by setting a label, score and explanation. By default, numeric outputs will
+            be recorded as scores, boolean outputs will be recorded as scores and labels, and
+            string outputs will be recorded as labels. If the output is a 2-tuple, the first item
+            will be recorded as the score and the second item will recorded as the explanation.
+    Examples:
+        Configuring an evaluator that returns a boolean
+        .. code-block:: python
+            @create_evaluator(kind="CODE", name="exact-match)
+            def match(output: str, expected: str) -> bool:
+                return output == expected
+        Configuring an evaluator that returns a label
+        .. code-block:: python
+            client = openai.Client()
+            @create_evaluator(kind="LLM")
+            def label(output: str) -> str:
+                res = client.chat.completions.create(
+                    model = "gpt-4",
+                    messages = [
+                        {
+                            "role": "user",
+                            "content": (
+                                "in one word, characterize the sentiment of the following customer "
+                                f"request: {output}"
+                            )
+                        },
+                    ],
+                )
+                label = res.choices[0].message.content
+                return label
+        Configuring an evaluator that returns a score and explanation
+        .. code-block:: python
+            from textdistance import levenshtein
+            @create_evaluator(kind="CODE", name="levenshtein-distance")
+            def ld(output: str, expected: str) -> Tuple[float, str]:
+                return (
+                    levenshtein(output, expected),
+                    f"Levenshtein distance between {output} and {expected}"
+                )
+    """
     if scorer is None:
         scorer = _default_eval_scorer
@@ -163,24 +228,8 @@ def _default_eval_scorer(result: Any) -> EvaluationResult:
         return EvaluationResult(score=float(result))
     if isinstance(result, str):
         return EvaluationResult(label=result)
-    if isinstance(result, (tuple, list)) and 0 < len(result) <= 3:
-        # Possible interpretations are:
-        # - 3-tuple: (Score, Label, Explanation)
-        # - 2-tuple: (Score, Explanation) or (Label, Explanation)
-        # - 1-tuple: (Score, ) or (Label, )
-        # Note that (Score, Label) conflicts with (Score, Explanation) and we
-        # pick the latter because it's probably more prevalent. To get
-        # (Score, Label), use a 3-tuple instead, i.e. (Score, Label, None).
-        a, b, c = islice(chain(result, repeat(None)), 3)
-        score, label, explanation = None, a, b
-        if hasattr(a, "__float__"):
-            try:
-                score = float(a)
-            except ValueError:
-                pass
-            else:
-                label, explanation = (None, b) if len(result) < 3 else (b, c)
-        return EvaluationResult(score=score, label=label, explanation=explanation)
-    if result is None:
-        return EvaluationResult(score=0)
+    if isinstance(result, (tuple, list)) and len(result) == 2:
+        # If the result is a 2-tuple, the first item will be recorded as the score
+        # and the second item will recorded as the explanation.
+        return EvaluationResult(score=float(result[0]), explanation=str(result[1]))
     raise ValueError(f"Unsupported evaluation result type: {type(result)}")

phoenix/experiments/functions.py CHANGED Viewed

@@ -72,15 +72,16 @@ from phoenix.experiments.types import (
 )
 from phoenix.experiments.utils import get_dataset_experiments_url, get_experiment_url, get_func_name
 from phoenix.trace.attributes import flatten
+from phoenix.utilities.client import VersionedAsyncClient, VersionedClient
 from phoenix.utilities.json import jsonify
 def _phoenix_clients() -> Tuple[httpx.Client, httpx.AsyncClient]:
     headers = get_env_client_headers()
-    return httpx.Client(
+    return VersionedClient(
         base_url=get_base_url(),
         headers=headers,
-    ), httpx.AsyncClient(
+    ), VersionedAsyncClient(
         base_url=get_base_url(),
         headers=headers,
     )
@@ -120,21 +121,23 @@ def run_experiment(
     output. If the `task` is a function of one argument then that argument will be bound to the
     `input` field of the dataset example. Alternatively, the `task` can be a function of any
     combination of specific argument names that will be bound to special values:
-        `input`: The input field of the dataset example
-        `expected`: The expected or reference output of the dataset example
-        `reference`: An alias for `expected`
-        `metadata`: Metadata associated with the dataset example
-        `example`: The dataset `Example` object with all associated fields
+    - `input`: The input field of the dataset example
+    - `expected`: The expected or reference output of the dataset example
+    - `reference`: An alias for `expected`
+    - `metadata`: Metadata associated with the dataset example
+    - `example`: The dataset `Example` object with all associated fields
     An `evaluator` is either a synchronous or asynchronous function that returns either a boolean
     or numeric "score". If the `evaluator` is a function of one argument then that argument will be
     bound to the `output` of the task. Alternatively, the `evaluator` can be a function of any
     combination of specific argument names that will be bound to special values:
-        `input`: The input field of the dataset example
-        `output`: The output of the task
-        `expected`: The expected or reference output of the dataset example
-        `reference`: An alias for `expected`
-        `metadata`: Metadata associated with the dataset example
+    - `input`: The input field of the dataset example
+    - `output`: The output of the task
+    - `expected`: The expected or reference output of the dataset example
+    - `reference`: An alias for `expected`
+    - `metadata`: Metadata associated with the dataset example
     Phoenix also provides pre-built evaluators in the `phoenix.experiments.evaluators` module.
@@ -366,10 +369,9 @@ def run_experiment(
         return exp_run
     _errors: Tuple[Type[BaseException], ...]
-    if not hasattr(rate_limit_errors, "__iter__"):
+    if not isinstance(rate_limit_errors, Sequence):
         _errors = (rate_limit_errors,) if rate_limit_errors is not None else ()
     else:
-        rate_limit_errors = cast(Sequence[Type[BaseException]], rate_limit_errors)
         _errors = tuple(filter(None, rate_limit_errors))
     rate_limiters = [RateLimiter(rate_limit_error=rate_limit_error) for rate_limit_error in _errors]
@@ -606,10 +608,9 @@ def evaluate_experiment(
         return eval_run
     _errors: Tuple[Type[BaseException], ...]
-    if not hasattr(rate_limit_errors, "__iter__"):
+    if not isinstance(rate_limit_errors, Sequence):
         _errors = (rate_limit_errors,) if rate_limit_errors is not None else ()
     else:
-        rate_limit_errors = cast(Sequence[Type[BaseException]], rate_limit_errors)
         _errors = tuple(filter(None, rate_limit_errors))
     rate_limiters = [RateLimiter(rate_limit_error=rate_limit_error) for rate_limit_error in _errors]

phoenix/server/api/context.py CHANGED Viewed

@@ -1,9 +1,8 @@
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
-from typing import AsyncContextManager, Callable, Optional
+from typing import Callable, Optional
-from sqlalchemy.ext.asyncio import AsyncSession
 from strawberry.fastapi import BaseContext
 from typing_extensions import TypeAlias
@@ -26,6 +25,7 @@ from phoenix.server.api.dataloaders import (
     ProjectByNameDataLoader,
     RecordCountDataLoader,
     SpanAnnotationsDataLoader,
+    SpanDatasetExamplesDataLoader,
     SpanDescendantsDataLoader,
     SpanEvaluationsDataLoader,
     SpanProjectsDataLoader,
@@ -33,6 +33,7 @@ from phoenix.server.api.dataloaders import (
     TraceEvaluationsDataLoader,
     TraceRowIdsDataLoader,
 )
+from phoenix.server.types import DbSessionFactory
 @dataclass
@@ -51,6 +52,7 @@ class DataLoaders:
     latency_ms_quantile: LatencyMsQuantileDataLoader
     min_start_or_max_end_times: MinStartOrMaxEndTimeDataLoader
     record_counts: RecordCountDataLoader
+    span_dataset_examples: SpanDatasetExamplesDataLoader
     span_descendants: SpanDescendantsDataLoader
     span_evaluations: SpanEvaluationsDataLoader
     span_projects: SpanProjectsDataLoader
@@ -66,7 +68,7 @@ ProjectRowId: TypeAlias = int
 @dataclass
 class Context(BaseContext):
-    db: Callable[[], AsyncContextManager[AsyncSession]]
+    db: DbSessionFactory
     data_loaders: DataLoaders
     cache_for_dataloaders: Optional[CacheForDataLoaders]
     model: Model

phoenix/server/api/dataloaders/__init__.py CHANGED Viewed

@@ -27,6 +27,7 @@ from .min_start_or_max_end_times import MinStartOrMaxEndTimeCache, MinStartOrMax
 from .project_by_name import ProjectByNameDataLoader
 from .record_counts import RecordCountCache, RecordCountDataLoader
 from .span_annotations import SpanAnnotationsDataLoader
+from .span_dataset_examples import SpanDatasetExamplesDataLoader
 from .span_descendants import SpanDescendantsDataLoader
 from .span_evaluations import SpanEvaluationsDataLoader
 from .span_projects import SpanProjectsDataLoader
@@ -50,6 +51,7 @@ __all__ = [
     "LatencyMsQuantileDataLoader",
     "MinStartOrMaxEndTimeDataLoader",
     "RecordCountDataLoader",
+    "SpanDatasetExamplesDataLoader",
     "SpanDescendantsDataLoader",
     "SpanEvaluationsDataLoader",
     "SpanProjectsDataLoader",

phoenix/server/api/dataloaders/average_experiment_run_latency.py CHANGED Viewed

@@ -1,18 +1,14 @@
-from typing import (
-    AsyncContextManager,
-    Callable,
-    List,
-)
+from typing import List, Optional
 from sqlalchemy import func, select
-from sqlalchemy.ext.asyncio import AsyncSession
 from strawberry.dataloader import DataLoader
 from typing_extensions import TypeAlias
 from phoenix.db import models
+from phoenix.server.types import DbSessionFactory
 ExperimentID: TypeAlias = int
-RunLatency: TypeAlias = float
+RunLatency: TypeAlias = Optional[float]
 Key: TypeAlias = ExperimentID
 Result: TypeAlias = RunLatency
@@ -20,33 +16,37 @@ Result: TypeAlias = RunLatency
 class AverageExperimentRunLatencyDataLoader(DataLoader[Key, Result]):
     def __init__(
         self,
-        db: Callable[[], AsyncContextManager[AsyncSession]],
+        db: DbSessionFactory,
     ) -> None:
         super().__init__(load_fn=self._load_fn)
         self._db = db
     async def _load_fn(self, keys: List[Key]) -> List[Result]:
         experiment_ids = keys
+        resolved_experiment_ids = (
+            select(models.Experiment.id)
+            .where(models.Experiment.id.in_(set(experiment_ids)))
+            .subquery()
+        )
+        query = (
+            select(
+                resolved_experiment_ids.c.id,
+                func.avg(
+                    func.extract("epoch", models.ExperimentRun.end_time)
+                    - func.extract("epoch", models.ExperimentRun.start_time)
+                ),
+            )
+            .outerjoin_from(
+                from_=resolved_experiment_ids,
+                target=models.ExperimentRun,
+                onclause=resolved_experiment_ids.c.id == models.ExperimentRun.experiment_id,
+            )
+            .group_by(resolved_experiment_ids.c.id)
+        )
         async with self._db() as session:
             avg_latencies = {
                 experiment_id: avg_latency
-                async for experiment_id, avg_latency in await session.stream(
-                    select(
-                        models.ExperimentRun.experiment_id,
-                        func.avg(
-                            func.extract(
-                                "epoch",
-                                models.ExperimentRun.end_time,
-                            )
-                            - func.extract(
-                                "epoch",
-                                models.ExperimentRun.start_time,
-                            )
-                        ),
-                    )
-                    .where(models.ExperimentRun.experiment_id.in_(set(experiment_ids)))
-                    .group_by(models.ExperimentRun.experiment_id)
-                )
+                async for experiment_id, avg_latency in await session.stream(query)
             }
         return [
             avg_latencies.get(experiment_id, ValueError(f"Unknown experiment: {experiment_id}"))

phoenix/server/api/dataloaders/dataset_example_revisions.py CHANGED Viewed

@@ -1,6 +1,4 @@
 from typing import (
-    AsyncContextManager,
-    Callable,
     List,
     Optional,
     Tuple,
@@ -8,12 +6,12 @@ from typing import (
 )
 from sqlalchemy import Integer, case, func, literal, or_, select, union
-from sqlalchemy.ext.asyncio import AsyncSession
 from strawberry.dataloader import DataLoader
 from typing_extensions import TypeAlias
 from phoenix.db import models
 from phoenix.server.api.types.DatasetExampleRevision import DatasetExampleRevision
+from phoenix.server.types import DbSessionFactory
 ExampleID: TypeAlias = int
 VersionID: TypeAlias = Optional[int]
@@ -22,7 +20,7 @@ Result: TypeAlias = DatasetExampleRevision
 class DatasetExampleRevisionsDataLoader(DataLoader[Key, Result]):
-    def __init__(self, db: Callable[[], AsyncContextManager[AsyncSession]]) -> None:
+    def __init__(self, db: DbSessionFactory) -> None:
         super().__init__(load_fn=self._load_fn)
         self._db = db

phoenix/server/api/dataloaders/dataset_example_spans.py CHANGED Viewed

@@ -1,17 +1,15 @@
 from typing import (
-    AsyncContextManager,
-    Callable,
     List,
     Optional,
 )
 from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import joinedload
 from strawberry.dataloader import DataLoader
 from typing_extensions import TypeAlias
 from phoenix.db import models
+from phoenix.server.types import DbSessionFactory
 ExampleID: TypeAlias = int
 Key: TypeAlias = ExampleID
@@ -19,7 +17,7 @@ Result: TypeAlias = Optional[models.Span]
 class DatasetExampleSpansDataLoader(DataLoader[Key, Result]):
-    def __init__(self, db: Callable[[], AsyncContextManager[AsyncSession]]) -> None:
+    def __init__(self, db: DbSessionFactory) -> None:
         super().__init__(load_fn=self._load_fn)
         self._db = db

phoenix/server/api/dataloaders/document_evaluation_summaries.py CHANGED Viewed

@@ -2,8 +2,6 @@ from collections import defaultdict
 from datetime import datetime
 from typing import (
     Any,
-    AsyncContextManager,
-    Callable,
     DefaultDict,
     List,
     Optional,
@@ -14,7 +12,6 @@ import numpy as np
 from aioitertools.itertools import groupby
 from cachetools import LFUCache, TTLCache
 from sqlalchemy import Select, select
-from sqlalchemy.ext.asyncio import AsyncSession
 from strawberry.dataloader import AbstractCache, DataLoader
 from typing_extensions import TypeAlias
@@ -24,6 +21,7 @@ from phoenix.metrics.retrieval_metrics import RetrievalMetrics
 from phoenix.server.api.dataloaders.cache import TwoTierCache
 from phoenix.server.api.input_types.TimeRange import TimeRange
 from phoenix.server.api.types.DocumentEvaluationSummary import DocumentEvaluationSummary
+from phoenix.server.types import DbSessionFactory
 from phoenix.trace.dsl import SpanFilter
 ProjectRowId: TypeAlias = int
@@ -77,7 +75,7 @@ class DocumentEvaluationSummaryCache(
 class DocumentEvaluationSummaryDataLoader(DataLoader[Key, Result]):
     def __init__(
         self,
-        db: Callable[[], AsyncContextManager[AsyncSession]],
+        db: DbSessionFactory,
         cache_map: Optional[AbstractCache[Key, Result]] = None,
     ) -> None:
         super().__init__(

phoenix/server/api/dataloaders/document_evaluations.py CHANGED Viewed

@@ -1,25 +1,23 @@
 from collections import defaultdict
 from typing import (
-    AsyncContextManager,
-    Callable,
     DefaultDict,
     List,
 )
 from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession
 from strawberry.dataloader import DataLoader
 from typing_extensions import TypeAlias
 from phoenix.db import models
 from phoenix.server.api.types.Evaluation import DocumentEvaluation
+from phoenix.server.types import DbSessionFactory
 Key: TypeAlias = int
 Result: TypeAlias = List[DocumentEvaluation]
 class DocumentEvaluationsDataLoader(DataLoader[Key, Result]):
-    def __init__(self, db: Callable[[], AsyncContextManager[AsyncSession]]) -> None:
+    def __init__(self, db: DbSessionFactory) -> None:
         super().__init__(load_fn=self._load_fn)
         self._db = db

phoenix/server/api/dataloaders/document_retrieval_metrics.py CHANGED Viewed

@@ -1,7 +1,5 @@
 from collections import defaultdict
 from typing import (
-    AsyncContextManager,
-    Callable,
     DefaultDict,
     Dict,
     List,
@@ -13,13 +11,13 @@ from typing import (
 import numpy as np
 from aioitertools.itertools import groupby
 from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession
 from strawberry.dataloader import DataLoader
 from typing_extensions import TypeAlias
 from phoenix.db import models
 from phoenix.metrics.retrieval_metrics import RetrievalMetrics
 from phoenix.server.api.types.DocumentRetrievalMetrics import DocumentRetrievalMetrics
+from phoenix.server.types import DbSessionFactory
 RowId: TypeAlias = int
 NumDocs: TypeAlias = int
@@ -30,7 +28,7 @@ Result: TypeAlias = List[DocumentRetrievalMetrics]
 class DocumentRetrievalMetricsDataLoader(DataLoader[Key, Result]):
-    def __init__(self, db: Callable[[], AsyncContextManager[AsyncSession]]) -> None:
+    def __init__(self, db: DbSessionFactory) -> None:
         super().__init__(load_fn=self._load_fn)
         self._db = db

phoenix/server/api/dataloaders/evaluation_summaries.py CHANGED Viewed

@@ -2,8 +2,6 @@ from collections import defaultdict
 from datetime import datetime
 from typing import (
     Any,
-    AsyncContextManager,
-    Callable,
     DefaultDict,
     List,
     Literal,
@@ -15,7 +13,6 @@ import pandas as pd
 from aioitertools.itertools import groupby
 from cachetools import LFUCache, TTLCache
 from sqlalchemy import Select, func, or_, select
-from sqlalchemy.ext.asyncio import AsyncSession
 from strawberry.dataloader import AbstractCache, DataLoader
 from typing_extensions import TypeAlias, assert_never
@@ -23,6 +20,7 @@ from phoenix.db import models
 from phoenix.server.api.dataloaders.cache import TwoTierCache
 from phoenix.server.api.input_types.TimeRange import TimeRange
 from phoenix.server.api.types.EvaluationSummary import EvaluationSummary
+from phoenix.server.types import DbSessionFactory
 from phoenix.trace.dsl import SpanFilter
 Kind: TypeAlias = Literal["span", "trace"]
@@ -77,7 +75,7 @@ class EvaluationSummaryCache(
 class EvaluationSummaryDataLoader(DataLoader[Key, Result]):
     def __init__(
         self,
-        db: Callable[[], AsyncContextManager[AsyncSession]],
+        db: DbSessionFactory,
         cache_map: Optional[AbstractCache[Key, Result]] = None,
     ) -> None:
         super().__init__(

phoenix/server/api/dataloaders/experiment_annotation_summaries.py CHANGED Viewed

@@ -1,19 +1,17 @@
 from collections import defaultdict
 from dataclasses import dataclass
 from typing import (
-    AsyncContextManager,
-    Callable,
     DefaultDict,
     List,
     Optional,
 )
 from sqlalchemy import func, select
-from sqlalchemy.ext.asyncio import AsyncSession
 from strawberry.dataloader import AbstractCache, DataLoader
 from typing_extensions import TypeAlias
 from phoenix.db import models
+from phoenix.server.types import DbSessionFactory
 @dataclass
@@ -34,7 +32,7 @@ Result: TypeAlias = List[ExperimentAnnotationSummary]
 class ExperimentAnnotationSummaryDataLoader(DataLoader[Key, Result]):
     def __init__(
         self,
-        db: Callable[[], AsyncContextManager[AsyncSession]],
+        db: DbSessionFactory,
         cache_map: Optional[AbstractCache[Key, Result]] = None,
     ) -> None:
         super().__init__(load_fn=self._load_fn)

arize-phoenix 4.12.1rc1__py3-none-any.whl → 4.15.0__py3-none-any.whl

Potentially problematic release.

arize-phoenix 4.12.1rc1py3-none-any.whl → 4.15.0py3-none-any.whl