PyPI - arize-phoenix - Versions diffs - 4.7.1__py3-none-any.whl → 4.7.2__py3-none-any.whl - Mend

arize-phoenix 4.7.1py3-none-any.whl → 4.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (10) hide show

{arize_phoenix-4.7.1.dist-info → arize_phoenix-4.7.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: arize-phoenix
-Version: 4.7.1
+Version: 4.7.2
 Summary: AI Observability and Evaluation
 Project-URL: Documentation, https://docs.arize.com/phoenix/
 Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues

{arize_phoenix-4.7.1.dist-info → arize_phoenix-4.7.2.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
 phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 phoenix/services.py,sha256=aTxhcOA1pZHB6U-B3TEcp6fqDF5oT0xCUvEUNMZVTUQ,5175
 phoenix/settings.py,sha256=cO-qgis_S27nHirTobYI9hHPfZH18R--WMmxNdsVUwc,273
-phoenix/version.py,sha256=7suIQcI50zQCokBqNExDKrJDN157VogyLAp_ZZ4xqCM,22
+phoenix/version.py,sha256=cmBVf5yWtMWpLVrALEkcO90h-q8n53EpZSufTL15jyo,22
 phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
 phoenix/core/model.py,sha256=km_a--PBHOuA337ClRw9xqhOHhrUT6Rl9pz_zV0JYkQ,4843
@@ -31,9 +31,9 @@ phoenix/db/migrations/types.py,sha256=Frq1AKSyBKQQ0FLzON-EmgTqE4kNkOpHMsbWnI-WgC
 phoenix/db/migrations/versions/10460e46d750_datasets.py,sha256=l69yZfScFrjfZZpY0gnqwhsDUEctLeo02qMgA_aOGDg,8155
 phoenix/db/migrations/versions/cf03bd6bae1d_init.py,sha256=CbWT3ZTR0CZqeT3zWLoTWhboFmnOy3Ju1z6Ztpq8WIM,8122
 phoenix/experiments/__init__.py,sha256=6JGwgUd7xCbGpuHqYZlsmErmYvVgv7N_j43bn3dUqsk,123
-phoenix/experiments/functions.py,sha256=m4PlpkTLoZxmt7ZV2KqwcWY5q18Yu3ZTrcEzAqLN-o4,31656
+phoenix/experiments/functions.py,sha256=2ZWCcIs0dh_UNB21t0J_PzWcYhu_32Ai3J6sEMjbBGE,32310
 phoenix/experiments/tracing.py,sha256=wVpt8Ie9WNPoi1djJdcrkwCokHdTO0bicXViLg3O-1Y,2831
-phoenix/experiments/types.py,sha256=HQ9k7dUTlOLZl0iGtZOnToUtZBYGos6afwvO44subAM,24035
+phoenix/experiments/types.py,sha256=VuvDCcvUGeHIQuXS_xpz7Jq5xHdt3qu-O_C7IQ3DvF8,23397
 phoenix/experiments/utils.py,sha256=wLu5Kvt1b4a8rGPRWq5G8RQ9XSiV8fCIVm51zWBI3-g,758
 phoenix/experiments/evaluators/__init__.py,sha256=j63fi3fa3U7-itVPHa82GowhjQRU-wO6yhO34u_lhsA,714
 phoenix/experiments/evaluators/base.py,sha256=ani0F2TN7DMN0KLhV89LIr9-W4g-ccEl2YQJgfp44Js,5325
@@ -135,7 +135,7 @@ phoenix/server/api/routers/v1/dataset_examples.py,sha256=XfqOvDKF1oxb0pkeYfBycww
 phoenix/server/api/routers/v1/datasets.py,sha256=f2gLG-geu-_wtEw4mKSzNWK2cFb5TYOyRL3tQ7Fl7Es,31544
 phoenix/server/api/routers/v1/evaluations.py,sha256=8g6P_e2BweV3RDU0esFmpkb0L5fCwonQPXiJ0y6HLwg,9126
 phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=H_psVyuGUQImo0oxdEAKAMQ-oyVwkVIq5yaMHzHIiPc,5455
-phoenix/server/api/routers/v1/experiment_runs.py,sha256=u4Kgz1i5AffmCF2LHtC9Oo1hlGscZ3Dm8JlTRhM55yU,8307
+phoenix/server/api/routers/v1/experiment_runs.py,sha256=jy4SynmzdtQMoUzlowmG6wsVU14SsLAzfcW4JOhXjeQ,8154
 phoenix/server/api/routers/v1/experiments.py,sha256=cG-LyIGRdB1jVTL42Xi2__nsXibVe9Up7m3hFiTIYYY,11886
 phoenix/server/api/routers/v1/spans.py,sha256=PFeS3ayKj4cUle0CH-f-CpM1fRi-JicEG7BEtkANzAo,4074
 phoenix/server/api/routers/v1/traces.py,sha256=dYEf5pThenAQCgfQljHdrnwd4tC_tAXm6Kvk6GphPYs,2774
@@ -202,7 +202,7 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
 phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
 phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
 phoenix/server/static/index.css,sha256=KKGpx4iwF91VGRm0YN-4cn8oC-oIqC6HecoPf0x3ZM8,1885
-phoenix/server/static/index.js,sha256=tmyBBBaYS-HaFhd0dqqXiIsy5Lnvf-klOxQ0gRHAoDM,3521766
+phoenix/server/static/index.js,sha256=LmaCH-kjVuEBKvMhK3pOUxK12Wv26htH5juAzju9VWE,3524823
 phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
 phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/server/templates/index.html,sha256=S4z7qSoNSwnKFAH9r96AR-YJEyoKMd-VMWVlJ_IdzME,2039
@@ -247,8 +247,8 @@ phoenix/utilities/logging.py,sha256=lDXd6EGaamBNcQxL4vP1au9-i_SXe0OraUDiJOcszSw,
 phoenix/utilities/project.py,sha256=qWsvKnG1oKhOFUowXf9qiOL2ia7jaFe_ijFFHEt8GJo,431
 phoenix/utilities/re.py,sha256=PDve_OLjRTM8yQQJHC8-n3HdIONi7aNils3ZKRZ5uBM,2045
 phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arize_phoenix-4.7.1.dist-info/METADATA,sha256=4l6GZ3dH0ALP9ifyZ7mSk32mjC_i7rWj_ahg2Mrvf-c,11451
-arize_phoenix-4.7.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-arize_phoenix-4.7.1.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
-arize_phoenix-4.7.1.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
-arize_phoenix-4.7.1.dist-info/RECORD,,
+arize_phoenix-4.7.2.dist-info/METADATA,sha256=m2l2uSMpqkXD-NpYdOBqsKlNF5jMUoLoSjnxo9is9h0,11451
+arize_phoenix-4.7.2.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+arize_phoenix-4.7.2.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
+arize_phoenix-4.7.2.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
+arize_phoenix-4.7.2.dist-info/RECORD,,

phoenix/experiments/functions.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import asyncio
 import functools
 import inspect
 import json
@@ -62,7 +63,6 @@ from phoenix.experiments.types import (
     ExperimentEvaluationRun,
     ExperimentParameters,
     ExperimentRun,
-    ExperimentRunOutput,
     ExperimentTask,
     RanExperiment,
     TaskSummary,
@@ -284,7 +284,7 @@ def run_experiment(
             experiment_id=experiment.id,
             dataset_example_id=example.id,
             repetition_number=repetition_number,
-            experiment_run_output=ExperimentRunOutput(task_output=output),
+            output=output,
             error=repr(error) if error else None,
             trace_id=_str_trace_id(span.get_span_context().trace_id),  # type: ignore[no-untyped-call]
         )
@@ -345,13 +345,21 @@ def run_experiment(
             experiment_id=experiment.id,
             dataset_example_id=example.id,
             repetition_number=repetition_number,
-            experiment_run_output=ExperimentRunOutput(task_output=output),
+            output=output,
             error=repr(error) if error else None,
             trace_id=_str_trace_id(span.get_span_context().trace_id),  # type: ignore[no-untyped-call]
         )
         if not dry_run:
-            resp = await async_client.post(
-                f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
+            # Below is a workaround to avoid timeout errors sometimes
+            # encountered when the task is a synchronous function that
+            # blocks for too long.
+            resp = await asyncio.get_running_loop().run_in_executor(
+                None,
+                functools.partial(
+                    sync_client.post,
+                    url=f"/v1/experiments/{experiment.id}/runs",
+                    json=jsonify(exp_run),
+                ),
             )
             resp.raise_for_status()
             exp_run = replace(exp_run, id=resp.json()["data"]["id"])
@@ -393,7 +401,7 @@ def run_experiment(
     ran_experiment.__init__(  # type: ignore[misc]
         params=params,
         dataset=dataset,
-        runs={r.id: r for r in task_runs},
+        runs={r.id: r for r in task_runs if r is not None},
         task_summary=task_summary,
         **_asdict(experiment),
     )
@@ -498,7 +506,7 @@ def evaluate_experiment(
             stack.enter_context(capture_spans(resource))
             try:
                 result = evaluator.evaluate(
-                    output=experiment_run.output,
+                    output=deepcopy(experiment_run.output),
                     expected=example.output,
                     reference=example.output,
                     input=example.input,
@@ -550,7 +558,7 @@ def evaluate_experiment(
             stack.enter_context(capture_spans(resource))
             try:
                 result = await evaluator.async_evaluate(
-                    output=experiment_run.output,
+                    output=deepcopy(experiment_run.output),
                     expected=example.output,
                     reference=example.output,
                     input=example.input,
@@ -582,7 +590,17 @@ def evaluate_experiment(
             trace_id=_str_trace_id(span.get_span_context().trace_id),  # type: ignore[no-untyped-call]
         )
         if not dry_run:
-            resp = await async_client.post("/v1/experiment_evaluations", json=jsonify(eval_run))
+            # Below is a workaround to avoid timeout errors sometimes
+            # encountered when the evaluator is a synchronous function
+            # that blocks for too long.
+            resp = await asyncio.get_running_loop().run_in_executor(
+                None,
+                functools.partial(
+                    sync_client.post,
+                    url="/v1/experiment_evaluations",
+                    json=jsonify(eval_run),
+                ),
+            )
             resp.raise_for_status()
             eval_run = replace(eval_run, id=resp.json()["data"]["id"])
         return eval_run

phoenix/experiments/types.py CHANGED Viewed

@@ -198,20 +198,6 @@ class Experiment:
         )
-@dataclass(frozen=True)
-class ExperimentRunOutput:
-    task_output: TaskOutput
-    def __post_init__(self) -> None:
-        object.__setattr__(self, "task_output", _make_read_only(self.task_output))
-    @classmethod
-    def from_dict(cls, obj: Optional[Mapping[str, Any]]) -> ExperimentRunOutput:
-        if not obj:
-            return cls(task_output=None)
-        return cls(task_output=obj["task_output"])
 @dataclass(frozen=True)
 class ExperimentRun:
     start_time: datetime
@@ -219,15 +205,11 @@ class ExperimentRun:
     experiment_id: ExperimentId
     dataset_example_id: ExampleId
     repetition_number: RepetitionNumber
-    experiment_run_output: ExperimentRunOutput
+    output: JSONSerializable
     error: Optional[str] = None
     id: ExperimentRunId = field(default_factory=_dry_run_id)
     trace_id: Optional[TraceId] = None
-    @property
-    def output(self) -> Optional[TaskOutput]:
-        return deepcopy(self.experiment_run_output.task_output)
     @classmethod
     def from_dict(cls, obj: Mapping[str, Any]) -> ExperimentRun:
         return cls(
@@ -236,14 +218,14 @@ class ExperimentRun:
             experiment_id=obj["experiment_id"],
             dataset_example_id=obj["dataset_example_id"],
             repetition_number=obj.get("repetition_number") or 1,
-            experiment_run_output=ExperimentRunOutput.from_dict(obj["experiment_run_output"]),
+            output=_make_read_only(obj.get("output")),
             error=obj.get("error"),
             id=obj["id"],
             trace_id=obj.get("trace_id"),
         )
     def __post_init__(self) -> None:
-        if bool(self.experiment_run_output) == bool(self.error):
+        if bool(self.output) == bool(self.error):
             ValueError("Must specify exactly one of experiment_run_output or error")
@@ -381,7 +363,7 @@ class EvaluationSummary(_HasStats):
         if df.empty:
             df = pd.DataFrame.from_records(
                 [
-                    {"evaluator": name, "error": True, "score": None, "label": None}
+                    {"evaluator": name, "error": None, "score": None, "label": None}
                     for name in params.eval_names
                 ]
             )
@@ -571,7 +553,7 @@ class RanExperiment(Experiment):
                 {
                     "run_id": run.id,
                     "error": run.error,
-                    "output": deepcopy(run.experiment_run_output.task_output),
+                    "output": deepcopy(run.output),
                     "input": deepcopy((ex := self.dataset.examples[run.dataset_example_id]).input),
                     "expected": deepcopy(ex.output),
                     "metadata": deepcopy(ex.metadata),

phoenix/server/api/routers/v1/experiment_runs.py CHANGED Viewed

@@ -7,9 +7,8 @@ from starlette.status import HTTP_404_NOT_FOUND
 from strawberry.relay import GlobalID
 from phoenix.db import models
-from phoenix.experiments.types import ExperimentRun, ExperimentRunOutput
+from phoenix.db.models import ExperimentRunOutput
 from phoenix.server.api.types.node import from_global_id_with_expected_type
-from phoenix.utilities.json import jsonify
 async def create_experiment_run(request: Request) -> Response:
@@ -39,9 +38,8 @@ async def create_experiment_run(request: Request) -> Response:
               trace_id:
                 type: string
                 description: Optional trace ID for tracking
-              experiment_run_output:
-                type: object
-                description: The output of the experiment run
+              output:
+                description: The output of the experiment task
               repetition_number:
                 type: integer
                 description: The repetition number of the experiment run
@@ -101,7 +99,7 @@ async def create_experiment_run(request: Request) -> Response:
         )
     trace_id = payload.get("trace_id", None)
-    output = payload["experiment_run_output"]
+    task_output = payload["output"]
     repetition_number = payload["repetition_number"]
     start_time = payload["start_time"]
     end_time = payload["end_time"]
@@ -112,7 +110,7 @@ async def create_experiment_run(request: Request) -> Response:
             experiment_id=experiment_id,
             dataset_example_id=dataset_example_id,
             trace_id=trace_id,
-            output=output,
+            output=ExperimentRunOutput(task_output=task_output),
             repetition_number=repetition_number,
             start_time=datetime.fromisoformat(start_time),
             end_time=datetime.fromisoformat(end_time),
@@ -170,9 +168,8 @@ async def list_experiment_runs(request: Request) -> Response:
                         type: string
                         format: date-time
                         description: The end time of the experiment run in ISO format
-                      experiment_run_output:
-                        type: object
-                        description: The output of the experiment run
+                      output:
+                        description: The output of the experiment task
                       error:
                         type: string
                         description: Error message if the experiment run encountered an error
@@ -205,16 +202,16 @@ async def list_experiment_runs(request: Request) -> Response:
             experiment_gid = GlobalID("Experiment", str(exp_run.experiment_id))
             example_gid = GlobalID("DatasetExample", str(exp_run.dataset_example_id))
             runs.append(
-                ExperimentRun(
-                    start_time=exp_run.start_time,
-                    end_time=exp_run.end_time,
+                dict(
+                    start_time=exp_run.start_time.isoformat(),
+                    end_time=exp_run.end_time.isoformat(),
                     experiment_id=str(experiment_gid),
                     dataset_example_id=str(example_gid),
                     repetition_number=exp_run.repetition_number,
-                    experiment_run_output=ExperimentRunOutput.from_dict(exp_run.output),
+                    output=exp_run.output.get("task_output"),
                     error=exp_run.error,
                     id=str(run_gid),
                     trace_id=exp_run.trace_id,
                 )
             )
-    return JSONResponse(content={"data": jsonify(runs)}, status_code=200)
+    return JSONResponse(content={"data": runs}, status_code=200)

arize-phoenix 4.7.1__py3-none-any.whl → 4.7.2__py3-none-any.whl

Potentially problematic release.

arize-phoenix 4.7.1py3-none-any.whl → 4.7.2py3-none-any.whl