PyPI - arize-phoenix - Versions diffs - 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl - Mend

arize-phoenix 2.5.0py3-none-any.whl → 2.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (24) hide show

{arize_phoenix-2.5.0.dist-info → arize_phoenix-2.7.0.dist-info}/METADATA +1 -1
{arize_phoenix-2.5.0.dist-info → arize_phoenix-2.7.0.dist-info}/RECORD +24 -23
phoenix/config.py +32 -7
phoenix/core/evals.py +53 -0
phoenix/datasets/fixtures.py +46 -0
phoenix/experimental/evals/evaluators.py +4 -0
phoenix/experimental/evals/functions/classify.py +16 -6
phoenix/experimental/evals/functions/generate.py +6 -3
phoenix/experimental/evals/models/anthropic.py +3 -4
phoenix/experimental/evals/models/base.py +1 -0
phoenix/experimental/evals/models/bedrock.py +4 -2
phoenix/experimental/evals/models/openai.py +2 -0
phoenix/experimental/evals/models/vertex.py +6 -0
phoenix/experimental/evals/templates/default_templates.py +0 -7
phoenix/server/static/index.js +1 -1
phoenix/session/evaluation.py +16 -10
phoenix/session/session.py +19 -0
phoenix/trace/errors.py +5 -0
phoenix/trace/span_evaluations.py +46 -61
phoenix/trace/trace_dataset.py +111 -4
phoenix/version.py +1 -1
{arize_phoenix-2.5.0.dist-info → arize_phoenix-2.7.0.dist-info}/WHEEL +0 -0
{arize_phoenix-2.5.0.dist-info → arize_phoenix-2.7.0.dist-info}/licenses/IP_NOTICE +0 -0
{arize_phoenix-2.5.0.dist-info → arize_phoenix-2.7.0.dist-info}/licenses/LICENSE +0 -0

{arize_phoenix-2.5.0.dist-info → arize_phoenix-2.7.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: arize-phoenix
-Version: 2.5.0
+Version: 2.7.0
 Summary: ML Observability in your notebook
 Project-URL: Documentation, https://docs.arize.com/phoenix/
 Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues

{arize_phoenix-2.5.0.dist-info → arize_phoenix-2.7.0.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 phoenix/__init__.py,sha256=EEh0vZGRQS8686h34GQ64OjQoZ7neKYO_iO5j6Oa9Jw,1402
-phoenix/config.py,sha256=wYmvT1I3wad8YIunbsiJ0nBu4-W8pUZwwbs5CxJKRs8,2442
+phoenix/config.py,sha256=RbQw8AkVyI4SSo5CD520AjUNcwkDNOGZA6_ErE48R7A,3454
 phoenix/datetime_utils.py,sha256=D955QLrkgrrSdUM6NyqbCeAu2SMsjhR5rHVQEsVUdng,2773
 phoenix/exceptions.py,sha256=igIWGAg3m8jm5YwQDeCY1p8ml_60A7zaGVXJ1yZhY9s,44
 phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 phoenix/services.py,sha256=f6AeyKTuOpy9RCcTCjVH3gx5nYZhbTMFOuv1WSUOB5o,4992
-phoenix/version.py,sha256=fMbNgIJqxiZEaSBLadLBt4rZpCHqarzb4Okt-aWsp2E,22
+phoenix/version.py,sha256=EtKWW0Hnl5oWglRNH0HZigvcDT2FEs58ek8buJdwW1E,22
 phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
-phoenix/core/evals.py,sha256=OrHeYlh804rpcZIXTA6kan2mzSZMfgpphNNQdPMpNoM,7597
+phoenix/core/evals.py,sha256=gJyqQzpud5YjtoY8h4pgXvHDsdubGfqmEewLuZHPPmQ,10224
 phoenix/core/model.py,sha256=vQ6RxpUPlncezJvur5u6xBN0Lkrk2gW0cTyb-qqaSqA,4713
 phoenix/core/model_schema.py,sha256=rR9VdhL_oXxbprDTPQJBXs5hw5sMPQmzx__m6Kwsxug,50394
 phoenix/core/model_schema_adapter.py,sha256=3GkyzqUST4fYi-Bgs8qAam5hwMCdQRZTDLjZ9Bnzdm4,8268
@@ -15,29 +15,29 @@ phoenix/core/traces.py,sha256=O01L6qwQfHxHUHNZemKBBsAgqDo1tAIO5-1fK2g0NwE,14618
 phoenix/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/datasets/dataset.py,sha256=scKVZ7zc6Dpc_ntt-pWhzY-KWqOJEwKePuyNnKSVTGE,30515
 phoenix/datasets/errors.py,sha256=cGp9vxnw4SewFoWBV3ZGMkhE0Kh73lPIv3Ppz_H_RoA,8261
-phoenix/datasets/fixtures.py,sha256=0_PacL3dw49zulKpFpPdhvxJxeGmHTguqIyf2VXkBkk,19158
+phoenix/datasets/fixtures.py,sha256=rGnVnufPvt25cyrlat0vKKtlu08olOuZvbp7EnR33aU,20668
 phoenix/datasets/schema.py,sha256=bF1d2Md6NyqQZuC4Ym5A52f2_IcazkyxGFZ11HPqSg0,6668
 phoenix/datasets/validation.py,sha256=dZ9lCFUV0EY7HCkQkQBrs-GLAEIZdpOqUxwD5l4dp88,8294
 phoenix/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/experimental/evals/__init__.py,sha256=q96YKLMt2GJD9zL8sjugvWx1INfw40Wa7E9OsHo2S4s,1885
-phoenix/experimental/evals/evaluators.py,sha256=b-nudXEa8A9DF0HuiBQSvMmLvOOxaT-2k3UN8moQ-zE,15852
+phoenix/experimental/evals/evaluators.py,sha256=r7fXrS-l4gn58SUhLAZSfY3P8lxysouSVJwHddrZJ_Q,15956
 phoenix/experimental/evals/retrievals.py,sha256=o3fqrsYbYZjyGj_jWkN_9VQVyXjLkDKDw5Ws7l8bwdI,3828
 phoenix/experimental/evals/functions/__init__.py,sha256=NNd0-_cmIopdV7vm3rspjfgM726qoQJ4DPq_vqbnaxQ,180
-phoenix/experimental/evals/functions/classify.py,sha256=pcAJjkSGiE6zusOMvfcSCnNNjabs7O-Cs0C0h-6Y76Y,18985
+phoenix/experimental/evals/functions/classify.py,sha256=A-seuYrwiNFdc4IK9WJkQVKY78YdBHxaCMSDPL4_SXE,19523
 phoenix/experimental/evals/functions/executor.py,sha256=bM7PI2rcPukQQzZ2rWqN_-Kfo_a935YJj0bh1Red8Ps,13406
-phoenix/experimental/evals/functions/generate.py,sha256=E_yt3Td_LpNWQOSR-jYCxNG7vVfe7fxX9WYeBhApMzc,5412
+phoenix/experimental/evals/functions/generate.py,sha256=8LnnPAjBM9yxitdkaGZ67OabuDTOWBF3fvinJ_uCFRg,5584
 phoenix/experimental/evals/functions/processing.py,sha256=F4xtLsulLV4a8CkuLldRddsCim75dSTIShEJUYN6I6w,1823
 phoenix/experimental/evals/models/__init__.py,sha256=j1N7DhiOPbcaemtVBONcQ0miNnGQwEXz4u3P3Vwe6-4,320
-phoenix/experimental/evals/models/anthropic.py,sha256=Tcv8R-vTyY8sLAv1wIHeZdMCBtqhyayqMPJXRDc7blI,6267
-phoenix/experimental/evals/models/base.py,sha256=tutL6WOe0rM1-xywVOX8VyYKxgfpxH3oylm-BWdjV3M,8052
-phoenix/experimental/evals/models/bedrock.py,sha256=CRPmBuSLc_nRnKKWLHhGMxdWEISIKUJM1tzIlOQ_qWM,7927
+phoenix/experimental/evals/models/anthropic.py,sha256=VRYYbZr8ZFvC-19VxScMNux_Yp_9DzSRXiSmWUuhlOc,6309
+phoenix/experimental/evals/models/base.py,sha256=z8xB18s6JI_Weihq2yG22Rte2RBde_cdHq9rINAXHYw,8086
+phoenix/experimental/evals/models/bedrock.py,sha256=VrLNifBxmgHVMFqp6j9d1aGQIvDDuw8yjBM8CdIZCH4,8009
 phoenix/experimental/evals/models/litellm.py,sha256=YvlYeAV-gG0IxFoVJ_OuRYwVwQ0LEtYBuWmp-uPGrNU,4368
-phoenix/experimental/evals/models/openai.py,sha256=EcTkv1DqdrtMNhyfiUTzD5gDbEZVUI_zQyrDgsnuYig,17168
+phoenix/experimental/evals/models/openai.py,sha256=Yht-AZDq2iiwMUlkG3ghv3tCxZY8p-L7xxhSeGPtfaM,17238
 phoenix/experimental/evals/models/rate_limiters.py,sha256=5GVN0RQKt36Przg3-9jLgocRmyg-tbeO-cdbuLIx89w,10160
-phoenix/experimental/evals/models/vertex.py,sha256=SflOzop3iPGqKT7UoDW4dMqTzNDlLa5Q8I-HBhjFW1c,6418
+phoenix/experimental/evals/models/vertex.py,sha256=52A1g8j54_VkahjQmLj0eguPKJdQj0xtI4dAlrLsgtY,6592
 phoenix/experimental/evals/models/vertexai.py,sha256=NfBpQq0l7XzP-wDEDsK27IRiQBzA1GXEdfwlAf8leX4,5609
 phoenix/experimental/evals/templates/__init__.py,sha256=GSJSoWJ4jwyoUANniidmWMUtXQhNQYbTJbfFqCvuYuo,1470
-phoenix/experimental/evals/templates/default_templates.py,sha256=QzPhILbgpOd8NJquSep60nWNMWVXqsDTGDmf3nYlvjc,21201
+phoenix/experimental/evals/templates/default_templates.py,sha256=dVKmoLwqgAyGcRuezz9WKnXSHhw7-qk1R8j6wSmqh0s,20722
 phoenix/experimental/evals/templates/template.py,sha256=ImFSaTPo9oalPNwq7cNdOCndrvuwLuIyIFKsgDVcoJE,6715
 phoenix/experimental/evals/utils/__init__.py,sha256=608EX7sG0f5oDG__II16J8xnFJiNpY9dI9AC8vXwR00,5601
 phoenix/experimental/evals/utils/threads.py,sha256=ksI-egarPnlxit0qKKjtjZ2L82qGLxqxZ6s92O0eBA4,1005
@@ -125,24 +125,25 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
 phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
 phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
 phoenix/server/static/index.css,sha256=KKGpx4iwF91VGRm0YN-4cn8oC-oIqC6HecoPf0x3ZM8,1885
-phoenix/server/static/index.js,sha256=fmq95YEmj2bkWIA2x96bf7iV5ZisW1k8EtwAIsy4jD4,3259870
+phoenix/server/static/index.js,sha256=4MEBiTUm4u7QrSnPE7OJrBEYSkFjmyZPugfrowtQOCI,3259882
 phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
 phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/server/templates/index.html,sha256=DlfcGoq1V5C2QkJWqP1j4Nu6_kPfsOzOrtzYF3ogghE,1900
 phoenix/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-phoenix/session/evaluation.py,sha256=s5OivScAMSj8qfU4IexpmbyKvcGBj_nt-GP_13_o-iY,4843
-phoenix/session/session.py,sha256=uZnBRmqITp694-B0UahBYHodfcJNKp1FdvrVAuu5jeI,20007
+phoenix/session/evaluation.py,sha256=DaAtA0XYJbXRJO_StGywa-9APlz2ORSmCXzxrtn3rvI,4997
+phoenix/session/session.py,sha256=94hRilOwlEWo6npLNjutaYRCevDPLPnAQdnuP07qeGc,20826
 phoenix/trace/__init__.py,sha256=4d_MqzUIFmlY9WWcFeTONJ4xL5mPGoWZaPM2TJ0ZDBQ,266
+phoenix/trace/errors.py,sha256=DbXSJnNErV7305tKv7pUWLD6jcVHJ6EBdSu4mZJ6IM4,112
 phoenix/trace/evaluation_conventions.py,sha256=t8jydM3U0-T5YpiQKRJ3tWdWGlHtzKyttYdw-ddvPOk,1048
 phoenix/trace/exporter.py,sha256=z3xrGJhIRh7XMy4Q1FkR3KmFZym-GX0XxLTZ6eSnN0Q,4347
 phoenix/trace/fixtures.py,sha256=GGNOVi8Cjj9eduxOenyYLF8mhl-XTbXHtnraP5vLlxQ,6341
 phoenix/trace/otel.py,sha256=Efc6S0IuvI-NEJ_Mv1VWEzQS94-lR_6nJ3ecTzwmyQ4,13933
 phoenix/trace/schemas.py,sha256=m1wVlYFT6qL3FovD3TtTYsEgN6OHvv52gNdJkoPCmuY,5400
 phoenix/trace/semantic_conventions.py,sha256=u6NG85ZhbreriZr8cqJaddldM_jUcew7JilszY7JUk8,4652
-phoenix/trace/span_evaluations.py,sha256=k6bwsa040AihvxTpve33MpkPN3gT8z_kSROpmJwOeCs,12579
+phoenix/trace/span_evaluations.py,sha256=asGug9lUHUufBwK1nL_PnHIDKsOc5X4ws7cur9lfoyI,12421
 phoenix/trace/span_json_decoder.py,sha256=Xv-0uCsHgwzQb0dqTa7CuuDeXAPaXjQICyCFK3ZQaSs,3089
 phoenix/trace/span_json_encoder.py,sha256=C5y7rkyOcV08oJC5t8TZqVxsKCZMJKad7bBQzAgLoDs,1763
-phoenix/trace/trace_dataset.py,sha256=nFclw-wuY_q7hqpqe7fEVVH67yAku9qJ5EiJ61lz0WM,8691
+phoenix/trace/trace_dataset.py,sha256=KW0TzmhlKuX8PUPLV172iTK08myYE0QXUC75KiIqJ7k,13204
 phoenix/trace/tracer.py,sha256=S8UfhI4Qhl_uulD9bj9qFdSB5vwcB42hXd8-qURGcmo,3662
 phoenix/trace/utils.py,sha256=7LurVGXn245cjj4MJsc7v6jq4DSJkpK6YGBfIaSywuw,1307
 phoenix/trace/dsl/__init__.py,sha256=WIQIjJg362XD3s50OsPJJ0xbDsGp41bSv7vDllLrPuA,144
@@ -165,8 +166,8 @@ phoenix/trace/v1/evaluation_pb2.pyi,sha256=cCbbx06gwQmaH14s3J1X25TtaARh-k1abbxQd
 phoenix/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/utilities/error_handling.py,sha256=7b5rpGFj9EWZ8yrZK1IHvxB89suWk3lggDayUQcvZds,1946
 phoenix/utilities/logging.py,sha256=lDXd6EGaamBNcQxL4vP1au9-i_SXe0OraUDiJOcszSw,222
-arize_phoenix-2.5.0.dist-info/METADATA,sha256=RvT2Wd4rmB9cLgrtDE6ccxlRP16z6kq9dc5-S2ikSEw,26479
-arize_phoenix-2.5.0.dist-info/WHEEL,sha256=mRYSEL3Ih6g5a_CVMIcwiF__0Ae4_gLYh01YFNwiq1k,87
-arize_phoenix-2.5.0.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
-arize_phoenix-2.5.0.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
-arize_phoenix-2.5.0.dist-info/RECORD,,
+arize_phoenix-2.7.0.dist-info/METADATA,sha256=G2XhPSpRh7gJHrTc5_MhOvrpFBTWv0_mjb_mZueDuWI,26479
+arize_phoenix-2.7.0.dist-info/WHEEL,sha256=mRYSEL3Ih6g5a_CVMIcwiF__0Ae4_gLYh01YFNwiq1k,87
+arize_phoenix-2.7.0.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
+arize_phoenix-2.7.0.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
+arize_phoenix-2.7.0.dist-info/RECORD,,

phoenix/config.py CHANGED Viewed

@@ -12,6 +12,11 @@ ENV_PHOENIX_COLLECTOR_ENDPOINT = "PHOENIX_COLLECTOR_ENDPOINT"
 The endpoint traces and evals are sent to. This must be set if the Phoenix
 server is running on a remote instance.
 """
+ENV_WORKING_DIR = "PHOENIX_WORKING_DIR"
+"""
+The directory in which to save, load, and export datasets. This directory must
+be accessible by both the Phoenix server and the notebook environment.
+"""
 def _get_temp_path() -> Path:
@@ -36,13 +41,16 @@ def get_running_pid() -> Optional[int]:
     return None
-for path in (
-    ROOT_DIR := Path.home().resolve() / ".phoenix",
-    EXPORT_DIR := ROOT_DIR / "exports",
-    DATASET_DIR := ROOT_DIR / "datasets",
-    TRACE_DATASET_DIR := ROOT_DIR / "trace_datasets",
-):
-    path.mkdir(parents=True, exist_ok=True)
+def get_working_dir() -> Path:
+    """
+    Get the working directory for saving, loading, and exporting datasets.
+    """
+    working_dir_str = os.getenv(ENV_WORKING_DIR)
+    if working_dir_str is not None:
+        return Path(working_dir_str)
+    # Fall back to ~/.phoenix if PHOENIX_WORKING_DIR is not set
+    return Path.home().resolve() / ".phoenix"
 PHOENIX_DIR = Path(__file__).resolve().parent
 # Server config
@@ -53,6 +61,23 @@ HOST = "0.0.0.0"
 PORT = 6006
 # The prefix of datasets that are auto-assigned a name
 GENERATED_DATASET_NAME_PREFIX = "phoenix_dataset_"
+# The work directory for saving, loading, and exporting datasets
+WORKING_DIR = get_working_dir()
+try:
+    for path in (
+        ROOT_DIR := WORKING_DIR,
+        EXPORT_DIR := ROOT_DIR / "exports",
+        DATASET_DIR := ROOT_DIR / "datasets",
+        TRACE_DATASET_DIR := ROOT_DIR / "trace_datasets",
+    ):
+        path.mkdir(parents=True, exist_ok=True)
+except Exception as e:
+    print(
+        f"⚠️ Failed to initialize the working directory at {WORKING_DIR} due to an error: {str(e)}"
+    )
+    print("⚠️ While phoenix will still run, you will not be able to save, load, or export data")
+    print("ℹ️ To change, set the `{ENV_WORKING_DIR}` environment variable before importing phoenix.")
 def get_exported_files(directory: Path) -> List[Path]:

phoenix/core/evals.py CHANGED Viewed

@@ -9,10 +9,12 @@ from typing import DefaultDict, Dict, List, Optional, Set, Tuple
 import numpy as np
 from google.protobuf.json_format import MessageToDict
+from pandas import DataFrame, Index, MultiIndex
 from typing_extensions import TypeAlias, assert_never
 import phoenix.trace.v1 as pb
 from phoenix.trace.schemas import SpanID, TraceID
+from phoenix.trace.span_evaluations import DocumentEvaluations, Evaluations, SpanEvaluations
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
@@ -171,3 +173,54 @@ class Evals:
                 if result.HasField("score") and document_position < num_documents:
                     scores[document_position] = result.score.value
         return scores
+    def export_evaluations(self) -> List[Evaluations]:
+        evaluations: List[Evaluations] = []
+        evaluations.extend(self._export_span_evaluations())
+        evaluations.extend(self._export_document_evaluations())
+        return evaluations
+    def _export_span_evaluations(self) -> List[SpanEvaluations]:
+        span_evaluations = []
+        with self._lock:
+            span_evaluations_by_name = tuple(self._span_evaluations_by_name.items())
+        for eval_name, _span_evaluations_by_id in span_evaluations_by_name:
+            span_ids = []
+            rows = []
+            with self._lock:
+                span_evaluations_by_id = tuple(_span_evaluations_by_id.items())
+            for span_id, pb_eval in span_evaluations_by_id:
+                span_ids.append(span_id)
+                rows.append(MessageToDict(pb_eval.result))
+            dataframe = DataFrame(rows, index=Index(span_ids, name="context.span_id"))
+            span_evaluations.append(SpanEvaluations(eval_name, dataframe))
+        return span_evaluations
+    def _export_document_evaluations(self) -> List[DocumentEvaluations]:
+        evaluations = []
+        with self._lock:
+            document_evaluations_by_name = tuple(self._document_evaluations_by_name.items())
+        for eval_name, _document_evaluations_by_id in document_evaluations_by_name:
+            span_ids = []
+            document_positions = []
+            rows = []
+            with self._lock:
+                document_evaluations_by_id = tuple(_document_evaluations_by_id.items())
+            for span_id, _document_evaluations_by_position in document_evaluations_by_id:
+                with self._lock:
+                    document_evaluations_by_position = sorted(
+                        _document_evaluations_by_position.items()
+                    )  # ensure the evals are sorted by document position
+                for document_position, pb_eval in document_evaluations_by_position:
+                    span_ids.append(span_id)
+                    document_positions.append(document_position)
+                    rows.append(MessageToDict(pb_eval.result))
+            dataframe = DataFrame(
+                rows,
+                index=MultiIndex.from_arrays(
+                    (span_ids, document_positions),
+                    names=("context.span_id", "document_position"),
+                ),
+            )
+            evaluations.append(DocumentEvaluations(eval_name, dataframe))
+        return evaluations

phoenix/datasets/fixtures.py CHANGED Viewed

@@ -240,6 +240,51 @@ click_through_rate_fixture = Fixture(
     reference_file_name="click_through_rate_train.parquet",
 )
+chatbot_queries_schema = Schema(
+    prediction_id_column_name="id",
+    prompt_column_names=RetrievalEmbeddingColumnNames(
+        vector_column_name="prompt",
+        raw_data_column_name="prompt_text",
+        context_retrieval_ids_column_name="document_ids",
+        context_retrieval_scores_column_name="document_scores",
+    ),
+    response_column_names="response",
+    tag_column_names=[
+        "answer_relevancy",
+        "context_relevancy",
+        "faithfulness",
+        "document_similarity_0",
+        "document_similarity_1",
+        "openai_relevance_0",
+        "openai_relevance_1",
+        "user_feedback",
+    ],
+)
+chatbot_database_schema = Schema(
+    prediction_id_column_name="document_id",
+    prompt_column_names=EmbeddingColumnNames(
+        vector_column_name="text_vector",
+        raw_data_column_name="text",
+    ),
+)
+chatbot_fixture = Fixture(
+    name="chatbot",
+    description="""
+    Investigate RAG performance for a chatbot built on top of Arize's documentation.
+    This use-case highlights how embedding visualizations for a RAG application can
+    highlight issues with the application's retrieval and performance.
+    The data contains relevance metrics generated by LLM Evals as well as RAGAS.
+    """,
+    primary_schema=chatbot_queries_schema,
+    corpus_schema=chatbot_database_schema,
+    prefix="unstructured/llm/chatbot",
+    primary_file_name="chatbot_queries_with_ragas.parquet",
+    corpus_file_name="chatbot_database_ds.parquet",
+)
 wide_data_primary_schema = Schema(
     actual_label_column_name="actual_label",
     prediction_label_column_name="predicted_label",
@@ -363,6 +408,7 @@ FIXTURES: Tuple[Fixture, ...] = (
     deep_data_fixture,
     llm_summarization_fixture,
     wikipedia_fixture,
+    chatbot_fixture,
 )
 NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}

phoenix/experimental/evals/evaluators.py CHANGED Viewed

@@ -36,6 +36,10 @@ class LLMEvaluator:
         self._model = model
         self._template = template
+    @property
+    def default_concurrency(self) -> int:
+        return self._model.default_concurrency
     def reload_client(self) -> None:
         self._model.reload_client()

phoenix/experimental/evals/functions/classify.py CHANGED Viewed

@@ -73,7 +73,7 @@ def llm_classify(
     include_prompt: bool = False,
     include_response: bool = False,
     run_sync: bool = False,
-    concurrency: int = 20,
+    concurrency: Optional[int] = None,
 ) -> pd.DataFrame:
     """Classifies each input row of the dataframe using an LLM. Returns a pandas.DataFrame
     where the first column is named `label` and contains the classification labels. An optional
@@ -116,8 +116,9 @@ def llm_classify(
         run_sync (bool, default=False): If True, forces synchronous request submission. Otherwise
         evaluations will be run asynchronously if possible.
-        concurrency (int, default=20): The number of concurrent evals if async submission is
-        possible.
+        concurrency (Optional[int], default=None): The number of concurrent evals if async
+        submission is possible. If not provided, a recommended default concurrency is set on a
+        per-model basis.
     Returns:
         pandas.DataFrame: A dataframe where the `label` column (at column position 0) contains
@@ -127,6 +128,7 @@ def llm_classify(
         from the entries in the rails argument or "NOT_PARSABLE" if the model's output could
         not be parsed.
     """
+    concurrency = concurrency or model.default_concurrency
     # clients need to be reloaded to ensure that async evals work properly
     model.reload_client()
@@ -353,7 +355,7 @@ def run_evals(
     provide_explanation: bool = False,
     use_function_calling_if_available: bool = True,
     verbose: bool = False,
-    concurrency: int = 20,
+    concurrency: Optional[int] = None,
 ) -> List[DataFrame]:
     """
     Applies a list of evaluators to a dataframe. Outputs a list of dataframes in
@@ -381,13 +383,21 @@ def run_evals(
         as model invocation parameters and details about retries and snapping to
         rails.
-        concurrency (int, optional): The number of concurrent evals if async
-        submission is possible.
+        concurrency (Optional[int], default=None): The number of concurrent evals if async
+        submission is possible. If not provided, a recommended default concurrency is set on a
+        per-model basis.
     Returns:
         List[DataFrame]: A list of dataframes, one for each evaluator, all of
         which have the same number of rows as the input dataframe.
     """
+    # use the minimum default concurrency of all the models
+    if concurrency is None:
+        if len(evaluators) == 0:
+            concurrency = 1
+        else:
+            concurrency = min(evaluator.default_concurrency for evaluator in evaluators)
     # clients need to be reloaded to ensure that async evals work properly
     for evaluator in evaluators:
         evaluator.reload_client()

phoenix/experimental/evals/functions/generate.py CHANGED Viewed

@@ -31,7 +31,7 @@ def llm_generate(
     include_prompt: bool = False,
     include_response: bool = False,
     run_sync: bool = False,
-    concurrency: int = 20,
+    concurrency: Optional[int] = None,
 ) -> pd.DataFrame:
     """
     Generates a text using a template using an LLM. This function is useful
@@ -70,14 +70,17 @@ def llm_generate(
         run_sync (bool, default=False): If True, forces synchronous request submission. Otherwise
         evaluations will be run asynchronously if possible.
-        concurrency (int, default=20): The number of concurrent evals if async submission is
-        possible.
+        concurrency (Optional[int], default=None): The number of concurrent evals if async
+        submission is possible. If not provided, a recommended default concurrency is set on a
+        per-model basis.
     Returns:
         generations_dataframe (pandas.DataFrame): A dataframe where each row
         represents the generated output
     """
+    concurrency = concurrency or model.default_concurrency
     # clients need to be reloaded to ensure that async evals work properly
     model.reload_client()

phoenix/experimental/evals/models/anthropic.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import logging
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
@@ -8,8 +7,6 @@ from phoenix.experimental.evals.models.rate_limiters import RateLimiter
 if TYPE_CHECKING:
     from tiktoken import Encoding
-logger = logging.getLogger(__name__)
 MODEL_TOKEN_LIMIT_MAPPING = {
     "claude-2.1": 200000,
     "claude-2.0": 100000,
@@ -80,7 +77,6 @@ class AnthropicModel(BaseEvalModel):
         try:
             encoding = self._tiktoken.encoding_for_model(self.model)
         except KeyError:
-            logger.warning("Warning: model not found. Using cl100k_base encoding.")
             encoding = self._tiktoken.get_encoding("cl100k_base")
         self._tiktoken_encoding = encoding
@@ -149,6 +145,9 @@ class AnthropicModel(BaseEvalModel):
         return _completion_with_retry(**kwargs)
     async def _async_generate(self, prompt: str, **kwargs: Dict[str, Any]) -> str:
+        # instruction is an invalid input to Anthropic models, it is passed in by
+        # BaseEvalModel.__call__ and needs to be removed
+        kwargs.pop("instruction", None)
         invocation_parameters = self.invocation_parameters()
         invocation_parameters.update(kwargs)
         response = await self._async_generate_with_retry(

phoenix/experimental/evals/models/base.py CHANGED Viewed

@@ -58,6 +58,7 @@ def set_verbosity(
 @dataclass
 class BaseEvalModel(ABC):
+    default_concurrency: int = 20
     _verbose: bool = False
     _rate_limiter: RateLimiter = field(default_factory=RateLimiter)

phoenix/experimental/evals/models/bedrock.py CHANGED Viewed

@@ -87,7 +87,6 @@ class BedrockModel(BaseEvalModel):
         try:
             encoding = self._tiktoken.encoding_for_model(self.model_id)
         except KeyError:
-            logger.warning("Warning: model not found. Using cl100k_base encoding.")
             encoding = self._tiktoken.get_encoding("cl100k_base")
         self._tiktoken_encoding = encoding
@@ -165,7 +164,7 @@ class BedrockModel(BaseEvalModel):
                     "temperature": self.temperature,
                     "topP": self.top_p,
                     "maxTokens": self.max_tokens,
-                    "stopSequences": [self.stop_sequences],
+                    "stopSequences": self.stop_sequences,
                 },
                 **self.extra_parameters,
             }
@@ -204,6 +203,9 @@ class BedrockModel(BaseEvalModel):
         elif self.model_id.startswith("anthropic"):
             body = json.loads(response.get("body").read().decode())
             return body.get("completion")
+        elif self.model_id.startswith("amazon"):
+            body = json.loads(response.get("body").read())
+            return body.get("results")[0].get("outputText")
         else:
             body = json.loads(response.get("body").read())
             return body.get("results")[0].get("data").get("outputText")

phoenix/experimental/evals/models/openai.py CHANGED Viewed

@@ -31,6 +31,8 @@ MODEL_TOKEN_LIMIT_MAPPING = {
     "gpt-4-0613": 8192,  # Current gpt-4 default
     "gpt-4-32k-0314": 32768,
     "gpt-4-32k-0613": 32768,
+    "gpt-4-1106-preview": 128000,
+    "gpt-4-vision-preview": 128000,
 }
 LEGACY_COMPLETION_API_MODELS = ("gpt-3.5-turbo-instruct",)
 logger = logging.getLogger(__name__)

phoenix/experimental/evals/models/vertex.py CHANGED Viewed

@@ -21,6 +21,9 @@ MODEL_TOKEN_LIMIT_MAPPING = {
 @dataclass
 class GeminiModel(BaseEvalModel):
+    # The vertex SDK runs into connection pool limits at high concurrency
+    default_concurrency: int = 5
     model: str = "gemini-pro"
     """The model name to use."""
     temperature: float = 0.0
@@ -50,6 +53,9 @@ class GeminiModel(BaseEvalModel):
             max_retries=self.max_retries,
         )
+    def reload_client(self) -> None:
+        self._init_client()
     def _init_client(self) -> None:
         try:
             from google.api_core import exceptions  # type:ignore

phoenix/experimental/evals/templates/default_templates.py CHANGED Viewed

@@ -73,13 +73,6 @@ your response.
     [END DATA]
     Is the answer above factual or hallucinated based on the query and reference text?
-Your response should be a single word: either "factual" or "hallucinated", and
-it should not include any other text or characters. "hallucinated" indicates that the answer
-provides factually inaccurate information to the query based on the reference text. "factual"
-indicates that the answer to the question is correct relative to the reference text, and does not
-contain made up information. Please read the query and reference text carefully before determining
-your response.
 """
 HALLUCINATION_PROMPT_TEMPLATE_WITH_EXPLANATION = """
 In this task, you will be presented with a query, a reference text and an answer. The answer is

phoenix/server/static/index.js CHANGED Viewed

@@ -6717,7 +6717,7 @@ fragment SpanEvaluationsTable_evals on Span {
                 gap: var(--ac-global-dimension-static-size-200);
               `,children:i.map((o,l)=>x("li",{children:_(ft,{padding:"size-200",backgroundColor:"purple-100",borderColor:"purple-700",borderWidth:"thin",borderRadius:"medium",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"embedded text"}),x("pre",{css:ee`
                           margin: var(--ac-global-dimension-static-size-100) 0;
-                        `,children:o[mtt]})]})},l))})}):null})}function Xxn(t){let{spanAttributes:e}=t,n=(0,br.useMemo)(()=>{let l=e[wr.tool];return typeof l=="object"?l:{}},[e]);if(!(Object.keys(n).length>0))return null;let r=n[vB.name],a=n[vB.description],o=n[vB.parameters];return x(Be,{direction:"column",gap:"size-200",children:x(uu,{title:"Tool"+(typeof r=="string"?`: ${r}`:""),...eg,children:_(Be,{direction:"column",children:[a!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",backgroundColor:"light",children:_(Be,{direction:"column",alignItems:"start",gap:"size-50",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Description"}),x(Me,{children:a})]})}):null,o!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",children:_(Be,{direction:"column",alignItems:"start",width:"100%",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Parameters"}),x(Tc,{value:JSON.stringify(o),mimeType:"json"})]})}):null]})})})}var Sxn=["irrelevant"];function Gse({document:t,documentEvaluations:e,backgroundColor:n,borderColor:i,labelColor:r}){let a=t[htt],o=e&&e.length;return x(ft,{borderRadius:"medium",backgroundColor:n,borderColor:i,borderWidth:"thin",children:_(Be,{direction:"column",children:[x(ft,{width:"100%",borderBottomWidth:"thin",borderBottomColor:i,children:_(Be,{direction:"row",justifyContent:"space-between",margin:"size-200",alignItems:"center",children:[_(Be,{direction:"row",gap:"size-50",alignItems:"center",children:[x(pt,{svg:x(Et.FileOutline,{})}),_(Nn,{level:4,children:["document ",t[Itt]]})]}),typeof t[Wse]=="number"&&x(Zs,{color:r,children:`score ${mh(t[Wse])}`})]})}),x("pre",{css:ee`
+                        `,children:o[mtt]})]})},l))})}):null})}function Xxn(t){let{spanAttributes:e}=t,n=(0,br.useMemo)(()=>{let l=e[wr.tool];return typeof l=="object"?l:{}},[e]);if(!(Object.keys(n).length>0))return null;let r=n[vB.name],a=n[vB.description],o=n[vB.parameters];return x(Be,{direction:"column",gap:"size-200",children:x(uu,{title:"Tool"+(typeof r=="string"?`: ${r}`:""),...eg,children:_(Be,{direction:"column",children:[a!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",backgroundColor:"light",children:_(Be,{direction:"column",alignItems:"start",gap:"size-50",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Description"}),x(Me,{children:a})]})}):null,o!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",children:_(Be,{direction:"column",alignItems:"start",width:"100%",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Parameters"}),x(Tc,{value:JSON.stringify(o),mimeType:"json"})]})}):null]})})})}var Sxn=["irrelevant","unrelated"];function Gse({document:t,documentEvaluations:e,backgroundColor:n,borderColor:i,labelColor:r}){let a=t[htt],o=e&&e.length;return x(ft,{borderRadius:"medium",backgroundColor:n,borderColor:i,borderWidth:"thin",children:_(Be,{direction:"column",children:[x(ft,{width:"100%",borderBottomWidth:"thin",borderBottomColor:i,children:_(Be,{direction:"row",justifyContent:"space-between",margin:"size-200",alignItems:"center",children:[_(Be,{direction:"row",gap:"size-50",alignItems:"center",children:[x(pt,{svg:x(Et.FileOutline,{})}),_(Nn,{level:4,children:["document ",t[Itt]]})]}),typeof t[Wse]=="number"&&x(Zs,{color:r,children:`score ${mh(t[Wse])}`})]})}),x("pre",{css:ee`
             padding: var(--ac-global-dimension-static-size-200);
             white-space: normal;
             margin: 0;

phoenix/session/evaluation.py CHANGED Viewed

@@ -9,6 +9,7 @@ import math
 from time import sleep
 from typing import (
     Any,
+    Iterator,
     Optional,
     Sequence,
     Tuple,
@@ -33,24 +34,29 @@ __all__ = [
 from phoenix.trace.span_evaluations import Evaluations
-def add_evaluations(
-    exporter: HttpExporter,
-    evaluations: pd.DataFrame,
-    evaluation_name: str,
-) -> None:
-    index_names = evaluations.index.names
-    for index, row in evaluations.iterrows():
+def encode_evaluations(evaluations: Evaluations) -> Iterator[pb.Evaluation]:
+    dataframe = evaluations.dataframe
+    eval_name = evaluations.eval_name
+    index_names = dataframe.index.names
+    for index, row in dataframe.iterrows():
         subject_id = _extract_subject_id_from_index(
             index_names,
             cast(Union[str, Tuple[Any]], index),
         )
         if (result := _extract_result(row)) is None:
             continue
-        evaluation = pb.Evaluation(
-            name=evaluation_name,
+        yield pb.Evaluation(
+            name=eval_name,
             result=result,
             subject_id=subject_id,
         )
+def add_evaluations(
+    exporter: HttpExporter,
+    evaluations: Evaluations,
+) -> None:
+    for evaluation in encode_evaluations(evaluations):
         exporter.export(evaluation)
@@ -130,7 +136,7 @@ def log_evaluations(
         return
     exporter = HttpExporter(endpoint=endpoint, host=host, port=port)
     for eval in filter(bool, evals):
-        add_evaluations(exporter, eval.dataframe, eval.eval_name)
+        add_evaluations(exporter, eval)
     with tqdm(total=n, desc="Sending Evaluations") as pbar:
         while n:
             sleep(0.1)

phoenix/session/session.py CHANGED Viewed

@@ -30,6 +30,7 @@ from phoenix.pointcloud.umap_parameters import get_umap_parameters
 from phoenix.server.app import create_app
 from phoenix.server.thread_server import ThreadServer
 from phoenix.services import AppService
+from phoenix.session.evaluation import encode_evaluations
 from phoenix.trace.dsl import SpanFilter
 from phoenix.trace.dsl.query import SpanQuery
 from phoenix.trace.otel import encode
@@ -46,6 +47,8 @@ logger = logging.getLogger(__name__)
 # type workaround
 # https://github.com/python/mypy/issues/5264#issuecomment-399407428
 if TYPE_CHECKING:
+    from phoenix.trace import Evaluations
     _BaseList = UserList[pd.DataFrame]
 else:
     _BaseList = UserList
@@ -123,6 +126,10 @@ class Session(ABC):
                 self.traces.put(encode(span))
         self.evals: Evals = Evals()
+        if trace_dataset:
+            for evaluations in trace_dataset.evaluations:
+                for pb_evaluation in encode_evaluations(evaluations):
+                    self.evals.put(pb_evaluation)
         self.host = host or get_env_host()
         self.port = port or get_env_port()
@@ -213,6 +220,15 @@ class Session(ABC):
             return None
         return pd.json_normalize(data, max_level=1).set_index("context.span_id", drop=False)
+    def get_evaluations(self) -> List["Evaluations"]:
+        return self.evals.export_evaluations()
+    def get_trace_dataset(self) -> Optional[TraceDataset]:
+        if (dataframe := self.get_spans_dataframe()) is None:
+            return None
+        evaluations = self.get_evaluations()
+        return TraceDataset(dataframe=dataframe, evaluations=evaluations)
 _session: Optional[Session] = None
@@ -479,6 +495,9 @@ def _get_url(host: str, port: int, notebook_env: NotebookEnvironment) -> str:
     if notebook_env == NotebookEnvironment.DATABRICKS:
         context = _get_databricks_context()
         return f"{_get_databricks_notebook_base_url(context)}/{port}/"
+    if host == "0.0.0.0" or host == "127.0.0.1":
+        # The app is running locally, so use localhost
+        return f"http://localhost:{port}/"
     return f"http://{host}:{port}/"

phoenix/trace/errors.py ADDED Viewed

@@ -0,0 +1,5 @@
+from phoenix.exceptions import PhoenixException
+class InvalidParquetMetadataError(PhoenixException):
+    pass

phoenix/trace/span_evaluations.py CHANGED Viewed

@@ -4,21 +4,18 @@ from dataclasses import dataclass, field
 from itertools import product
 from pathlib import Path
 from types import MappingProxyType
-from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Set, Tuple, Type, Union
+from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Type, Union
 from uuid import UUID, uuid4
 import pandas as pd
 from pandas.api.types import is_integer_dtype, is_numeric_dtype, is_string_dtype
-from pyarrow import Table, parquet
+from pyarrow import Schema, Table, parquet
 from phoenix.config import TRACE_DATASET_DIR
-from phoenix.exceptions import PhoenixException
+from phoenix.trace.errors import InvalidParquetMetadataError
 EVAL_NAME_COLUMN_PREFIX = "eval."
-class InvalidParquetMetadataError(PhoenixException):
-    pass
+EVAL_PARQUET_FILE_NAME = "evaluations-{id}.parquet"
 class NeedsNamedIndex(ABC):
@@ -164,20 +161,21 @@ class Evaluations(NeedsNamedIndex, NeedsResultColumns, ABC):
             tuple(sorted(prod)) for prod in product(*cls.index_names.keys())
         )
-    def to_parquet(self, directory: Optional[Union[str, Path]] = None) -> Path:
-        """Persists the evaluations to a parquet file.
+    def save(self, directory: Optional[Union[str, Path]] = None) -> UUID:
+        """
+        Persists the evaluations to disk.
         Args:
             directory (Optional[Union[str, Path]], optional): An optional path
-            to a directory where the parquet file will be saved. If not
-            provided, the parquet file will be saved to a default location.
+            to a directory where the data will be saved. If not provided, the
+            data will be saved to a default location.
         Returns:
-            Path: The path to the parquet file, including a randomly generated
-            filename.
+            UUID: The ID of the evaluations, which can be used as a key to load
+            the evaluations from disk using `load`.
         """
         directory = Path(directory) if directory else TRACE_DATASET_DIR
-        path = directory / f"evaluations-{self.id}.parquet"
+        path = directory / EVAL_PARQUET_FILE_NAME.format(id=self.id)
         table = Table.from_pandas(self.dataframe)
         table = table.replace_schema_metadata(
             {
@@ -193,23 +191,38 @@ class Evaluations(NeedsNamedIndex, NeedsResultColumns, ABC):
             }
         )
         parquet.write_table(table, path)
-        return path
+        return self.id
     @classmethod
-    def from_parquet(cls, path: Union[str, Path]) -> "Evaluations":
-        """Loads the evaluations from a parquet file.
+    def load(
+        cls, id: Union[str, UUID], directory: Optional[Union[str, Path]] = None
+    ) -> "Evaluations":
+        """
+        Loads the evaluations from disk.
         Args:
-            path (Union[str, Path]): Path to a persisted evaluations parquet
-            file.
+            id (Union[str, UUID]): The ID of the evaluations to load.
+            directory(Optional[Union[str, Path]], optional): The path to the
+            directory containing the persisted evaluations. If not provided, the
+            parquet file will be loaded from the same default location used by
+            `save`.
         Returns:
             Evaluations: The loaded evaluations. The type of the returned
             evaluations will be the same as the type of the evaluations that
             were originally persisted.
         """
+        if not isinstance(id, UUID):
+            id = UUID(id)
+        path = Path(directory or TRACE_DATASET_DIR) / EVAL_PARQUET_FILE_NAME.format(id=id)
         schema = parquet.read_schema(path)
-        eval_id, eval_name, evaluations_cls = _parse_schema_metadata(schema.metadata)
+        eval_id, eval_name, evaluations_cls = _parse_schema_metadata(schema)
+        if id != eval_id:
+            raise InvalidParquetMetadataError(
+                f"The input id {id} does not match the id {eval_id} in the parquet metadata. "
+                "Ensure that you have not renamed the parquet file."
+            )
         table = parquet.read_table(path)
         dataframe = table.to_pandas()
         evaluations = evaluations_cls(eval_name=eval_name, dataframe=dataframe)
@@ -301,48 +314,20 @@ class TraceEvaluations(
     ...
-def _parse_schema_metadata(metadata: Dict[bytes, Any]) -> Tuple[UUID, str, Type[Evaluations]]:
-    """Validates and parses the schema metadata. Raises an exception if the
-    metadata is invalid.
-    Args:
-        metadata (Dict[bytes, Any]): A dictionary of schema metadata from a
-        parquet file.
-    Returns:
-        Tuple[str, ModuleType]: The evaluation name and the evaluations class.
+def _parse_schema_metadata(schema: Schema) -> Tuple[UUID, str, Type[Evaluations]]:
+    """
+    Validates and parses the pyarrow schema metadata.
     """
-    if not (arize_metadata_json := metadata.get(b"arize")):
-        raise InvalidParquetMetadataError('Schema metadata is missing "arize" key')
     try:
-        arize_metadata = json.loads(arize_metadata_json)
-    except json.JSONDecodeError as err:
+        metadata = schema.metadata
+        arize_metadata = json.loads(metadata[b"arize"])
+        eval_classes = {subclass.__name__: subclass for subclass in Evaluations.__subclasses__()}
+        eval_id = UUID(arize_metadata["eval_id"])
+        if not isinstance((eval_name := arize_metadata["eval_name"]), str):
+            raise ValueError('Arize metadata must contain a string value for key "eval_name"')
+        evaluations_cls = eval_classes[arize_metadata["eval_type"]]
+        return eval_id, eval_name, evaluations_cls
+    except Exception as err:
         raise InvalidParquetMetadataError(
-            'Encountered invalid JSON string under "arize" key'
+            "An error occurred while parsing parquet schema metadata"
         ) from err
-    evaluations_classes = {subclass.__name__: subclass for subclass in Evaluations.__subclasses__()}
-    if not (
-        isinstance(arize_metadata, dict)
-        and (eval_id := _to_uuid(arize_metadata.get("eval_id")))
-        and isinstance(eval_name := arize_metadata.get("eval_name"), str)
-        and (eval_type := arize_metadata.get("eval_type"))
-        and (evaluations_cls := evaluations_classes.get(eval_type))
-    ):
-        raise InvalidParquetMetadataError(f"Invalid Arize metadata: {arize_metadata}")
-    return eval_id, eval_name, evaluations_cls
-def _to_uuid(value: Any) -> Optional[UUID]:
-    """
-    Converts an input to a UUID if possible, otherwise returns None.
-    Args:
-        value (Any): The value to convert to a UUID.
-    Returns:
-        Optional[UUID]: A UUID if the value could be converted, otherwise None.
-    """
-    try:
-        return UUID(value)
-    except Exception:
-        return None

phoenix/trace/trace_dataset.py CHANGED Viewed

@@ -1,14 +1,18 @@
 import json
-import uuid
 from datetime import datetime
-from typing import Any, Iterable, Iterator, List, Optional, cast
+from pathlib import Path
+from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union, cast
+from uuid import UUID, uuid4
+from warnings import warn
 import pandas as pd
 from pandas import DataFrame, read_parquet
+from pyarrow import Schema, Table, parquet
 from phoenix.datetime_utils import normalize_timestamps
+from phoenix.trace.errors import InvalidParquetMetadataError
-from ..config import DATASET_DIR, GENERATED_DATASET_NAME_PREFIX
+from ..config import DATASET_DIR, GENERATED_DATASET_NAME_PREFIX, TRACE_DATASET_DIR
 from .schemas import ATTRIBUTE_PREFIX, CONTEXT_PREFIX, Span
 from .semantic_conventions import (
     DOCUMENT_METADATA,
@@ -43,6 +47,8 @@ DOCUMENT_COLUMNS = [
     RERANKER_OUTPUT_DOCUMENTS_COLUMN_NAME,
 ]
+TRACE_DATASET_PARQUET_FILE_NAME = "trace_dataset-{id}.parquet"
 def normalize_dataframe(dataframe: DataFrame) -> "DataFrame":
     """Makes the dataframe have appropriate data types"""
@@ -94,6 +100,7 @@ class TraceDataset:
     name: str
     dataframe: pd.DataFrame
     evaluations: List[Evaluations] = []
+    _id: UUID = uuid4()
     _data_file_name: str = "data.parquet"
     def __init__(
@@ -122,7 +129,7 @@ class TraceDataset:
                 f"The dataframe is missing some required columns: {', '.join(missing_columns)}"
             )
         self.dataframe = normalize_dataframe(dataframe)
-        self.name = name or f"{GENERATED_DATASET_NAME_PREFIX}{str(uuid.uuid4())}"
+        self.name = name or f"{GENERATED_DATASET_NAME_PREFIX}{str(uuid4())}"
         self.evaluations = list(evaluations)
     @classmethod
@@ -199,6 +206,89 @@ class TraceDataset:
             coerce_timestamps="ms",
         )
+    def save(self, directory: Optional[Union[str, Path]] = None) -> UUID:
+        """
+        Writes the trace dataset to disk. If any evaluations have been appended
+        to the dataset, those evaluations will be saved to separate files within
+        the same directory.
+        Args:
+            directory (Optional[Union[str, Path]], optional): An optional path
+            to a directory where the data will be written. If not provided, the
+            data will be written to a default location.
+        Returns:
+            UUID: The id of the trace dataset, which can be used as key to load
+            the dataset from disk using `load`.
+        """
+        directory = Path(directory or TRACE_DATASET_DIR)
+        for evals in self.evaluations:
+            evals.save(directory)
+        path = directory / TRACE_DATASET_PARQUET_FILE_NAME.format(id=self._id)
+        dataframe = get_serializable_spans_dataframe(self.dataframe)
+        dataframe.to_parquet(
+            path,
+            allow_truncated_timestamps=True,
+            coerce_timestamps="ms",
+        )
+        table = Table.from_pandas(self.dataframe)
+        table = table.replace_schema_metadata(
+            {
+                **(table.schema.metadata or {}),
+                # explicitly encode keys and values, which are automatically encoded regardless
+                b"arize": json.dumps(
+                    {
+                        "dataset_id": str(self._id),
+                        "dataset_name": self.name,
+                        "eval_ids": [str(evals.id) for evals in self.evaluations],
+                    }
+                ).encode("utf-8"),
+            }
+        )
+        parquet.write_table(table, path)
+        return self._id
+    @classmethod
+    def load(
+        cls, id: Union[str, UUID], directory: Optional[Union[str, Path]] = None
+    ) -> "TraceDataset":
+        """
+        Reads in a trace dataset from disk. Any associated evaluations will
+        automatically be read from disk and attached to the trace dataset.
+        Args:
+            id (Union[str, UUID]): The ID of the trace dataset to be loaded.
+            directory (Optional[Union[str, Path]], optional): The path to the
+            directory containing the persisted trace dataset parquet file. If
+            not provided, the parquet file will be loaded from the same default
+            location used by `save`.
+        Returns:
+            TraceDataset: The loaded trace dataset.
+        """
+        if not isinstance(id, UUID):
+            id = UUID(id)
+        path = Path(directory or TRACE_DATASET_DIR) / TRACE_DATASET_PARQUET_FILE_NAME.format(id=id)
+        schema = parquet.read_schema(path)
+        dataset_id, dataset_name, eval_ids = _parse_schema_metadata(schema)
+        if id != dataset_id:
+            raise InvalidParquetMetadataError(
+                f"The input id {id} does not match the id {dataset_id} in the parquet metadata. "
+                "Ensure that you have not renamed the parquet file."
+            )
+        evaluations = []
+        for eval_id in eval_ids:
+            try:
+                evaluations.append(Evaluations.load(eval_id, path.parent))
+            except Exception:
+                warn(f'Failed to load evaluations with id: "{eval_id}"')
+        table = parquet.read_table(path)
+        dataframe = table.to_pandas()
+        ds = cls(dataframe, dataset_name, evaluations)
+        ds._id = dataset_id
+        return ds
     def append_evaluations(self, evaluations: Evaluations) -> None:
         """adds an evaluation to the traces"""
         # Append the evaluations to the list of evaluations
@@ -233,3 +323,20 @@ class TraceDataset:
         # Make sure the index is set to the span_id
         df = self.dataframe.set_index("context.span_id", drop=False)
         return pd.concat([df, evals_df], axis=1)
+def _parse_schema_metadata(schema: Schema) -> Tuple[UUID, str, List[UUID]]:
+    """
+    Returns parsed metadata from a parquet schema or raises an exception if the
+    metadata is invalid.
+    """
+    try:
+        metadata = schema.metadata
+        arize_metadata = json.loads(metadata[b"arize"])
+        dataset_id = UUID(arize_metadata["dataset_id"])
+        if not isinstance(dataset_name := arize_metadata["dataset_name"], str):
+            raise ValueError("Arize metadata must contain a dataset_name key with string value")
+        eval_ids = [UUID(eval_id) for eval_id in arize_metadata["eval_ids"]]
+        return dataset_id, dataset_name, eval_ids
+    except Exception as err:
+        raise InvalidParquetMetadataError("Unable to parse parquet metadata") from err

phoenix/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "2.5.0"
1	+ __version__ = "2.7.0"

{arize_phoenix-2.5.0.dist-info → arize_phoenix-2.7.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{arize_phoenix-2.5.0.dist-info → arize_phoenix-2.7.0.dist-info}/licenses/IP_NOTICE RENAMED Viewed

File without changes

{arize_phoenix-2.5.0.dist-info → arize_phoenix-2.7.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

arize-phoenix 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl

Potentially problematic release.

arize-phoenix 2.5.0py3-none-any.whl → 2.7.0py3-none-any.whl