PyPI - arize-phoenix - Versions diffs - 4.4.4rc3__py3-none-any.whl → 4.4.4rc4__py3-none-any.whl - Mend

arize-phoenix 4.4.4rc3py3-none-any.whl → 4.4.4rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{arize_phoenix-4.4.4rc3.dist-info → arize_phoenix-4.4.4rc4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: arize-phoenix
-Version: 4.4.4rc3
+Version: 4.4.4rc4
 Summary: AI Observability and Evaluation
 Project-URL: Documentation, https://docs.arize.com/phoenix/
 Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues

{arize_phoenix-4.4.4rc3.dist-info → arize_phoenix-4.4.4rc4.dist-info}/RECORD RENAMED Viewed

@@ -5,17 +5,20 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
 phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 phoenix/services.py,sha256=aTxhcOA1pZHB6U-B3TEcp6fqDF5oT0xCUvEUNMZVTUQ,5175
 phoenix/settings.py,sha256=cO-qgis_S27nHirTobYI9hHPfZH18R--WMmxNdsVUwc,273
-phoenix/version.py,sha256=vkzG2Z0dkYNWJYkiDnpu7yJxir6A-qjTBfFVeklU7TY,25
+phoenix/version.py,sha256=NZ2gYPUT2LKOK3V9-dZJ34v1J27mnLmDtx-pKAXd1W0,25
 phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
 phoenix/core/model.py,sha256=km_a--PBHOuA337ClRw9xqhOHhrUT6Rl9pz_zV0JYkQ,4843
 phoenix/core/model_schema.py,sha256=F2dbbVnkDLsPYoyZDv1q03uhvP8LcU1wXp0g-exiWs0,50551
 phoenix/core/model_schema_adapter.py,sha256=0Tm_Y_gV-WED8fKBCaFXAEFwE3CTEZS1dowqnTZ7x7g,8426
 phoenix/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-phoenix/datasets/evaluators.py,sha256=_ezCRB6cyhuJsxsQJRFt2CKg3vqV-KgtBi9NNtkdeJQ,10410
-phoenix/datasets/experiments.py,sha256=D1gmdCbOC7tkjrFjyC_WPNFyY41YrqKESsPE0CebTtc,19223
+phoenix/datasets/experiments.py,sha256=MhuhJWJ-bBqZ_aR3FewudEeo6RUrLgm0hmDlGjWVsrU,19314
 phoenix/datasets/tracing.py,sha256=Ieb2Uo-9qHpmv65uf1VsFSsWo5Yxj6VHwGS6dxu9NHQ,2248
 phoenix/datasets/types.py,sha256=w0KoSP7AdlcFlV3I6qVtvKOOWoK0yiY6_s4CvH0flcs,5753
+phoenix/datasets/evaluators/__init__.py,sha256=KSr9fNG4O93swYxNdPj_UihP9Itl_5mj0a492wi_4_0,465
+phoenix/datasets/evaluators/_utils.py,sha256=-MaNdoN1hA3FLzLyIDplUUkUtmM56BMIV83Gh-sgAsU,436
+phoenix/datasets/evaluators/code_evaluators.py,sha256=fwoKfyHD7_xBaHY8Ax78xcry7PtB8Y1FxIn82guAV5M,4640
+phoenix/datasets/evaluators/llm_evaluators.py,sha256=Ghg3bIBtQCdd6LuQ6VdcbkNQKI9ouZXwjlJV5GcdxOg,8675
 phoenix/db/README.md,sha256=IvKaZyf9ECbGBYYePaRhBveKZwDbxAc-c7BMxJYZh6Q,595
 phoenix/db/__init__.py,sha256=pDjEFXukHmJBM-1D8RjmXkvLsz85YWNxMQczt81ec3A,118
 phoenix/db/alembic.ini,sha256=p8DjVqGUs_tTx8oU56JP7qj-rMUebNFizItUSv_hPhs,3763
@@ -173,7 +176,7 @@ phoenix/server/api/types/Retrieval.py,sha256=OhMK2ncjoyp5h1yjKhjlKpoTbQrMHuxmgSF
 phoenix/server/api/types/ScalarDriftMetricEnum.py,sha256=IUAcRPpgL41WdoIgK6cNk2Te38SspXGyEs-S1fY23_A,232
 phoenix/server/api/types/Segments.py,sha256=m2yoegrxA1Tn7ZAy1rMjjD1isc752MaAXMoffkBlvrM,2921
 phoenix/server/api/types/SortDir.py,sha256=OUpXhlCzCxPoXSDkJJygEs9Rw9pMymfaZUG5zPTrw4Y,152
-phoenix/server/api/types/Span.py,sha256=Nk0Of6JyHSI7OqrEodyV3d5UUvzCWnDkNSZUcmCvq-I,13837
+phoenix/server/api/types/Span.py,sha256=W4Rsg85bgqbDhgYwpjgOTrIQKbkwpFQPpL6nqMyzhCs,13865
 phoenix/server/api/types/TimeSeries.py,sha256=wjzuxHFqCey0O7Ys25qiXyuqXK8an-osyNWUE8A_8G4,5227
 phoenix/server/api/types/Trace.py,sha256=ep-mPexub1ijxAnBvc2KrGsNVXO2SfDR1WxqER2wcD8,2376
 phoenix/server/api/types/UMAPPoints.py,sha256=5sOuruzM8saXa8C2XiyUfk2XPrkVGmhqKpclMYRw1dk,1656
@@ -194,7 +197,7 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
 phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
 phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
 phoenix/server/static/index.css,sha256=KKGpx4iwF91VGRm0YN-4cn8oC-oIqC6HecoPf0x3ZM8,1885
-phoenix/server/static/index.js,sha256=88OQ_pBKrFdD5usFU6Frpm1vBzxL19zO4JS9ChoHWEo,3487681
+phoenix/server/static/index.js,sha256=n8qF_l7ijW-7E8m63oViD8SpXOYjN3wvZUhgB8H6ZLo,3489949
 phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
 phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 phoenix/server/templates/index.html,sha256=S4z7qSoNSwnKFAH9r96AR-YJEyoKMd-VMWVlJ_IdzME,2039
@@ -211,7 +214,7 @@ phoenix/trace/exporter.py,sha256=eAYemdvDCHMugDJiaR29BFFMTQBdf3oerdkz34Cl3hE,473
 phoenix/trace/fixtures.py,sha256=gBGFG2gkcBsSDzolzzR9AJDrB_fdOQfUaGgHV-EHdco,14204
 phoenix/trace/otel.py,sha256=WA720jvRadiZBAKjsYoPyXzypHwbyEK2OZRVUwtbjB8,9976
 phoenix/trace/projects.py,sha256=2BwlNjFE-uwpqYtCu5YyBiYZk9wRPpM13vh3-Cv7GkA,2157
-phoenix/trace/schemas.py,sha256=JiFKhGD2JF6Eai7UOhPF5urcuKGkpMLHc3Vltbe1msk,5967
+phoenix/trace/schemas.py,sha256=Mjc6fD9OyeMnEk5wPPSbveqnNUYWK3p3BxpOvSGanHU,5950
 phoenix/trace/span_evaluations.py,sha256=GaADtJLi2njra4aYaie0BIwkSgdxPB_SNseglI4ykZA,13104
 phoenix/trace/span_json_decoder.py,sha256=IAFakPRqSMYxTPKYFMiXYxm7U-FipdN8_xbvapDS0Qc,3131
 phoenix/trace/span_json_encoder.py,sha256=tzSCIQJbeFBm33K68G8A5M12n_86tCDyuU0WAobxEz4,2010
@@ -239,8 +242,8 @@ phoenix/utilities/logging.py,sha256=lDXd6EGaamBNcQxL4vP1au9-i_SXe0OraUDiJOcszSw,
 phoenix/utilities/project.py,sha256=qWsvKnG1oKhOFUowXf9qiOL2ia7jaFe_ijFFHEt8GJo,431
 phoenix/utilities/re.py,sha256=PDve_OLjRTM8yQQJHC8-n3HdIONi7aNils3ZKRZ5uBM,2045
 phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arize_phoenix-4.4.4rc3.dist-info/METADATA,sha256=VuX8kXsqxcbsdYmi9-jCDMHgMJ182JMbDYCY-3N74jU,11012
-arize_phoenix-4.4.4rc3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-arize_phoenix-4.4.4rc3.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
-arize_phoenix-4.4.4rc3.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
-arize_phoenix-4.4.4rc3.dist-info/RECORD,,
+arize_phoenix-4.4.4rc4.dist-info/METADATA,sha256=YEUoxXSRba4zRgzM8-lcq7TIp9GNPZSjY_QGoyIJN-w,11012
+arize_phoenix-4.4.4rc4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+arize_phoenix-4.4.4rc4.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
+arize_phoenix-4.4.4rc4.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
+arize_phoenix-4.4.4rc4.dist-info/RECORD,,

phoenix/datasets/evaluators/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+from phoenix.datasets.evaluators.code_evaluators import ContainsKeyword, JSONParsable
+from phoenix.datasets.evaluators.llm_evaluators import (
+    CoherenceEvaluator,
+    ConcisenessEvaluator,
+    HelpfulnessEvaluator,
+    LLMCriteriaEvaluator,
+    RelevanceEvaluator,
+)
+__all__ = [
+    "ContainsKeyword",
+    "JSONParsable",
+    "CoherenceEvaluator",
+    "ConcisenessEvaluator",
+    "LLMCriteriaEvaluator",
+    "HelpfulnessEvaluator",
+    "RelevanceEvaluator",
+]

phoenix/datasets/evaluators/_utils.py ADDED Viewed

@@ -0,0 +1,13 @@
+from phoenix.datasets.types import JSONSerializable
+def _unwrap_json(obj: JSONSerializable) -> JSONSerializable:
+    if isinstance(obj, dict):
+        if len(obj) == 1:
+            key = next(iter(obj.keys()))
+            output = obj[key]
+            assert isinstance(
+                output, (dict, list, str, int, float, bool, type(None))
+            ), "Output must be JSON serializable"
+            return output
+    return obj

phoenix/datasets/evaluators/code_evaluators.py ADDED Viewed

@@ -0,0 +1,127 @@
+from __future__ import annotations
+import json
+import re
+from typing import TYPE_CHECKING, List, Optional, Union
+from phoenix.datasets.evaluators._utils import _unwrap_json
+from phoenix.datasets.types import EvaluationResult, Example, ExperimentEvaluator, ExperimentRun
+class JSONParsable:
+    annotator_kind = "CODE"
+    name = "JSONParsable"
+    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
+        assert exp_run.output is not None
+        output = _unwrap_json(exp_run.output.result)
+        assert isinstance(output, str), "Experiment run output must be a string"
+        try:
+            json.loads(output)
+            json_parsable = True
+        except BaseException:
+            json_parsable = False
+        return EvaluationResult(
+            score=int(json_parsable),
+        )
+class ContainsKeyword:
+    annotator_kind = "CODE"
+    def __init__(self, keyword: str, name: Optional[str] = None) -> None:
+        self.keyword = keyword
+        self.name = name or f"Contains({repr(keyword)})"
+    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
+        assert exp_run.output is not None
+        result = _unwrap_json(exp_run.output.result)
+        assert isinstance(result, str), "Experiment run output must be a string"
+        found = self.keyword in result
+        return EvaluationResult(
+            score=float(found),
+            explanation=(
+                f"the string {repr(self.keyword)} was "
+                f"{'found' if found else 'not found'} in the output"
+            ),
+        )
+class ContainsAnyKeyword:
+    annotator_kind = "CODE"
+    def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
+        self.keywords = keywords
+        self.name = name or f"ContainsAny({keywords})"
+    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
+        assert exp_run.output is not None
+        result = _unwrap_json(exp_run.output.result)
+        assert isinstance(result, str), "Experiment run output must be a string"
+        found = [keyword for keyword in self.keywords if keyword in result]
+        if found:
+            explanation = f"the keywords {found} were found in the output"
+        else:
+            explanation = f"none of the keywords {self.keywords} were found in the output"
+        return EvaluationResult(
+            score=float(bool(found)),
+            explanation=explanation,
+        )
+class ContainsAllKeywords:
+    annotator_kind = "CODE"
+    def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
+        self.keywords = keywords
+        self.name = name or f"ContainsAll({keywords})"
+    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
+        assert exp_run.output is not None
+        result = _unwrap_json(exp_run.output.result)
+        assert isinstance(result, str), "Experiment run output must be a string"
+        not_found = [keyword for keyword in self.keywords if keyword not in result]
+        if not_found:
+            contains_all = False
+            explanation = f"the keywords {not_found} were not found in the output"
+        else:
+            contains_all = True
+            explanation = f"all of the keywords {self.keywords} were found in the output"
+        return EvaluationResult(
+            score=float(contains_all),
+            explanation=explanation,
+        )
+class MatchesRegex:
+    annotator_kind = "CODE"
+    def __init__(self, pattern: Union[str, re.Pattern[str]], name: Optional[str] = None) -> None:
+        if isinstance(pattern, str):
+            pattern = re.compile(pattern)
+        self.pattern = pattern
+        assert isinstance(pattern, re.Pattern)
+        self.name = name or f"matches_({pattern})"
+    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
+        assert exp_run.output is not None
+        result = _unwrap_json(exp_run.output.result)
+        assert isinstance(result, str), "Experiment run output must be a string"
+        matches = self.pattern.findall(result)
+        if matches:
+            explanation = (
+                f"the substrings {matches} matched the regex pattern {self.pattern.pattern}"
+            )
+        else:
+            explanation = f"no substrings matched the regex pattern {self.pattern.pattern}"
+        return EvaluationResult(
+            score=float(bool(matches)),
+            explanation=explanation,
+        )
+# Someday we'll do typing checking in unit tests.
+if TYPE_CHECKING:
+    _: ExperimentEvaluator
+    _ = JSONParsable()
+    _ = ContainsKeyword("test")

phoenix/datasets/{evaluators.py → evaluators/llm_evaluators.py} RENAMED Viewed

@@ -1,70 +1,12 @@
-import json
 import re
-from typing import TYPE_CHECKING, Callable, Optional, Type
+from typing import Callable, Optional, Type
-from phoenix.datasets.types import (
-    EvaluationResult,
-    Example,
-    ExperimentEvaluator,
-    ExperimentRun,
-    JSONSerializable,
-)
+from phoenix.datasets.evaluators._utils import _unwrap_json
+from phoenix.datasets.types import EvaluationResult, Example, ExperimentEvaluator, ExperimentRun
 from phoenix.evals.models.base import BaseModel as LLMBaseModel
 from phoenix.evals.utils import snap_to_rail
-def _unwrap_json(obj: JSONSerializable) -> JSONSerializable:
-    if isinstance(obj, dict):
-        if len(obj) == 1:
-            key = next(iter(obj.keys()))
-            output = obj[key]
-            assert isinstance(
-                output, (dict, list, str, int, float, bool, type(None))
-            ), "Output must be JSON serializable"
-            return output
-    return obj
-class JSONParsable:
-    annotator_kind = "CODE"
-    name = "JSONParsable"
-    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
-        assert exp_run.output is not None
-        output = _unwrap_json(exp_run.output.result)
-        assert isinstance(output, str), "Experiment run output must be a string"
-        try:
-            json.loads(output)
-            json_parsable = True
-        except BaseException:
-            json_parsable = False
-        return EvaluationResult(
-            score=int(json_parsable),
-        )
-class ContainsKeyword:
-    annotator_kind = "CODE"
-    def __init__(self, keyword: str) -> None:
-        super().__init__()
-        self.keyword = keyword
-        self.name = f"ContainsKeyword({keyword})"
-    def evaluate(self, example: Example, exp_run: ExperimentRun) -> EvaluationResult:
-        assert exp_run.output is not None
-        result = _unwrap_json(exp_run.output.result)
-        assert isinstance(result, str), "Experiment run output must be a string"
-        found = self.keyword in result
-        return EvaluationResult(
-            score=float(found),
-            explanation=(
-                f"the string {repr(self.keyword)} was "
-                f"{'found' if found else 'not found'} in the output"
-            ),
-        )
 class LLMCriteriaEvaluator:
     annotator_kind = "LLM"
     _base_template = (
@@ -77,7 +19,7 @@ class LLMCriteriaEvaluator:
         "EXPLANATION: *a step by step explanation of your reasoning for whether the text meets "
         "the criteria*\n"
         "LABEL: *true or false*\n\n"
-        "Follow this template for the following text:\n\n"
+        "Follow this template for the following example:\n\n"
         "CRITERIA: the text is '{criteria}'\n"
         "TEXT: {text}\n"
         "EXPLANATION: "
@@ -142,40 +84,43 @@ class LLMCriteriaEvaluator:
 def criteria_evaluator_factory(
-    class_name: str, criteria: str, description: str
+    class_name: str, criteria: str, description: str, default_name: str
 ) -> Type[ExperimentEvaluator]:
+    def _init(self, model: LLMBaseModel, name: str = default_name) -> None:  # type: ignore
+        LLMCriteriaEvaluator.__init__(self, model, criteria, description, name=name)
     return type(
         class_name,
         (LLMCriteriaEvaluator,),
         {
-            "__init__": lambda self, model: LLMCriteriaEvaluator.__init__(
-                self, model, criteria, description, name=class_name
-            ),
+            "__init__": _init,
             "__module__": __name__,
-            "name": class_name,
             "template": LLMCriteriaEvaluator._format_base_template(criteria, description),
         },
     )
-LLMConcisenessEvaluator = criteria_evaluator_factory(
-    class_name="LLMConcisenessEvaluator",
+ConcisenessEvaluator = criteria_evaluator_factory(
+    class_name="ConcisenessEvaluator",
     criteria="concise",
     description="is just a few sentences and easy to follow",
+    default_name="Conciseness",
 )
-LLMHelpfulnessEvaluator = criteria_evaluator_factory(
-    class_name="LLMHelpfulnessEvaluator",
+HelpfulnessEvaluator = criteria_evaluator_factory(
+    class_name="HelpfulnessEvaluator",
     criteria="helpful",
     description="provides useful information",
+    default_name="Helpfulness",
 )
-LLMCoherenceEvaluator = criteria_evaluator_factory(
-    class_name="LLMCoherenceEvaluator",
+CoherenceEvaluator = criteria_evaluator_factory(
+    class_name="CoherenceEvaluator",
     criteria="coherent",
-    description="is coherent, well-structured, and organized",
+    description="is coherent, well-structured, and logically sound",
+    default_name="Coherence",
 )
@@ -266,10 +211,3 @@ class RelevanceEvaluator:
         formatted_template = self._format_eval_template(example, exp_run)
         unparsed_response = await self.model._async_generate(formatted_template)
         return self._parse_eval_output(unparsed_response)
-# Someday we'll do typing checking in unit tests.
-if TYPE_CHECKING:
-    _: ExperimentEvaluator
-    _ = JSONParsable()
-    _ = ContainsKeyword("test")

phoenix/datasets/experiments.py CHANGED Viewed

@@ -458,6 +458,7 @@ def _evaluate_experiment(
         max_retries=0,
         exit_on_error=False,
         fallback_return_value=None,
+        tqdm_bar_format=get_tqdm_progress_bar_formatter("running experiment evaluations"),
     )
     evaluation_payloads, _execution_details = executor.run(evaluation_inputs)
     for payload in evaluation_payloads:

phoenix/server/api/types/Span.py CHANGED Viewed

@@ -59,6 +59,7 @@ class SpanKind(Enum):
     embedding = "EMBEDDING"
     agent = "AGENT"
     reranker = "RERANKER"
+    evaluator = "EVALUATOR"
     unknown = "UNKNOWN"
     @classmethod

arize-phoenix 4.4.4rc3__py3-none-any.whl → 4.4.4rc4__py3-none-any.whl

arize-phoenix 4.4.4rc3py3-none-any.whl → 4.4.4rc4py3-none-any.whl