PyPI - arize-phoenix - Versions diffs - 1.9.1rc2__tar.gz → 2.0.0__tar.gz - Mend

arize-phoenix 1.9.1rc2tar.gz → 2.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (174) hide show

{arize_phoenix-1.9.1rc2 → arize_phoenix-2.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: arize-phoenix
-Version: 1.9.1rc2
+Version: 2.0.0
 Summary: ML Observability in your notebook
 Project-URL: Documentation, https://docs.arize.com/phoenix/
 Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -55,7 +55,7 @@ Requires-Dist: strawberry-graphql[debug-server]==0.208.2; extra == 'dev'
 Provides-Extra: experimental
 Requires-Dist: tenacity; extra == 'experimental'
 Provides-Extra: llama-index
-Requires-Dist: llama-index>=0.9.14; extra == 'llama-index'
+Requires-Dist: llama-index==0.9.14; extra == 'llama-index'
 Description-Content-Type: text/markdown
 <p align="center">

{arize_phoenix-1.9.1rc2 → arize_phoenix-2.0.0}/pyproject.toml RENAMED Viewed

@@ -66,7 +66,7 @@ experimental = [
   "tenacity",
 ]
 llama-index = [
-  "llama-index>=0.9.14",
+  "llama-index==0.9.14",  # always pin to a version that keeps our notebooks working
 ]
 [project.urls]

{arize_phoenix-1.9.1rc2 → arize_phoenix-2.0.0}/src/phoenix/__init__.py RENAMED Viewed

@@ -6,7 +6,7 @@ from .session.session import NotebookEnvironment, Session, active_session, close
 from .trace.fixtures import load_example_traces
 from .trace.trace_dataset import TraceDataset
-__version__ = "1.9.1rc2"
+__version__ = "2.0.0"
 # module level doc-string
 __doc__ = """

{arize_phoenix-1.9.1rc2 → arize_phoenix-2.0.0}/src/phoenix/core/traces.py RENAMED Viewed

@@ -169,7 +169,7 @@ class Traces:
                 if (
                     (span := self[span_id])
                     and start_time <= span.start_time < stop_time
-                    and (span.parent_id is None) == bool(root_spans_only)
+                    and (not root_spans_only or span.parent_id is None)
                 ):
                     yield span
             return

arize_phoenix-2.0.0/src/phoenix/exceptions.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ class PhoenixException(Exception):
2	+ pass

{arize_phoenix-1.9.1rc2 → arize_phoenix-2.0.0}/src/phoenix/experimental/evals/__init__.py RENAMED Viewed

@@ -1,3 +1,4 @@
+from .evaluators import LLMEvaluator
 from .functions import llm_classify, llm_generate, run_relevance_eval
 from .models import BedrockModel, LiteLLMModel, OpenAIModel, VertexAIModel
 from .retrievals import compute_precisions_at_k
@@ -8,7 +9,6 @@ from .templates import (
     HALLUCINATION_PROMPT_TEMPLATE,
     HUMAN_VS_AI_PROMPT_RAILS_MAP,
     HUMAN_VS_AI_PROMPT_TEMPLATE,
-    NOT_PARSABLE,
     QA_PROMPT_RAILS_MAP,
     QA_PROMPT_TEMPLATE,
     RAG_RELEVANCY_PROMPT_RAILS_MAP,
@@ -18,7 +18,7 @@ from .templates import (
     ClassificationTemplate,
     PromptTemplate,
 )
-from .utils import download_benchmark_dataset
+from .utils import NOT_PARSABLE, download_benchmark_dataset
 __all__ = [
     "compute_precisions_at_k",
@@ -29,6 +29,7 @@ __all__ = [
     "VertexAIModel",
     "BedrockModel",
     "LiteLLMModel",
+    "LLMEvaluator",
     "PromptTemplate",
     "ClassificationTemplate",
     "CODE_READABILITY_PROMPT_RAILS_MAP",

{arize_phoenix-1.9.1rc2 → arize_phoenix-2.0.0}/src/phoenix/experimental/evals/evaluators.py RENAMED Viewed

@@ -1,10 +1,11 @@
-from typing import List, Mapping, Optional
+from typing import List, Mapping, Optional, Tuple
 from phoenix.experimental.evals.models import set_verbosity
+from phoenix.experimental.evals.utils import parse_openai_function_call, snap_to_rail
 from phoenix.utilities.logging import printif
 from .models import BaseEvalModel
-from .templates import ClassificationTemplate, PromptTemplate
+from .templates import ClassificationTemplate, PromptOptions, PromptTemplate
 Record = Mapping[str, str]
@@ -20,51 +21,80 @@ class LLMEvaluator:
         self,
         model: BaseEvalModel,
         template: ClassificationTemplate,
-        name: str,
-        verbose: bool = False,
     ) -> None:
         """Initializer for LLMEvaluator.
         Args:
             model (BaseEvalModel): The LLM model to use for evaluation.
             template (ClassificationTemplate): The evaluation template.
-            name (str): The name of the evaluator.
-            verbose (bool, optional): Whether to print verbose output.
         """
         self._model = model
         self._template = template
-        self.name = name
-        self._verbose = verbose
-    def evaluate(self, record: Record) -> str:
-        """Evaluates a single record.
+    def evaluate(
+        self,
+        record: Record,
+        provide_explanation: bool = False,
+        verbose: bool = False,
+    ) -> Tuple[str, Optional[str]]:
+        """
+        Evaluates a single record.
         Args:
             record (Record): The record to evaluate.
+            provide_explanation (bool, optional): Whether to provide an
+            explanation.
+            verbose (bool, optional): Whether to print verbose output.
         Returns:
-            EvaluationResult: The result of the evaluation
+            Tuple[str, Optional[str]]: The label and explanation (if provided).
         """
-        prompt = self._template.format(record)
-        with set_verbosity(self._model, self._verbose) as verbose_model:
+        prompt = self._template.format(
+            record, options=PromptOptions(provide_explanation=provide_explanation)
+        )
+        with set_verbosity(self._model, verbose) as verbose_model:
             unparsed_output = verbose_model(prompt)
-        parsed_output = _snap_to_rail(unparsed_output, self._template.rails, self._verbose)
-        return parsed_output
-    async def aevaluate(self, record: Record) -> str:
-        """Evaluates a single record.
+        label, explanation = _extract_label_and_explanation(
+            unparsed_output=unparsed_output,
+            template=self._template,
+            use_openai_function_call=False,
+            provide_explanation=provide_explanation,
+            verbose=verbose,
+        )
+        return label, explanation
+    async def aevaluate(
+        self, record: Record, provide_explanation: bool = False, verbose: bool = False
+    ) -> Tuple[str, Optional[str]]:
+        """
+        Evaluates a single record.
         Args:
             record (Record): The record to evaluate.
+            provide_explanation (bool, optional): Whether to provide an
+            explanation.
+            verbose (bool, optional): Whether to print verbose output.
         Returns:
-            EvaluationResult: The result of the evaluation
+            Tuple[str, Optional[str]]: The label and explanation (if provided).
         """
-        prompt = self._template.format(dict(record))
-        with set_verbosity(self._model, self._verbose) as verbose_model:
+        prompt = self._template.format(
+            record, options=PromptOptions(provide_explanation=provide_explanation)
+        )
+        with set_verbosity(self._model, verbose) as verbose_model:
             unparsed_output = await verbose_model._async_generate(prompt)
-        parsed_output = _snap_to_rail(unparsed_output, self._template.rails, self._verbose)
-        return parsed_output
+        label, explanation = _extract_label_and_explanation(
+            unparsed_output=unparsed_output,
+            template=self._template,
+            use_openai_function_call=False,
+            provide_explanation=provide_explanation,
+            verbose=verbose,
+        )
+        return label, explanation
 class MapReducer:
@@ -202,33 +232,46 @@ class Refiner:
         return model(reduce_prompt)
-def _snap_to_rail(raw_string: Optional[str], rails: List[str], verbose: bool = False) -> str:
+def _extract_label_and_explanation(
+    unparsed_output: str,
+    template: ClassificationTemplate,
+    provide_explanation: bool,
+    use_openai_function_call: bool,
+    verbose: bool,
+) -> Tuple[str, Optional[str]]:
     """
-    Snaps a string to the nearest rail, or returns None if the string cannot be
-    snapped to a rail.
+    Extracts the label and explanation from the unparsed output.
     Args:
-        raw_string (str): An input to be snapped to a rail.
+        unparsed_output (str): The raw output to be parsed.
+        template (ClassificationTemplate): The template used to generate the
+        output.
-        rails (List[str]): The target set of strings to snap to.
+        provide_explanation (bool): Whether the output includes an explanation.
+        use_openai_function_call (bool): Whether the output was generated using
+        function calling.
+        verbose (bool): If True, print verbose output to stdout.
     Returns:
-        str: A string from the rails argument or "UNPARSABLE" if the input
-        string could not be snapped.
+        Tuple[str, Optional[str]]: A tuple containing the label and an
+        explanation (if one is provided).
     """
-    if not raw_string:
-        return NOT_PARSABLE
-    snap_string = raw_string.lower()
-    rails = list(set(rail.lower() for rail in rails))
-    rails.sort(key=len, reverse=True)
-    found_rails = set()
-    for rail in rails:
-        if rail in snap_string:
-            found_rails.add(rail)
-            snap_string = snap_string.replace(rail, "")
-    if len(found_rails) != 1:
-        printif(verbose, f"- Cannot snap {repr(raw_string)} to rails")
-        return NOT_PARSABLE
-    rail = list(found_rails)[0]
-    printif(verbose, f"- Snapped {repr(raw_string)} to rail: {rail}")
-    return rail
+    if not use_openai_function_call:
+        if provide_explanation:
+            unrailed_label, explanation = (
+                template.extract_label_from_explanation(unparsed_output),
+                unparsed_output,
+            )
+            printif(
+                verbose and unrailed_label == NOT_PARSABLE,
+                f"- Could not parse {repr(unparsed_output)}",
+            )
+        else:
+            unrailed_label = unparsed_output
+            explanation = None
+    else:
+        unrailed_label, explanation = parse_openai_function_call(unparsed_output)
+    return snap_to_rail(unrailed_label, template.rails, verbose=verbose), explanation

arize-phoenix 1.9.1rc2__tar.gz → 2.0.0__tar.gz

Potentially problematic release.

arize-phoenix 1.9.1rc2tar.gz → 2.0.0tar.gz