PyPI - ibm-watsonx-orchestrate-evaluation-framework - Versions diffs - 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl - Mend

ibm-watsonx-orchestrate-evaluation-framework 1.1.1py3-none-any.whl → 1.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

wxo_agentic_evaluation/utils/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 def json_dump(output_path, object):
     with open(output_path, "w", encoding="utf-8") as f:
         json.dump(object, f, indent=4)

wxo_agentic_evaluation/utils/open_ai_tool_extractor.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import ast
 import re
 from pathlib import Path
-from typing import Union, Mapping, Any, List
+from typing import Any, List, Mapping, Union
 class PythonTypeToJsonType:
     OPTIONAL_PARAM_EXTRACT = re.compile(r"[Oo]ptional\[(\w+)\]")
     @staticmethod
     def python_to_json_type(python_annotation: str):
         if not python_annotation:
@@ -25,30 +26,33 @@ class PythonTypeToJsonType:
             return "object"
         if python_annotation.startswith("optional"):
             # extract the type within Optional[T]
-            inner_type = PythonTypeToJsonType.OPTIONAL_PARAM_EXTRACT.search(python_annotation).group(1)
+            inner_type = PythonTypeToJsonType.OPTIONAL_PARAM_EXTRACT.search(
+                python_annotation
+            ).group(1)
             return PythonTypeToJsonType.python_to_json_type(inner_type)
         return "string"
 class ToolExtractionOpenAIFormat:
     @staticmethod
     def get_default_arguments(node):
-        """ Returns the default arguments (if any)
+        """Returns the default arguments (if any)
         The default arguments are stored in args.default array.
         Since, in Python, the default arguments only come after positional arguments,
         we can index the argument array starting from the last `n` arguments, where n is
         the length of the default arguments.
-        ex.
+        ex.
         def add(a, b=5):
            pass
         Then we have,
         args = [a, b]
         defaults = [Constant(value=5)]
-        args[-len(defaults):] = [b]
+        args[-len(defaults):] = [b]
         (
         "FunctionDef(
@@ -70,12 +74,12 @@ class ToolExtractionOpenAIFormat:
         if num_defaults > 0:
             for arg in node.args.args[-num_defaults:]:
                 default_arguments.add(arg)
         return default_arguments
     @staticmethod
     def from_file(tools_path: Union[str, Path]) -> Mapping[str, Any]:
-        """ Uses `extract_tool_signatures` function, but converts the response
+        """Uses `extract_tool_signatures` function, but converts the response
             to open-ai format
             ```
@@ -100,7 +104,11 @@ class ToolExtractionOpenAIFormat:
             parsed_code = ast.parse(code)
             for node in parsed_code.body:
                 if isinstance(node, ast.FunctionDef):
-                    parameters = {"type": "object", "properties": {}, "required": []}
+                    parameters = {
+                        "type": "object",
+                        "properties": {},
+                        "required": [],
+                    }
                     function_name = node.name
                     for arg in node.args.args:
                         type_annotation = None
@@ -109,16 +117,25 @@ class ToolExtractionOpenAIFormat:
                         if arg.annotation:
                             type_annotation = ast.unparse(arg.annotation)
-                        parameter_type = PythonTypeToJsonType.python_to_json_type(type_annotation)
+                        parameter_type = (
+                            PythonTypeToJsonType.python_to_json_type(
+                                type_annotation
+                            )
+                        )
                         parameters["properties"][arg.arg] = {
                             "type": parameter_type,
-                            "description": "", # todo
+                            "description": "",  # todo
                         }
-                        if type_annotation and "Optional" not in type_annotation:
+                        if (
+                            type_annotation
+                            and "Optional" not in type_annotation
+                        ):
                             parameters["required"].append(arg.arg)
-                    default_arguments = ToolExtractionOpenAIFormat.get_default_arguments(node)
+                    default_arguments = (
+                        ToolExtractionOpenAIFormat.get_default_arguments(node)
+                    )
                     for arg_name in parameters["required"]:
                         if arg_name in default_arguments:
                             parameters.remove(arg_name)
@@ -128,8 +145,10 @@ class ToolExtractionOpenAIFormat:
                         "function": {
                             "name": function_name,
                             "parameters": parameters,
-                            "description": ast.get_docstring(node) # fix (does not do :params)
-                        }
+                            "description": ast.get_docstring(
+                                node
+                            ),  # fix (does not do :params)
+                        },
                     }
                     tool_data.append(open_ai_format_fn)
@@ -149,9 +168,11 @@ class ToolExtractionOpenAIFormat:
         elif tools_path.is_dir():
             files_to_parse.extend(tools_path.glob("**/*.py"))
         else:
-            raise ValueError(f"Tools path {tools_path} is neither a file nor directory")
+            raise ValueError(
+                f"Tools path {tools_path} is neither a file nor directory"
+            )
         for file_path in files_to_parse:
             all_tools.extend(ToolExtractionOpenAIFormat.from_file(file_path))
-        return all_tools
+        return all_tools

wxo_agentic_evaluation/utils/rich_utils.py CHANGED Viewed

@@ -1,6 +1,7 @@
-from rich.text import Text
-from typing import Optional, List, Any
+from typing import Any, List, Optional
 import rich
+from rich.text import Text
 def pretty_print(content: Any, style: Optional[str] = None):
@@ -33,13 +34,17 @@ def warn(
 def is_ok(
-    message: str, style: Optional[str] = "bold green", prompt: Optional[str] = "OK ✅ :"
+    message: str,
+    style: Optional[str] = "bold green",
+    prompt: Optional[str] = "OK ✅ :",
 ) -> Text:
     """Utility function for formatting an OK message."""
     return Text(f"{prompt}{message}\n\n", style=style)
-def print_done(prompt: Optional[str] = "Done ✅", style: Optional[str] = "bold cyan"):
+def print_done(
+    prompt: Optional[str] = "Done ✅", style: Optional[str] = "bold cyan"
+):
     """
     Prints a prompt indicating completion of a process/routine.
     :param prompt: default is `"Done ✅"`
@@ -63,7 +68,9 @@ def print_success(
 def print_failure(
-    message: str, style: Optional[str] = "bold red", prompt: Optional[str] = "❌ FAILED"
+    message: str,
+    style: Optional[str] = "bold red",
+    prompt: Optional[str] = "❌ FAILED",
 ):
     """
     Prints a failure message.
@@ -108,7 +115,9 @@ class IncorrectParameterUtils:
         ]
     @staticmethod
-    def format_bad_description_message(tool_name: str, tool_desc: str) -> List[Text]:
+    def format_bad_description_message(
+        tool_name: str, tool_desc: str
+    ) -> List[Text]:
         return [
             warn(
@@ -139,12 +148,15 @@ class TestingUtils:
         For example, this can be read as: `"{\n⚙️ Testing} {20} {good tool descriptions}"`.
         """
         pretty_print(
-            content=f"{prompt} {test_case_count} {test_description}", style=style
+            content=f"{prompt} {test_case_count} {test_description}",
+            style=style,
         )
     @staticmethod
     def print_error_details(
-        expected: List[str], detected: List[str], style: Optional[str] = "bold red"
+        expected: List[str],
+        detected: List[str],
+        style: Optional[str] = "bold red",
     ):
         """
         Print detailed error information.
@@ -169,6 +181,8 @@ class TestingUtils:
         :param style: The style for the text (default is bold red).
         """
         if failed_cases:
-            pretty_print(content=f"{prompt} ({len(failed_cases)}):", style=style)
+            pretty_print(
+                content=f"{prompt} ({len(failed_cases)}):", style=style
+            )
             for case in failed_cases:
                 pretty_print(content=f"  - {case}", style=style)

wxo_agentic_evaluation/utils/utils.py CHANGED Viewed

@@ -1,25 +1,31 @@
+import glob
+import json
+import os
+import re
+from typing import List, Optional, Union
 from urllib.parse import urlparse
+import yaml
+from rich import box, print
 from rich.console import Console, Group
-from rich.table import Table
 from rich.panel import Panel
 from rich.rule import Rule
-from rich import box
-from rich import print
-import re
 from rich.style import Style
-from typing import List, Optional, Union
-import json
-import yaml
-import glob
-import os
+from rich.table import Table
 from wxo_agentic_evaluation.metrics.llm_as_judge import Faithfulness
-from wxo_agentic_evaluation.metrics.metrics import KnowledgeBaseMetricSummary, ReferenceLessEvalMetrics
-from wxo_agentic_evaluation.type import ConversationalConfidenceThresholdScore, Message
+from wxo_agentic_evaluation.metrics.metrics import (
+    KnowledgeBaseMetricSummary,
+    ReferenceLessEvalMetrics,
+)
+from wxo_agentic_evaluation.type import (
+    ConversationalConfidenceThresholdScore,
+    Message,
+)
 console = Console()
 class AttackResultsTable:
     def __init__(self, attack_results: dict):
         self.table = Table(
@@ -35,11 +41,21 @@ class AttackResultsTable:
         n_on_policy = attack_results.get("n_on_policy_attacks", 0)
         n_off_policy = attack_results.get("n_off_policy_attacks", 0)
         n_on_policy_successful = attack_results.get("n_on_policy_successful", 0)
-        n_off_policy_successful = attack_results.get("n_off_policy_successful", 0)
+        n_off_policy_successful = attack_results.get(
+            "n_off_policy_successful", 0
+        )
         # Calculate success rates
-        on_policy_rate = f"{round(100 * safe_divide(n_on_policy_successful, n_on_policy))}%" if n_on_policy else "0%"
-        off_policy_rate = f"{round(100 * safe_divide(n_off_policy_successful, n_off_policy))}%" if n_off_policy else "0%"
+        on_policy_rate = (
+            f"{round(100 * safe_divide(n_on_policy_successful, n_on_policy))}%"
+            if n_on_policy
+            else "0%"
+        )
+        off_policy_rate = (
+            f"{round(100 * safe_divide(n_off_policy_successful, n_off_policy))}%"
+            if n_off_policy
+            else "0%"
+        )
         self.table.add_row("On Policy", str(n_on_policy), on_policy_rate)
         self.table.add_row("Off Policy", str(n_off_policy), off_policy_rate)
@@ -47,6 +63,7 @@ class AttackResultsTable:
     def print(self):
         console.print(self.table)
 class AgentMetricsTable:
     def __init__(self, data):
         self.table = Table(
@@ -90,7 +107,8 @@ def safe_divide(nom, denom):
     if denom == 0:
         return 0
     else:
-        return nom/denom
+        return nom / denom
 def is_saas_url(service_url: str) -> bool:
     hostname = urlparse(service_url).hostname
@@ -103,19 +121,17 @@ def is_ibm_cloud_url(service_url: str) -> bool:
 def add_line_seperator(
-        style_config: Optional[
-            Union[str,Style]
-            ]=None,
+    style_config: Optional[Union[str, Style]] = None,
 ):
     if not style_config:
-        style="grey42"
+        style = "grey42"
     else:
-        style=style_config
+        style = style_config
     console.print(
         Rule(
-        style=style,
+            style=style,
         )
     )
@@ -124,14 +140,18 @@ class FaithfulnessTable:
     def __init__(
         self, faithfulness_metrics: List[Faithfulness], tool_call_ids: List[str]
     ):
-        self.table = Table(title="Faithfulness", box=box.ROUNDED, show_lines=True)
+        self.table = Table(
+            title="Faithfulness", box=box.ROUNDED, show_lines=True
+        )
         self.table.add_column("Tool Call Id", style="blue")
         self.table.add_column("Faithfulness Score", style="blue3")
         self.table.add_column("Evidence", style="cyan")
         self.table.add_column("Reasoning", style="yellow3")
-        for tool_call_id, faithfulness in zip(tool_call_ids, faithfulness_metrics):
+        for tool_call_id, faithfulness in zip(
+            tool_call_ids, faithfulness_metrics
+        ):
             faithfulness = faithfulness.table()
             self.table.add_row(
                 tool_call_id,
@@ -185,7 +205,9 @@ class KnowledgePanel:
         self.confidence_scores = ConversationalSearchTable(
             confidence_scores, tool_call_id
         )
-        self.group = Group(self.faithfulness.table, self.confidence_scores.table)
+        self.group = Group(
+            self.faithfulness.table, self.confidence_scores.table
+        )
         # Panel acts as a section
         self.section = Panel(
@@ -240,35 +262,32 @@ class Tokenizer:
             \w+|                     # Regular words (letters, numbers, underscores)
             [^\w\s]                  # Punctuation marks (anything that's not word chars or whitespace)
         """
     def __init__(self):
         self.compiled_pattern = re.compile(
-            self.PATTERN,
-            re.VERBOSE | re.IGNORECASE
+            self.PATTERN, re.VERBOSE | re.IGNORECASE
         )
     def __call__(self, text: str) -> List[str]:
         """
         Tokenizes text by splitting on punctuation and handling contractions.
         Args:
             text: Input text to tokenize.
         Returns:
             List of tokenized words (lowercase, no punctuation).
         Examples:
             - "I'm fine"      -> ['i', 'm', 'fine']
-            - "don't go"      -> ['do', "n't", 'go']
+            - "don't go"      -> ['do', "n't", 'go']
             - "Hello, world!" -> ['hello', 'world']
         """
-        tokens = self.compiled_pattern.findall(
-            text
-        )
+        tokens = self.compiled_pattern.findall(text)
         return self._clean_tokens(tokens)
     def _clean_tokens(self, raw_tokens: List[str]) -> List[str]:
         """
         Applies some basic post-processing to tokenized messages.
@@ -276,12 +295,11 @@ class Tokenizer:
         Args:
             raw_tokens: list of tokens extracted from a message.
         """
         filtered_tokens = [
-            token.lower() \
-            for token in raw_tokens \
-            if token.strip() \
-            and not (len(token) == 1 and not token.isalnum())
+            token.lower()
+            for token in raw_tokens
+            if token.strip() and not (len(token) == 1 and not token.isalnum())
         ]
         return filtered_tokens
@@ -296,10 +314,22 @@ class ReferencelessEvalPanel:
         )
         self.table.add_column("Dataset", style="yellow", justify="center")
-        self.table.add_column("Tool Calls", style="deep_sky_blue1", justify="center")
-        self.table.add_column("Successful Tool Calls", style="magenta", justify="center")
-        self.table.add_column("Tool Calls Failed due to Schema Mismatch", style="deep_sky_blue1", justify="center")
-        self.table.add_column("Tool Calls Failed due to Hallucination", style="magenta", justify="center")
+        self.table.add_column(
+            "Tool Calls", style="deep_sky_blue1", justify="center"
+        )
+        self.table.add_column(
+            "Successful Tool Calls", style="magenta", justify="center"
+        )
+        self.table.add_column(
+            "Tool Calls Failed due to Schema Mismatch",
+            style="deep_sky_blue1",
+            justify="center",
+        )
+        self.table.add_column(
+            "Tool Calls Failed due to Hallucination",
+            style="magenta",
+            justify="center",
+        )
         for metric in referenceless_metrics:
             self.table.add_row(
@@ -307,12 +337,13 @@ class ReferencelessEvalPanel:
                 str(metric.number_of_tool_calls),
                 str(metric.number_of_successful_tool_calls),
                 str(metric.number_of_static_failed_tool_calls),
-                str(metric.number_of_semantic_failed_tool_calls)
+                str(metric.number_of_semantic_failed_tool_calls),
             )
     def print(self):
         console.print(self.table)
 # Function to load messages from JSON file
 def load_messages(file_path):
     with open(file_path, "r") as f:
@@ -339,9 +370,9 @@ def load_agents(agents_path: str):
     for agent_path in agents_json:
         with open(agent_path, "r") as f:
             agents.append(json.load(f))
     for agent_path in agents_yaml:
         with open(agent_path, "r") as f:
             agents.append(yaml.safe_load(f))
     return agents

ibm-watsonx-orchestrate-evaluation-framework 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl

ibm-watsonx-orchestrate-evaluation-framework 1.1.1py3-none-any.whl → 1.1.3py3-none-any.whl