PyPI - ibm-watsonx-orchestrate-evaluation-framework - Versions diffs - 1.1.6__py3-none-any.whl → 1.1.7__py3-none-any.whl - Mend

ibm-watsonx-orchestrate-evaluation-framework 1.1.6py3-none-any.whl → 1.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ibm-watsonx-orchestrate-evaluation-framework might be problematic. Click here for more details.

Files changed (42) hide show

wxo_agentic_evaluation/utils/__init__.py CHANGED Viewed

@@ -1,9 +1,47 @@
 import json
-from wxo_agentic_evaluation.utils.utils import TestCaseResources, add_line_seperator, list_run_files, load_run_metrics, N_A
-from wxo_agentic_evaluation.utils.open_ai_tool_extractor import ToolExtractionOpenAIFormat
+import os
+import tempfile
+from pathlib import Path
+from wxo_agentic_evaluation.utils.open_ai_tool_extractor import (
+    ToolExtractionOpenAIFormat,
+)
 from wxo_agentic_evaluation.utils.parsers import ReferencelessEvalParser
+from wxo_agentic_evaluation.utils.utils import (
+    N_A,
+    TestCaseResources,
+    add_line_seperator,
+    list_run_files,
+    load_run_metrics,
+)
+def json_dump(output_path, obj):
+    """
+    Atomically dump JSON to `output_path`.
+    - Writes to a temporary file first
+    - Then atomically replaces the target file
+    - Prevents corrupted/half-written JSON if process is interrupted
+    """
+    output_path = Path(output_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
-def json_dump(output_path, object):
-    with open(output_path, "w", encoding="utf-8") as f:
-        json.dump(object, f, indent=4)
+    fd, tmp_path = tempfile.mkstemp(
+        dir=output_path.parent,
+        prefix=output_path.stem,
+        suffix=".tmp",
+        text=True,
+    )
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            json.dump(obj, f, indent=4, ensure_ascii=False)
+            f.flush()
+            os.fsync(f.fileno())
+        os.replace(tmp_path, output_path)
+    except Exception:
+        try:
+            os.remove(tmp_path)
+        except OSError:
+            pass
+        raise

wxo_agentic_evaluation/utils/evaluation_discovery.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""
+Evaluation discovery mechanism.
+This module provides functionality for discovering classes that inherit from Evaluation.
+"""
+import importlib.util
+import inspect
+import os
+def find_evaluation_subclasses(directory: str, base_class_name="Evaluation"):
+    """
+    Dynamically import Python files under 'directory' and find classes that
+    inherit from a class named 'Evaluation'. Returns a list of non-abstract
+    class objects.
+    """
+    subclasses = []
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if file.endswith(".py") and not file.startswith("__"):
+                filepath = os.path.join(root, file)
+                module_name = os.path.splitext(os.path.basename(filepath))[0]
+                spec = importlib.util.spec_from_file_location(
+                    module_name, filepath
+                )
+                if spec and spec.loader:
+                    module = importlib.util.module_from_spec(spec)
+                    try:
+                        spec.loader.exec_module(module)
+                    except Exception as e:
+                        print(f"Skipping {filepath} due to import error: {e}")
+                        continue
+                    # Inspect for subclasses
+                    for name, obj in inspect.getmembers(
+                        module, inspect.isclass
+                    ):
+                        if any(
+                            base.__name__ == base_class_name
+                            for base in obj.__mro__[1:]
+                        ) and not inspect.isabstract(obj):
+                            subclasses.append(obj)
+    return subclasses

wxo_agentic_evaluation/utils/gateway_provider_utils.py ADDED Viewed

@@ -0,0 +1,39 @@
+import os
+from functools import lru_cache
+from wxo_agentic_evaluation.arg_configs import AuthConfig
+from wxo_agentic_evaluation.service_provider import USE_GATEWAY_MODEL_PROVIDER
+from wxo_agentic_evaluation.wxo_client import get_wxo_client
+WXO_AUTH_CONFIG_DEFAULTS = AuthConfig(
+    url=os.getenv("WXO_URL", "http://localhost:4321"),
+    tenant_name=os.getenv("WXO_TENANT", "wxo-dev"),
+    token=os.getenv("WXO_TOKEN", None),
+)
+@lru_cache(maxsize=1)
+def _get_cached_wxo_client():
+    # TODO: remove this once the client is implemented as a Singleton.
+    return get_wxo_client(
+        WXO_AUTH_CONFIG_DEFAULTS.url,
+        WXO_AUTH_CONFIG_DEFAULTS.tenant_name,
+        WXO_AUTH_CONFIG_DEFAULTS.token,
+    )
+def get_provider_kwargs(**base_kwargs: dict) -> dict:
+    if not USE_GATEWAY_MODEL_PROVIDER:
+        return base_kwargs
+    if "instance_url" in base_kwargs and "token" in base_kwargs:
+        return base_kwargs
+    wxo_client = _get_cached_wxo_client()
+    return {
+        **base_kwargs,
+        "instance_url": wxo_client.service_url,
+        "token": wxo_client.api_key,
+    }

wxo_agentic_evaluation/utils/messages_parser.py ADDED Viewed

@@ -0,0 +1,30 @@
+from typing import Optional
+from pydantic import BaseModel, Field
+from wxo_agentic_evaluation.type import ContentType, Message
+class ParsedMessages(BaseModel):
+    """
+    A parsed history of messages.
+    """
+    messages: list[Message] = Field(description="The list of messages")
+    @property
+    def user_input(self) -> Optional[str]:
+        """Find the original user message."""
+        for message in self.messages:
+            if message.role == "user" and message.type == ContentType.text:
+                return str(message.content)
+        return None
+    @property
+    def agent_response(self) -> Optional[str]:
+        """Find the most recent assistant message."""
+        messages_in_reverse = reversed(self.messages)
+        for message in messages_in_reverse:
+            if message.role == "assistant" and message.type == ContentType.text:
+                return str(message.content)
+        return None

wxo_agentic_evaluation/utils/utils.py CHANGED Viewed

@@ -1,15 +1,15 @@
+import csv
 import glob
 import json
 import os
 import re
-import csv
 from collections import defaultdict
 from pathlib import Path
-from typing import List, Optional, Union, Mapping, Tuple, Any
+from typing import Any, List, Mapping, Optional, Tuple, Union
 from urllib.parse import urlparse
-import yaml
 import rich
+import yaml
 from rich import box, print
 from rich.console import Console, Group
 from rich.panel import Panel
@@ -19,15 +19,15 @@ from rich.table import Table
 from wxo_agentic_evaluation.metrics.llm_as_judge import Faithfulness
 from wxo_agentic_evaluation.metrics.metrics import (
+    EnhancedAnalyzeMetrics,
     KnowledgeBaseMetricSummary,
     ReferenceLessEvalMetrics,
     ToolCallAndRoutingMetrics,
-    EnhancedAnalyzeMetrics,
 )
 from wxo_agentic_evaluation.type import (
     ConversationalConfidenceThresholdScore,
-    Message,
     ExtendedMessage,
+    Message,
 )
 console = Console()
@@ -37,6 +37,7 @@ RUN_FILE_RE = re.compile(
 )
 N_A = "N/A"
 class AttackResultsTable:
     def __init__(self, attack_results: dict):
         self.table = Table(
@@ -164,9 +165,11 @@ class TestCaseResources:
 class AgentMetricsTable:
-    def __init__(self, data):
+    def __init__(self, data, title: Optional[str] = None):
+        if title is None:
+            title = "Agent Metrics"
         self.table = Table(
-            title="Agent Metrics",
+            title=title,
             box=box.ROUNDED,
             show_lines=True,
         )
@@ -187,7 +190,9 @@ class AgentMetricsTable:
         console.print(self.table)
-def create_table(data: List[dict]) -> AgentMetricsTable:
+def create_table(
+    data: List[dict], title: Optional[str] = None
+) -> AgentMetricsTable:
     """
     Generate a Rich table from a list of dictionaries.
     Returns the AgentMetricsTable instance.
@@ -199,7 +204,7 @@ def create_table(data: List[dict]) -> AgentMetricsTable:
         print("create_table() received an empty dataset. No table generated.")
         return None
-    return AgentMetricsTable(data)
+    return AgentMetricsTable(data, title=title)
 def safe_divide(nom, denom):

wxo_agentic_evaluation/wxo_client.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import os
+from typing import Any, Dict, Optional
 import requests
 import urllib3
 from urllib3.exceptions import InsecureRequestWarning
-from typing import Dict, Any, Optional
 from wxo_agentic_evaluation.service_instance import tenant_setup

{ibm_watsonx_orchestrate_evaluation_framework-1.1.6.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{ibm_watsonx_orchestrate_evaluation_framework-1.1.6.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

ibm-watsonx-orchestrate-evaluation-framework 1.1.6__py3-none-any.whl → 1.1.7__py3-none-any.whl

Potentially problematic release.

ibm-watsonx-orchestrate-evaluation-framework 1.1.6py3-none-any.whl → 1.1.7py3-none-any.whl