PyPI - ibm-watsonx-orchestrate-evaluation-framework - Versions diffs - 1.1.2__py3-none-any.whl → 1.1.4__py3-none-any.whl - Mend

ibm-watsonx-orchestrate-evaluation-framework 1.1.2py3-none-any.whl → 1.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ibm-watsonx-orchestrate-evaluation-framework might be problematic. Click here for more details.

Files changed (27) hide show

wxo_agentic_evaluation/service_provider/model_proxy_provider.py CHANGED Viewed

@@ -1,15 +1,23 @@
 import os
-import requests
 import time
-from typing import List, Tuple
 from threading import Lock
+from typing import List, Tuple
+import requests
 from wxo_agentic_evaluation.service_provider.provider import Provider
 from wxo_agentic_evaluation.utils.utils import is_ibm_cloud_url
-AUTH_ENDPOINT_AWS = "https://iam.platform.saas.ibm.com/siusermgr/api/1.0/apikeys/token"
+AUTH_ENDPOINT_AWS = (
+    "https://iam.platform.saas.ibm.com/siusermgr/api/1.0/apikeys/token"
+)
 AUTH_ENDPOINT_IBM_CLOUD = "https://iam.cloud.ibm.com/identity/token"
-DEFAULT_PARAM = {"min_new_tokens": 1, "decoding_method": "greedy", "max_new_tokens": 400}
+DEFAULT_PARAM = {
+    "min_new_tokens": 1,
+    "decoding_method": "greedy",
+    "max_new_tokens": 400,
+}
 def _infer_cpd_auth_url(instance_url: str) -> str:
     inst = (instance_url or "").rstrip("/")
@@ -36,49 +44,71 @@ class ModelProxyProvider(Provider):
         instance_url=None,
         timeout=300,
         embedding_model_id=None,
-        params=None
+        params=None,
     ):
         super().__init__()
         instance_url = os.environ.get("WO_INSTANCE", instance_url)
         if not instance_url:
-            raise RuntimeError("instance url must be specified to use WO model proxy")
+            raise RuntimeError(
+                "instance url must be specified to use WO model proxy"
+            )
         self.timeout = timeout
-        self.model_id = os.environ.get("MODEL_OVERRIDE",model_id)
+        self.model_id = os.environ.get("MODEL_OVERRIDE", model_id)
         self.embedding_model_id = embedding_model_id
         self.api_key = os.environ.get("WO_API_KEY", api_key)
         self.username = os.environ.get("WO_USERNAME", None)
         self.password = os.environ.get("WO_PASSWORD", None)
-        self.auth_type = os.environ.get("WO_AUTH_TYPE", "").lower()  # explicit override if set, otherwise inferred- match ADK values
+        self.auth_type = os.environ.get(
+            "WO_AUTH_TYPE", ""
+        ).lower()  # explicit override if set, otherwise inferred- match ADK values
         explicit_auth_url = os.environ.get("AUTHORIZATION_URL", None)
         self.is_ibm_cloud = is_ibm_cloud_url(instance_url)
         self.instance_url = instance_url.rstrip("/")
-        self.auth_mode, self.auth_url = self._resolve_auth_mode_and_url(explicit_auth_url=explicit_auth_url)
-        self._wo_ssl_verify = os.environ.get("WO_SSL_VERIFY", "true").lower() != "false"
+        self.auth_mode, self.auth_url = self._resolve_auth_mode_and_url(
+            explicit_auth_url=explicit_auth_url
+        )
+        self._wo_ssl_verify = (
+            os.environ.get("WO_SSL_VERIFY", "true").lower() != "false"
+        )
         env_space_id = os.environ.get("WATSONX_SPACE_ID", None)
         if self.auth_mode == "cpd":
             if not env_space_id or not env_space_id.strip():
-                raise RuntimeError("CPD mode requires WATSONX_SPACE_ID environment variable to be set")
+                raise RuntimeError(
+                    "CPD mode requires WATSONX_SPACE_ID environment variable to be set"
+                )
             self.space_id = env_space_id.strip()
         else:
-            self.space_id = (env_space_id.strip() if env_space_id and env_space_id.strip() else "1")
+            self.space_id = (
+                env_space_id.strip()
+                if env_space_id and env_space_id.strip()
+                else "1"
+            )
         if self.auth_mode == "cpd":
             if "/orchestrate" in self.instance_url:
-                self.instance_url = self.instance_url.split("/orchestrate", 1)[0].rstrip("/")
+                self.instance_url = self.instance_url.split("/orchestrate", 1)[
+                    0
+                ].rstrip("/")
             if not self.username:
                 raise RuntimeError("CPD auth requires WO_USERNAME to be set")
             if not (self.password or self.api_key):
-                raise RuntimeError("CPD auth requires either WO_PASSWORD or WO_API_KEY to be set (with WO_USERNAME)")
+                raise RuntimeError(
+                    "CPD auth requires either WO_PASSWORD or WO_API_KEY to be set (with WO_USERNAME)"
+                )
         else:
             if not self.api_key:
-                raise RuntimeError("WO_API_KEY must be specified for SaaS or IBM IAM auth")
+                raise RuntimeError(
+                    "WO_API_KEY must be specified for SaaS or IBM IAM auth"
+                )
-        self.url = self.instance_url + "/ml/v1/text/generation?version=2024-05-01"
+        self.url = (
+            self.instance_url + "/ml/v1/text/generation?version=2024-05-01"
+        )
         self.embedding_url = self.instance_url + "/ml/v1/text/embeddings"
         self.lock = Lock()
@@ -86,8 +116,7 @@ class ModelProxyProvider(Provider):
         self.params = params if params else DEFAULT_PARAM
     def _resolve_auth_mode_and_url(
-        self,
-        explicit_auth_url: str | None
+        self, explicit_auth_url: str | None
     ) -> Tuple[str, str]:
         """
         Returns (auth_mode, auth_url)
@@ -128,32 +157,61 @@ class ModelProxyProvider(Provider):
         exchange_url = self.auth_url
         if self.auth_mode == "ibm_iam":
-            headers = {"Accept": "application/json", "Content-Type": "application/x-www-form-urlencoded"}
+            headers = {
+                "Accept": "application/json",
+                "Content-Type": "application/x-www-form-urlencoded",
+            }
             form_data = {
                 "grant_type": "urn:ibm:params:oauth:grant-type:apikey",
-                "apikey": self.api_key
+                "apikey": self.api_key,
             }
             post_args = {"data": form_data}
-            resp = requests.post(exchange_url, headers=headers, timeout=timeout, verify=self._wo_ssl_verify, **post_args)
+            resp = requests.post(
+                exchange_url,
+                headers=headers,
+                timeout=timeout,
+                verify=self._wo_ssl_verify,
+                **post_args,
+            )
         elif self.auth_mode == "cpd":
-            headers = {"Accept": "application/json", "Content-Type": "application/json"}
+            headers = {
+                "Accept": "application/json",
+                "Content-Type": "application/json",
+            }
             body = {"username": self.username}
             if self.password:
                 body["password"] = self.password
             else:
                 body["api_key"] = self.api_key
             timeout = self.timeout
-            resp = requests.post(exchange_url, headers=headers, json=body, timeout=timeout, verify=self._wo_ssl_verify)
+            resp = requests.post(
+                exchange_url,
+                headers=headers,
+                json=body,
+                timeout=timeout,
+                verify=self._wo_ssl_verify,
+            )
         else:
-            headers = {"Accept": "application/json", "Content-Type": "application/json"}
+            headers = {
+                "Accept": "application/json",
+                "Content-Type": "application/json",
+            }
             post_args = {"json": {"apikey": self.api_key}}
-            resp = requests.post(exchange_url, headers=headers, timeout=timeout, verify=self._wo_ssl_verify, **post_args)
+            resp = requests.post(
+                exchange_url,
+                headers=headers,
+                timeout=timeout,
+                verify=self._wo_ssl_verify,
+                **post_args,
+            )
         if resp.status_code == 200:
             json_obj = resp.json()
             token = json_obj.get("access_token") or json_obj.get("token")
             if not token:
-                raise RuntimeError(f"No token field found in response: {json_obj!r}")
+                raise RuntimeError(
+                    f"No token field found in response: {json_obj!r}"
+                )
             expires_in = json_obj.get("expires_in")
             try:
@@ -179,13 +237,24 @@ class ModelProxyProvider(Provider):
     def encode(self, sentences: List[str]) -> List[list]:
         if self.embedding_model_id is None:
-            raise Exception("embedding model id must be specified for text generation")
+            raise Exception(
+                "embedding model id must be specified for text generation"
+            )
         self.refresh_token_if_expires()
         headers = self.get_header()
-        payload = {"inputs": sentences, "model_id": self.embedding_model_id, "space_id": self.space_id}
-                   #"timeout": self.timeout}
-        resp = requests.post(self.embedding_url, json=payload, headers=headers, verify=self._wo_ssl_verify)
+        payload = {
+            "inputs": sentences,
+            "model_id": self.embedding_model_id,
+            "space_id": self.space_id,
+        }
+        # "timeout": self.timeout}
+        resp = requests.post(
+            self.embedding_url,
+            json=payload,
+            headers=headers,
+            verify=self._wo_ssl_verify,
+        )
         if resp.status_code == 200:
             json_obj = resp.json()
@@ -198,9 +267,16 @@ class ModelProxyProvider(Provider):
             raise Exception("model id must be specified for text generation")
         self.refresh_token_if_expires()
         headers = self.get_header()
-        payload = {"input": sentence, "model_id": self.model_id, "space_id": self.space_id,
-                   "timeout": self.timeout, "parameters": self.params}
-        resp = requests.post(self.url, json=payload, headers=headers, verify=self._wo_ssl_verify)
+        payload = {
+            "input": sentence,
+            "model_id": self.model_id,
+            "space_id": self.space_id,
+            "timeout": self.timeout,
+            "parameters": self.params,
+        }
+        resp = requests.post(
+            self.url, json=payload, headers=headers, verify=self._wo_ssl_verify
+        )
         if resp.status_code == 200:
             return resp.json()["results"][0]["generated_text"]
@@ -208,5 +284,8 @@ class ModelProxyProvider(Provider):
 if __name__ == "__main__":
-    provider = ModelProxyProvider(model_id="meta-llama/llama-3-3-70b-instruct", embedding_model_id="ibm/slate-30m-english-rtrvr")
-    print(provider.query("ok"))
+    provider = ModelProxyProvider(
+        model_id="meta-llama/llama-3-3-70b-instruct",
+        embedding_model_id="ibm/slate-30m-english-rtrvr",
+    )
+    print(provider.query("ok"))

wxo_agentic_evaluation/utils/utils.py CHANGED Viewed

@@ -17,6 +17,7 @@ from wxo_agentic_evaluation.metrics.llm_as_judge import Faithfulness
 from wxo_agentic_evaluation.metrics.metrics import (
     KnowledgeBaseMetricSummary,
     ReferenceLessEvalMetrics,
+    ToolCallAndRoutingMetrics,
 )
 from wxo_agentic_evaluation.type import (
     ConversationalConfidenceThresholdScore,
@@ -376,3 +377,34 @@ def load_agents(agents_path: str):
             agents.append(yaml.safe_load(f))
     return agents
+RUN_FILE_RE = re.compile(
+    r"^(?P<base>.+)\.run(?P<run>\d+)\.(?P<kind>messages(?:\.analyze)?|metrics)\.json$"
+)
+def list_run_files(messages_dir: str, dataset_base: str):
+    """
+    Returns: dict[run_id] -> {"analyze": path|None, "metrics": path|None}
+    (We only need analyze+metrics for this feature.)
+    """
+    runs = {}
+    for fn in os.listdir(messages_dir):
+        m = RUN_FILE_RE.match(fn)
+        if not m or m.group("base") != dataset_base:
+            continue
+        run_id = int(m.group("run"))
+        kind = m.group("kind")
+        entry = runs.setdefault(run_id, {"analyze": None, "metrics": None})
+        full = os.path.join(messages_dir, fn)
+        if kind == "messages.analyze":
+            entry["analyze"] = full
+        elif kind == "metrics":
+            entry["metrics"] = full
+    return runs
+def load_run_metrics(metrics_path: str) -> ToolCallAndRoutingMetrics:
+    with open(metrics_path, "r", encoding="utf-8") as f:
+        return ToolCallAndRoutingMetrics(**json.load(f))

{ibm_watsonx_orchestrate_evaluation_framework-1.1.2.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{ibm_watsonx_orchestrate_evaluation_framework-1.1.2.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

ibm-watsonx-orchestrate-evaluation-framework 1.1.2__py3-none-any.whl → 1.1.4__py3-none-any.whl

Potentially problematic release.

ibm-watsonx-orchestrate-evaluation-framework 1.1.2py3-none-any.whl → 1.1.4py3-none-any.whl