PyPI - ibm-watsonx-orchestrate-evaluation-framework - Versions diffs - 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

ibm-watsonx-orchestrate-evaluation-framework 1.0.9py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ibm-watsonx-orchestrate-evaluation-framework might be problematic. Click here for more details.

Files changed (10) hide show

{ibm_watsonx_orchestrate_evaluation_framework-1.0.9.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ibm-watsonx-orchestrate-evaluation-framework
-Version: 1.0.9
+Version: 1.1.1
 Summary: The WxO evaluation framework
 Author-email: Haode Qi <Haode.Qi@ibm.com>
 License: MIT
@@ -13,6 +13,7 @@ Requires-Dist: jinja2~=3.1.5
 Requires-Dist: python-dotenv
 Requires-Dist: dataclasses-json~=0.6.7
 Requires-Dist: jsonargparse~=4.37.0
+Requires-Dist: jsonschema~=4.23.0
 Provides-Extra: dev
 Requires-Dist: setuptools~=70.3.0; extra == "dev"
 Requires-Dist: pytest<9.0.0,>=8.3.4; extra == "dev"

{ibm_watsonx_orchestrate_evaluation_framework-1.0.9.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info}/RECORD RENAMED Viewed

@@ -53,7 +53,7 @@ wxo_agentic_evaluation/red_teaming/attack_generator.py,sha256=YQi9xoaFATBNGe_Neb
 wxo_agentic_evaluation/red_teaming/attack_list.py,sha256=edphWARWqDtXFtcHTVbRXngvO0YfG5SgrfPtrBRXuFw,4734
 wxo_agentic_evaluation/red_teaming/attack_runner.py,sha256=qBZY4GK1352NUMyED5LVjjbcvpdCcxG6mDIN1HvxKIc,4340
 wxo_agentic_evaluation/referenceless_eval/__init__.py,sha256=lijXMgQ8nQe-9eIfade2jLfHMlXfYafMZIwXtC9KDZo,106
-wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py,sha256=82jutvZ21GO76rRcjGWux5kAanDtzE728BijC7trSxY,4297
+wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py,sha256=ypEMOeAwaztGkOuDr_2JArSQWwos7XcBTwo8lFs2N5w,4262
 wxo_agentic_evaluation/referenceless_eval/function_calling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 wxo_agentic_evaluation/referenceless_eval/function_calling/consts.py,sha256=UidTaT9g5IxbcakfQqP_9c5civ1wDqY-PpPUf0uOXJo,915
 wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -67,7 +67,7 @@ wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_sele
 wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection_metrics_runtime.json,sha256=o4oRur1MiXO2RYzmzj07QOBzX75DyU7T7yd-gFsgFdo,30563
 wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/adapters.py,sha256=kMMFq4ABX5q6cPnDdublLMVqXu4Ij-x4OlxZyePWIjc,3599
-wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/pipeline.py,sha256=rKKBtL47gvN_fFy3FVbQTaa0U1bhv8bls0HPZi66EZ8,9279
+wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/pipeline.py,sha256=44HNEoIt3_jKZczs1qB8WGltCG-vn3ZI5aNhucxSDeM,9272
 wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/semantic_checker.py,sha256=z_k-qdFoUJqstkPYn9Zmhlp2YTVQKJtoDZCIdKow664,17306
 wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/static_checker.py,sha256=_Er2KfCkc3HFmOmxZT6eb-e7qF7ukqsf6Si5CJTqPPg,6016
 wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/transformation_prompts.py,sha256=tW1wc87WIm8BZh2lhdj1RDP6VdRLqZBWSMmemSttbGs,22034
@@ -80,17 +80,18 @@ wxo_agentic_evaluation/referenceless_eval/metrics/prompt.py,sha256=CGQ5LvhQrmxAy
 wxo_agentic_evaluation/referenceless_eval/metrics/utils.py,sha256=jurmc4KFFKH4hwnvor2xg97H91b-xJc3cUKYaU2I8uM,1370
 wxo_agentic_evaluation/referenceless_eval/prompt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 wxo_agentic_evaluation/referenceless_eval/prompt/runner.py,sha256=FFmcSWXQnLmylpYyj8LZuPwb6nqwQp-jj6Mv9g8zby0,5052
-wxo_agentic_evaluation/service_provider/__init__.py,sha256=EaY4jjKp58M3W8N3b3a8PNC2S81xA7YV2_QkTIy9DfI,1600
-wxo_agentic_evaluation/service_provider/model_proxy_provider.py,sha256=Y36Ryv4nPG8RdVP_zsQsRlEWv8F_hGi7-wOppWPQTwc,4026
+wxo_agentic_evaluation/service_provider/__init__.py,sha256=yNQ-urOIdjANbpCzVAhkPHNcpBY6hndDJgPZM1C2qeo,2107
+wxo_agentic_evaluation/service_provider/model_proxy_provider.py,sha256=EW1JIiIWoKaTTC-fqKURSsbdyo-dbVWYVrXY8-gEmvc,4081
 wxo_agentic_evaluation/service_provider/ollama_provider.py,sha256=HMHQVUGFbLSQI1dhysAn70ozJl90yRg-CbNd4vsz-Dc,1116
 wxo_agentic_evaluation/service_provider/provider.py,sha256=MsnRzLYAaQiU6y6xf6eId7kn6-CetQuNZl00EP-Nl28,417
-wxo_agentic_evaluation/service_provider/watsonx_provider.py,sha256=C_uezsx4FHqkvtCIQGIii82nu9_vSOKP70uAVXWj5fM,10619
+wxo_agentic_evaluation/service_provider/referenceless_provider_wrapper.py,sha256=aJrCz8uco6HOQwNCSjEKviwnhlyLTNAGpLtsOAegQ70,5200
+wxo_agentic_evaluation/service_provider/watsonx_provider.py,sha256=ugXCXwrfi_XC2d9FPa96ccMKGQbTd1ElDw8RNR8TDB8,6544
 wxo_agentic_evaluation/utils/__init__.py,sha256=ItryTgc1jVc32rB3XktTFaYGA_A6bRIDZ1Pts_JGmv8,144
 wxo_agentic_evaluation/utils/open_ai_tool_extractor.py,sha256=Vyji_edgou2xMLbsGwFG-QI7xRBNvO3-1nbeOc8ZuFo,5646
 wxo_agentic_evaluation/utils/rich_utils.py,sha256=J9lzL4ETQeiAJcXKsUzXh82XdKvlDY7jmcgTQlwmL9s,6252
 wxo_agentic_evaluation/utils/rouge_score.py,sha256=WvcGh6mwF4rWH599J9_lAt3BfaHbAZKtKEJBsC61iKo,692
 wxo_agentic_evaluation/utils/utils.py,sha256=qQR_2W5p0Rk6KSE3-llRyZrWXkO5zG9JW7H1692L4PI,11428
-ibm_watsonx_orchestrate_evaluation_framework-1.0.9.dist-info/METADATA,sha256=OgVKCRnQdk7cYnzrexY8VPPgQW73uHdX2jfI9a7o5IE,16105
-ibm_watsonx_orchestrate_evaluation_framework-1.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-ibm_watsonx_orchestrate_evaluation_framework-1.0.9.dist-info/top_level.txt,sha256=2okpqtpxyqHoLyb2msio4pzqSg7yPSzwI7ekks96wYE,23
-ibm_watsonx_orchestrate_evaluation_framework-1.0.9.dist-info/RECORD,,
+ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info/METADATA,sha256=9Na_jkG3ZSaXewhsm8llDVuHsYuCt6or78Ww5y2XVrE,16139
+ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info/top_level.txt,sha256=2okpqtpxyqHoLyb2msio4pzqSg7yPSzwI7ekks96wYE,23
+ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info/RECORD,,

wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/pipeline.py CHANGED Viewed

@@ -20,7 +20,7 @@ from wxo_agentic_evaluation.referenceless_eval.function_calling.pipeline.types i
     ToolSpec,
 )
 from wxo_agentic_evaluation.referenceless_eval.function_calling import metrics
-from wxo_agentic_evaluation.service_provider.watsonx_provider import WatsonXLLMKitWrapper
+from wxo_agentic_evaluation.service_provider.referenceless_provider_wrapper import LLMKitWrapper
 def metrics_dir():
     path = importlib.resources.files(metrics)
@@ -57,8 +57,8 @@ class ReflectionPipeline:
     def __init__(
         self,
-        metrics_client: WatsonXLLMKitWrapper,
-        codegen_client: Optional[WatsonXLLMKitWrapper] = None,
+        metrics_client: LLMKitWrapper,
+        codegen_client: Optional[LLMKitWrapper] = None,
         general_metrics: Optional[
             Union[Path, List[FunctionCallMetric], List[str]]
         ] = _DEFAULT_GENERAL_RUNTIME,

wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import json
-import os
 from typing import Any, List, Mapping
 import rich
@@ -16,7 +15,7 @@ from wxo_agentic_evaluation.referenceless_eval.function_calling.pipeline.types i
     ToolSpec,
 )
 from wxo_agentic_evaluation.type import Message
-from wxo_agentic_evaluation.service_provider.watsonx_provider import WatsonXLLMKitWrapper
+from wxo_agentic_evaluation.service_provider import get_provider
 class ReferencelessEvaluation:
     """
@@ -33,12 +32,12 @@ class ReferencelessEvaluation:
         messages: List[Message],
         model_id: str,
         task_n: str,
-        dataset_name: str,
-    ):
-        self.metrics_client = WatsonXLLMKitWrapper(
+        dataset_name: str,):
+        self.metrics_client = get_provider(
             model_id=model_id,
-            api_key=os.getenv("WATSONX_APIKEY", ""),
-            space_id=os.getenv("WATSONX_SPACE_ID")
+            params={"min_new_tokens": 0, "decoding_method": "greedy", "max_new_tokens": 4096},
+            referenceless_eval=True
         )
         self.pipeline = ReflectionPipeline(
@@ -57,7 +56,6 @@ class ReferencelessEvaluation:
     def _run_pipeline(self, examples: List[Mapping[str, Any]]):
         results = []
         for example in examples:
-            # self.pipeline.sy
             result = self.pipeline.run_sync(
                 conversation=example["context"],
                 inventory=self.apis_specs,

wxo_agentic_evaluation/service_provider/__init__.py CHANGED Viewed

@@ -1,21 +1,30 @@
 from wxo_agentic_evaluation.service_provider.ollama_provider import OllamaProvider
 from wxo_agentic_evaluation.service_provider.watsonx_provider import WatsonXProvider
 from wxo_agentic_evaluation.service_provider.model_proxy_provider import ModelProxyProvider
+from wxo_agentic_evaluation.service_provider.referenceless_provider_wrapper import ModelProxyProviderLLMKitWrapper, WatsonXLLMKitWrapper
 from wxo_agentic_evaluation.arg_configs import ProviderConfig
 import os
-def _instantiate_provider(config: ProviderConfig, **kwargs):
+def _instantiate_provider(config: ProviderConfig, is_referenceless_eval: bool = False, **kwargs):
     if config.provider == "watsonx":
-        return WatsonXProvider(model_id=config.model_id, **kwargs)
+        if is_referenceless_eval:
+            provider = WatsonXLLMKitWrapper
+        else:
+            provider = WatsonXProvider
+        return provider(model_id=config.model_id, **kwargs)
     elif config.provider == "ollama":
         return OllamaProvider(model_id=config.model_id, **kwargs)
     elif config.provider == "model_proxy":
-        return ModelProxyProvider(model_id=config.model_id, **kwargs)
+        if is_referenceless_eval:
+            provider = ModelProxyProviderLLMKitWrapper
+        else:
+            provider = ModelProxyProvider
+        return provider(model_id=config.model_id, **kwargs)
     else:
         raise RuntimeError(f"target provider is not supported {config.provider}")
-def get_provider(config: ProviderConfig = None, model_id: str = None, **kwargs):
+def get_provider(config: ProviderConfig = None, model_id: str = None, referenceless_eval: bool = False, **kwargs):
     if config:
         return _instantiate_provider(config, **kwargs)
@@ -24,11 +33,11 @@ def get_provider(config: ProviderConfig = None, model_id: str = None, **kwargs):
     if "WATSONX_APIKEY" in os.environ and "WATSONX_SPACE_ID" in os.environ:
         config = ProviderConfig(provider="watsonx", model_id=model_id)
-        return _instantiate_provider(config, **kwargs)
+        return _instantiate_provider(config, referenceless_eval, **kwargs)
     if "WO_API_KEY" in os.environ and "WO_INSTANCE" in os.environ:
         config = ProviderConfig(provider="model_proxy", model_id=model_id)
-        return _instantiate_provider(config, **kwargs)
+        return _instantiate_provider(config, referenceless_eval, **kwargs)
     raise RuntimeError(
         "No provider found. Please either provide a config or set the required environment variables."

wxo_agentic_evaluation/service_provider/model_proxy_provider.py CHANGED Viewed

@@ -38,9 +38,10 @@ class ModelProxyProvider(Provider):
         self.api_key = api_key
         self.is_ibm_cloud = is_ibm_cloud_url(instance_url)
         self.auth_url = AUTH_ENDPOINT_IBM_CLOUD if self.is_ibm_cloud else AUTH_ENDPOINT_AWS
-        self.url = instance_url + "/ml/v1/text/generation?version=2024-05-01"
-        self.embedding_url = instance_url + "/ml/v1/text/embeddings"
+        self.instance_url = instance_url
+        self.url = self.instance_url + "/ml/v1/text/generation?version=2024-05-01"
+        self.embedding_url = self.instance_url + "/ml/v1/text/embeddings"
         self.lock = Lock()
         self.token, self.refresh_time = self.get_token()

wxo_agentic_evaluation/service_provider/referenceless_provider_wrapper.py ADDED Viewed

@@ -0,0 +1,138 @@
+import requests
+from typing import List, Mapping, Union, Optional, Any
+from abc import ABC, abstractmethod
+import rich
+from wxo_agentic_evaluation.service_provider.model_proxy_provider import ModelProxyProvider
+from wxo_agentic_evaluation.service_provider.watsonx_provider import WatsonXProvider
+class LLMResponse:
+    """
+    NOTE: Taken from LLM-Eval-Kit
+    Response object that can contain both content and tool calls
+    """
+    def __init__(self, content: str, tool_calls: Optional[List[Mapping[str, Any]]] = None):
+        self.content = content
+        self.tool_calls = tool_calls or []
+    def __str__(self) -> str:
+        """Return the content of the response as a string."""
+        return self.content
+    def __repr__(self) -> str:
+        """Return a string representation of the LLMResponse object."""
+        return f"LLMResponse(content='{self.content}', tool_calls={self.tool_calls})"
+class LLMKitWrapper(ABC):
+    """ In the future this wrapper won't be neccesary.
+    Right now the referenceless code requires a `generate()` function for the metrics client.
+    In refactor, rewrite referenceless code so this wrapper is not needed.
+    """
+    @abstractmethod
+    def chat():
+        pass
+    def generate(
+            self,
+            prompt: Union[str, List[Mapping[str, str]]],
+            *,
+            schema,
+            retries: int = 3,
+            generation_args: Optional[Any] = None,
+            **kwargs: Any
+        ):
+        """
+        In future, implement validation of response like in llmevalkit
+        """
+        for attempt in range(1, retries + 1):
+            try:
+                raw_response = self.chat(prompt)
+                response = self._parse_llm_response(raw_response)
+                return response
+            except Exception as e:
+                rich.print(f"[b][r] Generation failed with error '{str(e)}' during `quick-eval` ... Attempt ({attempt} / {retries}))")
+    def _parse_llm_response(self, raw: Any) -> Union[str, LLMResponse]:
+        """
+        Extract the generated text and tool calls from a watsonx response.
+        - For text generation: raw['results'][0]['generated_text']
+        - For chat:           raw['choices'][0]['message']['content']
+        """
+        content = ""
+        tool_calls = []
+        if isinstance(raw, dict) and "choices" in raw:
+            choices = raw["choices"]
+            if isinstance(choices, list) and choices:
+                first = choices[0]
+                msg = first.get("message")
+                if isinstance(msg, dict):
+                    content = msg.get("content", "")
+                    # Extract tool calls if present
+                    if "tool_calls" in msg and msg["tool_calls"]:
+                        tool_calls = []
+                        for tool_call in msg["tool_calls"]:
+                            tool_call_dict = {
+                                "id": tool_call.get("id"),
+                                "type": tool_call.get("type", "function"),
+                                "function": {
+                                    "name": tool_call.get("function", {}).get("name"),
+                                    "arguments": tool_call.get("function", {}).get(
+                                        "arguments"
+                                    ),
+                                },
+                            }
+                            tool_calls.append(tool_call_dict)
+                elif "text" in first:
+                    content = first["text"]
+        if not content and not tool_calls:
+            raise ValueError(f"Unexpected watsonx response format: {raw!r}")
+        # Return LLMResponse if tool calls exist, otherwise just content
+        if tool_calls:
+            return LLMResponse(content=content, tool_calls=tool_calls)
+        return content
+class ModelProxyProviderLLMKitWrapper(ModelProxyProvider, LLMKitWrapper):
+    def chat(self, sentence: List[str]):
+        if self.model_id is None:
+            raise Exception("model id must be specified for text generation")
+        chat_url = f"{self.instance_url}/ml/v1/text/chat?version=2023-10-25"
+        self.refresh_token_if_expires()
+        headers = self.get_header()
+        data = {
+            "model_id": self.model_id,
+            "messages": sentence,
+            "parameters": self.params,
+            "space_id": "1",
+            "timeout": self.timeout
+        }
+        resp = requests.post(url=chat_url, headers=headers, json=data)
+        if resp.status_code == 200:
+            return resp.json()
+        else:
+            resp.raise_for_status()
+class WatsonXLLMKitWrapper(WatsonXProvider, LLMKitWrapper):
+    def chat(self, sentence: list):
+        chat_url = f"{self.api_endpoint}/ml/v1/text/chat?version=2023-05-02"
+        headers = self.prepare_header()
+        data = {
+            "model_id": self.model_id,
+            "messages": sentence,
+            "parameters": self.params,
+            "space_id": self.space_id
+        }
+        resp = requests.post(url=chat_url, headers=headers, json=data)
+        if resp.status_code == 200:
+            return resp.json()
+        else:
+            resp.raise_for_status()

wxo_agentic_evaluation/service_provider/watsonx_provider.py CHANGED Viewed

@@ -2,12 +2,10 @@ import os
 import requests
 import json
 from types import MappingProxyType
-from typing import List, Mapping, Union, Optional, Any
-from functools import singledispatchmethod
+from typing import List, Mapping, Union
 import dataclasses
 from threading import Lock
 import time
-import rich
 from wxo_agentic_evaluation.service_provider.provider import Provider
 ACCESS_URL = "https://iam.cloud.ibm.com/identity/token"
@@ -90,12 +88,7 @@ class WatsonXProvider(Provider):
                   "Content-Type": "application/json"}
         return headers
-    @singledispatchmethod
-    def generate(self, sentence):
-        raise ValueError(f"Input must either be a string or a list of dictionaries")
-    @generate.register
-    def _(self, sentence: str):
+    def _query(self, sentence: str):
         headers = self.prepare_header()
         data = {"model_id": self.model_id, "input": sentence,
@@ -107,22 +100,6 @@ class WatsonXProvider(Provider):
         else:
             resp.raise_for_status()
-    @generate.register
-    def _(self, sentence: list):
-        chat_url = f"{self.api_endpoint}/ml/v1/text/chat?version=2023-05-02"
-        headers = self.prepare_header()
-        data = {
-            "model_id": self.model_id,
-            "messages": sentence,
-            "parameters": self.params,
-            "space_id": self.space_id
-        }
-        resp = requests.post(url=chat_url, headers=headers, json=data)
-        if resp.status_code == 200:
-            return resp.json()
-        else:
-            resp.raise_for_status()
     def _refresh_token(self):
         # if we do not have a token or the current timestamp is 9 minutes away from expire.
         if not self.access_token or time.time() > self.refresh_time:
@@ -134,7 +111,7 @@ class WatsonXProvider(Provider):
         if self.model_id is None:
             raise Exception("model id must be specified for text generation")
         try:
-            response = self.generate(sentence)
+            response = self._query(sentence)
             if (generated_text := response.get("generated_text")):
                 return generated_text
             elif (message := response.get("message")):
@@ -165,90 +142,6 @@ class WatsonXProvider(Provider):
         else:
             resp.raise_for_status()
-class LLMResponse:
-    """
-    NOTE: Taken from LLM-Eval-Kit
-    Response object that can contain both content and tool calls
-    """
-    def __init__(self, content: str, tool_calls: Optional[List[Mapping[str, Any]]] = None):
-        self.content = content
-        self.tool_calls = tool_calls or []
-    def __str__(self) -> str:
-        """Return the content of the response as a string."""
-        return self.content
-    def __repr__(self) -> str:
-        """Return a string representation of the LLMResponse object."""
-        return f"LLMResponse(content='{self.content}', tool_calls={self.tool_calls})"
-class WatsonXLLMKitWrapper(WatsonXProvider):
-    def generate(
-            self,
-            prompt: Union[str, List[Mapping[str, str]]],
-            *,
-            schema,
-            retries: int = 3,
-            generation_args: Optional[Any] = None,
-            **kwargs: Any
-        ):
-        """
-        In future, implement validation of response like in llmevalkit
-        """
-        for attempt in range(1, retries + 1):
-            try:
-                raw_response = super().generate(prompt)
-                response = self._parse_llm_response(raw_response)
-                return response
-            except Exception as e:
-                rich.print(f"[b][r] WatsonX generation failed with error '{str(e)}' during `quick-eval` ... Attempt ({attempt} / {retries}))")
-    def _parse_llm_response(self, raw: Any) -> Union[str, LLMResponse]:
-        """
-        Extract the generated text and tool calls from a watsonx response.
-        - For text generation: raw['results'][0]['generated_text']
-        - For chat:           raw['choices'][0]['message']['content']
-        """
-        content = ""
-        tool_calls = []
-        if isinstance(raw, dict) and "choices" in raw:
-            choices = raw["choices"]
-            if isinstance(choices, list) and choices:
-                first = choices[0]
-                msg = first.get("message")
-                if isinstance(msg, dict):
-                    content = msg.get("content", "")
-                    # Extract tool calls if present
-                    if "tool_calls" in msg and msg["tool_calls"]:
-                        tool_calls = []
-                        for tool_call in msg["tool_calls"]:
-                            tool_call_dict = {
-                                "id": tool_call.get("id"),
-                                "type": tool_call.get("type", "function"),
-                                "function": {
-                                    "name": tool_call.get("function", {}).get("name"),
-                                    "arguments": tool_call.get("function", {}).get(
-                                        "arguments"
-                                    ),
-                                },
-                            }
-                            tool_calls.append(tool_call_dict)
-                elif "text" in first:
-                    content = first["text"]
-        if not content and not tool_calls:
-            raise ValueError(f"Unexpected watsonx response format: {raw!r}")
-        # Return LLMResponse if tool calls exist, otherwise just content
-        if tool_calls:
-            return LLMResponse(content=content, tool_calls=tool_calls)
-        return content
 if __name__ == "__main__":
     provider = WatsonXProvider(model_id="meta-llama/llama-3-2-90b-vision-instruct")

{ibm_watsonx_orchestrate_evaluation_framework-1.0.9.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{ibm_watsonx_orchestrate_evaluation_framework-1.0.9.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

ibm-watsonx-orchestrate-evaluation-framework 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl

Potentially problematic release.

ibm-watsonx-orchestrate-evaluation-framework 1.0.9py3-none-any.whl → 1.1.1py3-none-any.whl