PyPI - ibm-watsonx-orchestrate-evaluation-framework - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl - Mend

ibm-watsonx-orchestrate-evaluation-framework 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

wxo_agentic_evaluation/service_provider/model_proxy_provider.py ADDED Viewed

@@ -0,0 +1,108 @@
+import os
+import requests
+import time
+from typing import List
+from threading import Lock
+from wxo_agentic_evaluation.service_provider.provider import Provider
+from wxo_agentic_evaluation.utils.utils import is_ibm_cloud_url
+AUTH_ENDPOINT_AWS = "https://iam.platform.saas.ibm.com/siusermgr/api/1.0/apikeys/token"
+AUTH_ENDPOINT_IBM_CLOUD = "https://iam.cloud.ibm.com/identity/token"
+WO_INSTANCE = os.environ.get("WO_INSTANCE")
+WO_API_KEY = os.environ.get("WO_API_KEY")
+DEFAULT_PARAM = {"min_new_tokens": 1, "decoding_method": "greedy", "max_new_tokens": 400}
+class ModelProxyProvider(Provider):
+    def __init__(
+        self,
+        model_id=None,
+        api_key=WO_API_KEY,
+        instance_url=WO_INSTANCE,
+        timeout=300,
+        embedding_model_id=None,
+        params=None
+    ):
+        super().__init__()
+        if not instance_url or not api_key:
+            raise RuntimeError("instance url and WO apikey must be specified to use WO model proxy")
+        self.timeout = timeout
+        self.model_id = model_id
+        self.embedding_model_id = embedding_model_id
+        self.api_key = api_key
+        self.is_ibm_cloud = is_ibm_cloud_url(instance_url)
+        self.auth_url = AUTH_ENDPOINT_IBM_CLOUD if self.is_ibm_cloud else AUTH_ENDPOINT_AWS
+        self.url = instance_url + "/ml/v1/text/generation?version=2024-05-01"
+        self.embedding_url = instance_url + "/ml/v1/text/embeddings"
+        self.lock = Lock()
+        self.token, self.refresh_time = self.get_token()
+        self.params = params if params else DEFAULT_PARAM
+    def get_token(self):
+        if self.is_ibm_cloud:
+            payload = {"grant_type": "urn:ibm:params:oauth:grant-type:apikey", "apikey": self.api_key}
+            resp = requests.post(self.auth_url, data=payload)
+            token_key = "access_token"
+        else:
+            payload = {"apikey": self.api_key}
+            resp = requests.post(self.auth_url, json=payload)
+            token_key = "token"
+        if resp.status_code == 200:
+            json_obj = resp.json()
+            token = json_obj[token_key]
+            expires_in = json_obj["expires_in"]
+            refresh_time = time.time() + int(0.8*expires_in)
+            return token, refresh_time
+        resp.raise_for_status()
+    def refresh_token_if_expires(self):
+        if time.time() > self.refresh_time:
+            with self.lock:
+                if time.time() > self.refresh_time:
+                    self.token, self.refresh_time = self.get_token()
+    def get_header(self):
+        return {"Authorization": f"Bearer {self.token}"}
+    def encode(self, sentences: List[str]) -> List[list]:
+        if self.embedding_model_id is None:
+            raise Exception("embedding model id must be specified for text generation")
+        self.refresh_token_if_expires()
+        headers = self.get_header()
+        payload = {"inputs": sentences, "model_id": self.embedding_model_id, "space_id": "1"}
+                   #"timeout": self.timeout}
+        resp = requests.post(self.embedding_url, json=payload, headers=headers)
+        if resp.status_code == 200:
+            json_obj = resp.json()
+            return json_obj["generated_text"]
+        resp.raise_for_status()
+    def query(self, sentence: str) -> str:
+        if self.model_id is None:
+            raise Exception("model id must be specified for text generation")
+        self.refresh_token_if_expires()
+        headers = self.get_header()
+        payload = {"input": sentence, "model_id": self.model_id, "space_id": "1",
+                   "timeout": self.timeout, "parameters": self.params}
+        resp = requests.post(self.url, json=payload, headers=headers)
+        if resp.status_code == 200:
+            return resp.json()["results"][0]["generated_text"]
+        resp.raise_for_status()
+if __name__ == "__main__":
+    provider = ModelProxyProvider(model_id="meta-llama/llama-3-3-70b-instruct", embedding_model_id="ibm/slate-30m-english-rtrvr")
+    print(provider.query("ok"))

wxo_agentic_evaluation/service_provider/ollama_provider.py ADDED Viewed

@@ -0,0 +1,40 @@
+import requests
+import json
+from wxo_agentic_evaluation.service_provider.provider import Provider
+from typing import List
+import os
+OLLAMA_URL = os.environ.get("OLLAMA_HOST", "http://localhost:11434")
+class OllamaProvider(Provider):
+    def __init__(
+        self,
+        model_id=None
+    ):
+        self.url = OLLAMA_URL + "/api/generate"
+        self.model_id = model_id
+        super().__init__()
+    def query(self, sentence: str) -> str:
+        payload = {"model": self.model_id, "prompt": sentence}
+        resp = requests.post(self.url, json=payload, stream=True)
+        final_text = ""
+        data = b''
+        for chunk in resp:
+            data += chunk
+            if data.endswith(b'\n'):
+                json_obj = json.loads(data)
+                if not json_obj["done"] and json_obj["response"]:
+                    final_text += json_obj["response"]
+                data = b''
+        return final_text
+    def encode(self, sentences: List[str]) -> List[list]:
+        pass
+if __name__ == "__main__":
+    provider = OllamaProvider(model_id="llama3.1:8b")
+    print(provider.query("ok"))

wxo_agentic_evaluation/service_provider/provider.py ADDED Viewed

@@ -0,0 +1,19 @@
+from abc import ABC, abstractmethod
+from typing import List
+class Provider(ABC):
+    def __init__(self):
+        pass
+    @abstractmethod
+    def query(self, sentence: str) -> str:
+        pass
+    def batch_query(self, sentences: List[str]) -> List[str]:
+        return [self.query(sentence) for sentence in sentences]
+    @abstractmethod
+    def encode(self, sentences: List[str]) -> List[list]:
+        pass

wxo_agentic_evaluation/{watsonx_provider.py → service_provider/watsonx_provider.py} RENAMED Viewed

@@ -4,10 +4,9 @@ import json
 from types import MappingProxyType
 from typing import List
 import dataclasses
-from ibm_watsonx_ai.foundation_models import ModelInference, Embeddings
-from ibm_watsonx_ai.credentials import Credentials
 from threading import Lock
+import time
+from wxo_agentic_evaluation.service_provider.provider import Provider
 ACCESS_URL = "https://iam.cloud.ibm.com/identity/token"
 ACCESS_HEADER = {
@@ -18,11 +17,11 @@ ACCESS_HEADER = {
 YPQA_URL = "https://yp-qa.ml.cloud.ibm.com"
 PROD_URL = "https://us-south.ml.cloud.ibm.com"
 DEFAULT_PARAM = MappingProxyType(
-    {"min_new_tokens": 0, "decoding_method": "greedy", "max_new_tokens": 100}
+    {"min_new_tokens": 1, "decoding_method": "greedy", "max_new_tokens": 400}
 )
-class WatsonXProvider:
+class WatsonXProvider(Provider):
     def __init__(
         self,
         model_id=None,
@@ -31,7 +30,7 @@ class WatsonXProvider:
         api_endpoint=PROD_URL,
         url=ACCESS_URL,
         timeout=60,
-        llm_decode_parameter=DEFAULT_PARAM,
+        params=None,
         embedding_model_id=None,
     ):
         super().__init__()
@@ -56,12 +55,15 @@ class WatsonXProvider:
         self.embedding_model_id = embedding_model_id
         self.lock = Lock()
-        if isinstance(llm_decode_parameter, MappingProxyType):
-            llm_decode_parameter = dict(llm_decode_parameter)
-        if dataclasses.is_dataclass(llm_decode_parameter):
-            llm_decode_parameter = dataclasses.asdict(llm_decode_parameter)
+        self.params = params if params else DEFAULT_PARAM
+        if isinstance(self.params, MappingProxyType):
+            self.params = dict(self.params)
+        if dataclasses.is_dataclass(self.params):
+            self.params = dataclasses.asdict(self.params)
-        self.decode_param = llm_decode_parameter
+        self.refresh_time = None
+        self.access_token = None
         self._refresh_token()
     def _get_access_token(self):
@@ -71,75 +73,70 @@ class WatsonXProvider:
         if response.status_code == 200:
             token_data = json.loads(response.text)
             token = token_data["access_token"]
-            return token
+            expiration = token_data["expiration"]
+            expires_in = token_data["expires_in"]
+            # 9 minutes before expire
+            refresh_time = expiration - int(0.15 * expires_in)
+            return token, refresh_time
         raise RuntimeError(
             f"try to acquire access token and get {response.status_code}"
         )
-    def _refresh_token(self):
-        self.access_token = self._get_access_token()
-        if self.embedding_model_id is not None:
-            self.embedding_client = Embeddings(
-                model_id=self.embedding_model_id,
-                credentials=Credentials(token=self.access_token, url=self.api_endpoint),
-                space_id=self.space_id,
-            )
-        else:
-            self.embedding_client = None
-        if self.model_id is not None:
-            self.client = ModelInference(
-                model_id=self.model_id,
-                params=self.decode_param,
-                credentials=Credentials(token=self.access_token, url=self.api_endpoint),
-                space_id=self.space_id,
-            )
+    def prepare_header(self):
+        headers = {"Authorization": f"Bearer {self.access_token}",
+                  "Content-Type": "application/json"}
+        return headers
+    def generate(self, sentence: str):
+        headers = self.prepare_header()
+        data = {"model_id": self.model_id, "input": sentence,
+                "parameters": self.params, "space_id": self.space_id}
+        generation_url = f"{self.api_endpoint}/ml/v1/text/generation?version=2023-05-02"
+        resp = requests.post(url=generation_url, headers=headers, json=data)
+        if resp.status_code == 200:
+            return resp.json()["results"][0]
         else:
-            self.client = None
+            resp.raise_for_status()
-    def query(self, sentence: str) -> dict:
-        if self.model_id is None:
-            raise Exception("model id must be specified for text generation")
-        try:
-            return self.client.generate([sentence])[0][
-                "results"
-            ][  # pylint: disable=E1136
-                0
-            ]
-        except Exception as e:
+    def _refresh_token(self):
+        # if we do not have a token or the current timestamp is 9 minutes away from expire.
+        if not self.access_token or time.time() > self.refresh_time:
             with self.lock:
-                if "authentication_token_expired" in str(e):
-                    self._refresh_token()
-                raise e
+                if not self.access_token or time.time() > self.refresh_time:
+                    self.access_token, self.refresh_time = self._get_access_token()
-    def batch_query(self, sentences: List[str]) -> List[dict]:
+    def query(self, sentence: str) -> str:
         if self.model_id is None:
             raise Exception("model id must be specified for text generation")
         try:
-            outputs = self.client.generate(sentences)
-            outputs = [output["results"][0] for output in outputs]
-            return outputs
+            return self.generate(sentence)["generated_text"]
         except Exception as e:
             with self.lock:
                 if "authentication_token_expired" in str(e):
                     self._refresh_token()
                 raise e
-        # pylint: disable=E1133
-        return []
+    def batch_query(self, sentences: List[str]) -> List[dict]:
+        return [self.query(sentence) for sentence in sentences]
     def encode(self, sentences: List[str]) -> List[list]:
         if self.embedding_model_id is None:
             raise Exception("embedding model id must be specified for text encoding")
-        output = self.embedding_client.generate(sentences)
-        return [entry["embedding"] for entry in output["results"]]
+        headers = self.prepare_header()
+        url = f"{self.api_endpoint}/ml/v1/text/embeddings?version=2023-10-25"
-if __name__ == "__main__":
-    import os
+        data = {"inputs": sentences, "model_id": self.model_id, "space_id": self.space_id}
+        resp = requests.post(url=url, headers=headers, json=data)
+        if resp.status_code == 200:
+            return [entry["embedding"] for entry in resp.json()["results"]]
+        else:
+            resp.raise_for_status()
+if __name__ == "__main__":
     provider = WatsonXProvider(model_id="meta-llama/llama-3-2-90b-vision-instruct")
     prompt = """
@@ -172,4 +169,4 @@ Usernwaters did not take anytime off during the period<|eot_id|>
 <|eot_id|><|start_header_id|>user<|end_header_id|>
 """
-    print(provider.query(prompt))
+    print(provider.query(prompt))

wxo_agentic_evaluation/test_prompt.py ADDED Viewed

@@ -0,0 +1,94 @@
+from wxo_agentic_evaluation.watsonx_provider import WatsonXProvider
+def parse_json_string(input_string):
+    json_char_count = 0
+    json_objects = []
+    current_json = ""
+    brace_level = 0
+    inside_json = False
+    for i, char in enumerate(input_string):
+        if char == "{":
+            brace_level += 1
+            inside_json = True
+            json_char_count += 1
+        if inside_json:
+            current_json += char
+            json_char_count += 1
+        if char == "}":
+            json_char_count += 1
+            brace_level -= 1
+            if brace_level == 0:
+                inside_json = False
+                try:
+                    json_objects.append(json.loads(current_json))
+                except json.JSONDecodeError as e:
+                    print(f"Error decoding JSON: {e}")
+                current_json = ""  # Reset current JSON string
+    # some threshold to say there are some non-funct calling step
+    is_thinking_step = len(input_string) - json_char_count > 10
+    return json_objects
+wai_client = WatsonXProvider(model_id="meta-llama/llama-3-405b-instruct")
+prompt =  """
+<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+You are trying to make tool calls. Given a raw input and tool output. Try to extract the information to make the tool call
+Example:
+    Tool description:
+    def get_payslips(user_id: str) -> PayslipsResponse:
+    Gets a user's payslips from Workday.
+    :param user_id: The user's id uniquely identifying them within the Workday API.
+    :return: The user's payslips.
+ Raw inputs:\{"tool_name": "get_payslips", "args": {"user_id": '$get_user_workday_ids'}}
+ tool output: {'user_id': UserWorkdayIDs(person_id='', user_id='6dcb8106e8b74b5aabb1fc3ab8ef2b92')}
+ <|start_header_id|>ipython<|end_header_id|>
+ {"tool_name": "get_payslips", "args": {"user_id": "6dcb8106e8b74b5aabb1fc3ab8ef2b92"}}
+ <|eot_id|>
+"""
+test_sample1 = """
+<|start_header_id|>assistant<|end_header_id|>
+    Tool description:
+    def update_direct_reports(email_id: str, members: List[str], notification:bool) -> PayslipsResponse:
+    update direct reports for a given user
+    :param email_id: The user's email-id uniquely identifying them within the Workday API.
+    :param members: a list of user ids to be added as direct reports
+    :param notification: do we send the notification to all members
+ Raw inputs:  {"tool_name": "update_direct_reports", "args": {"email_id": '$get_email_id', 'members': $get_user_by_dvision]}}
+ tool output: {"email_id": 'jalenm3@163.com'}
+              {'members': [UserProfile(name="Lan Smith", user_id="46873f8i93", email="lan_smith@gmail.com"), UserProfile(name="Mary Rubic", user_id="34sss31", email="MaryRobic@gmail.com"), UserProfile(name="Jason Dai", user_id="8e8ewer3", email="jd@gmail.com"])}
+ <|start_header_id|>ipython<|end_header_id|>"""
+test_sample2 = """
+<|start_header_id|>assistant<|end_header_id|>
+    Tool description:
+    def book_meeting(location: str, date: str, time: str) -> bool:
+    update direct reports for a given user
+    :param email_id: The user's email-id uniquely identifying them within the Workday API.
+    :param members: a list of user ids to be added as direct reports
+    :param notification: do we send the notification to all members
+ Raw inputs:  {"tool_name": "book_meeting", "args": {"email_id": '$get_email_id', 'members': $get_user_by_dvision]}}
+ tool output: {"email_id": 'jalenm3@163.com'}
+              {'members': [UserProfile(name="Lan Smith", user_id="46873f8i93", email="lan_smith@gmail.com"), UserProfile(name="Mary Rubic", user_id="34sss31", email="MaryRobic@gmail.com"), UserProfile(name="Jason Dai", user_id="8e8ewer3", email="jd@gmail.com"])}
+ <|start_header_id|>ipython<|end_header_id|>"""
+outputs = wai_client.query(prompt + test_sample1)
+import json
+print(outputs["generated_text"])
+json_obj = parse_json_string(outputs["generated_text"])[0]
+print(json_obj)

wxo_agentic_evaluation/tool_planner.py CHANGED Viewed

@@ -6,15 +6,25 @@ import importlib.util
 import re
 from jsonargparse import CLI
 import os
+import textwrap
+from dataclasses import is_dataclass, asdict
-from wxo_agentic_evaluation.watsonx_provider import WatsonXProvider
+from wxo_agentic_evaluation.service_provider import get_provider
 from wxo_agentic_evaluation.arg_configs import BatchAnnotateConfig
-from wxo_agentic_evaluation.prompt.template_render import ToolPlannerTemplateRenderer, ToolChainAgentTemplateRenderer
+from wxo_agentic_evaluation.prompt.template_render import ToolPlannerTemplateRenderer, ArgsExtractorTemplateRenderer
 from wxo_agentic_evaluation import __file__
 root_dir = os.path.dirname(__file__)
 TOOL_PLANNER_PROMPT_PATH = os.path.join(root_dir, "prompt", "tool_planner.jinja2")
+ARGS_EXTRACTOR_PROMPT_PATH = os.path.join(root_dir, "prompt", "args_extractor_prompt.jinja2")
+class UniversalEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if is_dataclass(obj):
+            return asdict(obj)
+        elif hasattr(obj, "__dict__"):
+            return obj.__dict__
+        return super().default(obj)
 def extract_first_json_list(raw: str) -> list:
     matches = re.findall(r"\[\s*{.*?}\s*]", raw, re.DOTALL)
@@ -29,6 +39,33 @@ def extract_first_json_list(raw: str) -> list:
     print(raw)
     return []
+def parse_json_string(input_string):
+    json_char_count = 0
+    json_objects = []
+    current_json = ""
+    brace_level = 0
+    inside_json = False
+    for i, char in enumerate(input_string):
+        if char == "{":
+            brace_level += 1
+            inside_json = True
+            json_char_count += 1
+        if inside_json:
+            current_json += char
+            json_char_count += 1
+        if char == "}":
+            json_char_count += 1
+            brace_level -= 1
+            if brace_level == 0:
+                inside_json = False
+                try:
+                    json_objects.append(json.loads(current_json))
+                except json.JSONDecodeError as e:
+                    print(f"Error decoding JSON: {e}")
+                current_json = ""  # Reset current JSON string
+    return json_objects
 def load_tools_module(tools_path: Path) -> dict:
     tools_dict = {}
@@ -93,8 +130,64 @@ def extract_tool_signatures(tools_path: Path) -> list:
     return tool_data
+def extract_tool_signatures_for_prompt(tools_path: Path) -> dict[str, str]:
+    functions = {}
+    files_to_parse = []
+    # Handle both single file and directory cases
+    if tools_path.is_file():
+        files_to_parse.append(tools_path)
+    elif tools_path.is_dir():
+        files_to_parse.extend(tools_path.glob("**/*.py"))
+    else:
+        raise ValueError(f"Tools path {tools_path} is neither a file nor directory")
+    for file_path in files_to_parse:
+        try:
+            with file_path.open("r", encoding="utf-8") as f:
+                code = f.read()
+            parsed_code = ast.parse(code)
+            for node in parsed_code.body:
+                if isinstance(node, ast.FunctionDef):
+                    name = node.name
+                    # Get args and type annotations
+                    args = []
+                    for arg in node.args.args:
+                        if arg.arg == "self":
+                            continue
+                        annotation = ast.unparse(arg.annotation) if arg.annotation else "Any"
+                        args.append((arg.arg, annotation))
+                    # Get return type
+                    returns = ast.unparse(node.returns) if node.returns else "None"
+                    # Get docstring
+                    docstring = ast.get_docstring(node)
+                    docstring = textwrap.dedent(docstring).strip() if docstring else ""
+                    # Format parameter descriptions if available in docstring
+                    doc_lines = docstring.splitlines()
+                    doc_summary = doc_lines[0] if doc_lines else ""
+                    param_descriptions = "\n".join([line for line in doc_lines[1:] if ":param" in line])
+                    # Compose the final string
+                    args_str = ", ".join(f"{arg}: {type_}" for arg, type_ in args)
+                    function_str = f"""def {name}({args_str}) -> {returns}:
+    {doc_summary}"""
+                    if param_descriptions:
+                        function_str += f"\n    {param_descriptions}"
-def ensure_data_available(tool_name: str, inputs: dict, snapshot: dict, tools_module: dict) -> dict:
+                    functions[name] = function_str
+        except Exception as e:
+            print(f"Warning: Failed to parse {file_path}: {str(e)}")
+            continue
+    return functions
+def ensure_data_available(step: dict, inputs: dict, snapshot: dict, tools_module: dict, tool_signatures_for_prompt) -> dict:
+    tool_name = step["tool_name"]
     cache = snapshot.setdefault("input_output_examples", {}).setdefault(tool_name, [])
     for entry in cache:
         if entry["inputs"] == inputs:
@@ -103,7 +196,27 @@ def ensure_data_available(tool_name: str, inputs: dict, snapshot: dict, tools_mo
     if tool_name not in tools_module:
         raise ValueError(f"Tool '{tool_name}' not found")
-    output = tools_module[tool_name](**inputs)
+    try:
+        output = tools_module[tool_name](**inputs)
+    except:
+        provider = get_provider(
+            model_id="meta-llama/llama-3-405b-instruct",
+            params={"min_new_tokens": 0, "decoding_method": "greedy", "max_new_tokens": 500},
+        )
+        renderer = ArgsExtractorTemplateRenderer(ARGS_EXTRACTOR_PROMPT_PATH)
+        prompt = renderer.render(
+            tool_signature=tool_signatures_for_prompt[tool_name],
+            step=step,
+            inputs=inputs,
+        )
+        response = provider.query(prompt)
+        json_obj = parse_json_string(response)[0]
+        try:
+            output = tools_module[json_obj["tool_name"]](**json_obj["inputs"])
+        except:
+            raise ValueError(f"Failed to execute tool '{tool_name}' with inputs {inputs}")
     cache.append({"inputs": inputs, "output": output})
     if not isinstance(output, dict):
         print(f" Tool {tool_name} returned non-dict output: {output}")
@@ -119,15 +232,14 @@ def plan_tool_calls_with_llm(story: str, agent_name: str, tool_signatures_str: s
         available_tools=tool_signatures_str,
     )
     response = provider.query(prompt)
-    raw = response.get("generated_text", "")
-    parsed = extract_first_json_list(raw)
+    parsed = extract_first_json_list(response)
     print("\n LLM Tool Plan:")
     print(json.dumps(parsed, indent=2))
     return parsed
 # --- Tool Execution Logic ---
-def run_tool_chain(tool_plan: list, snapshot: dict, tools_module) -> None:
+def run_tool_chain(tool_plan: list, snapshot: dict, tools_module, tool_signatures_for_prompt) -> None:
     memory = {}
     for step in tool_plan:
@@ -166,14 +278,14 @@ def run_tool_chain(tool_plan: list, snapshot: dict, tools_module) -> None:
                 item_inputs = resolved_inputs.copy()
                 item_inputs[list_key] = val
                 print(f" ⚙️ Running {name} with {list_key} = {val}")
-                output = ensure_data_available(name, item_inputs, snapshot, tools_module)
+                output = ensure_data_available(step, item_inputs, snapshot, tools_module, tool_signatures_for_prompt)
                 results.append(output)
                 memory[f"{name}_{idx}"] = output
             memory[name] = results
             print(f"Stored {len(results)} outputs under '{name}' and indexed as '{name}_i'")
         else:
-            output = ensure_data_available(name, resolved_inputs, snapshot, tools_module)
+            output = ensure_data_available(step, resolved_inputs, snapshot, tools_module, tool_signatures_for_prompt)
             memory[name] = output
             print(f"Stored output under tool name: {name} = {output}")
@@ -183,14 +295,11 @@ def build_snapshot(agent_name: str, tools_path: Path, stories: list, output_path
     agent = {"name": agent_name}
     tools_module = load_tools_module(tools_path)
     tool_signatures = extract_tool_signatures(tools_path)
+    tool_signatures_for_prompt = extract_tool_signatures_for_prompt(tools_path)
-    provider = WatsonXProvider(
+    provider = get_provider(
         model_id="meta-llama/llama-3-405b-instruct",
-        llm_decode_parameter={
-            "min_new_tokens": 50,
-            "decoding_method": "greedy",
-            "max_new_tokens": 200
-        }
+        params={"min_new_tokens": 1, "decoding_method": "greedy", "max_new_tokens": 2048},
     )
     snapshot = {
@@ -202,10 +311,14 @@ def build_snapshot(agent_name: str, tools_path: Path, stories: list, output_path
     for story in stories:
         print(f"\n📘 Planning tool calls for story: {story}")
         tool_plan = plan_tool_calls_with_llm(story, agent["name"], tool_signatures, provider)
-        run_tool_chain(tool_plan, snapshot, tools_module)
+        try:
+            run_tool_chain(tool_plan, snapshot, tools_module, tool_signatures_for_prompt)
+        except ValueError as e:
+            print(f"❌ Error running tool chain for story '{story}': {e}")
+            continue
     with output_path.open("w", encoding="utf-8") as f:
-        json.dump(snapshot, f, indent=2)
+        json.dump(snapshot, f, indent=2, cls=UniversalEncoder)
     print(f"\n✅ Snapshot saved to {output_path}")

ibm-watsonx-orchestrate-evaluation-framework 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

ibm-watsonx-orchestrate-evaluation-framework 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl