ibm-watsonx-orchestrate-evaluation-framework 1.1.1__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ibm-watsonx-orchestrate-evaluation-framework might be problematic. Click here for more details.

Files changed (61) hide show
  1. ibm_watsonx_orchestrate_evaluation_framework-1.1.2.dist-info/METADATA +34 -0
  2. {ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.2.dist-info}/RECORD +60 -60
  3. wxo_agentic_evaluation/analytics/tools/analyzer.py +36 -21
  4. wxo_agentic_evaluation/analytics/tools/main.py +18 -7
  5. wxo_agentic_evaluation/analytics/tools/types.py +26 -11
  6. wxo_agentic_evaluation/analytics/tools/ux.py +75 -31
  7. wxo_agentic_evaluation/analyze_run.py +69 -48
  8. wxo_agentic_evaluation/annotate.py +6 -4
  9. wxo_agentic_evaluation/arg_configs.py +8 -2
  10. wxo_agentic_evaluation/batch_annotate.py +78 -25
  11. wxo_agentic_evaluation/data_annotator.py +18 -13
  12. wxo_agentic_evaluation/description_quality_checker.py +20 -14
  13. wxo_agentic_evaluation/evaluation_package.py +114 -70
  14. wxo_agentic_evaluation/external_agent/__init__.py +18 -7
  15. wxo_agentic_evaluation/external_agent/external_validate.py +46 -35
  16. wxo_agentic_evaluation/external_agent/performance_test.py +32 -20
  17. wxo_agentic_evaluation/external_agent/types.py +12 -5
  18. wxo_agentic_evaluation/inference_backend.py +158 -73
  19. wxo_agentic_evaluation/llm_matching.py +4 -3
  20. wxo_agentic_evaluation/llm_rag_eval.py +7 -4
  21. wxo_agentic_evaluation/llm_user.py +7 -3
  22. wxo_agentic_evaluation/main.py +175 -67
  23. wxo_agentic_evaluation/metrics/llm_as_judge.py +2 -2
  24. wxo_agentic_evaluation/metrics/metrics.py +26 -12
  25. wxo_agentic_evaluation/prompt/template_render.py +32 -11
  26. wxo_agentic_evaluation/quick_eval.py +49 -23
  27. wxo_agentic_evaluation/record_chat.py +70 -33
  28. wxo_agentic_evaluation/red_teaming/attack_evaluator.py +58 -18
  29. wxo_agentic_evaluation/red_teaming/attack_generator.py +38 -18
  30. wxo_agentic_evaluation/red_teaming/attack_runner.py +43 -27
  31. wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/base.py +3 -1
  32. wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/loader.py +23 -15
  33. wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/adapters.py +13 -8
  34. wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/pipeline.py +41 -13
  35. wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/semantic_checker.py +26 -16
  36. wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/static_checker.py +17 -11
  37. wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/types.py +44 -29
  38. wxo_agentic_evaluation/referenceless_eval/metrics/field.py +13 -5
  39. wxo_agentic_evaluation/referenceless_eval/metrics/metric.py +16 -5
  40. wxo_agentic_evaluation/referenceless_eval/metrics/metrics_runner.py +8 -3
  41. wxo_agentic_evaluation/referenceless_eval/metrics/prompt.py +6 -2
  42. wxo_agentic_evaluation/referenceless_eval/metrics/utils.py +5 -1
  43. wxo_agentic_evaluation/referenceless_eval/prompt/runner.py +16 -3
  44. wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py +23 -12
  45. wxo_agentic_evaluation/resource_map.py +2 -1
  46. wxo_agentic_evaluation/service_instance.py +24 -11
  47. wxo_agentic_evaluation/service_provider/__init__.py +33 -13
  48. wxo_agentic_evaluation/service_provider/model_proxy_provider.py +129 -26
  49. wxo_agentic_evaluation/service_provider/ollama_provider.py +10 -11
  50. wxo_agentic_evaluation/service_provider/provider.py +0 -1
  51. wxo_agentic_evaluation/service_provider/referenceless_provider_wrapper.py +34 -21
  52. wxo_agentic_evaluation/service_provider/watsonx_provider.py +50 -22
  53. wxo_agentic_evaluation/tool_planner.py +128 -44
  54. wxo_agentic_evaluation/type.py +12 -9
  55. wxo_agentic_evaluation/utils/__init__.py +1 -0
  56. wxo_agentic_evaluation/utils/open_ai_tool_extractor.py +41 -20
  57. wxo_agentic_evaluation/utils/rich_utils.py +23 -9
  58. wxo_agentic_evaluation/utils/utils.py +83 -52
  59. ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info/METADATA +0 -386
  60. {ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.2.dist-info}/WHEEL +0 -0
  61. {ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.2.dist-info}/top_level.txt +0 -0
@@ -1,26 +1,35 @@
1
- import json
2
1
  import ast
3
2
  import csv
4
- from pathlib import Path
5
3
  import importlib.util
6
- import re
7
- from jsonargparse import CLI
4
+ import json
8
5
  import os
6
+ import re
9
7
  import sys
10
8
  import textwrap
11
- from dataclasses import is_dataclass, asdict
9
+ from dataclasses import asdict, is_dataclass
10
+ from pathlib import Path
11
+
12
+ from jsonargparse import CLI
12
13
 
13
- from wxo_agentic_evaluation.service_provider import get_provider
14
- from wxo_agentic_evaluation.arg_configs import BatchAnnotateConfig
15
- from wxo_agentic_evaluation.prompt.template_render import ToolPlannerTemplateRenderer, ArgsExtractorTemplateRenderer
16
14
  from wxo_agentic_evaluation import __file__
15
+ from wxo_agentic_evaluation.arg_configs import BatchAnnotateConfig
16
+ from wxo_agentic_evaluation.prompt.template_render import (
17
+ ArgsExtractorTemplateRenderer,
18
+ ToolPlannerTemplateRenderer,
19
+ )
20
+ from wxo_agentic_evaluation.service_provider import get_provider
17
21
 
18
22
  root_dir = os.path.dirname(__file__)
19
- TOOL_PLANNER_PROMPT_PATH = os.path.join(root_dir, "prompt", "tool_planner.jinja2")
20
- ARGS_EXTRACTOR_PROMPT_PATH = os.path.join(root_dir, "prompt", "args_extractor_prompt.jinja2")
23
+ TOOL_PLANNER_PROMPT_PATH = os.path.join(
24
+ root_dir, "prompt", "tool_planner.jinja2"
25
+ )
26
+ ARGS_EXTRACTOR_PROMPT_PATH = os.path.join(
27
+ root_dir, "prompt", "args_extractor_prompt.jinja2"
28
+ )
21
29
 
22
30
  MISSING_DOCSTRING_PROMPT = "No description available"
23
31
 
32
+
24
33
  class UniversalEncoder(json.JSONEncoder):
25
34
  def default(self, obj):
26
35
  if is_dataclass(obj):
@@ -29,12 +38,15 @@ class UniversalEncoder(json.JSONEncoder):
29
38
  return obj.__dict__
30
39
  return super().default(obj)
31
40
 
41
+
32
42
  def extract_first_json_list(raw: str) -> list:
33
43
  matches = re.findall(r"\[\s*{.*?}\s*]", raw, re.DOTALL)
34
44
  for match in matches:
35
45
  try:
36
46
  parsed = json.loads(match)
37
- if isinstance(parsed, list) and all("tool_name" in step for step in parsed):
47
+ if isinstance(parsed, list) and all(
48
+ "tool_name" in step for step in parsed
49
+ ):
38
50
  return parsed
39
51
  except Exception:
40
52
  continue
@@ -42,6 +54,7 @@ def extract_first_json_list(raw: str) -> list:
42
54
  print(raw)
43
55
  return []
44
56
 
57
+
45
58
  def parse_json_string(input_string):
46
59
  json_char_count = 0
47
60
  json_objects = []
@@ -79,12 +92,16 @@ def load_tools_module(tools_path: Path) -> dict:
79
92
  elif tools_path.is_dir():
80
93
  files_to_parse.extend(tools_path.glob("**/*.py"))
81
94
  else:
82
- raise ValueError(f"Tools path {tools_path} is neither a file nor directory")
95
+ raise ValueError(
96
+ f"Tools path {tools_path} is neither a file nor directory"
97
+ )
83
98
 
84
99
  for file_path in files_to_parse:
85
100
  try:
86
101
  module_name = file_path.stem
87
- spec = importlib.util.spec_from_file_location(module_name, file_path)
102
+ spec = importlib.util.spec_from_file_location(
103
+ module_name, file_path
104
+ )
88
105
  module = importlib.util.module_from_spec(spec)
89
106
  parent_dir = str(file_path.parent)
90
107
  sys_path_modified = False
@@ -99,7 +116,7 @@ def load_tools_module(tools_path: Path) -> dict:
99
116
  # Add all module's non-private functions to tools_dict
100
117
  for attr_name in dir(module):
101
118
  attr = getattr(module, attr_name)
102
- if callable(attr) and not attr_name.startswith('_'):
119
+ if callable(attr) and not attr_name.startswith("_"):
103
120
  tools_dict[attr_name] = attr
104
121
  except Exception as e:
105
122
  print(f"Warning: Failed to load {file_path}: {str(e)}")
@@ -117,7 +134,9 @@ def extract_tool_signatures(tools_path: Path) -> list:
117
134
  elif tools_path.is_dir():
118
135
  files_to_parse.extend(tools_path.glob("**/*.py"))
119
136
  else:
120
- raise ValueError(f"Tools path {tools_path} is neither a file nor directory")
137
+ raise ValueError(
138
+ f"Tools path {tools_path} is neither a file nor directory"
139
+ )
121
140
 
122
141
  for file_path in files_to_parse:
123
142
  try:
@@ -128,19 +147,24 @@ def extract_tool_signatures(tools_path: Path) -> list:
128
147
  for node in parsed_code.body:
129
148
  if isinstance(node, ast.FunctionDef):
130
149
  name = node.name
131
- args = [arg.arg for arg in node.args.args if arg.arg != "self"]
150
+ args = [
151
+ arg.arg for arg in node.args.args if arg.arg != "self"
152
+ ]
132
153
  docstring = ast.get_docstring(node)
133
- tool_data.append({
134
- "Function Name": name,
135
- "Arguments": args,
136
- "Docstring": docstring or MISSING_DOCSTRING_PROMPT
137
- })
154
+ tool_data.append(
155
+ {
156
+ "Function Name": name,
157
+ "Arguments": args,
158
+ "Docstring": docstring or MISSING_DOCSTRING_PROMPT,
159
+ }
160
+ )
138
161
  except Exception as e:
139
162
  print(f"Warning: Failed to parse {file_path}: {str(e)}")
140
163
  continue
141
164
 
142
165
  return tool_data
143
166
 
167
+
144
168
  def extract_tool_signatures_for_prompt(tools_path: Path) -> dict[str, str]:
145
169
  functions = {}
146
170
  files_to_parse = []
@@ -151,7 +175,9 @@ def extract_tool_signatures_for_prompt(tools_path: Path) -> dict[str, str]:
151
175
  elif tools_path.is_dir():
152
176
  files_to_parse.extend(tools_path.glob("**/*.py"))
153
177
  else:
154
- raise ValueError(f"Tools path {tools_path} is neither a file nor directory")
178
+ raise ValueError(
179
+ f"Tools path {tools_path} is neither a file nor directory"
180
+ )
155
181
 
156
182
  for file_path in files_to_parse:
157
183
  try:
@@ -168,23 +194,35 @@ def extract_tool_signatures_for_prompt(tools_path: Path) -> dict[str, str]:
168
194
  for arg in node.args.args:
169
195
  if arg.arg == "self":
170
196
  continue
171
- annotation = ast.unparse(arg.annotation) if arg.annotation else "Any"
197
+ annotation = (
198
+ ast.unparse(arg.annotation)
199
+ if arg.annotation
200
+ else "Any"
201
+ )
172
202
  args.append((arg.arg, annotation))
173
203
 
174
204
  # Get return type
175
- returns = ast.unparse(node.returns) if node.returns else "None"
205
+ returns = (
206
+ ast.unparse(node.returns) if node.returns else "None"
207
+ )
176
208
 
177
209
  # Get docstring
178
210
  docstring = ast.get_docstring(node)
179
- docstring = textwrap.dedent(docstring).strip() if docstring else ""
211
+ docstring = (
212
+ textwrap.dedent(docstring).strip() if docstring else ""
213
+ )
180
214
 
181
215
  # Format parameter descriptions if available in docstring
182
216
  doc_lines = docstring.splitlines()
183
217
  doc_summary = doc_lines[0] if doc_lines else ""
184
- param_descriptions = "\n".join([line for line in doc_lines[1:] if ":param" in line])
218
+ param_descriptions = "\n".join(
219
+ [line for line in doc_lines[1:] if ":param" in line]
220
+ )
185
221
 
186
222
  # Compose the final string
187
- args_str = ", ".join(f"{arg}: {type_}" for arg, type_ in args)
223
+ args_str = ", ".join(
224
+ f"{arg}: {type_}" for arg, type_ in args
225
+ )
188
226
  function_str = f"""def {name}({args_str}) -> {returns}:
189
227
  {doc_summary}"""
190
228
  if param_descriptions:
@@ -197,9 +235,18 @@ def extract_tool_signatures_for_prompt(tools_path: Path) -> dict[str, str]:
197
235
 
198
236
  return functions
199
237
 
200
- def ensure_data_available(step: dict, inputs: dict, snapshot: dict, tools_module: dict, tool_signatures_for_prompt) -> dict:
238
+
239
+ def ensure_data_available(
240
+ step: dict,
241
+ inputs: dict,
242
+ snapshot: dict,
243
+ tools_module: dict,
244
+ tool_signatures_for_prompt,
245
+ ) -> dict:
201
246
  tool_name = step["tool_name"]
202
- cache = snapshot.setdefault("input_output_examples", {}).setdefault(tool_name, [])
247
+ cache = snapshot.setdefault("input_output_examples", {}).setdefault(
248
+ tool_name, []
249
+ )
203
250
  for entry in cache:
204
251
  if entry["inputs"] == inputs:
205
252
  return entry["output"]
@@ -212,7 +259,11 @@ def ensure_data_available(step: dict, inputs: dict, snapshot: dict, tools_module
212
259
  except:
213
260
  provider = get_provider(
214
261
  model_id="meta-llama/llama-3-405b-instruct",
215
- params={"min_new_tokens": 0, "decoding_method": "greedy", "max_new_tokens": 500},
262
+ params={
263
+ "min_new_tokens": 0,
264
+ "decoding_method": "greedy",
265
+ "max_new_tokens": 500,
266
+ },
216
267
  )
217
268
  renderer = ArgsExtractorTemplateRenderer(ARGS_EXTRACTOR_PROMPT_PATH)
218
269
 
@@ -226,14 +277,19 @@ def ensure_data_available(step: dict, inputs: dict, snapshot: dict, tools_module
226
277
  try:
227
278
  output = tools_module[json_obj["tool_name"]](**json_obj["inputs"])
228
279
  except:
229
- raise ValueError(f"Failed to execute tool '{tool_name}' with inputs {inputs}")
280
+ raise ValueError(
281
+ f"Failed to execute tool '{tool_name}' with inputs {inputs}"
282
+ )
230
283
 
231
284
  cache.append({"inputs": inputs, "output": output})
232
285
  if not isinstance(output, dict):
233
286
  print(f" Tool {tool_name} returned non-dict output: {output}")
234
287
  return output
235
288
 
236
- def plan_tool_calls_with_llm(story: str, agent_name: str, tool_signatures_str: str, provider) -> list:
289
+
290
+ def plan_tool_calls_with_llm(
291
+ story: str, agent_name: str, tool_signatures_str: str, provider
292
+ ) -> list:
237
293
 
238
294
  renderer = ToolPlannerTemplateRenderer(TOOL_PLANNER_PROMPT_PATH)
239
295
 
@@ -250,7 +306,9 @@ def plan_tool_calls_with_llm(story: str, agent_name: str, tool_signatures_str: s
250
306
 
251
307
 
252
308
  # --- Tool Execution Logic ---
253
- def run_tool_chain(tool_plan: list, snapshot: dict, tools_module, tool_signatures_for_prompt) -> None:
309
+ def run_tool_chain(
310
+ tool_plan: list, snapshot: dict, tools_module, tool_signatures_for_prompt
311
+ ) -> None:
254
312
  memory = {}
255
313
 
256
314
  for step in tool_plan:
@@ -280,7 +338,9 @@ def run_tool_chain(tool_plan: list, snapshot: dict, tools_module, tool_signature
280
338
 
281
339
  if list_keys:
282
340
  if len(list_keys) > 1:
283
- raise ValueError(f"Tool '{name}' received multiple list inputs. Only one supported for now.")
341
+ raise ValueError(
342
+ f"Tool '{name}' received multiple list inputs. Only one supported for now."
343
+ )
284
344
  list_key = list_keys[0]
285
345
  value_list = resolved_inputs[list_key]
286
346
 
@@ -289,20 +349,36 @@ def run_tool_chain(tool_plan: list, snapshot: dict, tools_module, tool_signature
289
349
  item_inputs = resolved_inputs.copy()
290
350
  item_inputs[list_key] = val
291
351
  print(f" ⚙️ Running {name} with {list_key} = {val}")
292
- output = ensure_data_available(step, item_inputs, snapshot, tools_module, tool_signatures_for_prompt)
352
+ output = ensure_data_available(
353
+ step,
354
+ item_inputs,
355
+ snapshot,
356
+ tools_module,
357
+ tool_signatures_for_prompt,
358
+ )
293
359
  results.append(output)
294
360
  memory[f"{name}_{idx}"] = output
295
361
 
296
362
  memory[name] = results
297
- print(f"Stored {len(results)} outputs under '{name}' and indexed as '{name}_i'")
363
+ print(
364
+ f"Stored {len(results)} outputs under '{name}' and indexed as '{name}_i'"
365
+ )
298
366
  else:
299
- output = ensure_data_available(step, resolved_inputs, snapshot, tools_module, tool_signatures_for_prompt)
367
+ output = ensure_data_available(
368
+ step,
369
+ resolved_inputs,
370
+ snapshot,
371
+ tools_module,
372
+ tool_signatures_for_prompt,
373
+ )
300
374
  memory[name] = output
301
375
  print(f"Stored output under tool name: {name} = {output}")
302
376
 
303
377
 
304
378
  # --- Main Snapshot Builder ---
305
- def build_snapshot(agent_name: str, tools_path: Path, stories: list, output_path: Path):
379
+ def build_snapshot(
380
+ agent_name: str, tools_path: Path, stories: list, output_path: Path
381
+ ):
306
382
  agent = {"name": agent_name}
307
383
  tools_module = load_tools_module(tools_path)
308
384
  tool_signatures = extract_tool_signatures(tools_path)
@@ -310,20 +386,28 @@ def build_snapshot(agent_name: str, tools_path: Path, stories: list, output_path
310
386
 
311
387
  provider = get_provider(
312
388
  model_id="meta-llama/llama-3-405b-instruct",
313
- params={"min_new_tokens": 1, "decoding_method": "greedy", "max_new_tokens": 2048},
389
+ params={
390
+ "min_new_tokens": 1,
391
+ "decoding_method": "greedy",
392
+ "max_new_tokens": 2048,
393
+ },
314
394
  )
315
395
 
316
396
  snapshot = {
317
397
  "agent": agent,
318
398
  "tools": tool_signatures,
319
- "input_output_examples": {}
399
+ "input_output_examples": {},
320
400
  }
321
401
 
322
402
  for story in stories:
323
403
  print(f"\n📘 Planning tool calls for story: {story}")
324
- tool_plan = plan_tool_calls_with_llm(story, agent["name"], tool_signatures, provider)
404
+ tool_plan = plan_tool_calls_with_llm(
405
+ story, agent["name"], tool_signatures, provider
406
+ )
325
407
  try:
326
- run_tool_chain(tool_plan, snapshot, tools_module, tool_signatures_for_prompt)
408
+ run_tool_chain(
409
+ tool_plan, snapshot, tools_module, tool_signatures_for_prompt
410
+ )
327
411
  except ValueError as e:
328
412
  print(f"❌ Error running tool chain for story '{story}': {e}")
329
413
  continue
@@ -340,7 +424,7 @@ if __name__ == "__main__":
340
424
 
341
425
  stories = []
342
426
  agent_name = None
343
- with stories_path.open("r", encoding="utf-8", newline='') as f:
427
+ with stories_path.open("r", encoding="utf-8", newline="") as f:
344
428
  csv_reader = csv.DictReader(f)
345
429
  for row in csv_reader:
346
430
  stories.append(row["story"])
@@ -349,4 +433,4 @@ if __name__ == "__main__":
349
433
 
350
434
  snapshot_path = stories_path.parent / f"{agent_name}_snapshot_llm.json"
351
435
 
352
- build_snapshot(agent_name, tools_path, stories, snapshot_path)
436
+ build_snapshot(agent_name, tools_path, stories, snapshot_path)
@@ -1,10 +1,7 @@
1
- from typing import Dict, List, Union, Any, Optional
2
- from pydantic import (
3
- BaseModel,
4
- ConfigDict,
5
- Field
6
- )
7
1
  from enum import StrEnum
2
+ from typing import Any, Dict, List, Optional, Union
3
+
4
+ from pydantic import BaseModel, ConfigDict, Field
8
5
  from rich.text import Text
9
6
 
10
7
 
@@ -61,9 +58,13 @@ class ConversationalConfidenceThresholdScore(BaseModel):
61
58
  def table(self):
62
59
  return {
63
60
  "response_confidence": str(self.response_confidence),
64
- "response_confidence_threshold": str(self.response_confidence_threshold),
61
+ "response_confidence_threshold": str(
62
+ self.response_confidence_threshold
63
+ ),
65
64
  "retrieval_confidence": str(self.retrieval_confidence),
66
- "retrieval_confidence_threshold": str(self.retrieval_confidence_threshold),
65
+ "retrieval_confidence_threshold": str(
66
+ self.retrieval_confidence_threshold
67
+ ),
67
68
  }
68
69
 
69
70
 
@@ -120,12 +121,14 @@ class GoalDetail(BaseModel):
120
121
  keywords: List = None
121
122
  knowledge_base: KnowledgeBaseGoalDetail = KnowledgeBaseGoalDetail()
122
123
 
124
+
123
125
  class AttackData(BaseModel):
124
126
  attack_category: AttackCategory
125
127
  attack_type: str
126
128
  attack_name: str
127
129
  attack_instructions: str
128
130
 
131
+
129
132
  class AttackData(BaseModel):
130
133
  agent: str
131
134
  agents_path: str
@@ -143,8 +146,8 @@ class EvaluationData(BaseModel):
143
146
  goal_details: List[GoalDetail]
144
147
  starting_sentence: str = None
145
148
 
149
+
146
150
  class ToolDefinition(BaseModel):
147
151
  tool_description: Optional[str]
148
152
  tool_name: str
149
153
  tool_params: List[str]
150
-
@@ -1,5 +1,6 @@
1
1
  import json
2
2
 
3
+
3
4
  def json_dump(output_path, object):
4
5
  with open(output_path, "w", encoding="utf-8") as f:
5
6
  json.dump(object, f, indent=4)
@@ -1,11 +1,12 @@
1
1
  import ast
2
2
  import re
3
3
  from pathlib import Path
4
- from typing import Union, Mapping, Any, List
4
+ from typing import Any, List, Mapping, Union
5
+
5
6
 
6
7
  class PythonTypeToJsonType:
7
8
  OPTIONAL_PARAM_EXTRACT = re.compile(r"[Oo]ptional\[(\w+)\]")
8
-
9
+
9
10
  @staticmethod
10
11
  def python_to_json_type(python_annotation: str):
11
12
  if not python_annotation:
@@ -25,30 +26,33 @@ class PythonTypeToJsonType:
25
26
  return "object"
26
27
  if python_annotation.startswith("optional"):
27
28
  # extract the type within Optional[T]
28
- inner_type = PythonTypeToJsonType.OPTIONAL_PARAM_EXTRACT.search(python_annotation).group(1)
29
+ inner_type = PythonTypeToJsonType.OPTIONAL_PARAM_EXTRACT.search(
30
+ python_annotation
31
+ ).group(1)
29
32
  return PythonTypeToJsonType.python_to_json_type(inner_type)
30
33
 
31
34
  return "string"
32
35
 
36
+
33
37
  class ToolExtractionOpenAIFormat:
34
38
  @staticmethod
35
39
  def get_default_arguments(node):
36
- """ Returns the default arguments (if any)
40
+ """Returns the default arguments (if any)
37
41
 
38
42
  The default arguments are stored in args.default array.
39
43
  Since, in Python, the default arguments only come after positional arguments,
40
44
  we can index the argument array starting from the last `n` arguments, where n is
41
45
  the length of the default arguments.
42
46
 
43
- ex.
47
+ ex.
44
48
  def add(a, b=5):
45
49
  pass
46
-
50
+
47
51
  Then we have,
48
52
  args = [a, b]
49
53
  defaults = [Constant(value=5)]
50
54
 
51
- args[-len(defaults):] = [b]
55
+ args[-len(defaults):] = [b]
52
56
 
53
57
  (
54
58
  "FunctionDef(
@@ -70,12 +74,12 @@ class ToolExtractionOpenAIFormat:
70
74
  if num_defaults > 0:
71
75
  for arg in node.args.args[-num_defaults:]:
72
76
  default_arguments.add(arg)
73
-
77
+
74
78
  return default_arguments
75
79
 
76
80
  @staticmethod
77
81
  def from_file(tools_path: Union[str, Path]) -> Mapping[str, Any]:
78
- """ Uses `extract_tool_signatures` function, but converts the response
82
+ """Uses `extract_tool_signatures` function, but converts the response
79
83
  to open-ai format
80
84
 
81
85
  ```
@@ -100,7 +104,11 @@ class ToolExtractionOpenAIFormat:
100
104
  parsed_code = ast.parse(code)
101
105
  for node in parsed_code.body:
102
106
  if isinstance(node, ast.FunctionDef):
103
- parameters = {"type": "object", "properties": {}, "required": []}
107
+ parameters = {
108
+ "type": "object",
109
+ "properties": {},
110
+ "required": [],
111
+ }
104
112
  function_name = node.name
105
113
  for arg in node.args.args:
106
114
  type_annotation = None
@@ -109,16 +117,25 @@ class ToolExtractionOpenAIFormat:
109
117
  if arg.annotation:
110
118
  type_annotation = ast.unparse(arg.annotation)
111
119
 
112
- parameter_type = PythonTypeToJsonType.python_to_json_type(type_annotation)
120
+ parameter_type = (
121
+ PythonTypeToJsonType.python_to_json_type(
122
+ type_annotation
123
+ )
124
+ )
113
125
  parameters["properties"][arg.arg] = {
114
126
  "type": parameter_type,
115
- "description": "", # todo
127
+ "description": "", # todo
116
128
  }
117
129
 
118
- if type_annotation and "Optional" not in type_annotation:
130
+ if (
131
+ type_annotation
132
+ and "Optional" not in type_annotation
133
+ ):
119
134
  parameters["required"].append(arg.arg)
120
135
 
121
- default_arguments = ToolExtractionOpenAIFormat.get_default_arguments(node)
136
+ default_arguments = (
137
+ ToolExtractionOpenAIFormat.get_default_arguments(node)
138
+ )
122
139
  for arg_name in parameters["required"]:
123
140
  if arg_name in default_arguments:
124
141
  parameters.remove(arg_name)
@@ -128,8 +145,10 @@ class ToolExtractionOpenAIFormat:
128
145
  "function": {
129
146
  "name": function_name,
130
147
  "parameters": parameters,
131
- "description": ast.get_docstring(node) # fix (does not do :params)
132
- }
148
+ "description": ast.get_docstring(
149
+ node
150
+ ), # fix (does not do :params)
151
+ },
133
152
  }
134
153
  tool_data.append(open_ai_format_fn)
135
154
 
@@ -149,9 +168,11 @@ class ToolExtractionOpenAIFormat:
149
168
  elif tools_path.is_dir():
150
169
  files_to_parse.extend(tools_path.glob("**/*.py"))
151
170
  else:
152
- raise ValueError(f"Tools path {tools_path} is neither a file nor directory")
153
-
171
+ raise ValueError(
172
+ f"Tools path {tools_path} is neither a file nor directory"
173
+ )
174
+
154
175
  for file_path in files_to_parse:
155
176
  all_tools.extend(ToolExtractionOpenAIFormat.from_file(file_path))
156
-
157
- return all_tools
177
+
178
+ return all_tools
@@ -1,6 +1,7 @@
1
- from rich.text import Text
2
- from typing import Optional, List, Any
1
+ from typing import Any, List, Optional
2
+
3
3
  import rich
4
+ from rich.text import Text
4
5
 
5
6
 
6
7
  def pretty_print(content: Any, style: Optional[str] = None):
@@ -33,13 +34,17 @@ def warn(
33
34
 
34
35
 
35
36
  def is_ok(
36
- message: str, style: Optional[str] = "bold green", prompt: Optional[str] = "OK ✅ :"
37
+ message: str,
38
+ style: Optional[str] = "bold green",
39
+ prompt: Optional[str] = "OK ✅ :",
37
40
  ) -> Text:
38
41
  """Utility function for formatting an OK message."""
39
42
  return Text(f"{prompt}{message}\n\n", style=style)
40
43
 
41
44
 
42
- def print_done(prompt: Optional[str] = "Done ✅", style: Optional[str] = "bold cyan"):
45
+ def print_done(
46
+ prompt: Optional[str] = "Done ✅", style: Optional[str] = "bold cyan"
47
+ ):
43
48
  """
44
49
  Prints a prompt indicating completion of a process/routine.
45
50
  :param prompt: default is `"Done ✅"`
@@ -63,7 +68,9 @@ def print_success(
63
68
 
64
69
 
65
70
  def print_failure(
66
- message: str, style: Optional[str] = "bold red", prompt: Optional[str] = "❌ FAILED"
71
+ message: str,
72
+ style: Optional[str] = "bold red",
73
+ prompt: Optional[str] = "❌ FAILED",
67
74
  ):
68
75
  """
69
76
  Prints a failure message.
@@ -108,7 +115,9 @@ class IncorrectParameterUtils:
108
115
  ]
109
116
 
110
117
  @staticmethod
111
- def format_bad_description_message(tool_name: str, tool_desc: str) -> List[Text]:
118
+ def format_bad_description_message(
119
+ tool_name: str, tool_desc: str
120
+ ) -> List[Text]:
112
121
 
113
122
  return [
114
123
  warn(
@@ -139,12 +148,15 @@ class TestingUtils:
139
148
  For example, this can be read as: `"{\n⚙️ Testing} {20} {good tool descriptions}"`.
140
149
  """
141
150
  pretty_print(
142
- content=f"{prompt} {test_case_count} {test_description}", style=style
151
+ content=f"{prompt} {test_case_count} {test_description}",
152
+ style=style,
143
153
  )
144
154
 
145
155
  @staticmethod
146
156
  def print_error_details(
147
- expected: List[str], detected: List[str], style: Optional[str] = "bold red"
157
+ expected: List[str],
158
+ detected: List[str],
159
+ style: Optional[str] = "bold red",
148
160
  ):
149
161
  """
150
162
  Print detailed error information.
@@ -169,6 +181,8 @@ class TestingUtils:
169
181
  :param style: The style for the text (default is bold red).
170
182
  """
171
183
  if failed_cases:
172
- pretty_print(content=f"{prompt} ({len(failed_cases)}):", style=style)
184
+ pretty_print(
185
+ content=f"{prompt} ({len(failed_cases)}):", style=style
186
+ )
173
187
  for case in failed_cases:
174
188
  pretty_print(content=f" - {case}", style=style)