ibm-watsonx-orchestrate-evaluation-framework 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. ibm_watsonx_orchestrate_evaluation_framework-1.1.3.dist-info/METADATA +35 -0
  2. {ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.3.dist-info}/RECORD +65 -60
  3. wxo_agentic_evaluation/analytics/tools/analyzer.py +36 -21
  4. wxo_agentic_evaluation/analytics/tools/main.py +18 -7
  5. wxo_agentic_evaluation/analytics/tools/types.py +26 -11
  6. wxo_agentic_evaluation/analytics/tools/ux.py +75 -31
  7. wxo_agentic_evaluation/analyze_run.py +69 -48
  8. wxo_agentic_evaluation/annotate.py +6 -4
  9. wxo_agentic_evaluation/arg_configs.py +9 -3
  10. wxo_agentic_evaluation/batch_annotate.py +78 -25
  11. wxo_agentic_evaluation/data_annotator.py +18 -13
  12. wxo_agentic_evaluation/description_quality_checker.py +20 -14
  13. wxo_agentic_evaluation/evaluation.py +42 -0
  14. wxo_agentic_evaluation/evaluation_package.py +117 -70
  15. wxo_agentic_evaluation/external_agent/__init__.py +18 -7
  16. wxo_agentic_evaluation/external_agent/external_validate.py +46 -35
  17. wxo_agentic_evaluation/external_agent/performance_test.py +32 -20
  18. wxo_agentic_evaluation/external_agent/types.py +12 -5
  19. wxo_agentic_evaluation/inference_backend.py +183 -79
  20. wxo_agentic_evaluation/llm_matching.py +4 -3
  21. wxo_agentic_evaluation/llm_rag_eval.py +7 -4
  22. wxo_agentic_evaluation/llm_user.py +7 -3
  23. wxo_agentic_evaluation/main.py +175 -67
  24. wxo_agentic_evaluation/metrics/llm_as_judge.py +2 -2
  25. wxo_agentic_evaluation/metrics/metrics.py +26 -12
  26. wxo_agentic_evaluation/otel_support/evaluate_tau.py +67 -0
  27. wxo_agentic_evaluation/otel_support/evaluate_tau_traces.py +176 -0
  28. wxo_agentic_evaluation/otel_support/otel_message_conversion.py +21 -0
  29. wxo_agentic_evaluation/otel_support/tasks_test.py +1226 -0
  30. wxo_agentic_evaluation/prompt/template_render.py +32 -11
  31. wxo_agentic_evaluation/quick_eval.py +49 -23
  32. wxo_agentic_evaluation/record_chat.py +70 -33
  33. wxo_agentic_evaluation/red_teaming/attack_evaluator.py +58 -18
  34. wxo_agentic_evaluation/red_teaming/attack_generator.py +38 -18
  35. wxo_agentic_evaluation/red_teaming/attack_runner.py +43 -27
  36. wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/base.py +3 -1
  37. wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/loader.py +23 -15
  38. wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/adapters.py +13 -8
  39. wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/pipeline.py +41 -13
  40. wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/semantic_checker.py +26 -16
  41. wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/static_checker.py +17 -11
  42. wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/types.py +44 -29
  43. wxo_agentic_evaluation/referenceless_eval/metrics/field.py +13 -5
  44. wxo_agentic_evaluation/referenceless_eval/metrics/metric.py +16 -5
  45. wxo_agentic_evaluation/referenceless_eval/metrics/metrics_runner.py +8 -3
  46. wxo_agentic_evaluation/referenceless_eval/metrics/prompt.py +6 -2
  47. wxo_agentic_evaluation/referenceless_eval/metrics/utils.py +5 -1
  48. wxo_agentic_evaluation/referenceless_eval/prompt/runner.py +16 -3
  49. wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py +23 -12
  50. wxo_agentic_evaluation/resource_map.py +2 -1
  51. wxo_agentic_evaluation/service_instance.py +103 -21
  52. wxo_agentic_evaluation/service_provider/__init__.py +33 -13
  53. wxo_agentic_evaluation/service_provider/model_proxy_provider.py +216 -34
  54. wxo_agentic_evaluation/service_provider/ollama_provider.py +10 -11
  55. wxo_agentic_evaluation/service_provider/provider.py +0 -1
  56. wxo_agentic_evaluation/service_provider/referenceless_provider_wrapper.py +34 -21
  57. wxo_agentic_evaluation/service_provider/watsonx_provider.py +50 -22
  58. wxo_agentic_evaluation/tool_planner.py +128 -44
  59. wxo_agentic_evaluation/type.py +12 -9
  60. wxo_agentic_evaluation/utils/__init__.py +1 -0
  61. wxo_agentic_evaluation/utils/open_ai_tool_extractor.py +41 -20
  62. wxo_agentic_evaluation/utils/rich_utils.py +23 -9
  63. wxo_agentic_evaluation/utils/utils.py +83 -52
  64. ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info/METADATA +0 -386
  65. {ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.3.dist-info}/WHEEL +0 -0
  66. {ibm_watsonx_orchestrate_evaluation_framework-1.1.1.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.3.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  import json
2
2
 
3
+
3
4
  def json_dump(output_path, object):
4
5
  with open(output_path, "w", encoding="utf-8") as f:
5
6
  json.dump(object, f, indent=4)
@@ -1,11 +1,12 @@
1
1
  import ast
2
2
  import re
3
3
  from pathlib import Path
4
- from typing import Union, Mapping, Any, List
4
+ from typing import Any, List, Mapping, Union
5
+
5
6
 
6
7
  class PythonTypeToJsonType:
7
8
  OPTIONAL_PARAM_EXTRACT = re.compile(r"[Oo]ptional\[(\w+)\]")
8
-
9
+
9
10
  @staticmethod
10
11
  def python_to_json_type(python_annotation: str):
11
12
  if not python_annotation:
@@ -25,30 +26,33 @@ class PythonTypeToJsonType:
25
26
  return "object"
26
27
  if python_annotation.startswith("optional"):
27
28
  # extract the type within Optional[T]
28
- inner_type = PythonTypeToJsonType.OPTIONAL_PARAM_EXTRACT.search(python_annotation).group(1)
29
+ inner_type = PythonTypeToJsonType.OPTIONAL_PARAM_EXTRACT.search(
30
+ python_annotation
31
+ ).group(1)
29
32
  return PythonTypeToJsonType.python_to_json_type(inner_type)
30
33
 
31
34
  return "string"
32
35
 
36
+
33
37
  class ToolExtractionOpenAIFormat:
34
38
  @staticmethod
35
39
  def get_default_arguments(node):
36
- """ Returns the default arguments (if any)
40
+ """Returns the default arguments (if any)
37
41
 
38
42
  The default arguments are stored in args.default array.
39
43
  Since, in Python, the default arguments only come after positional arguments,
40
44
  we can index the argument array starting from the last `n` arguments, where n is
41
45
  the length of the default arguments.
42
46
 
43
- ex.
47
+ ex.
44
48
  def add(a, b=5):
45
49
  pass
46
-
50
+
47
51
  Then we have,
48
52
  args = [a, b]
49
53
  defaults = [Constant(value=5)]
50
54
 
51
- args[-len(defaults):] = [b]
55
+ args[-len(defaults):] = [b]
52
56
 
53
57
  (
54
58
  "FunctionDef(
@@ -70,12 +74,12 @@ class ToolExtractionOpenAIFormat:
70
74
  if num_defaults > 0:
71
75
  for arg in node.args.args[-num_defaults:]:
72
76
  default_arguments.add(arg)
73
-
77
+
74
78
  return default_arguments
75
79
 
76
80
  @staticmethod
77
81
  def from_file(tools_path: Union[str, Path]) -> Mapping[str, Any]:
78
- """ Uses `extract_tool_signatures` function, but converts the response
82
+ """Uses `extract_tool_signatures` function, but converts the response
79
83
  to open-ai format
80
84
 
81
85
  ```
@@ -100,7 +104,11 @@ class ToolExtractionOpenAIFormat:
100
104
  parsed_code = ast.parse(code)
101
105
  for node in parsed_code.body:
102
106
  if isinstance(node, ast.FunctionDef):
103
- parameters = {"type": "object", "properties": {}, "required": []}
107
+ parameters = {
108
+ "type": "object",
109
+ "properties": {},
110
+ "required": [],
111
+ }
104
112
  function_name = node.name
105
113
  for arg in node.args.args:
106
114
  type_annotation = None
@@ -109,16 +117,25 @@ class ToolExtractionOpenAIFormat:
109
117
  if arg.annotation:
110
118
  type_annotation = ast.unparse(arg.annotation)
111
119
 
112
- parameter_type = PythonTypeToJsonType.python_to_json_type(type_annotation)
120
+ parameter_type = (
121
+ PythonTypeToJsonType.python_to_json_type(
122
+ type_annotation
123
+ )
124
+ )
113
125
  parameters["properties"][arg.arg] = {
114
126
  "type": parameter_type,
115
- "description": "", # todo
127
+ "description": "", # todo
116
128
  }
117
129
 
118
- if type_annotation and "Optional" not in type_annotation:
130
+ if (
131
+ type_annotation
132
+ and "Optional" not in type_annotation
133
+ ):
119
134
  parameters["required"].append(arg.arg)
120
135
 
121
- default_arguments = ToolExtractionOpenAIFormat.get_default_arguments(node)
136
+ default_arguments = (
137
+ ToolExtractionOpenAIFormat.get_default_arguments(node)
138
+ )
122
139
  for arg_name in parameters["required"]:
123
140
  if arg_name in default_arguments:
124
141
  parameters.remove(arg_name)
@@ -128,8 +145,10 @@ class ToolExtractionOpenAIFormat:
128
145
  "function": {
129
146
  "name": function_name,
130
147
  "parameters": parameters,
131
- "description": ast.get_docstring(node) # fix (does not do :params)
132
- }
148
+ "description": ast.get_docstring(
149
+ node
150
+ ), # fix (does not do :params)
151
+ },
133
152
  }
134
153
  tool_data.append(open_ai_format_fn)
135
154
 
@@ -149,9 +168,11 @@ class ToolExtractionOpenAIFormat:
149
168
  elif tools_path.is_dir():
150
169
  files_to_parse.extend(tools_path.glob("**/*.py"))
151
170
  else:
152
- raise ValueError(f"Tools path {tools_path} is neither a file nor directory")
153
-
171
+ raise ValueError(
172
+ f"Tools path {tools_path} is neither a file nor directory"
173
+ )
174
+
154
175
  for file_path in files_to_parse:
155
176
  all_tools.extend(ToolExtractionOpenAIFormat.from_file(file_path))
156
-
157
- return all_tools
177
+
178
+ return all_tools
@@ -1,6 +1,7 @@
1
- from rich.text import Text
2
- from typing import Optional, List, Any
1
+ from typing import Any, List, Optional
2
+
3
3
  import rich
4
+ from rich.text import Text
4
5
 
5
6
 
6
7
  def pretty_print(content: Any, style: Optional[str] = None):
@@ -33,13 +34,17 @@ def warn(
33
34
 
34
35
 
35
36
  def is_ok(
36
- message: str, style: Optional[str] = "bold green", prompt: Optional[str] = "OK ✅ :"
37
+ message: str,
38
+ style: Optional[str] = "bold green",
39
+ prompt: Optional[str] = "OK ✅ :",
37
40
  ) -> Text:
38
41
  """Utility function for formatting an OK message."""
39
42
  return Text(f"{prompt}{message}\n\n", style=style)
40
43
 
41
44
 
42
- def print_done(prompt: Optional[str] = "Done ✅", style: Optional[str] = "bold cyan"):
45
+ def print_done(
46
+ prompt: Optional[str] = "Done ✅", style: Optional[str] = "bold cyan"
47
+ ):
43
48
  """
44
49
  Prints a prompt indicating completion of a process/routine.
45
50
  :param prompt: default is `"Done ✅"`
@@ -63,7 +68,9 @@ def print_success(
63
68
 
64
69
 
65
70
  def print_failure(
66
- message: str, style: Optional[str] = "bold red", prompt: Optional[str] = "❌ FAILED"
71
+ message: str,
72
+ style: Optional[str] = "bold red",
73
+ prompt: Optional[str] = "❌ FAILED",
67
74
  ):
68
75
  """
69
76
  Prints a failure message.
@@ -108,7 +115,9 @@ class IncorrectParameterUtils:
108
115
  ]
109
116
 
110
117
  @staticmethod
111
- def format_bad_description_message(tool_name: str, tool_desc: str) -> List[Text]:
118
+ def format_bad_description_message(
119
+ tool_name: str, tool_desc: str
120
+ ) -> List[Text]:
112
121
 
113
122
  return [
114
123
  warn(
@@ -139,12 +148,15 @@ class TestingUtils:
139
148
  For example, this can be read as: `"{\n⚙️ Testing} {20} {good tool descriptions}"`.
140
149
  """
141
150
  pretty_print(
142
- content=f"{prompt} {test_case_count} {test_description}", style=style
151
+ content=f"{prompt} {test_case_count} {test_description}",
152
+ style=style,
143
153
  )
144
154
 
145
155
  @staticmethod
146
156
  def print_error_details(
147
- expected: List[str], detected: List[str], style: Optional[str] = "bold red"
157
+ expected: List[str],
158
+ detected: List[str],
159
+ style: Optional[str] = "bold red",
148
160
  ):
149
161
  """
150
162
  Print detailed error information.
@@ -169,6 +181,8 @@ class TestingUtils:
169
181
  :param style: The style for the text (default is bold red).
170
182
  """
171
183
  if failed_cases:
172
- pretty_print(content=f"{prompt} ({len(failed_cases)}):", style=style)
184
+ pretty_print(
185
+ content=f"{prompt} ({len(failed_cases)}):", style=style
186
+ )
173
187
  for case in failed_cases:
174
188
  pretty_print(content=f" - {case}", style=style)
@@ -1,25 +1,31 @@
1
+ import glob
2
+ import json
3
+ import os
4
+ import re
5
+ from typing import List, Optional, Union
1
6
  from urllib.parse import urlparse
7
+
8
+ import yaml
9
+ from rich import box, print
2
10
  from rich.console import Console, Group
3
- from rich.table import Table
4
11
  from rich.panel import Panel
5
12
  from rich.rule import Rule
6
- from rich import box
7
- from rich import print
8
- import re
9
13
  from rich.style import Style
10
-
11
- from typing import List, Optional, Union
12
- import json
13
- import yaml
14
- import glob
15
- import os
14
+ from rich.table import Table
16
15
 
17
16
  from wxo_agentic_evaluation.metrics.llm_as_judge import Faithfulness
18
- from wxo_agentic_evaluation.metrics.metrics import KnowledgeBaseMetricSummary, ReferenceLessEvalMetrics
19
- from wxo_agentic_evaluation.type import ConversationalConfidenceThresholdScore, Message
17
+ from wxo_agentic_evaluation.metrics.metrics import (
18
+ KnowledgeBaseMetricSummary,
19
+ ReferenceLessEvalMetrics,
20
+ )
21
+ from wxo_agentic_evaluation.type import (
22
+ ConversationalConfidenceThresholdScore,
23
+ Message,
24
+ )
20
25
 
21
26
  console = Console()
22
27
 
28
+
23
29
  class AttackResultsTable:
24
30
  def __init__(self, attack_results: dict):
25
31
  self.table = Table(
@@ -35,11 +41,21 @@ class AttackResultsTable:
35
41
  n_on_policy = attack_results.get("n_on_policy_attacks", 0)
36
42
  n_off_policy = attack_results.get("n_off_policy_attacks", 0)
37
43
  n_on_policy_successful = attack_results.get("n_on_policy_successful", 0)
38
- n_off_policy_successful = attack_results.get("n_off_policy_successful", 0)
44
+ n_off_policy_successful = attack_results.get(
45
+ "n_off_policy_successful", 0
46
+ )
39
47
 
40
48
  # Calculate success rates
41
- on_policy_rate = f"{round(100 * safe_divide(n_on_policy_successful, n_on_policy))}%" if n_on_policy else "0%"
42
- off_policy_rate = f"{round(100 * safe_divide(n_off_policy_successful, n_off_policy))}%" if n_off_policy else "0%"
49
+ on_policy_rate = (
50
+ f"{round(100 * safe_divide(n_on_policy_successful, n_on_policy))}%"
51
+ if n_on_policy
52
+ else "0%"
53
+ )
54
+ off_policy_rate = (
55
+ f"{round(100 * safe_divide(n_off_policy_successful, n_off_policy))}%"
56
+ if n_off_policy
57
+ else "0%"
58
+ )
43
59
 
44
60
  self.table.add_row("On Policy", str(n_on_policy), on_policy_rate)
45
61
  self.table.add_row("Off Policy", str(n_off_policy), off_policy_rate)
@@ -47,6 +63,7 @@ class AttackResultsTable:
47
63
  def print(self):
48
64
  console.print(self.table)
49
65
 
66
+
50
67
  class AgentMetricsTable:
51
68
  def __init__(self, data):
52
69
  self.table = Table(
@@ -90,7 +107,8 @@ def safe_divide(nom, denom):
90
107
  if denom == 0:
91
108
  return 0
92
109
  else:
93
- return nom/denom
110
+ return nom / denom
111
+
94
112
 
95
113
  def is_saas_url(service_url: str) -> bool:
96
114
  hostname = urlparse(service_url).hostname
@@ -103,19 +121,17 @@ def is_ibm_cloud_url(service_url: str) -> bool:
103
121
 
104
122
 
105
123
  def add_line_seperator(
106
- style_config: Optional[
107
- Union[str,Style]
108
- ]=None,
124
+ style_config: Optional[Union[str, Style]] = None,
109
125
  ):
110
-
126
+
111
127
  if not style_config:
112
- style="grey42"
128
+ style = "grey42"
113
129
  else:
114
- style=style_config
115
-
130
+ style = style_config
131
+
116
132
  console.print(
117
133
  Rule(
118
- style=style,
134
+ style=style,
119
135
  )
120
136
  )
121
137
 
@@ -124,14 +140,18 @@ class FaithfulnessTable:
124
140
  def __init__(
125
141
  self, faithfulness_metrics: List[Faithfulness], tool_call_ids: List[str]
126
142
  ):
127
- self.table = Table(title="Faithfulness", box=box.ROUNDED, show_lines=True)
143
+ self.table = Table(
144
+ title="Faithfulness", box=box.ROUNDED, show_lines=True
145
+ )
128
146
 
129
147
  self.table.add_column("Tool Call Id", style="blue")
130
148
  self.table.add_column("Faithfulness Score", style="blue3")
131
149
  self.table.add_column("Evidence", style="cyan")
132
150
  self.table.add_column("Reasoning", style="yellow3")
133
151
 
134
- for tool_call_id, faithfulness in zip(tool_call_ids, faithfulness_metrics):
152
+ for tool_call_id, faithfulness in zip(
153
+ tool_call_ids, faithfulness_metrics
154
+ ):
135
155
  faithfulness = faithfulness.table()
136
156
  self.table.add_row(
137
157
  tool_call_id,
@@ -185,7 +205,9 @@ class KnowledgePanel:
185
205
  self.confidence_scores = ConversationalSearchTable(
186
206
  confidence_scores, tool_call_id
187
207
  )
188
- self.group = Group(self.faithfulness.table, self.confidence_scores.table)
208
+ self.group = Group(
209
+ self.faithfulness.table, self.confidence_scores.table
210
+ )
189
211
 
190
212
  # Panel acts as a section
191
213
  self.section = Panel(
@@ -240,35 +262,32 @@ class Tokenizer:
240
262
  \w+| # Regular words (letters, numbers, underscores)
241
263
  [^\w\s] # Punctuation marks (anything that's not word chars or whitespace)
242
264
  """
243
-
265
+
244
266
  def __init__(self):
245
267
  self.compiled_pattern = re.compile(
246
- self.PATTERN,
247
- re.VERBOSE | re.IGNORECASE
268
+ self.PATTERN, re.VERBOSE | re.IGNORECASE
248
269
  )
249
-
270
+
250
271
  def __call__(self, text: str) -> List[str]:
251
272
  """
252
273
  Tokenizes text by splitting on punctuation and handling contractions.
253
274
 
254
275
  Args:
255
276
  text: Input text to tokenize.
256
-
277
+
257
278
  Returns:
258
279
  List of tokenized words (lowercase, no punctuation).
259
-
280
+
260
281
  Examples:
261
282
  - "I'm fine" -> ['i', 'm', 'fine']
262
- - "don't go" -> ['do', "n't", 'go']
283
+ - "don't go" -> ['do', "n't", 'go']
263
284
  - "Hello, world!" -> ['hello', 'world']
264
285
  """
265
-
266
- tokens = self.compiled_pattern.findall(
267
- text
268
- )
269
-
286
+
287
+ tokens = self.compiled_pattern.findall(text)
288
+
270
289
  return self._clean_tokens(tokens)
271
-
290
+
272
291
  def _clean_tokens(self, raw_tokens: List[str]) -> List[str]:
273
292
  """
274
293
  Applies some basic post-processing to tokenized messages.
@@ -276,12 +295,11 @@ class Tokenizer:
276
295
  Args:
277
296
  raw_tokens: list of tokens extracted from a message.
278
297
  """
279
-
298
+
280
299
  filtered_tokens = [
281
- token.lower() \
282
- for token in raw_tokens \
283
- if token.strip() \
284
- and not (len(token) == 1 and not token.isalnum())
300
+ token.lower()
301
+ for token in raw_tokens
302
+ if token.strip() and not (len(token) == 1 and not token.isalnum())
285
303
  ]
286
304
 
287
305
  return filtered_tokens
@@ -296,10 +314,22 @@ class ReferencelessEvalPanel:
296
314
  )
297
315
 
298
316
  self.table.add_column("Dataset", style="yellow", justify="center")
299
- self.table.add_column("Tool Calls", style="deep_sky_blue1", justify="center")
300
- self.table.add_column("Successful Tool Calls", style="magenta", justify="center")
301
- self.table.add_column("Tool Calls Failed due to Schema Mismatch", style="deep_sky_blue1", justify="center")
302
- self.table.add_column("Tool Calls Failed due to Hallucination", style="magenta", justify="center")
317
+ self.table.add_column(
318
+ "Tool Calls", style="deep_sky_blue1", justify="center"
319
+ )
320
+ self.table.add_column(
321
+ "Successful Tool Calls", style="magenta", justify="center"
322
+ )
323
+ self.table.add_column(
324
+ "Tool Calls Failed due to Schema Mismatch",
325
+ style="deep_sky_blue1",
326
+ justify="center",
327
+ )
328
+ self.table.add_column(
329
+ "Tool Calls Failed due to Hallucination",
330
+ style="magenta",
331
+ justify="center",
332
+ )
303
333
 
304
334
  for metric in referenceless_metrics:
305
335
  self.table.add_row(
@@ -307,12 +337,13 @@ class ReferencelessEvalPanel:
307
337
  str(metric.number_of_tool_calls),
308
338
  str(metric.number_of_successful_tool_calls),
309
339
  str(metric.number_of_static_failed_tool_calls),
310
- str(metric.number_of_semantic_failed_tool_calls)
340
+ str(metric.number_of_semantic_failed_tool_calls),
311
341
  )
312
342
 
313
343
  def print(self):
314
344
  console.print(self.table)
315
345
 
346
+
316
347
  # Function to load messages from JSON file
317
348
  def load_messages(file_path):
318
349
  with open(file_path, "r") as f:
@@ -339,9 +370,9 @@ def load_agents(agents_path: str):
339
370
  for agent_path in agents_json:
340
371
  with open(agent_path, "r") as f:
341
372
  agents.append(json.load(f))
342
-
373
+
343
374
  for agent_path in agents_yaml:
344
375
  with open(agent_path, "r") as f:
345
376
  agents.append(yaml.safe_load(f))
346
-
377
+
347
378
  return agents