ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. ibm_watsonx_gov/__init__.py +8 -0
  2. ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
  3. ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
  4. ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
  5. ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
  6. ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
  7. ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
  8. ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
  9. ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
  10. ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
  11. ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
  12. ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
  13. ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
  14. ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
  15. ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
  16. ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
  17. ibm_watsonx_gov/clients/__init__.py +0 -0
  18. ibm_watsonx_gov/clients/api_client.py +99 -0
  19. ibm_watsonx_gov/clients/segment_client.py +46 -0
  20. ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
  21. ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
  22. ibm_watsonx_gov/config/__init__.py +14 -0
  23. ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
  24. ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
  25. ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
  26. ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
  27. ibm_watsonx_gov/entities/__init__.py +8 -0
  28. ibm_watsonx_gov/entities/agentic_app.py +209 -0
  29. ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
  30. ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
  31. ibm_watsonx_gov/entities/ai_experiment.py +419 -0
  32. ibm_watsonx_gov/entities/base_classes.py +134 -0
  33. ibm_watsonx_gov/entities/container.py +54 -0
  34. ibm_watsonx_gov/entities/credentials.py +633 -0
  35. ibm_watsonx_gov/entities/criteria.py +508 -0
  36. ibm_watsonx_gov/entities/enums.py +274 -0
  37. ibm_watsonx_gov/entities/evaluation_result.py +444 -0
  38. ibm_watsonx_gov/entities/foundation_model.py +490 -0
  39. ibm_watsonx_gov/entities/llm_judge.py +44 -0
  40. ibm_watsonx_gov/entities/locale.py +17 -0
  41. ibm_watsonx_gov/entities/mapping.py +49 -0
  42. ibm_watsonx_gov/entities/metric.py +211 -0
  43. ibm_watsonx_gov/entities/metric_threshold.py +36 -0
  44. ibm_watsonx_gov/entities/model_provider.py +329 -0
  45. ibm_watsonx_gov/entities/model_risk_result.py +43 -0
  46. ibm_watsonx_gov/entities/monitor.py +71 -0
  47. ibm_watsonx_gov/entities/prompt_setup.py +40 -0
  48. ibm_watsonx_gov/entities/state.py +22 -0
  49. ibm_watsonx_gov/entities/utils.py +99 -0
  50. ibm_watsonx_gov/evaluators/__init__.py +26 -0
  51. ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
  52. ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
  53. ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
  54. ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
  55. ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
  56. ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
  57. ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
  58. ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
  59. ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
  60. ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
  61. ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
  62. ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
  63. ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
  64. ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
  65. ibm_watsonx_gov/metrics/__init__.py +74 -0
  66. ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
  67. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
  68. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
  69. ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
  70. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
  71. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
  72. ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
  73. ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
  74. ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
  75. ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
  76. ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
  77. ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
  78. ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
  79. ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
  80. ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
  81. ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
  82. ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
  83. ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
  84. ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
  85. ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
  86. ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
  87. ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
  88. ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
  89. ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
  90. ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
  91. ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
  92. ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
  93. ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
  94. ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
  95. ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
  96. ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
  97. ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
  98. ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
  99. ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
  100. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
  101. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
  102. ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
  103. ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
  104. ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
  105. ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
  106. ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
  107. ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
  108. ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
  109. ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
  110. ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
  111. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
  112. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
  113. ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
  114. ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
  115. ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
  116. ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
  117. ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
  118. ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
  119. ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
  120. ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
  121. ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
  122. ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
  123. ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
  124. ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
  125. ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
  126. ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
  127. ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
  128. ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
  129. ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
  130. ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
  131. ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
  132. ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
  133. ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
  134. ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
  135. ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
  136. ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
  137. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
  138. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
  139. ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
  140. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
  141. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
  142. ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
  143. ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
  144. ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
  145. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
  146. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
  147. ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
  148. ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
  149. ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
  150. ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
  151. ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
  152. ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
  153. ibm_watsonx_gov/metrics/status/__init__.py +0 -0
  154. ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
  155. ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
  156. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
  157. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
  158. ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
  159. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
  160. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
  161. ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
  162. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
  163. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
  164. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
  165. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
  166. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
  167. ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
  168. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
  169. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
  170. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
  171. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
  172. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
  173. ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
  174. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
  175. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
  176. ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
  177. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
  178. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
  179. ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
  180. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
  181. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
  182. ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
  183. ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
  184. ibm_watsonx_gov/metrics/utils.py +440 -0
  185. ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
  186. ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
  187. ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
  188. ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
  189. ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
  190. ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
  191. ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
  192. ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
  193. ibm_watsonx_gov/providers/__init__.py +8 -0
  194. ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
  195. ibm_watsonx_gov/providers/detectors_provider.py +415 -0
  196. ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
  197. ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
  198. ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
  199. ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
  200. ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
  201. ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
  202. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
  203. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
  204. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
  205. ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
  206. ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
  207. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
  208. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
  209. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
  210. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
  211. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
  212. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
  213. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
  214. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
  215. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
  216. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
  217. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
  218. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
  219. ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
  220. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
  221. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
  222. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
  223. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
  224. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
  225. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
  226. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
  227. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
  228. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
  229. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
  230. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
  231. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
  232. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
  233. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
  234. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
  235. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
  236. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
  237. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
  238. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
  239. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
  240. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
  241. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
  242. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
  243. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
  244. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
  245. ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
  246. ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
  247. ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
  248. ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
  249. ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
  250. ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
  251. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
  252. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
  253. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
  254. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
  255. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
  256. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
  257. ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
  258. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
  259. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
  260. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
  261. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
  262. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
  263. ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
  264. ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
  265. ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
  266. ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
  267. ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
  268. ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
  269. ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
  270. ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
  271. ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
  272. ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
  273. ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
  274. ibm_watsonx_gov/tools/__init__.py +10 -0
  275. ibm_watsonx_gov/tools/clients/__init__.py +11 -0
  276. ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
  277. ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
  278. ibm_watsonx_gov/tools/core/__init__.py +8 -0
  279. ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
  280. ibm_watsonx_gov/tools/entities/__init__.py +8 -0
  281. ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
  282. ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
  283. ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
  284. ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
  285. ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
  286. ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
  287. ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
  288. ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
  289. ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
  290. ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
  291. ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
  292. ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
  293. ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
  294. ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
  295. ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
  296. ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
  297. ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
  298. ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
  299. ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
  300. ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
  301. ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
  302. ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
  303. ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
  304. ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
  305. ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
  306. ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
  307. ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
  308. ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
  309. ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
  310. ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
  311. ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
  312. ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
  313. ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
  314. ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
  315. ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
  316. ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
  317. ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
  318. ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
  319. ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
  320. ibm_watsonx_gov/tools/utils/__init__.py +14 -0
  321. ibm_watsonx_gov/tools/utils/constants.py +69 -0
  322. ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
  323. ibm_watsonx_gov/tools/utils/environment.py +108 -0
  324. ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
  325. ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
  326. ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
  327. ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
  328. ibm_watsonx_gov/traces/__init__.py +8 -0
  329. ibm_watsonx_gov/traces/span_exporter.py +195 -0
  330. ibm_watsonx_gov/traces/span_node.py +251 -0
  331. ibm_watsonx_gov/traces/span_util.py +153 -0
  332. ibm_watsonx_gov/traces/trace_utils.py +1074 -0
  333. ibm_watsonx_gov/utils/__init__.py +8 -0
  334. ibm_watsonx_gov/utils/aggregation_util.py +346 -0
  335. ibm_watsonx_gov/utils/async_util.py +62 -0
  336. ibm_watsonx_gov/utils/authenticator.py +144 -0
  337. ibm_watsonx_gov/utils/constants.py +15 -0
  338. ibm_watsonx_gov/utils/errors.py +40 -0
  339. ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
  340. ibm_watsonx_gov/utils/insights_generator.py +1285 -0
  341. ibm_watsonx_gov/utils/python_utils.py +425 -0
  342. ibm_watsonx_gov/utils/rest_util.py +73 -0
  343. ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
  344. ibm_watsonx_gov/utils/singleton_meta.py +25 -0
  345. ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
  346. ibm_watsonx_gov/utils/validation_util.py +126 -0
  347. ibm_watsonx_gov/visualizations/__init__.py +13 -0
  348. ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
  349. ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
  350. ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
  351. ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
  352. ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
  353. ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
@@ -0,0 +1,298 @@
1
+ """
2
+ Utility functions for tool call comparison.
3
+ """
4
+
5
+ import json
6
+ import re
7
+ from typing import Any, Dict, List, Optional, Union
8
+ from difflib import SequenceMatcher
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def normalize_json_string(value: str) -> Union[Dict, List, str]:
15
+ """Attempt to parse JSON string, return original if parsing fails."""
16
+ try:
17
+ return json.loads(value)
18
+ except (json.JSONDecodeError, TypeError):
19
+ return value
20
+
21
+
22
+ def calculate_string_similarity(str1: str, str2: str) -> float:
23
+ """Calculate similarity between two strings using difflib."""
24
+ if str1 == str2:
25
+ return 1.0
26
+
27
+ # Normalize strings for comparison
28
+ norm_str1 = str1.lower().strip()
29
+ norm_str2 = str2.lower().strip()
30
+
31
+ if norm_str1 == norm_str2:
32
+ return 0.95 # Close match with case/whitespace differences
33
+
34
+ # Use sequence matcher for fuzzy matching
35
+ matcher = SequenceMatcher(None, norm_str1, norm_str2)
36
+ return matcher.ratio()
37
+
38
+
39
+ def extract_numeric_value(value: Any) -> Optional[float]:
40
+ """Extract numeric value from various formats."""
41
+ if isinstance(value, (int, float)):
42
+ return float(value)
43
+
44
+ if isinstance(value, str):
45
+ # Try to extract number from string
46
+ numeric_match = re.search(r"-?\d+\.?\d*", value.strip())
47
+ if numeric_match:
48
+ try:
49
+ return float(numeric_match.group())
50
+ except ValueError:
51
+ pass
52
+
53
+ return None
54
+
55
+
56
+ def is_semantically_equivalent_boolean(val1: Any, val2: Any) -> bool:
57
+ """Check if two values represent the same boolean state."""
58
+ # Define truthy and falsy values
59
+ truthy_values = {
60
+ True,
61
+ "true",
62
+ "True",
63
+ "TRUE",
64
+ "yes",
65
+ "Yes",
66
+ "YES",
67
+ "1",
68
+ 1,
69
+ "on",
70
+ "On",
71
+ "ON",
72
+ }
73
+ falsy_values = {
74
+ False,
75
+ "false",
76
+ "False",
77
+ "FALSE",
78
+ "no",
79
+ "No",
80
+ "NO",
81
+ "0",
82
+ 0,
83
+ "off",
84
+ "Off",
85
+ "OFF",
86
+ }
87
+
88
+ val1_is_truthy = val1 in truthy_values
89
+ val2_is_truthy = val2 in truthy_values
90
+ val1_is_falsy = val1 in falsy_values
91
+ val2_is_falsy = val2 in falsy_values
92
+
93
+ # Both are truthy or both are falsy
94
+ return (val1_is_truthy and val2_is_truthy) or (val1_is_falsy and val2_is_falsy)
95
+
96
+
97
+ def compare_numeric_with_tolerance(
98
+ val1: Any, val2: Any, tolerance: float = 0.01
99
+ ) -> bool:
100
+ """Compare numeric values with tolerance."""
101
+ num1 = extract_numeric_value(val1)
102
+ num2 = extract_numeric_value(val2)
103
+
104
+ if num1 is None or num2 is None:
105
+ return False
106
+
107
+ return abs(num1 - num2) <= tolerance
108
+
109
+
110
+ def safe_json_dumps(obj: Any) -> str:
111
+ """Safely convert object to JSON string."""
112
+ try:
113
+ return json.dumps(obj, ensure_ascii=False, sort_keys=True)
114
+ except (TypeError, ValueError):
115
+ return str(obj)
116
+
117
+
118
+ def deep_compare_objects(obj1: Any, obj2: Any, tolerance: float = 0.01) -> float:
119
+ """
120
+ Deep comparison of objects with similarity score.
121
+
122
+ Returns:
123
+ float: Similarity score between 0.0 and 1.0
124
+ """
125
+ # Exact match
126
+ if obj1 == obj2:
127
+ return 1.0
128
+
129
+ # Type compatibility checks
130
+ if type(obj1) != type(obj2):
131
+ # Try semantic equivalence for common type mismatches
132
+ if is_semantically_equivalent_boolean(obj1, obj2):
133
+ return 0.95
134
+
135
+ if compare_numeric_with_tolerance(obj1, obj2, tolerance):
136
+ return 0.9
137
+
138
+ # String comparison if one is string
139
+ if isinstance(obj1, str) or isinstance(obj2, str):
140
+ return calculate_string_similarity(str(obj1), str(obj2))
141
+
142
+ return 0.0
143
+
144
+ # Same type comparisons
145
+ if isinstance(obj1, str):
146
+ return calculate_string_similarity(obj1, obj2)
147
+
148
+ elif isinstance(obj1, (int, float)):
149
+ if compare_numeric_with_tolerance(obj1, obj2, tolerance):
150
+ return 1.0
151
+ else:
152
+ # Partial score based on relative difference
153
+ try:
154
+ max_val = max(abs(obj1), abs(obj2), 1) # Avoid division by zero
155
+ diff_ratio = abs(obj1 - obj2) / max_val
156
+ return max(0.0, 1.0 - diff_ratio)
157
+ except (TypeError, ZeroDivisionError):
158
+ return 0.0
159
+
160
+ elif isinstance(obj1, bool):
161
+ return 1.0 if obj1 == obj2 else 0.0
162
+
163
+ elif isinstance(obj1, list):
164
+ return _compare_lists(obj1, obj2, tolerance)
165
+
166
+ elif isinstance(obj1, dict):
167
+ return _compare_dicts(obj1, obj2, tolerance)
168
+
169
+ # For other types, fall back to string comparison
170
+ return calculate_string_similarity(str(obj1), str(obj2))
171
+
172
+
173
+ def _compare_lists(list1: List, list2: List, tolerance: float) -> float:
174
+ """Compare two lists with similarity scoring."""
175
+ if len(list1) == 0 and len(list2) == 0:
176
+ return 1.0
177
+
178
+ if len(list1) == 0 or len(list2) == 0:
179
+ return 0.0
180
+
181
+ # If same length, compare element by element
182
+ if len(list1) == len(list2):
183
+ scores = []
184
+ for item1, item2 in zip(list1, list2):
185
+ scores.append(deep_compare_objects(item1, item2, tolerance))
186
+ return sum(scores) / len(scores) if scores else 0.0
187
+
188
+ # Different lengths - find best matches
189
+ max_len = max(len(list1), len(list2))
190
+ min_len = min(len(list1), len(list2))
191
+
192
+ # Calculate similarity for overlapping elements
193
+ scores = []
194
+ for i in range(min_len):
195
+ scores.append(deep_compare_objects(list1[i], list2[i], tolerance))
196
+
197
+ avg_score = sum(scores) / len(scores) if scores else 0.0
198
+
199
+ # Apply penalty for length difference
200
+ length_penalty = min_len / max_len
201
+
202
+ return avg_score * length_penalty
203
+
204
+
205
+ def _compare_dicts(dict1: Dict, dict2: Dict, tolerance: float) -> float:
206
+ """Compare two dictionaries with similarity scoring."""
207
+ if not dict1 and not dict2:
208
+ return 1.0
209
+
210
+ if not dict1 or not dict2:
211
+ return 0.0
212
+
213
+ all_keys = set(dict1.keys()) | set(dict2.keys())
214
+ common_keys = set(dict1.keys()) & set(dict2.keys())
215
+
216
+ if not all_keys:
217
+ return 1.0
218
+
219
+ # Score for common keys
220
+ scores = []
221
+ for key in common_keys:
222
+ scores.append(deep_compare_objects(dict1[key], dict2[key], tolerance))
223
+
224
+ # Average score for common keys
225
+ common_score = sum(scores) / len(scores) if scores else 0.0
226
+
227
+ # Penalty for missing keys
228
+ key_coverage = len(common_keys) / len(all_keys)
229
+
230
+ return common_score * key_coverage
231
+
232
+
233
+ def sanitize_parameter_name(name: str) -> str:
234
+ """Sanitize parameter name for consistent comparison."""
235
+ # Remove common prefixes/suffixes that might be added by different systems
236
+ name = re.sub(r"^(param_|parameter_|arg_|argument_)", "", name.lower())
237
+ name = re.sub(r"(_param|_parameter|_arg|_argument)$", "", name)
238
+
239
+ # Normalize common variations
240
+ replacements = {
241
+ "id": "identifier",
242
+ "num": "number",
243
+ "qty": "quantity",
244
+ "amt": "amount",
245
+ "desc": "description",
246
+ }
247
+
248
+ for old, new in replacements.items():
249
+ if name == old:
250
+ return new
251
+
252
+ return name
253
+
254
+
255
+ def validate_tool_call_structure(tool_call: Dict[str, Any]) -> List[str]:
256
+ """
257
+ Validate tool call structure and return list of issues.
258
+
259
+ Returns:
260
+ List of validation error messages
261
+ """
262
+ issues = []
263
+
264
+ if not isinstance(tool_call, dict):
265
+ issues.append("Tool call must be a dictionary")
266
+ return issues
267
+
268
+ # Check for required top-level structure
269
+ if "function" not in tool_call:
270
+ issues.append("Missing 'function' key in tool call")
271
+ return issues
272
+
273
+ function_data = tool_call["function"]
274
+ if not isinstance(function_data, dict):
275
+ issues.append("'function' value must be a dictionary")
276
+ return issues
277
+
278
+ # Check function name
279
+ if "name" not in function_data:
280
+ issues.append("Missing 'name' in function data")
281
+ elif not isinstance(function_data["name"], str):
282
+ issues.append("Function 'name' must be a string")
283
+ elif not function_data["name"].strip():
284
+ issues.append("Function 'name' cannot be empty")
285
+
286
+ # Check arguments
287
+ if "arguments" in function_data:
288
+ args = function_data["arguments"]
289
+ if isinstance(args, str):
290
+ # Try to parse JSON string
291
+ try:
292
+ json.loads(args)
293
+ except json.JSONDecodeError:
294
+ issues.append("Function 'arguments' string is not valid JSON")
295
+ elif not isinstance(args, dict):
296
+ issues.append("Function 'arguments' must be a dictionary or JSON string")
297
+
298
+ return issues
@@ -0,0 +1,33 @@
1
+ ### Metric name constants
2
+ ## General metrics
3
+ METRIC_GENERAL_HALLUCINATION_CHECK = "general_hallucination_check"
4
+ METRIC_GENERAL_VALUE_FORMAT_ALIGNMENT = "general_value_format_alignment"
5
+
6
+ ## Function selection metrics
7
+ METRIC_FUNCTION_SELECTION_APPROPRIATENESS = "function_selection_appropriateness"
8
+ METRIC_AGENTIC_CONSTRAINTS_SATISFACTION = "agentic_constraints_satisfaction"
9
+
10
+ ## Parameter metrics
11
+ METRIC_PARAMETER_VALUE_FORMAT_ALIGNMENT = "parameter_value_format_alignment"
12
+ METRIC_PARAMETER_HALLUCINATION_CHECK = "parameter_hallucination_check"
13
+
14
+ ## Trajectory metrics
15
+ METRIC_TRAJECTORY_OBJECTIVE_SATISFACTION = "trajectory_objective_satisfaction"
16
+
17
+ ## Metric category mapping
18
+ GENERAL_METRICS = [
19
+ METRIC_GENERAL_HALLUCINATION_CHECK,
20
+ METRIC_GENERAL_VALUE_FORMAT_ALIGNMENT,
21
+ ]
22
+
23
+ FUNCTION_SELECTION_METRICS = [
24
+ METRIC_FUNCTION_SELECTION_APPROPRIATENESS,
25
+ METRIC_AGENTIC_CONSTRAINTS_SATISFACTION,
26
+ ]
27
+
28
+ PARAMETER_METRICS = [
29
+ METRIC_PARAMETER_VALUE_FORMAT_ALIGNMENT,
30
+ METRIC_PARAMETER_HALLUCINATION_CHECK,
31
+ ]
32
+
33
+ TRAJECTORY_METRICS = [METRIC_TRAJECTORY_OBJECTIVE_SATISFACTION]
@@ -0,0 +1,31 @@
1
+ """Function calling metrics."""
2
+ from .function_call import GeneralMetricsPrompt, get_general_metrics_prompt
3
+ from .function_selection import (
4
+ FunctionSelectionPrompt,
5
+ )
6
+ from .loader import (
7
+ load_prompts_from_jsonl,
8
+ load_prompts_from_list,
9
+ load_prompts_from_metrics,
10
+ PromptKind,
11
+ )
12
+ from .parameter import ParameterMetricsPrompt, get_parameter_metrics_prompt
13
+ from .trajectory import (
14
+ get_trajectory_reflection_prompt,
15
+ TrajectoryReflectionPrompt,
16
+ )
17
+
18
+
19
+ __all__ = [
20
+ "get_general_metrics_prompt",
21
+ "GeneralMetricsPrompt",
22
+ "FunctionSelectionPrompt",
23
+ "get_parameter_metrics_prompt",
24
+ "ParameterMetricsPrompt",
25
+ "get_trajectory_reflection_prompt",
26
+ "TrajectoryReflectionPrompt",
27
+ "load_prompts_from_jsonl",
28
+ "load_prompts_from_list",
29
+ "load_prompts_from_metrics",
30
+ "PromptKind",
31
+ ]
@@ -0,0 +1,26 @@
1
+ from abc import ABC
2
+
3
+ from llmevalkit.metrics import Metric, MetricPrompt
4
+
5
+
6
+ class FunctionMetricsPrompt(MetricPrompt, ABC):
7
+ """
8
+ Abstract base for function-calling metric prompts.
9
+ Subclasses must define class attrs:
10
+ - system_template: str
11
+ - user_template: str
12
+ """
13
+
14
+ system_template: str
15
+ user_template: str
16
+
17
+ def __init__(self, metric: Metric, task_description: str) -> None:
18
+ super().__init__(
19
+ metric=metric,
20
+ system_template=self.system_template,
21
+ user_template=self.user_template,
22
+ system_kwargs_defaults={
23
+ "task_description": task_description,
24
+ "metric_jsonschema": metric.to_jsonschema(),
25
+ },
26
+ )
@@ -0,0 +1,4 @@
1
+ """General function-call metrics."""
2
+ from .general import GeneralMetricsPrompt, get_general_metrics_prompt
3
+
4
+ __all__ = ["GeneralMetricsPrompt", "get_general_metrics_prompt"]
@@ -0,0 +1,46 @@
1
+ from typing import Any, Dict, List, Union
2
+ from llmevalkit.function_calling.metrics.base import FunctionMetricsPrompt
3
+
4
+ _general_system = (
5
+ "### Task Description and Role:\n\n"
6
+ "{{ task_description }}\n\n"
7
+ "Your output must conform to the following JSON schema, in the same order as the fields appear in the schema:\n"
8
+ "{{ metric_jsonschema }}"
9
+ )
10
+
11
+ _general_user: str = (
12
+ "Conversation context:\n"
13
+ "{{ conversation_context }}\n\n"
14
+ "Tool Specification:\n"
15
+ "{{ tool_inventory }}\n\n"
16
+ "Proposed tool call:\n"
17
+ "{{ tool_call }}\n\n"
18
+ "Return a JSON object as specified in the system prompt. You MUST keep the same order of fields in the JSON object as provided in the JSON schema and examples."
19
+ )
20
+
21
+
22
+ class GeneralMetricsPrompt(FunctionMetricsPrompt):
23
+ """Prompt builder for general tool-call semantic metrics."""
24
+
25
+ system_template = _general_system
26
+ user_template = _general_user
27
+
28
+
29
+ def get_general_metrics_prompt(
30
+ prompt: GeneralMetricsPrompt,
31
+ conversation_context: Union[str, List[Dict[str, str]]],
32
+ tool_inventory: List[Dict[str, Any]],
33
+ tool_call: Dict[str, Any],
34
+ ) -> List[Dict[str, str]]:
35
+ """
36
+ Build the messages for a general semantic evaluation.
37
+
38
+ Returns the list of chat messages (system -> [few-shot] -> user).
39
+ """
40
+ return prompt.build_messages(
41
+ user_kwargs={
42
+ "conversation_context": conversation_context,
43
+ "tool_inventory": tool_inventory,
44
+ "tool_call": tool_call,
45
+ }
46
+ )