ibm-watsonx-gov 1.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. ibm_watsonx_gov/__init__.py +8 -0
  2. ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
  3. ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
  4. ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
  5. ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
  6. ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
  7. ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
  8. ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
  9. ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
  10. ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
  11. ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
  12. ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
  13. ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
  14. ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
  15. ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
  16. ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
  17. ibm_watsonx_gov/clients/__init__.py +0 -0
  18. ibm_watsonx_gov/clients/api_client.py +99 -0
  19. ibm_watsonx_gov/clients/segment_client.py +46 -0
  20. ibm_watsonx_gov/clients/usage_client.cp313-win_amd64.pyd +0 -0
  21. ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
  22. ibm_watsonx_gov/config/__init__.py +14 -0
  23. ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
  24. ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
  25. ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
  26. ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
  27. ibm_watsonx_gov/entities/__init__.py +8 -0
  28. ibm_watsonx_gov/entities/agentic_app.py +209 -0
  29. ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
  30. ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
  31. ibm_watsonx_gov/entities/ai_experiment.py +419 -0
  32. ibm_watsonx_gov/entities/base_classes.py +134 -0
  33. ibm_watsonx_gov/entities/container.py +54 -0
  34. ibm_watsonx_gov/entities/credentials.py +633 -0
  35. ibm_watsonx_gov/entities/criteria.py +508 -0
  36. ibm_watsonx_gov/entities/enums.py +274 -0
  37. ibm_watsonx_gov/entities/evaluation_result.py +444 -0
  38. ibm_watsonx_gov/entities/foundation_model.py +490 -0
  39. ibm_watsonx_gov/entities/llm_judge.py +44 -0
  40. ibm_watsonx_gov/entities/locale.py +17 -0
  41. ibm_watsonx_gov/entities/mapping.py +49 -0
  42. ibm_watsonx_gov/entities/metric.py +211 -0
  43. ibm_watsonx_gov/entities/metric_threshold.py +36 -0
  44. ibm_watsonx_gov/entities/model_provider.py +329 -0
  45. ibm_watsonx_gov/entities/model_risk_result.py +43 -0
  46. ibm_watsonx_gov/entities/monitor.py +71 -0
  47. ibm_watsonx_gov/entities/prompt_setup.py +40 -0
  48. ibm_watsonx_gov/entities/state.py +22 -0
  49. ibm_watsonx_gov/entities/utils.py +99 -0
  50. ibm_watsonx_gov/evaluators/__init__.py +26 -0
  51. ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
  52. ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
  53. ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
  54. ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
  55. ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cp313-win_amd64.pyd +0 -0
  56. ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cp313-win_amd64.pyd +0 -0
  57. ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
  58. ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
  59. ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
  60. ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
  61. ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
  62. ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
  63. ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
  64. ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
  65. ibm_watsonx_gov/metrics/__init__.py +74 -0
  66. ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
  67. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
  68. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
  69. ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
  70. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
  71. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
  72. ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
  73. ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
  74. ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
  75. ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
  76. ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
  77. ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
  78. ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
  79. ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
  80. ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
  81. ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
  82. ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
  83. ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
  84. ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
  85. ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
  86. ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
  87. ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
  88. ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
  89. ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
  90. ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
  91. ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
  92. ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
  93. ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
  94. ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
  95. ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
  96. ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
  97. ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
  98. ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
  99. ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
  100. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
  101. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
  102. ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
  103. ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
  104. ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
  105. ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
  106. ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
  107. ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
  108. ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
  109. ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
  110. ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
  111. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
  112. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
  113. ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
  114. ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
  115. ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
  116. ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
  117. ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
  118. ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
  119. ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
  120. ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
  121. ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
  122. ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
  123. ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
  124. ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
  125. ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
  126. ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
  127. ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
  128. ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
  129. ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
  130. ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
  131. ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
  132. ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
  133. ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
  134. ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
  135. ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
  136. ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
  137. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
  138. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
  139. ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
  140. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
  141. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
  142. ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
  143. ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
  144. ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
  145. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
  146. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
  147. ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
  148. ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
  149. ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
  150. ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
  151. ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
  152. ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
  153. ibm_watsonx_gov/metrics/status/__init__.py +0 -0
  154. ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
  155. ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
  156. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
  157. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
  158. ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
  159. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
  160. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
  161. ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
  162. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
  163. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
  164. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
  165. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
  166. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
  167. ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
  168. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
  169. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
  170. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
  171. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
  172. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
  173. ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
  174. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
  175. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
  176. ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
  177. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
  178. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
  179. ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
  180. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
  181. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
  182. ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
  183. ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
  184. ibm_watsonx_gov/metrics/utils.py +440 -0
  185. ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
  186. ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
  187. ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
  188. ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
  189. ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
  190. ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
  191. ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
  192. ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
  193. ibm_watsonx_gov/providers/__init__.py +8 -0
  194. ibm_watsonx_gov/providers/detectors_provider.cp313-win_amd64.pyd +0 -0
  195. ibm_watsonx_gov/providers/detectors_provider.py +415 -0
  196. ibm_watsonx_gov/providers/eval_assist_provider.cp313-win_amd64.pyd +0 -0
  197. ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
  198. ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
  199. ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
  200. ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
  201. ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
  202. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
  203. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
  204. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
  205. ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
  206. ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
  207. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
  208. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
  209. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
  210. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
  211. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
  212. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
  213. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
  214. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
  215. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
  216. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
  217. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
  218. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
  219. ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
  220. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
  221. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
  222. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
  223. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
  224. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
  225. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
  226. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
  227. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
  228. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
  229. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
  230. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
  231. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
  232. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
  233. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
  234. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
  235. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
  236. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
  237. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
  238. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
  239. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
  240. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
  241. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
  242. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
  243. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
  244. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
  245. ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
  246. ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
  247. ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
  248. ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
  249. ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
  250. ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
  251. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
  252. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
  253. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
  254. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
  255. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
  256. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
  257. ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
  258. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
  259. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
  260. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
  261. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
  262. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
  263. ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
  264. ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
  265. ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
  266. ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
  267. ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
  268. ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
  269. ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
  270. ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
  271. ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
  272. ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
  273. ibm_watsonx_gov/providers/unitxt_provider.cp313-win_amd64.pyd +0 -0
  274. ibm_watsonx_gov/tools/__init__.py +10 -0
  275. ibm_watsonx_gov/tools/clients/__init__.py +11 -0
  276. ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
  277. ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
  278. ibm_watsonx_gov/tools/core/__init__.py +8 -0
  279. ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
  280. ibm_watsonx_gov/tools/entities/__init__.py +8 -0
  281. ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
  282. ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
  283. ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
  284. ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
  285. ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
  286. ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
  287. ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
  288. ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
  289. ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
  290. ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
  291. ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
  292. ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
  293. ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
  294. ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
  295. ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
  296. ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
  297. ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
  298. ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
  299. ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
  300. ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
  301. ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
  302. ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
  303. ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
  304. ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
  305. ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
  306. ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
  307. ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
  308. ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
  309. ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
  310. ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
  311. ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
  312. ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
  313. ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
  314. ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
  315. ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
  316. ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
  317. ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
  318. ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
  319. ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
  320. ibm_watsonx_gov/tools/utils/__init__.py +14 -0
  321. ibm_watsonx_gov/tools/utils/constants.py +69 -0
  322. ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
  323. ibm_watsonx_gov/tools/utils/environment.py +108 -0
  324. ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
  325. ibm_watsonx_gov/tools/utils/platform_url_mapping.cp313-win_amd64.pyd +0 -0
  326. ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
  327. ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
  328. ibm_watsonx_gov/traces/__init__.py +8 -0
  329. ibm_watsonx_gov/traces/span_exporter.py +195 -0
  330. ibm_watsonx_gov/traces/span_node.py +251 -0
  331. ibm_watsonx_gov/traces/span_util.py +153 -0
  332. ibm_watsonx_gov/traces/trace_utils.py +1074 -0
  333. ibm_watsonx_gov/utils/__init__.py +8 -0
  334. ibm_watsonx_gov/utils/aggregation_util.py +346 -0
  335. ibm_watsonx_gov/utils/async_util.py +62 -0
  336. ibm_watsonx_gov/utils/authenticator.py +144 -0
  337. ibm_watsonx_gov/utils/constants.py +15 -0
  338. ibm_watsonx_gov/utils/errors.py +40 -0
  339. ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
  340. ibm_watsonx_gov/utils/insights_generator.py +1285 -0
  341. ibm_watsonx_gov/utils/python_utils.py +425 -0
  342. ibm_watsonx_gov/utils/rest_util.py +73 -0
  343. ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
  344. ibm_watsonx_gov/utils/singleton_meta.py +25 -0
  345. ibm_watsonx_gov/utils/url_mapping.cp313-win_amd64.pyd +0 -0
  346. ibm_watsonx_gov/utils/validation_util.py +126 -0
  347. ibm_watsonx_gov/visualizations/__init__.py +13 -0
  348. ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
  349. ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
  350. ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
  351. ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
  352. ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
  353. ibm_watsonx_gov-1.3.3.dist-info/WHEEL +4 -0
@@ -0,0 +1,129 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ from typing import Annotated, Callable, Dict, Optional, Union
11
+
12
+ from pydantic import Field, model_validator
13
+ from typing_extensions import Self
14
+
15
+ from ibm_watsonx_gov.entities.base_classes import BaseConfiguration
16
+ from ibm_watsonx_gov.entities.enums import TaskType
17
+ from ibm_watsonx_gov.entities.llm_judge import LLMJudge
18
+ from ibm_watsonx_gov.entities.locale import Locale
19
+
20
+
21
+ class GenAIConfiguration(BaseConfiguration):
22
+ """
23
+ Defines the GenAIConfiguration class.
24
+
25
+ This is used to specify the fields mapping details in the data and other configuration parameters needed for evaluation.
26
+
27
+ Examples:
28
+ 1. Create configuration with default parameters
29
+ .. code-block:: python
30
+
31
+ configuration = GenAIConfiguration()
32
+
33
+ 2. Create configuration with parameters
34
+ .. code-block:: python
35
+
36
+ configuration = GenAIConfiguration(input_fields=["input"],
37
+ output_fields=["output"])
38
+
39
+ 2. Create configuration with dict parameters
40
+ .. code-block:: python
41
+
42
+ config = {"input_fields": ["input"],
43
+ "output_fields": ["output"],
44
+ "context_fields": ["contexts"],
45
+ "reference_fields": ["reference"]}
46
+ configuration = GenAIConfiguration(**config)
47
+ """
48
+ task_type: Annotated[TaskType | None, Field(title="Task Type",
49
+ description="The generative task type. Default value is None.",
50
+ default=None,
51
+ examples=[TaskType.RAG])]
52
+ input_fields: Annotated[list[str], Field(title="Input Fields",
53
+ description="The list of model input fields in the data. Default value is ['input_text'].",
54
+ examples=[
55
+ ["question"]],
56
+ default=["input_text"])]
57
+ context_fields: Annotated[list[str], Field(title="Context Fields",
58
+ description="The list of context fields in the input fields. Default value is ['context'].",
59
+ default=["context"],
60
+ examples=[["context1", "context2"]])]
61
+ output_fields: Annotated[list[str], Field(title="Output Fields",
62
+ description="The list of model output fields in the data. Default value is ['generated_text'].",
63
+ default=["generated_text"],
64
+ examples=[["output"]])]
65
+ reference_fields: Annotated[list[str], Field(title="Reference Fields",
66
+ description="The list of reference fields in the data. Default value is ['ground_truth'].",
67
+ default=["ground_truth"],
68
+ examples=[["reference"]])]
69
+ locale: Annotated[Locale | None, Field(title="Locale",
70
+ description="The language locale of the input, output and reference fields in the data.",
71
+ default=None)]
72
+ tools: Annotated[Union[list[Callable], list[Dict]], Field(title="Tools",
73
+ description="The list of tools used by the LLM.",
74
+ default=[],
75
+ examples=[["function1", "function2"]])]
76
+ tool_calls_field: Annotated[Optional[str], Field(title="Tool Calls Field",
77
+ description="The tool calls field in the input fields. Default value is 'tool_calls'.",
78
+ default="tool_calls",
79
+ examples=["tool_calls"])]
80
+ available_tools_field: Annotated[Optional[str], Field(title="Available Tools Field",
81
+ description="The tool inventory field in the data. Default value is 'available_tools'.",
82
+ default="available_tools",
83
+ examples=["available_tools"])]
84
+
85
+ llm_judge: Annotated[LLMJudge | None, Field(title="LLM Judge",
86
+ description="LLM as Judge Model details.",
87
+ default=None)]
88
+ prompt_field: Annotated[Optional[str], Field(title="Model Prompt Field",
89
+ description="The prompt field in the input fields. Default value is 'model_prompt'.",
90
+ default="model_prompt",
91
+ examples=["model_prompt"])]
92
+ start_time_field: Annotated[Optional[str], Field(title="Span Start Time Field ",
93
+ description="The start time field in span attributes.",
94
+ default=None,
95
+ examples=["start_time"])]
96
+ end_time_field: Annotated[Optional[str], Field(title="Span End Time Field",
97
+ description="The end time field in span attributes.",
98
+ default=None,
99
+ examples=["end_time"])]
100
+ model_usage_detail_fields: Annotated[Optional[list[str]], Field(title="Model Usage Detail Field",
101
+ description="The model usage detail field in span attributes.This field should provide information on model name, input_token_count and output_token_count",
102
+ default=[])]
103
+ input_token_count_fields: Annotated[Optional[list[str]], Field(title="Input Token Count Field",
104
+ description="The input token count field in span attributes.",
105
+ default=[],
106
+ examples=[["prompt_tokens"]])]
107
+ output_token_count_fields: Annotated[Optional[list[str]], Field(title="Output Token Count Field",
108
+ description="The output token count field in span attributes.",
109
+ default=[
110
+ "completion_tokens"],
111
+ examples=[["completion_tokens"]])]
112
+ status_field: Annotated[Optional[str], Field(title="Span Status Field ",
113
+ description="The status field in span attributes.",
114
+ default="status",
115
+ examples=["status"])]
116
+ user_id_field: Annotated[Optional[str], Field(title="User Id Field ",
117
+ description="The user id field in span attributes.",
118
+ default="user_id",
119
+ examples=["user_id"])]
120
+
121
+ @model_validator(mode="after")
122
+ def validate_fields(self) -> Self:
123
+
124
+ if self.task_type == TaskType.RAG:
125
+ if not self.input_fields or not self.context_fields:
126
+ raise ValueError(
127
+ "input_fields and context_fields are required for RAG task type.")
128
+
129
+ return self
@@ -0,0 +1,173 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ from typing import Annotated, List, Optional, Tuple
11
+
12
+ from pydantic import BaseModel, ConfigDict, Field, PositiveInt, field_validator
13
+
14
+ from ibm_watsonx_gov.entities.credentials import WxGovConsoleCredentials
15
+ from ibm_watsonx_gov.entities.foundation_model import FoundationModel
16
+
17
+
18
+ class WxGovConsoleConfiguration(BaseModel):
19
+ """
20
+ Defines the WxGovConsoleConfiguration class.
21
+
22
+ This configuration is used to integrate with the watsonx Governance Console for storing model risk evaluation results.
23
+ It includes the model identifier and the credentials required for authentication.
24
+
25
+ Examples:
26
+ 1. Create configuration with explicit credentials:
27
+ .. code-block:: python
28
+
29
+ credentials = WxGovConsoleCredentials(
30
+ url="https://governance-console.example.com",
31
+ username="admin",
32
+ password="securepassword",
33
+ api_key="optional-api-key"
34
+ )
35
+ configuration = WxGovConsoleConfiguration(
36
+ model_id="model-12345",
37
+ credentials=credentials
38
+ )
39
+ """
40
+ model_id: Annotated[
41
+ str,
42
+ Field(
43
+ description="The watsonx Governance Console identifier of the model to store the model risk result."
44
+ ),
45
+ ]
46
+ credentials: Annotated[
47
+ WxGovConsoleCredentials,
48
+ Field(
49
+ description="The watsonx Governance Console credentials."
50
+ ),
51
+ ]
52
+ model_config = ConfigDict(protected_namespaces=())
53
+
54
+
55
+ class ModelRiskConfiguration(BaseModel):
56
+ """
57
+ Defines the ModelRiskConfiguration class.
58
+
59
+ This configuration class encapsulates all parameters required to perform model risk evaluation,
60
+ including model metadata, evaluation scope, thresholds, and output/reporting preferences.
61
+
62
+ Examples:
63
+ 1. Create a basic configuration:
64
+ .. code-block:: python
65
+
66
+ model_details = WxAIFoundationModel(
67
+ model_name="mymodel_flan",
68
+ model_id="ibm/granite-3-3-8b-instruct",
69
+ project_id="project_id")
70
+
71
+ model_config = ModelRiskConfiguration(
72
+ model_details=model_details,
73
+ risk_dimensions=["hallucination"],
74
+ max_sample_size=500,
75
+ thresholds=(20, 80),
76
+ pdf_report_output_path="/reports"
77
+ )
78
+
79
+ 2. Include watsonx Governance Console integration:
80
+ .. code-block:: python
81
+
82
+ model_details = WxAIFoundationModel(
83
+ model_name="mymodel_flan",
84
+ model_id="ibm/granite-3-3-8b-instruct",
85
+ project_id="project_id")
86
+
87
+ wx_gc_credentials = WxGovConsoleCredentials(
88
+ url="https://governance.example.com",
89
+ username="admin",
90
+ password="securepass"
91
+ api_key="console API key"
92
+ )
93
+
94
+ wx_config = WxGovConsoleConfiguration(
95
+ model_id="model-abc123",
96
+ credentials=wx_gc_credentials
97
+ )
98
+
99
+ model_config = ModelRiskConfiguration(
100
+ model_details=model_details,
101
+ risk_dimensions=["hallucination"],
102
+ max_sample_size=500,
103
+ thresholds=(20, 80),
104
+ wx_gc_configuration=wx_config,
105
+ pdf_report_output_path="/reports"
106
+ )
107
+
108
+ Validators:
109
+ - `thresholds`: Ensures that the threshold values are between 0 and 100, and that the lower value is less than the upper value.
110
+ """
111
+ model_details: Annotated[
112
+ FoundationModel,
113
+ Field(
114
+ title="Foundation Model Details",
115
+ description="The details of the foundation model being evaluated.",
116
+ )
117
+ ]
118
+ risk_dimensions: Annotated[
119
+ Optional[List[str]],
120
+ Field(
121
+ title="Risk Dimensions",
122
+ description="A list of risk categories to be evaluated for the model. These could include hallucination, jailbreaking etc.",
123
+ default=None,
124
+ examples=[["hallucination", "jailbreaking",
125
+ "harmful-code-generation"]]
126
+ )
127
+ ]
128
+ max_sample_size: Annotated[
129
+ Optional[PositiveInt],
130
+ Field(
131
+ title="Maximum Sample Size",
132
+ description="The maximum number of samples to be used during the evaluation process. Must be a positive integer.",
133
+ default=None,
134
+ examples=[50]
135
+ )
136
+ ]
137
+ wx_gc_configuration: Annotated[
138
+ Optional[WxGovConsoleConfiguration],
139
+ Field(
140
+ title="watsonx Governance Console Configuration",
141
+ description="Optional configuration for storing results in watsonx Governance Console.",
142
+ default=None,
143
+ )
144
+ ]
145
+ pdf_report_output_path: Annotated[
146
+ Optional[str],
147
+ Field(
148
+ title="PDF Report Output Path",
149
+ description="The output file path to store the model risk evaluation PDF report.",
150
+ default=None,
151
+ examples=["/reports/"]
152
+ )
153
+ ]
154
+ thresholds: Annotated[
155
+ Optional[Tuple[int, int]],
156
+ Field(
157
+ title="Performance Thresholds",
158
+ description="A tuple representing the percentile-based threshold configuration used for categorizing LLM performance. The first element is the lower percentile threshold, and the second is the upper percentile threshold",
159
+ default=(25, 75),
160
+ examples=[(25, 75)]
161
+ )
162
+ ]
163
+ model_config = ConfigDict(protected_namespaces=())
164
+
165
+ @field_validator("thresholds")
166
+ @classmethod
167
+ def validate_thresholds(cls, v):
168
+ if v is not None:
169
+ low, high = v
170
+ if not (0 <= low < high <= 100):
171
+ raise ValueError(
172
+ "Thresholds must be between 0 and 100, and the first must be less than the second.")
173
+ return v
@@ -0,0 +1,20 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+
11
+ from ibm_watsonx_gov.entities.base_classes import BaseConfiguration
12
+ from ibm_watsonx_gov.entities.enums import InputDataType, ProblemType
13
+
14
+
15
+ class PredictiveAIConfiguration(BaseConfiguration):
16
+ problem_type: ProblemType
17
+ input_data_type: InputDataType
18
+ feature_fields: list[str]
19
+ categorical_fields: list[str] = []
20
+ text_fields: list[str] = []
@@ -0,0 +1,8 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
@@ -0,0 +1,209 @@
1
+
2
+ # ----------------------------------------------------------------------------------------------------
3
+ # IBM Confidential
4
+ # Licensed Materials - Property of IBM
5
+ # 5737-H76, 5900-A3Q
6
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
7
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
8
+ # GSA ADPSchedule Contract with IBM Corp.
9
+ # ----------------------------------------------------------------------------------------------------
10
+
11
+
12
+ from json import loads
13
+ from typing import Annotated, Optional
14
+
15
+ from pydantic import BaseModel, Field, TypeAdapter, field_serializer
16
+
17
+ from ibm_watsonx_gov.config.agentic_ai_configuration import \
18
+ AgenticAIConfiguration
19
+ from ibm_watsonx_gov.entities.enums import MetricGroup
20
+ from ibm_watsonx_gov.entities.foundation_model import FoundationModelInfo
21
+ from ibm_watsonx_gov.entities.metric import GenAIMetric, Mapping
22
+ from ibm_watsonx_gov.metrics import METRICS_UNION
23
+
24
+
25
+ class MetricsConfiguration(BaseModel):
26
+ """
27
+ The class representing the metrics to be computed and the configuration details required for them.
28
+
29
+ Examples:
30
+ 1. Create MetricsConfiguration with default agentic ai configuration
31
+ .. code-block:: python
32
+
33
+ metrics_configuration = MetricsConfiguration(metrics=[ContextRelevanceMetric()],
34
+ metric_groups=[MetricGroup.RETRIEVAL_QUALITY])])
35
+
36
+ 2. Create MetricsConfiguration by specifying agentic ai configuration
37
+ .. code-block:: python
38
+
39
+ config = {
40
+ "input_fields": ["input"],
41
+ "context_fields": ["contexts"]
42
+ }
43
+ metrics_configuration = MetricsConfiguration(configuration=AgenticAIConfiguration(**config)
44
+ metrics=[ContextRelevanceMetric()],
45
+ metric_groups=[MetricGroup.RETRIEVAL_QUALITY])])
46
+ """
47
+ configuration: Annotated[AgenticAIConfiguration,
48
+ Field(title="Metrics configuration",
49
+ description="The configuration of the metrics to compute. The configuration contains the fields names to be read when computing the metrics.",
50
+ default=AgenticAIConfiguration())]
51
+ metrics: Annotated[Optional[list[GenAIMetric]],
52
+ Field(title="Metrics",
53
+ description="The list of metrics to compute.",
54
+ default=[])]
55
+ metric_groups: Annotated[Optional[list[MetricGroup]],
56
+ Field(title="Metric Groups",
57
+ description="The list of metric groups to compute.",
58
+ default=[])]
59
+
60
+ @classmethod
61
+ def model_validate(cls, obj, **kwargs):
62
+ if "metrics" in obj:
63
+ obj["metrics"] = [TypeAdapter(METRICS_UNION).validate_python(
64
+ m) for m in obj.get("metrics")]
65
+ return super().model_validate(obj, **kwargs)
66
+
67
+ @field_serializer("metrics", when_used="json")
68
+ def metrics_serializer(self, metrics: list[GenAIMetric]):
69
+ return [metric.model_dump(mode="json") for metric in metrics]
70
+
71
+
72
+ class Node(BaseModel):
73
+ """
74
+ The class representing a node in an agentic application.
75
+
76
+ Examples:
77
+ 1. Create Node with metrics configuration and default agentic ai configuration
78
+ .. code-block:: python
79
+
80
+ metrics_configurations = [MetricsConfiguration(metrics=[ContextRelevanceMetric()],
81
+ metric_groups=[MetricGroup.RETRIEVAL_QUALITY])])]
82
+ node = Node(name="Retrieval Node",
83
+ metrics_configurations=metrics_configurations)
84
+
85
+ 2. Create Node with metrics configuration and specifying agentic ai configuration
86
+ .. code-block:: python
87
+
88
+ node_config = {"input_fields": ["input"],
89
+ "output_fields": ["output"],
90
+ "context_fields": ["contexts"],
91
+ "reference_fields": ["reference"]}
92
+ metrics_configurations = [MetricsConfiguration(configuration=AgenticAIConfiguration(**node_config)
93
+ metrics=[ContextRelevanceMetric()],
94
+ metric_groups=[MetricGroup.RETRIEVAL_QUALITY])])]
95
+ node = Node(name="Retrieval Node",
96
+ metrics_configurations=metrics_configurations)
97
+ """
98
+ name: Annotated[str,
99
+ Field(title="Name",
100
+ description="The name of the node.")]
101
+ func_name: Annotated[Optional[str],
102
+ Field(title="Node function name",
103
+ description="The name of the node function.",
104
+ default=None)]
105
+ metrics_configurations: Annotated[list[MetricsConfiguration],
106
+ Field(title="Metrics configuration",
107
+ description="The list of metrics and their configuration details.",
108
+ default=[])]
109
+ foundation_models: Annotated[
110
+ list[FoundationModelInfo],
111
+ Field(
112
+ description="The Foundation models invoked by the node",
113
+ default=[],
114
+ ),
115
+ ]
116
+
117
+ @classmethod
118
+ def model_validate(cls, obj, **kwargs):
119
+ if "metrics_configurations" in obj:
120
+ obj["metrics_configurations"] = [MetricsConfiguration.model_validate(
121
+ m) for m in obj.get("metrics_configurations")]
122
+ return super().model_validate(obj, **kwargs)
123
+
124
+
125
+ class AgenticApp(BaseModel):
126
+ """
127
+ The configuration class representing an agentic application.
128
+ An agent is composed of a set of nodes.
129
+ The metrics to be computed at the agent or message level should be specified in the metrics_configuration and the metrics to be computed for the node level should be specified in the nodes list.
130
+
131
+ Examples:
132
+ 1. Create AgenticApp with agent level metrics configuration.
133
+ .. code-block:: python
134
+
135
+ # Below example provides the agent configuration to compute the AnswerRelevanceMetric and all the metrics in Content Safety group on agent or message level.
136
+ agentic_app = AgenticApp(name="Agentic App",
137
+ metrics_configuration=MetricsConfiguration(metrics=[AnswerRelevanceMetric()],
138
+ metric_groups=[MetricGroup.CONTENT_SAFETY]))
139
+ agentic_evaluator = AgenticEvaluator(agentic_app=agentic_app)
140
+ ...
141
+
142
+ 2. Create AgenticApp with agent and node level metrics configuration and default agentic ai configuration for metrics.
143
+ .. code-block:: python
144
+
145
+ # Below example provides the node configuration to compute the ContextRelevanceMetric and all the metrics in Retrieval Quality group.
146
+ nodes = [Node(name="Retrieval Node",
147
+ metrics_configurations=[MetricsConfiguration(metrics=[ContextRelevanceMetric()],
148
+ metric_groups=[MetricGroup.RETRIEVAL_QUALITY])])]
149
+
150
+ # Below example provides the agent configuration to compute the AnswerRelevanceMetric and all the metrics in Content Safety group on agent or message level.
151
+ agentic_app = AgenticApp(name="Agentic App",
152
+ metrics_configuration=MetricsConfiguration(metrics=[AnswerRelevanceMetric()],
153
+ metric_groups=[MetricGroup.CONTENT_SAFETY]),
154
+ nodes=nodes)
155
+ agentic_evaluator = AgenticEvaluator(agentic_app=agentic_app)
156
+ ...
157
+
158
+ 3. Create AgenticApp with agent and nodel level metrics configuration and with agentic ai configuration for metrics.
159
+ .. code-block:: python
160
+
161
+ # Below example provides the node configuration to compute the ContextRelevanceMetric and all the metrics in Retrieval Quality group.
162
+ node_fields_config = {
163
+ "input_fields": ["input"],
164
+ "context_fields": ["web_context"]
165
+ }
166
+ nodes = [Node(name="Retrieval Node",
167
+ metrics_configurations=[MetricsConfiguration(configuration=AgenticAIConfiguration(**node_fields_config)
168
+ metrics=[ContextRelevanceMetric()],
169
+ metric_groups=[MetricGroup.RETRIEVAL_QUALITY])])]
170
+
171
+ # Below example provides the agent configuration to compute the AnswerRelevanceMetric and all the metrics in Content Safety group on agent or message level.
172
+ agent_fields_config = {
173
+ "input_fields": ["input"],
174
+ "output_fields": ["output"]
175
+ }
176
+ agentic_app = AgenticApp(name="Agentic App",
177
+ metrics_configuration=MetricsConfiguration(configuration=AgenticAIConfiguration(**agent_fields_config)
178
+ metrics=[AnswerRelevanceMetric()],
179
+ metric_groups=[MetricGroup.CONTENT_SAFETY]),
180
+ nodes=nodes)
181
+ agentic_evaluator = AgenticEvaluator(agentic_app=agentic_app)
182
+ ...
183
+ """
184
+ name: Annotated[str, Field(title="Agentic application name",
185
+ description="The name of the agentic application.",
186
+ default="Agentic App")]
187
+ message_io_mapping: Annotated[Optional[Mapping],
188
+ Field(title="Message IO mapping",
189
+ description="The message input and output mapping.",
190
+ default=None)]
191
+ metrics_configuration: Annotated[Optional[MetricsConfiguration],
192
+ Field(title="Metrics configuration",
193
+ description="The list of metrics to be computed on the agentic application and their configuration details.",
194
+ default=MetricsConfiguration())]
195
+ nodes: Annotated[Optional[list[Node]],
196
+ Field(title="Node details",
197
+ description="The nodes details.",
198
+ default=[])]
199
+
200
+ @classmethod
201
+ def model_validate_json(cls, json_data, **kwargs):
202
+ data = loads(json_data)
203
+ if "metrics_configuration" in data:
204
+ data["metrics_configuration"] = MetricsConfiguration.model_validate(
205
+ data.get("metrics_configuration"))
206
+ if "nodes" in data:
207
+ data["nodes"] = [Node.model_validate(node)
208
+ for node in data.get("nodes", [])]
209
+ return cls.model_validate(data, **kwargs)