ibm-watsonx-gov 1.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. ibm_watsonx_gov/__init__.py +8 -0
  2. ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
  3. ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
  4. ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
  5. ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
  6. ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
  7. ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
  8. ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
  9. ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
  10. ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
  11. ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
  12. ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
  13. ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
  14. ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
  15. ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
  16. ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
  17. ibm_watsonx_gov/clients/__init__.py +0 -0
  18. ibm_watsonx_gov/clients/api_client.py +99 -0
  19. ibm_watsonx_gov/clients/segment_client.py +46 -0
  20. ibm_watsonx_gov/clients/usage_client.cp313-win_amd64.pyd +0 -0
  21. ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
  22. ibm_watsonx_gov/config/__init__.py +14 -0
  23. ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
  24. ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
  25. ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
  26. ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
  27. ibm_watsonx_gov/entities/__init__.py +8 -0
  28. ibm_watsonx_gov/entities/agentic_app.py +209 -0
  29. ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
  30. ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
  31. ibm_watsonx_gov/entities/ai_experiment.py +419 -0
  32. ibm_watsonx_gov/entities/base_classes.py +134 -0
  33. ibm_watsonx_gov/entities/container.py +54 -0
  34. ibm_watsonx_gov/entities/credentials.py +633 -0
  35. ibm_watsonx_gov/entities/criteria.py +508 -0
  36. ibm_watsonx_gov/entities/enums.py +274 -0
  37. ibm_watsonx_gov/entities/evaluation_result.py +444 -0
  38. ibm_watsonx_gov/entities/foundation_model.py +490 -0
  39. ibm_watsonx_gov/entities/llm_judge.py +44 -0
  40. ibm_watsonx_gov/entities/locale.py +17 -0
  41. ibm_watsonx_gov/entities/mapping.py +49 -0
  42. ibm_watsonx_gov/entities/metric.py +211 -0
  43. ibm_watsonx_gov/entities/metric_threshold.py +36 -0
  44. ibm_watsonx_gov/entities/model_provider.py +329 -0
  45. ibm_watsonx_gov/entities/model_risk_result.py +43 -0
  46. ibm_watsonx_gov/entities/monitor.py +71 -0
  47. ibm_watsonx_gov/entities/prompt_setup.py +40 -0
  48. ibm_watsonx_gov/entities/state.py +22 -0
  49. ibm_watsonx_gov/entities/utils.py +99 -0
  50. ibm_watsonx_gov/evaluators/__init__.py +26 -0
  51. ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
  52. ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
  53. ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
  54. ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
  55. ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cp313-win_amd64.pyd +0 -0
  56. ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cp313-win_amd64.pyd +0 -0
  57. ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
  58. ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
  59. ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
  60. ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
  61. ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
  62. ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
  63. ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
  64. ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
  65. ibm_watsonx_gov/metrics/__init__.py +74 -0
  66. ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
  67. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
  68. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
  69. ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
  70. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
  71. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
  72. ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
  73. ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
  74. ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
  75. ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
  76. ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
  77. ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
  78. ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
  79. ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
  80. ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
  81. ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
  82. ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
  83. ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
  84. ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
  85. ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
  86. ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
  87. ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
  88. ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
  89. ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
  90. ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
  91. ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
  92. ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
  93. ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
  94. ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
  95. ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
  96. ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
  97. ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
  98. ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
  99. ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
  100. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
  101. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
  102. ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
  103. ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
  104. ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
  105. ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
  106. ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
  107. ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
  108. ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
  109. ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
  110. ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
  111. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
  112. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
  113. ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
  114. ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
  115. ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
  116. ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
  117. ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
  118. ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
  119. ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
  120. ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
  121. ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
  122. ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
  123. ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
  124. ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
  125. ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
  126. ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
  127. ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
  128. ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
  129. ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
  130. ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
  131. ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
  132. ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
  133. ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
  134. ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
  135. ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
  136. ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
  137. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
  138. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
  139. ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
  140. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
  141. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
  142. ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
  143. ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
  144. ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
  145. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
  146. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
  147. ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
  148. ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
  149. ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
  150. ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
  151. ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
  152. ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
  153. ibm_watsonx_gov/metrics/status/__init__.py +0 -0
  154. ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
  155. ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
  156. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
  157. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
  158. ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
  159. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
  160. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
  161. ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
  162. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
  163. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
  164. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
  165. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
  166. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
  167. ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
  168. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
  169. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
  170. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
  171. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
  172. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
  173. ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
  174. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
  175. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
  176. ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
  177. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
  178. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
  179. ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
  180. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
  181. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
  182. ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
  183. ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
  184. ibm_watsonx_gov/metrics/utils.py +440 -0
  185. ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
  186. ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
  187. ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
  188. ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
  189. ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
  190. ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
  191. ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
  192. ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
  193. ibm_watsonx_gov/providers/__init__.py +8 -0
  194. ibm_watsonx_gov/providers/detectors_provider.cp313-win_amd64.pyd +0 -0
  195. ibm_watsonx_gov/providers/detectors_provider.py +415 -0
  196. ibm_watsonx_gov/providers/eval_assist_provider.cp313-win_amd64.pyd +0 -0
  197. ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
  198. ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
  199. ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
  200. ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
  201. ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
  202. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
  203. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
  204. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
  205. ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
  206. ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
  207. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
  208. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
  209. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
  210. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
  211. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
  212. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
  213. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
  214. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
  215. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
  216. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
  217. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
  218. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
  219. ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
  220. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
  221. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
  222. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
  223. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
  224. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
  225. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
  226. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
  227. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
  228. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
  229. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
  230. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
  231. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
  232. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
  233. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
  234. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
  235. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
  236. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
  237. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
  238. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
  239. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
  240. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
  241. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
  242. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
  243. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
  244. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
  245. ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
  246. ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
  247. ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
  248. ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
  249. ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
  250. ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
  251. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
  252. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
  253. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
  254. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
  255. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
  256. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
  257. ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
  258. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
  259. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
  260. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
  261. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
  262. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
  263. ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
  264. ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
  265. ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
  266. ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
  267. ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
  268. ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
  269. ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
  270. ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
  271. ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
  272. ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
  273. ibm_watsonx_gov/providers/unitxt_provider.cp313-win_amd64.pyd +0 -0
  274. ibm_watsonx_gov/tools/__init__.py +10 -0
  275. ibm_watsonx_gov/tools/clients/__init__.py +11 -0
  276. ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
  277. ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
  278. ibm_watsonx_gov/tools/core/__init__.py +8 -0
  279. ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
  280. ibm_watsonx_gov/tools/entities/__init__.py +8 -0
  281. ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
  282. ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
  283. ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
  284. ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
  285. ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
  286. ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
  287. ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
  288. ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
  289. ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
  290. ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
  291. ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
  292. ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
  293. ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
  294. ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
  295. ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
  296. ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
  297. ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
  298. ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
  299. ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
  300. ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
  301. ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
  302. ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
  303. ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
  304. ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
  305. ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
  306. ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
  307. ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
  308. ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
  309. ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
  310. ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
  311. ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
  312. ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
  313. ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
  314. ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
  315. ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
  316. ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
  317. ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
  318. ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
  319. ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
  320. ibm_watsonx_gov/tools/utils/__init__.py +14 -0
  321. ibm_watsonx_gov/tools/utils/constants.py +69 -0
  322. ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
  323. ibm_watsonx_gov/tools/utils/environment.py +108 -0
  324. ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
  325. ibm_watsonx_gov/tools/utils/platform_url_mapping.cp313-win_amd64.pyd +0 -0
  326. ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
  327. ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
  328. ibm_watsonx_gov/traces/__init__.py +8 -0
  329. ibm_watsonx_gov/traces/span_exporter.py +195 -0
  330. ibm_watsonx_gov/traces/span_node.py +251 -0
  331. ibm_watsonx_gov/traces/span_util.py +153 -0
  332. ibm_watsonx_gov/traces/trace_utils.py +1074 -0
  333. ibm_watsonx_gov/utils/__init__.py +8 -0
  334. ibm_watsonx_gov/utils/aggregation_util.py +346 -0
  335. ibm_watsonx_gov/utils/async_util.py +62 -0
  336. ibm_watsonx_gov/utils/authenticator.py +144 -0
  337. ibm_watsonx_gov/utils/constants.py +15 -0
  338. ibm_watsonx_gov/utils/errors.py +40 -0
  339. ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
  340. ibm_watsonx_gov/utils/insights_generator.py +1285 -0
  341. ibm_watsonx_gov/utils/python_utils.py +425 -0
  342. ibm_watsonx_gov/utils/rest_util.py +73 -0
  343. ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
  344. ibm_watsonx_gov/utils/singleton_meta.py +25 -0
  345. ibm_watsonx_gov/utils/url_mapping.cp313-win_amd64.pyd +0 -0
  346. ibm_watsonx_gov/utils/validation_util.py +126 -0
  347. ibm_watsonx_gov/visualizations/__init__.py +13 -0
  348. ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
  349. ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
  350. ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
  351. ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
  352. ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
  353. ibm_watsonx_gov-1.3.3.dist-info/WHEEL +4 -0
@@ -0,0 +1,254 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ from typing import Annotated, Literal
11
+
12
+ import pandas as pd
13
+ from lazy_imports import LazyModule, load
14
+ from pydantic import Field, model_validator
15
+ from typing_extensions import Self
16
+
17
+ from ibm_watsonx_gov.config import AgenticAIConfiguration, GenAIConfiguration
18
+ from ibm_watsonx_gov.entities.base_classes import Error
19
+ from ibm_watsonx_gov.entities.enums import MetricGroup, TaskType
20
+ from ibm_watsonx_gov.entities.evaluation_result import (AggregateMetricResult,
21
+ RecordMetricResult)
22
+ from ibm_watsonx_gov.entities.llm_judge import LLMJudge
23
+ from ibm_watsonx_gov.entities.metric import GenAIMetric
24
+ from ibm_watsonx_gov.entities.metric_threshold import MetricThreshold
25
+ from ibm_watsonx_gov.providers.detectors_provider import DetectorsProvider
26
+ from ibm_watsonx_gov.utils.async_util import run_in_event_loop
27
+ from ibm_watsonx_gov.utils.gov_sdk_logger import GovSDKLogger
28
+ from ibm_watsonx_gov.utils.validation_util import (validate_context,
29
+ validate_input,
30
+ validate_llm_as_judge,
31
+ validate_output,
32
+ validate_small_model_method,
33
+ validate_unitxt_method)
34
+
35
+ unitxt_provider = LazyModule(
36
+ "from ibm_watsonx_gov.providers.unitxt_provider import UnitxtProvider",
37
+ name="lazy_unitxt_provider"
38
+ )
39
+ load(unitxt_provider)
40
+ UnitxtProvider = unitxt_provider.UnitxtProvider
41
+
42
+ logger = GovSDKLogger.get_logger(__name__)
43
+ FAITHFULNESS = "faithfulness"
44
+
45
+ unitxt_methods = [
46
+ "token_k_precision",
47
+ "sentence_bert_mini_lm",
48
+ "llm_as_judge",
49
+ "granite_guardian",
50
+ "faithfulness_model"
51
+ ]
52
+
53
+
54
+ class FaithfulnessMetric(GenAIMetric):
55
+ """
56
+ Defines the Faithfulness metric class.
57
+
58
+ The faithfulness metrics can be computed using the below methods:
59
+
60
+ 1. token_k_precision (default)
61
+ 2. sentence_bert_mini_lm
62
+ 3. llm_as_judge
63
+ 4. granite_guardian
64
+ 5. faithfulness_model
65
+
66
+ Examples:
67
+ 1. Create Faithfulness metric with default parameters.
68
+ .. code-block:: python
69
+
70
+ metric = FaithfulnessMetric()
71
+ result = MetricsEvaluator().evaluate(data={"input_text": "...", "context": "...", "generated_text": "..."},
72
+ metrics=[metric])
73
+ # A list of contexts can also be passed as shown below
74
+ result = MetricsEvaluator().evaluate(data={"input_text": "...", "context": ["...", "..."], "generated_text": "..."},
75
+ metrics=[metric])
76
+
77
+ 2. Create Faithfulness metric with a custom threshold and method.
78
+ .. code-block:: python
79
+
80
+ thresholds = [MetricThreshold(type="lower_limit", value=0.5)]
81
+ method = "sentence_bert_mini_lm"
82
+ metric = FaithfulnessMetric(method=method, thresholds=thresholds)
83
+
84
+ 3. Create Faithfulness metric with llm_as_judge method.
85
+ .. code-block:: python
86
+
87
+ # Define LLM Judge using watsonx.ai
88
+ # To use other frameworks and models as llm_judge, see :module:`ibm_watsonx_gov.entities.foundation_model`
89
+ llm_judge = LLMJudge(model=WxAIFoundationModel(
90
+ model_id="ibm/granite-3-3-8b-instruct",
91
+ project_id="<PROJECT_ID>"
92
+ ))
93
+ metric = FaithfulnessMetric(llm_judge=llm_judge)
94
+
95
+ 4. Create Faithfulness metric with granite_guardian method.
96
+ .. code-block:: python
97
+
98
+ metric = FaithfulnessMetric(method="granite_guardian")
99
+
100
+ 5. Create Faithfulness metric with faithfulness_model method. Currently available only in On-Prem version.
101
+ .. code-block:: python
102
+
103
+ metric = FaithfulnessMetric(method="faithfulness_model")
104
+ """
105
+ name: Annotated[Literal["faithfulness"],
106
+ Field(title="Name",
107
+ description="The faithfulness metric name.",
108
+ default=FAITHFULNESS, frozen=True)]
109
+ display_name: Annotated[Literal["Faithfulness"],
110
+ Field(title="Display Name",
111
+ description="The faithfulness metric display name.",
112
+ default="Faithfulness", frozen=True)]
113
+ tasks: Annotated[list[TaskType],
114
+ Field(title="Tasks",
115
+ description="The list of supported tasks.",
116
+ default=[TaskType.RAG])]
117
+ thresholds: Annotated[list[MetricThreshold],
118
+ Field(title="Thresholds",
119
+ description="The metric thresholds.",
120
+ default=[MetricThreshold(type="lower_limit", value=0.7)])]
121
+ method: Annotated[Literal["token_k_precision", "sentence_bert_mini_lm", "llm_as_judge", "granite_guardian", "faithfulness_model"],
122
+ Field(title="Method",
123
+ description="The method used to compute the metric. This field is optional and when `llm_judge` is provided, the method would be set to `llm_as_judge`. The `faithfulness_model` method is currently available only in On-Prem version.",
124
+ default="token_k_precision")]
125
+ group: Annotated[MetricGroup,
126
+ Field(title="Group",
127
+ description="The metric group.",
128
+ default=MetricGroup.ANSWER_QUALITY, frozen=True)]
129
+ llm_judge: Annotated[LLMJudge | None,
130
+ Field(title="LLM Judge",
131
+ description="The LLM judge used to compute the metric.",
132
+ default=None)]
133
+
134
+ @model_validator(mode="after")
135
+ def set_llm_judge_default_method(self) -> Self:
136
+ # If llm_judge is set, set the method to llm_as_judge
137
+ if self.llm_judge:
138
+ self.method = "llm_as_judge"
139
+ return self
140
+
141
+ def evaluate(self,
142
+ data: pd.DataFrame,
143
+ configuration: GenAIConfiguration | AgenticAIConfiguration,
144
+ **kwargs) -> AggregateMetricResult:
145
+ # If ran in sync mode, block until it is done
146
+ return run_in_event_loop(
147
+ self.evaluate_async,
148
+ data=data,
149
+ configuration=configuration,
150
+ **kwargs,
151
+ )
152
+
153
+ def __is_supported(self, **kwargs):
154
+ # Currently supported only in CPD and ypqa
155
+ return kwargs.get(
156
+ "api_client").credentials.region == "ypqa" or kwargs.get("api_client").is_cpd
157
+
158
+ async def evaluate_async(self,
159
+ data: pd.DataFrame,
160
+ configuration: GenAIConfiguration | AgenticAIConfiguration,
161
+ **kwargs) -> AggregateMetricResult:
162
+ data_cols = data.columns.to_list()
163
+
164
+ try:
165
+ validate_input(data_cols, configuration)
166
+ validate_output(data_cols, configuration)
167
+ validate_context(data_cols, configuration)
168
+ validate_unitxt_method(self.name, self.method, unitxt_methods)
169
+ validate_llm_as_judge(self.name, self.method,
170
+ self.llm_judge, configuration.llm_judge)
171
+ validate_small_model_method(
172
+ self.name, self.method, self.__is_supported(**kwargs), unitxt_methods)
173
+ except ValueError as ve:
174
+ if kwargs.get("ignore_validation_errors"):
175
+ message = f"Skipping '{self.name}' computation because the validation failed. Details: {str(ve)}"
176
+ logger.warning(message)
177
+ return
178
+ raise ve
179
+
180
+ # Separate the data into a dataframe with no None values and a dataframe with None values
181
+ input_output_has_none = data[configuration.input_fields +
182
+ configuration.output_fields].isna().any(axis=1)
183
+ all_contexts_none = data[configuration.context_fields].isna().all(
184
+ axis=1)
185
+ mask_has_none = input_output_has_none | all_contexts_none
186
+ df_with_none = data[mask_has_none]
187
+ df_without_none = data[mask_has_none == False]
188
+
189
+ # Compute the metrics only for the dataframe with no None values
190
+ aggregated_metric_result = None
191
+ if not df_without_none.empty:
192
+ if self.method in ["granite_guardian", "faithfulness_model"]:
193
+ kwargs["detector_params"] = {
194
+ "method": self.method, "threshold": 0.001}
195
+ provider = DetectorsProvider(configuration=configuration,
196
+ metric_name=self.name,
197
+ metric_display_name=self.display_name,
198
+ metric_method=self.method,
199
+ metric_group=self.group,
200
+ thresholds=self.thresholds,
201
+ **kwargs)
202
+ else:
203
+ provider = UnitxtProvider(configuration=configuration,
204
+ metric_name=self.name,
205
+ metric_display_name=self.display_name,
206
+ metric_method=self.method,
207
+ metric_prefix="metrics.rag.external_rag",
208
+ metric_group=self.group,
209
+ llm_judge=self.llm_judge,
210
+ thresholds=self.thresholds,
211
+ **kwargs)
212
+
213
+ aggregated_metric_result = await provider.evaluate_async(data=df_without_none)
214
+
215
+ # Update the metric result with record level metrics results for the records with missing values
216
+ if not df_with_none.empty:
217
+ # Create None results for records with missing values
218
+ none_results = []
219
+ for _, row in df_with_none.iterrows():
220
+ record_result = RecordMetricResult(
221
+ name=self.name,
222
+ display_name=self.display_name,
223
+ method=self.method,
224
+ group=self.group,
225
+ value=None,
226
+ record_id=row[configuration.record_id_field],
227
+ thresholds=self.thresholds,
228
+ errors=[Error(
229
+ code="BAD_REQUEST", message_en="The value of required fields input, output or context is None.")]
230
+ )
231
+ none_results.append(record_result)
232
+
233
+ # Merge the results
234
+ if aggregated_metric_result:
235
+ all_record_results = aggregated_metric_result.record_level_metrics + none_results
236
+ aggregated_metric_result.record_level_metrics = all_record_results
237
+ aggregated_metric_result.total_records = len(
238
+ all_record_results)
239
+ else:
240
+ aggregated_metric_result = AggregateMetricResult(
241
+ name=self.name,
242
+ display_name=self.display_name,
243
+ method=self.method,
244
+ group=self.group,
245
+ value=None,
246
+ total_records=len(none_results),
247
+ record_level_metrics=none_results,
248
+ min=None,
249
+ max=None,
250
+ mean=None,
251
+ thresholds=self.thresholds
252
+ )
253
+
254
+ return aggregated_metric_result
@@ -0,0 +1,16 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ from ibm_watsonx_gov.metrics.hap.hap_metric import HAPMetric
11
+ from ibm_watsonx_gov.metrics.hap.input_hap_metric import InputHAPMetric
12
+ from ibm_watsonx_gov.metrics.hap.output_hap_metric import OutputHAPMetric
13
+
14
+ __all__ = ["HAPMetric", "InputHAPMetric", "OutputHAPMetric"]
15
+
16
+ # Made with Bob
@@ -0,0 +1,58 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ from functools import partial
11
+ from typing import Callable, Optional
12
+
13
+ from wrapt import decorator
14
+
15
+ from ibm_watsonx_gov.config.agentic_ai_configuration import \
16
+ AgenticAIConfiguration
17
+ from ibm_watsonx_gov.entities.enums import EvaluatorFields
18
+ from ibm_watsonx_gov.entities.metric import GenAIMetric
19
+ from ibm_watsonx_gov.metrics.base_metric_decorator import BaseMetricDecorator
20
+ from ibm_watsonx_gov.metrics.hap.hap_metric import HAPMetric
21
+
22
+
23
+ class HAPDecorator(BaseMetricDecorator):
24
+
25
+ def evaluate_hap(self, func: Optional[Callable] = None,
26
+ *,
27
+ configuration: Optional[AgenticAIConfiguration] = None,
28
+ metrics: list[GenAIMetric] = [],
29
+ ) -> dict:
30
+ """
31
+ An evaluation decorator for computing hap metric on an agentic node.
32
+ """
33
+ if func is None:
34
+ return partial(self.evaluate_hap, configuration=configuration, metrics=metrics)
35
+
36
+ if not metrics:
37
+ metrics = [HAPMetric()]
38
+
39
+ @decorator
40
+ def wrapper(func, instance, args, kwargs):
41
+
42
+ try:
43
+ self.validate(func=func, metrics=metrics,
44
+ valid_metric_types=(HAPMetric))
45
+
46
+ metric_inputs = [EvaluatorFields.INPUT_FIELDS]
47
+ original_result = self.compute_helper(func=func, args=args, kwargs=kwargs,
48
+ configuration=configuration,
49
+ metrics=metrics,
50
+ metric_inputs=metric_inputs,
51
+ metric_outputs=[])
52
+
53
+ return original_result
54
+ except Exception as ex:
55
+ raise Exception(
56
+ f"There was an error while evaluating hap metric on {func.__name__},") from ex
57
+
58
+ return wrapper(func)
@@ -0,0 +1,98 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ from typing import Annotated, Literal
11
+
12
+ import pandas as pd
13
+ from pydantic import Field
14
+
15
+ from ibm_watsonx_gov.config.gen_ai_configuration import GenAIConfiguration
16
+ from ibm_watsonx_gov.entities.enums import MetricGroup, TaskType
17
+ from ibm_watsonx_gov.entities.evaluation_result import AggregateMetricResult
18
+ from ibm_watsonx_gov.entities.metric import GenAIMetric
19
+ from ibm_watsonx_gov.entities.metric_threshold import MetricThreshold
20
+ from ibm_watsonx_gov.providers.detectors_provider import DetectorsProvider
21
+ from ibm_watsonx_gov.utils.async_util import run_in_event_loop
22
+ from ibm_watsonx_gov.utils.validation_util import validate_input
23
+
24
+ HAP = "hap"
25
+
26
+
27
+ class HAPMetric(GenAIMetric):
28
+ """
29
+ Defines the HAP metric class.
30
+
31
+ The HAP metric measures if there is any toxic content that contains hate, abuse, or profanity in the input.
32
+ It is computed using the hap model.
33
+
34
+ Examples:
35
+ 1. Create HAP metric with default parameters and compute using metrics evaluator.
36
+ .. code-block:: python
37
+
38
+ metric = HAPMetric()
39
+ result = MetricsEvaluator().evaluate(data={"input_text": "...", metrics=[metric])
40
+
41
+ 2. Create HAP metric with a custom threshold.
42
+ .. code-block:: python
43
+
44
+ threshold = MetricThreshold(type="lower_limit", value=0.5)
45
+ metric = HAPMetric(threshold=threshold)
46
+ """
47
+ name: Annotated[Literal["hap"],
48
+ Field(title="Name",
49
+ description="The hap metric name.",
50
+ default=HAP, frozen=True)]
51
+ display_name: Annotated[Literal["HAP"],
52
+ Field(title="Display Name",
53
+ description="The hap metric display name.",
54
+ default="HAP", frozen=True)]
55
+ tasks: Annotated[list[TaskType],
56
+ Field(title="Tasks",
57
+ description="The list of supported tasks.",
58
+ default=TaskType.values(), frozen=True)]
59
+ thresholds: Annotated[list[MetricThreshold],
60
+ Field(title="Thresholds",
61
+ description="The metric thresholds.",
62
+ default=[MetricThreshold(type="upper_limit", value=0.1)])]
63
+ group: Annotated[MetricGroup,
64
+ Field(title="Group",
65
+ description="The metric group.",
66
+ default=MetricGroup.CONTENT_SAFETY, frozen=True)]
67
+
68
+ async def evaluate_async(
69
+ self,
70
+ data: pd.DataFrame,
71
+ configuration: GenAIConfiguration,
72
+ **kwargs
73
+ ) -> list[AggregateMetricResult]:
74
+
75
+ validate_input(data.columns.to_list(), configuration)
76
+ provider = DetectorsProvider(configuration=configuration,
77
+ metric_name=self.name,
78
+ metric_display_name=self.display_name,
79
+ metric_method=self.method,
80
+ metric_group=self.group,
81
+ thresholds=self.thresholds,
82
+ **kwargs)
83
+ aggregated_metric_result = await provider.evaluate_async(data=data)
84
+ return aggregated_metric_result
85
+
86
+ def evaluate(
87
+ self,
88
+ data: pd.DataFrame | dict,
89
+ configuration: GenAIConfiguration,
90
+ **kwargs,
91
+ ):
92
+ # If ran in sync mode, block until it is done
93
+ return run_in_event_loop(
94
+ self.evaluate_async,
95
+ data=data,
96
+ configuration=configuration,
97
+ **kwargs,
98
+ )
@@ -0,0 +1,104 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ from typing import Annotated, Literal
11
+
12
+ import pandas as pd
13
+ from pydantic import Field
14
+
15
+ from ibm_watsonx_gov.config.gen_ai_configuration import GenAIConfiguration
16
+ from ibm_watsonx_gov.entities.enums import MetricGroup, TaskType
17
+ from ibm_watsonx_gov.entities.evaluation_result import AggregateMetricResult
18
+ from ibm_watsonx_gov.entities.metric import GenAIMetric
19
+ from ibm_watsonx_gov.entities.metric_threshold import MetricThreshold
20
+ from ibm_watsonx_gov.providers.detectors_provider import DetectorsProvider
21
+ from ibm_watsonx_gov.utils.async_util import run_in_event_loop
22
+ from ibm_watsonx_gov.utils.validation_util import validate_input
23
+
24
+ INPUT_HAP = "input_hap"
25
+
26
+
27
+ class InputHAPMetric(GenAIMetric):
28
+ """
29
+ Defines the Input HAP metric class.
30
+
31
+ The Input HAP metric measures if there is any toxic content that contains hate, abuse, or profanity in the input.
32
+ It is computed using the hap model on the input data.
33
+
34
+ Examples:
35
+ 1. Create Input HAP metric with default parameters and compute using metrics evaluator.
36
+ .. code-block:: python
37
+
38
+ metric = InputHAPMetric()
39
+ result = MetricsEvaluator().evaluate(data={"input_text": "...", metrics=[metric])
40
+
41
+ 2. Create Input HAP metric with a custom threshold.
42
+ .. code-block:: python
43
+
44
+ threshold = MetricThreshold(type="lower_limit", value=0.5)
45
+ metric = InputHAPMetric(threshold=threshold)
46
+ """
47
+ name: Annotated[Literal["input_hap"],
48
+ Field(title="Name",
49
+ description="The input hap metric name.",
50
+ default=INPUT_HAP, frozen=True)]
51
+ display_name: Annotated[Literal["Input HAP"],
52
+ Field(title="Display Name",
53
+ description="The input hap metric display name.",
54
+ default="Input HAP", frozen=True)]
55
+ tasks: Annotated[list[TaskType],
56
+ Field(title="Tasks",
57
+ description="The list of supported tasks.",
58
+ default=TaskType.values(), frozen=True)]
59
+ thresholds: Annotated[list[MetricThreshold],
60
+ Field(title="Thresholds",
61
+ description="The metric thresholds.",
62
+ default=[MetricThreshold(type="upper_limit", value=0.1)])]
63
+ group: Annotated[MetricGroup,
64
+ Field(title="Group",
65
+ description="The metric group.",
66
+ default=MetricGroup.CONTENT_SAFETY, frozen=True)]
67
+
68
+ async def evaluate_async(
69
+ self,
70
+ data: pd.DataFrame,
71
+ configuration: GenAIConfiguration,
72
+ **kwargs
73
+ ) -> AggregateMetricResult:
74
+
75
+ validate_input(data.columns.to_list(), configuration)
76
+ provider = DetectorsProvider(configuration=configuration,
77
+ metric_name="hap",
78
+ metric_display_name=self.display_name,
79
+ metric_method=self.method,
80
+ metric_group=self.group,
81
+ thresholds=self.thresholds,
82
+ **kwargs)
83
+ aggregated_metric_result = await provider.evaluate_async(data=data)
84
+ # Update the metric name to input_hap
85
+ aggregated_metric_result.name = self.name
86
+ for record in aggregated_metric_result.record_level_metrics:
87
+ record.name = self.name
88
+ return aggregated_metric_result
89
+
90
+ def evaluate(
91
+ self,
92
+ data: pd.DataFrame | dict,
93
+ configuration: GenAIConfiguration,
94
+ **kwargs,
95
+ ):
96
+ # If ran in sync mode, block until it is done
97
+ return run_in_event_loop(
98
+ self.evaluate_async,
99
+ data=data,
100
+ configuration=configuration,
101
+ **kwargs,
102
+ )
103
+
104
+ # Made with Bob
@@ -0,0 +1,110 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ from typing import Annotated, Literal
11
+
12
+ import pandas as pd
13
+ from pydantic import Field
14
+
15
+ from ibm_watsonx_gov.config.gen_ai_configuration import GenAIConfiguration
16
+ from ibm_watsonx_gov.entities.enums import MetricGroup, TaskType
17
+ from ibm_watsonx_gov.entities.evaluation_result import AggregateMetricResult
18
+ from ibm_watsonx_gov.entities.metric import GenAIMetric
19
+ from ibm_watsonx_gov.entities.metric_threshold import MetricThreshold
20
+ from ibm_watsonx_gov.providers.detectors_provider import DetectorsProvider
21
+ from ibm_watsonx_gov.utils.async_util import run_in_event_loop
22
+ from ibm_watsonx_gov.utils.validation_util import validate_output
23
+
24
+ OUTPUT_HAP = "output_hap"
25
+
26
+
27
+ class OutputHAPMetric(GenAIMetric):
28
+ """
29
+ Defines the Output HAP metric class.
30
+
31
+ The Output HAP metric measures if there is any toxic content that contains hate, abuse, or profanity in the output.
32
+ It is computed using the hap model on the output data.
33
+
34
+ Examples:
35
+ 1. Create Output HAP metric with default parameters and compute using metrics evaluator.
36
+ .. code-block:: python
37
+
38
+ metric = OutputHAPMetric()
39
+ result = MetricsEvaluator().evaluate(data={"generated_text": "...", metrics=[metric])
40
+
41
+ 2. Create Output HAP metric with a custom threshold.
42
+ .. code-block:: python
43
+
44
+ threshold = MetricThreshold(type="lower_limit", value=0.5)
45
+ metric = OutputHAPMetric(threshold=threshold)
46
+ """
47
+ name: Annotated[Literal["output_hap"],
48
+ Field(title="Name",
49
+ description="The output hap metric name.",
50
+ default=OUTPUT_HAP, frozen=True)]
51
+ display_name: Annotated[Literal["Output HAP"],
52
+ Field(title="Display Name",
53
+ description="The output hap metric display name.",
54
+ default="Output HAP", frozen=True)]
55
+ tasks: Annotated[list[TaskType],
56
+ Field(title="Tasks",
57
+ description="The list of supported tasks.",
58
+ default=TaskType.values(), frozen=True)]
59
+ thresholds: Annotated[list[MetricThreshold],
60
+ Field(title="Thresholds",
61
+ description="The metric thresholds.",
62
+ default=[MetricThreshold(type="upper_limit", value=0.1)])]
63
+ group: Annotated[MetricGroup,
64
+ Field(title="Group",
65
+ description="The metric group.",
66
+ default=MetricGroup.CONTENT_SAFETY, frozen=True)]
67
+
68
+ async def evaluate_async(
69
+ self,
70
+ data: pd.DataFrame,
71
+ configuration: GenAIConfiguration,
72
+ **kwargs
73
+ ) -> AggregateMetricResult:
74
+
75
+ validate_output(data.columns.to_list(), configuration)
76
+
77
+ # Create a modified configuration that uses output_fields as input_fields
78
+ # This allows DetectorsProvider to process output data
79
+ modified_config = configuration.model_copy(deep=True)
80
+ modified_config.input_fields = configuration.output_fields
81
+
82
+ provider = DetectorsProvider(configuration=modified_config,
83
+ metric_name="hap",
84
+ metric_display_name=self.display_name,
85
+ metric_method=self.method,
86
+ metric_group=self.group,
87
+ thresholds=self.thresholds,
88
+ **kwargs)
89
+ aggregated_metric_result = await provider.evaluate_async(data=data)
90
+ # Update the metric name to output_hap
91
+ aggregated_metric_result.name = self.name
92
+ for record in aggregated_metric_result.record_level_metrics:
93
+ record.name = self.name
94
+ return aggregated_metric_result
95
+
96
+ def evaluate(
97
+ self,
98
+ data: pd.DataFrame | dict,
99
+ configuration: GenAIConfiguration,
100
+ **kwargs,
101
+ ):
102
+ # If ran in sync mode, block until it is done
103
+ return run_in_event_loop(
104
+ self.evaluate_async,
105
+ data=data,
106
+ configuration=configuration,
107
+ **kwargs,
108
+ )
109
+
110
+ # Made with Bob
@@ -0,0 +1,8 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------