ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. ibm_watsonx_gov/__init__.py +8 -0
  2. ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
  3. ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
  4. ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
  5. ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
  6. ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
  7. ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
  8. ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
  9. ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
  10. ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
  11. ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
  12. ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
  13. ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
  14. ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
  15. ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
  16. ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
  17. ibm_watsonx_gov/clients/__init__.py +0 -0
  18. ibm_watsonx_gov/clients/api_client.py +99 -0
  19. ibm_watsonx_gov/clients/segment_client.py +46 -0
  20. ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
  21. ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
  22. ibm_watsonx_gov/config/__init__.py +14 -0
  23. ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
  24. ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
  25. ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
  26. ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
  27. ibm_watsonx_gov/entities/__init__.py +8 -0
  28. ibm_watsonx_gov/entities/agentic_app.py +209 -0
  29. ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
  30. ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
  31. ibm_watsonx_gov/entities/ai_experiment.py +419 -0
  32. ibm_watsonx_gov/entities/base_classes.py +134 -0
  33. ibm_watsonx_gov/entities/container.py +54 -0
  34. ibm_watsonx_gov/entities/credentials.py +633 -0
  35. ibm_watsonx_gov/entities/criteria.py +508 -0
  36. ibm_watsonx_gov/entities/enums.py +274 -0
  37. ibm_watsonx_gov/entities/evaluation_result.py +444 -0
  38. ibm_watsonx_gov/entities/foundation_model.py +490 -0
  39. ibm_watsonx_gov/entities/llm_judge.py +44 -0
  40. ibm_watsonx_gov/entities/locale.py +17 -0
  41. ibm_watsonx_gov/entities/mapping.py +49 -0
  42. ibm_watsonx_gov/entities/metric.py +211 -0
  43. ibm_watsonx_gov/entities/metric_threshold.py +36 -0
  44. ibm_watsonx_gov/entities/model_provider.py +329 -0
  45. ibm_watsonx_gov/entities/model_risk_result.py +43 -0
  46. ibm_watsonx_gov/entities/monitor.py +71 -0
  47. ibm_watsonx_gov/entities/prompt_setup.py +40 -0
  48. ibm_watsonx_gov/entities/state.py +22 -0
  49. ibm_watsonx_gov/entities/utils.py +99 -0
  50. ibm_watsonx_gov/evaluators/__init__.py +26 -0
  51. ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
  52. ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
  53. ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
  54. ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
  55. ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
  56. ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
  57. ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
  58. ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
  59. ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
  60. ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
  61. ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
  62. ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
  63. ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
  64. ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
  65. ibm_watsonx_gov/metrics/__init__.py +74 -0
  66. ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
  67. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
  68. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
  69. ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
  70. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
  71. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
  72. ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
  73. ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
  74. ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
  75. ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
  76. ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
  77. ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
  78. ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
  79. ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
  80. ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
  81. ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
  82. ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
  83. ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
  84. ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
  85. ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
  86. ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
  87. ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
  88. ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
  89. ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
  90. ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
  91. ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
  92. ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
  93. ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
  94. ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
  95. ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
  96. ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
  97. ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
  98. ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
  99. ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
  100. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
  101. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
  102. ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
  103. ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
  104. ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
  105. ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
  106. ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
  107. ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
  108. ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
  109. ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
  110. ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
  111. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
  112. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
  113. ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
  114. ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
  115. ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
  116. ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
  117. ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
  118. ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
  119. ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
  120. ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
  121. ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
  122. ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
  123. ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
  124. ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
  125. ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
  126. ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
  127. ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
  128. ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
  129. ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
  130. ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
  131. ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
  132. ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
  133. ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
  134. ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
  135. ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
  136. ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
  137. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
  138. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
  139. ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
  140. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
  141. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
  142. ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
  143. ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
  144. ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
  145. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
  146. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
  147. ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
  148. ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
  149. ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
  150. ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
  151. ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
  152. ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
  153. ibm_watsonx_gov/metrics/status/__init__.py +0 -0
  154. ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
  155. ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
  156. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
  157. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
  158. ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
  159. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
  160. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
  161. ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
  162. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
  163. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
  164. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
  165. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
  166. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
  167. ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
  168. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
  169. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
  170. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
  171. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
  172. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
  173. ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
  174. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
  175. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
  176. ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
  177. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
  178. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
  179. ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
  180. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
  181. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
  182. ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
  183. ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
  184. ibm_watsonx_gov/metrics/utils.py +440 -0
  185. ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
  186. ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
  187. ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
  188. ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
  189. ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
  190. ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
  191. ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
  192. ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
  193. ibm_watsonx_gov/providers/__init__.py +8 -0
  194. ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
  195. ibm_watsonx_gov/providers/detectors_provider.py +415 -0
  196. ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
  197. ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
  198. ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
  199. ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
  200. ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
  201. ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
  202. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
  203. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
  204. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
  205. ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
  206. ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
  207. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
  208. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
  209. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
  210. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
  211. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
  212. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
  213. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
  214. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
  215. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
  216. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
  217. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
  218. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
  219. ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
  220. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
  221. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
  222. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
  223. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
  224. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
  225. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
  226. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
  227. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
  228. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
  229. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
  230. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
  231. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
  232. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
  233. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
  234. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
  235. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
  236. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
  237. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
  238. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
  239. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
  240. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
  241. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
  242. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
  243. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
  244. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
  245. ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
  246. ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
  247. ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
  248. ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
  249. ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
  250. ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
  251. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
  252. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
  253. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
  254. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
  255. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
  256. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
  257. ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
  258. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
  259. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
  260. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
  261. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
  262. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
  263. ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
  264. ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
  265. ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
  266. ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
  267. ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
  268. ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
  269. ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
  270. ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
  271. ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
  272. ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
  273. ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
  274. ibm_watsonx_gov/tools/__init__.py +10 -0
  275. ibm_watsonx_gov/tools/clients/__init__.py +11 -0
  276. ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
  277. ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
  278. ibm_watsonx_gov/tools/core/__init__.py +8 -0
  279. ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
  280. ibm_watsonx_gov/tools/entities/__init__.py +8 -0
  281. ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
  282. ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
  283. ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
  284. ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
  285. ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
  286. ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
  287. ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
  288. ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
  289. ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
  290. ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
  291. ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
  292. ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
  293. ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
  294. ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
  295. ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
  296. ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
  297. ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
  298. ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
  299. ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
  300. ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
  301. ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
  302. ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
  303. ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
  304. ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
  305. ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
  306. ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
  307. ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
  308. ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
  309. ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
  310. ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
  311. ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
  312. ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
  313. ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
  314. ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
  315. ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
  316. ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
  317. ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
  318. ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
  319. ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
  320. ibm_watsonx_gov/tools/utils/__init__.py +14 -0
  321. ibm_watsonx_gov/tools/utils/constants.py +69 -0
  322. ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
  323. ibm_watsonx_gov/tools/utils/environment.py +108 -0
  324. ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
  325. ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
  326. ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
  327. ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
  328. ibm_watsonx_gov/traces/__init__.py +8 -0
  329. ibm_watsonx_gov/traces/span_exporter.py +195 -0
  330. ibm_watsonx_gov/traces/span_node.py +251 -0
  331. ibm_watsonx_gov/traces/span_util.py +153 -0
  332. ibm_watsonx_gov/traces/trace_utils.py +1074 -0
  333. ibm_watsonx_gov/utils/__init__.py +8 -0
  334. ibm_watsonx_gov/utils/aggregation_util.py +346 -0
  335. ibm_watsonx_gov/utils/async_util.py +62 -0
  336. ibm_watsonx_gov/utils/authenticator.py +144 -0
  337. ibm_watsonx_gov/utils/constants.py +15 -0
  338. ibm_watsonx_gov/utils/errors.py +40 -0
  339. ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
  340. ibm_watsonx_gov/utils/insights_generator.py +1285 -0
  341. ibm_watsonx_gov/utils/python_utils.py +425 -0
  342. ibm_watsonx_gov/utils/rest_util.py +73 -0
  343. ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
  344. ibm_watsonx_gov/utils/singleton_meta.py +25 -0
  345. ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
  346. ibm_watsonx_gov/utils/validation_util.py +126 -0
  347. ibm_watsonx_gov/visualizations/__init__.py +13 -0
  348. ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
  349. ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
  350. ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
  351. ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
  352. ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
  353. ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
@@ -0,0 +1,8 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
@@ -0,0 +1,346 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ import copy
11
+ from collections import Counter, defaultdict
12
+ from typing import List
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ from ibm_watsonx_gov.entities.agentic_app import Node
17
+ from ibm_watsonx_gov.entities.agentic_evaluation_result import \
18
+ AgenticEvaluationResult
19
+ from ibm_watsonx_gov.entities.enums import MetricGroup, MetricValueType
20
+ from ibm_watsonx_gov.entities.evaluation_result import (
21
+ AgentMetricResult, AggregateAgentMetricResult, RecordMetricResult)
22
+ from ibm_watsonx_gov.entities.metric_threshold import MetricThreshold
23
+ from ibm_watsonx_gov.metrics.llm_validation.llm_validation_metric import \
24
+ LLMValidationMetric
25
+ from ibm_watsonx_gov.utils.gov_sdk_logger import GovSDKLogger
26
+
27
+ logger = GovSDKLogger.get_logger(__name__)
28
+
29
+
30
+ def get_aggregated_thresholds(metric_results: List[AgentMetricResult]) -> List[MetricThreshold]:
31
+ """
32
+ Aggregates thresholds from a list of AgentMetricResult objects.
33
+
34
+ This function takes a list of AgentMetricResult objects and returns a list of MetricThreshold objects.
35
+ It aggregates thresholds if all AgentMetricResult objects have identical sets of thresholds.
36
+ If the list is empty, it returns an empty list. If there's only one AgentMetricResult, it returns its thresholds.
37
+
38
+ Parameters:
39
+ metric_results (List[AgentMetricResult]): A list of AgentMetricResult objects.
40
+
41
+ Returns:
42
+ List[MetricThreshold]: A list of MetricThreshold objects, either aggregated or an empty list if thresholds do not match.
43
+ """
44
+
45
+ if not metric_results:
46
+ return []
47
+
48
+ if len(metric_results) == 1:
49
+ return metric_results[0].thresholds
50
+
51
+ first_thresholds = set(metric_results[0].thresholds)
52
+ for metric_result in metric_results[1:]:
53
+ if first_thresholds != set(metric_result.thresholds):
54
+ logger.warning(
55
+ f"Did not get matching thresholds for {metric_results[0].name} metric. Not aggregating.")
56
+ return []
57
+
58
+ return metric_results[0].thresholds
59
+
60
+
61
+ def __get_aggregation_result(metric_results: List[AgentMetricResult]) -> AggregateAgentMetricResult | None:
62
+ values, labels = [], []
63
+
64
+ for r in metric_results:
65
+ if r.value is not None:
66
+ values.append(r.value)
67
+ if r.label is not None:
68
+ labels.append(r.label)
69
+
70
+ value, mean, min_val, max_val, labels_count, percentiles = None, None, None, None, None, None
71
+ if values:
72
+ mean = sum(values) / len(values)
73
+ min_val = min(values)
74
+ max_val = max(values)
75
+ value = mean
76
+ if len(values) > 1:
77
+ # Calculate all percentiles in a single call
78
+ percentile_values = np.percentile(
79
+ values, [25, 50, 75, 90, 95, 99])
80
+ percentiles = {
81
+ "25": percentile_values[0],
82
+ "50": percentile_values[1],
83
+ "75": percentile_values[2],
84
+ "90": percentile_values[3],
85
+ "95": percentile_values[4],
86
+ "99": percentile_values[5]
87
+ }
88
+
89
+ if labels:
90
+ labels_count = dict(Counter(labels))
91
+
92
+ combined_thresholds = get_aggregated_thresholds(
93
+ metric_results=metric_results)
94
+ first_metric_result = metric_results[0]
95
+ return AggregateAgentMetricResult(name=first_metric_result.name,
96
+ value_type=first_metric_result.value_type,
97
+ display_name=first_metric_result.display_name,
98
+ thresholds=combined_thresholds,
99
+ method=first_metric_result.method,
100
+ provider=first_metric_result.provider,
101
+ node_name=first_metric_result.node_name,
102
+ applies_to=first_metric_result.applies_to,
103
+ group=first_metric_result.group,
104
+ value=mean,
105
+ min=min_val,
106
+ max=max_val,
107
+ count=len(metric_results),
108
+ percentiles=percentiles,
109
+ individual_results=metric_results)
110
+
111
+
112
+ def __compute_aggregated_metrics_results(metrics_result: List[AgentMetricResult],
113
+ nodes: List[Node],
114
+ include_individual_results: bool = True) -> List[AggregateAgentMetricResult]:
115
+
116
+ nodes_result_group, message_result_group, conversation_result_map = __get_grouped_metrics_result(
117
+ metrics_result)
118
+
119
+ aggregated_results = []
120
+ aggregated_results.extend(__get_aggregated_node_metrics(
121
+ include_individual_results, nodes, nodes_result_group))
122
+ aggregated_results.extend(
123
+ __get_aggregated_metrics(message_result_group))
124
+ aggregated_results.extend(
125
+ __get_aggregated_metrics(conversation_result_map))
126
+
127
+ return aggregated_results
128
+
129
+
130
+ def __get_aggregated_metrics(message_results):
131
+ aggregated_results = []
132
+ # Aggregate message or conversation level metrics
133
+ for values in list(message_results.values()):
134
+ aggregated_result = __get_aggregation_result(
135
+ values)
136
+ if aggregated_result:
137
+ aggregated_results.append(aggregated_result)
138
+ return aggregated_results
139
+
140
+
141
+ def __get_grouped_metrics_result(metrics_result):
142
+ """
143
+ Group the metrics results based on node and message.
144
+ """
145
+ nodes_result_map, message_result_map = {}, {}
146
+ conversation_result_map = defaultdict(list)
147
+ conversation_metrics = defaultdict(lambda: defaultdict(float))
148
+ for mr in metrics_result:
149
+ key = mr.name+"_"+mr.method if mr.method else mr.name
150
+ if mr.applies_to == "node":
151
+ if mr.node_name in nodes_result_map:
152
+ if key in nodes_result_map[mr.node_name]:
153
+ nodes_result_map[mr.node_name][key].append(mr)
154
+ else:
155
+ nodes_result_map[mr.node_name][key] = [mr]
156
+ else:
157
+ nodes_result_map[mr.node_name] = {
158
+ key: [mr]
159
+ }
160
+ elif mr.applies_to == "message":
161
+ if key in message_result_map:
162
+ message_result_map[key].append(mr)
163
+ else:
164
+ message_result_map[key] = [mr]
165
+ if key in ("duration", "cost", "input_token_count", "output_token_count"):
166
+ conversation_metrics[mr.conversation_id][key] += mr.value
167
+ for conversation_id, metric_value in conversation_metrics.items():
168
+ for metric, value in metric_value.items():
169
+ conversation_result_map[metric].append(AgentMetricResult(name=metric,
170
+ value=value,
171
+ display_name=metric,
172
+ group=MetricGroup.PERFORMANCE.value if metric == "duration" else MetricGroup.USAGE.value,
173
+ message_id=None,
174
+ applies_to="conversation",
175
+ conversation_id=conversation_id))
176
+
177
+ return nodes_result_map, message_result_map, dict(conversation_result_map)
178
+
179
+
180
+ def __get_aggregated_node_metrics(include_individual_results, nodes, nodes_results):
181
+ aggregated_results = []
182
+
183
+ # Create node metrics dict for easy access to metrics
184
+ node_to_metrics = {}
185
+ for n in nodes:
186
+ mts = {}
187
+ for mc in n.metrics_configurations:
188
+ for m in mc.metrics:
189
+ mts[m.id] = m
190
+ node_to_metrics[n.name] = mts
191
+
192
+ # Aggregate node level metrics
193
+ for node, node_metrics in nodes_results.items():
194
+ for metric_key, values in node_metrics.items():
195
+ aggregated_result = None
196
+ metric_obj = node_to_metrics.get(node, {}).get(metric_key)
197
+
198
+ if isinstance(metric_obj, LLMValidationMetric):
199
+ # convert metrics result from AgentMetricResult to RecordMetricResult used by the metric
200
+ aggregated_result = __get_llm_validation_metric_aggregation_result(
201
+ include_individual_results, values, metric_obj)
202
+ else:
203
+ aggregated_result = __get_aggregation_result(
204
+ values)
205
+ if aggregated_result:
206
+ aggregated_results.append(aggregated_result)
207
+ return aggregated_results
208
+
209
+
210
+ def __get_llm_validation_metric_aggregation_result(include_individual_results, values, metric_obj):
211
+ record_level_metrics = [RecordMetricResult(
212
+ **v.__dict__, record_id=v.message_id) for v in values]
213
+ aggregated_result = metric_obj.get_aggregated_results_from_individual_results(
214
+ record_level_metrics)
215
+
216
+ # convert updated record results to AgentMetricResult
217
+ updated_record_level_metrics = aggregated_result.record_level_metrics
218
+ agent_individual_results = []
219
+ for record_result, agent_result in zip(updated_record_level_metrics, values):
220
+ args = {**agent_result.__dict__,
221
+ **record_result.__dict__}
222
+ agent_individual_results.append(
223
+ AgentMetricResult(**args))
224
+
225
+ if aggregated_result:
226
+ # convert AggregateMetricResult to AggregateAgentMetricResult
227
+ mv = values[0]
228
+
229
+ # Calculate percentiles if we have enough data points
230
+ percentiles = None
231
+
232
+ if len(agent_individual_results) > 1:
233
+ # Extract values for percentile calculation
234
+ valid_values = [
235
+ r.value for r in agent_individual_results if r.value is not None]
236
+ if valid_values and all(isinstance(v, (int, float)) for v in valid_values):
237
+ # Calculate all percentiles in a single call
238
+ percentile_values = np.percentile(
239
+ valid_values, [25, 50, 75, 90, 95, 99])
240
+
241
+ percentiles = {
242
+ "25": percentile_values[0],
243
+ "50": percentile_values[1],
244
+ "75": percentile_values[2],
245
+ "90": percentile_values[3],
246
+ "95": percentile_values[4],
247
+ "99": percentile_values[5]
248
+ }
249
+
250
+ aggregated_result = AggregateAgentMetricResult(
251
+ name=mv.name,
252
+ method=mv.method,
253
+ provider=mv.provider,
254
+ node_name=mv.node_name,
255
+ applies_to=mv.applies_to,
256
+ group=mv.group,
257
+ value=aggregated_result.mean,
258
+ min=aggregated_result.min,
259
+ max=aggregated_result.max,
260
+ count=aggregated_result.total_records,
261
+ percentiles=percentiles,
262
+ individual_results=copy.deepcopy(
263
+ agent_individual_results) if include_individual_results else [],
264
+ additional_info=copy.deepcopy(
265
+ aggregated_result.additional_info)
266
+ )
267
+ return aggregated_result
268
+
269
+
270
+ def get_agentic_evaluation_result(metrics_result: list[AgentMetricResult], nodes: list[Node] = []) -> AgenticEvaluationResult:
271
+ aggregated_metrics_results = __compute_aggregated_metrics_results(
272
+ metrics_result, nodes)
273
+ metrics_result = []
274
+ for amr in aggregated_metrics_results:
275
+ metrics_result.extend(amr.individual_results)
276
+
277
+ return AgenticEvaluationResult(metrics_results=metrics_result,
278
+ aggregated_metrics_results=aggregated_metrics_results)
279
+
280
+
281
+ def get_summaries(individual_metric_values: list):
282
+ """
283
+ Calculates statistical summaries for a list of numeric metric values.
284
+
285
+ Args:
286
+ individual_metric_values (list): A list of numeric values representing individual
287
+ metrics. May contain None values which will be filtered out.
288
+
289
+ Returns:
290
+ dict: A dictionary containing the following statistical summaries:
291
+ - "metric_value" (float): Mean of the values (same as "mean")
292
+ - "mean" (float): Arithmetic mean of the values
293
+ - "min" (float): Minimum value in the dataset
294
+ - "max" (float): Maximum value in the dataset
295
+ - "std" (float): Standard deviation of the values
296
+ - "percentiles" (dict): Dictionary containing percentile values with keys:
297
+ - "25": 25th percentile of the values
298
+ - "50": 50th percentile (median) of the values
299
+ - "75": 75th percentile of the values
300
+ - "90": 90th percentile of the values
301
+ - "95": 95th percentile of the values
302
+ - "99": 99th percentile of the values
303
+
304
+ If input is empty or contains only None values, returns:
305
+ {"metric_value": 0, "mean": 0, "min": 0, "max": 0, "std": 0,
306
+ "percentiles": {"25": 0, "50": 0, "75": 0, "90": 0, "95": 0, "99": 0}}
307
+ """
308
+ individual_metric_values = [
309
+ ele for ele in individual_metric_values if ele is not None]
310
+
311
+ if individual_metric_values is None or len(individual_metric_values) == 0:
312
+ return {
313
+ "metric_value": 0,
314
+ "mean": 0,
315
+ "min": 0,
316
+ "max": 0,
317
+ "std": 0,
318
+ "percentiles": {
319
+ "25": 0,
320
+ "50": 0,
321
+ "75": 0,
322
+ "90": 0,
323
+ "95": 0,
324
+ "99": 0
325
+ }
326
+ }
327
+ else:
328
+ # Calculate all percentiles in a single call
329
+ percentile_values = np.percentile(
330
+ individual_metric_values, [25, 50, 75, 90, 95, 99])
331
+
332
+ return {
333
+ "metric_value": np.mean(individual_metric_values).item(),
334
+ "mean": np.mean(individual_metric_values).item(),
335
+ "min": np.min(individual_metric_values).item(),
336
+ "max": np.max(individual_metric_values).item(),
337
+ "std": np.std(individual_metric_values).item(),
338
+ "percentiles": {
339
+ "25": percentile_values[0].item(),
340
+ "50": percentile_values[1].item(),
341
+ "75": percentile_values[2].item(),
342
+ "90": percentile_values[3].item(),
343
+ "95": percentile_values[4].item(),
344
+ "99": percentile_values[5].item()
345
+ }
346
+ }
@@ -0,0 +1,62 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+
11
+ import asyncio
12
+ from typing import Any, Awaitable, Iterable
13
+
14
+
15
+ def run_in_event_loop(task, *args, **kwargs):
16
+ """Run the given async task in an event loop, safely handling loop reuse."""
17
+ try:
18
+ event_loop = asyncio.get_running_loop()
19
+ # Use existing event loop and wait for the task to be executed.
20
+ import nest_asyncio
21
+ nest_asyncio.apply()
22
+ return event_loop.run_until_complete(task(*args, **kwargs))
23
+ except RuntimeError:
24
+ # No running loop, create one and close it when done
25
+ event_loop = asyncio.new_event_loop()
26
+ try:
27
+ return event_loop.run_until_complete(task(*args, **kwargs))
28
+ finally:
29
+ event_loop.close()
30
+
31
+
32
+ async def gather_with_concurrency(
33
+ coros: Iterable[Awaitable],
34
+ return_exceptions: bool = False,
35
+ max_concurrency: int = 10,
36
+ ) -> Any:
37
+ semaphore = asyncio.Semaphore(max_concurrency)
38
+
39
+ async def safe_coroutine_fn(fn):
40
+ async with semaphore:
41
+ return await fn
42
+
43
+ tasks = [asyncio.create_task(safe_coroutine_fn(fn)) for fn in coros]
44
+ try:
45
+ return await asyncio.gather(*tasks, return_exceptions=return_exceptions)
46
+ except Exception as ex:
47
+ for task in tasks:
48
+ task.cancel()
49
+ raise ex
50
+
51
+
52
+ def start_event_loop_run_func(func, data):
53
+ """
54
+ Create a wrapper function to start the event loop in the thread as unitxt LiteLLMInference fails without it.
55
+ """
56
+ loop = asyncio.new_event_loop()
57
+ asyncio.set_event_loop(loop)
58
+ try:
59
+ return func(data)
60
+ finally:
61
+ loop.close()
62
+ asyncio.set_event_loop(None)
@@ -0,0 +1,144 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ import json
11
+
12
+ from ibm_watsonx_gov.utils.gov_sdk_logger import GovSDKLogger
13
+ from ibm_watsonx_gov.utils.rest_util import RestUtil
14
+
15
+
16
+ class Authenticator:
17
+ """
18
+ Helper class to authenticate with IBM Cloud and CPD
19
+ """
20
+
21
+ def __init__(self, credentials: dict, use_cpd: bool, use_ssl: bool) -> None:
22
+ """
23
+ Initialize the authenticator object
24
+
25
+ Args:
26
+ credentials (dict): A dictionary containing the necessary credentials for authentication.
27
+ use_cpd (bool): A boolean indicating whether to authenticate with CPD or IBM Cloud.
28
+ """
29
+ self.__credentials: dict[str, str] = credentials
30
+ self.__use_cpd: bool = use_cpd
31
+ self.__iam_token: str = None
32
+ self.__use_ssl: bool = use_ssl
33
+ self.logger = GovSDKLogger.get_logger(__name__)
34
+
35
+ def authenticate(self) -> str:
36
+ """
37
+ Function to complete the authentication flow with either IBM Cloud or CPD based
38
+ on the configuration. This will set self.__iam_token and return the token to the user
39
+ """
40
+ self.logger.info("Authenticating the client")
41
+ if self.__use_cpd:
42
+ self.logger.info("Authenticating the client with CPD")
43
+ self.__iam_token = self.__get_iam_token_cpd()
44
+ else:
45
+ self.logger.info("Authenticating the client with ibm cloud")
46
+ self.__iam_token = self.__get_iam_token_cloud()
47
+
48
+ self.logger.info("Client authenticated successfully")
49
+ return self.__iam_token
50
+
51
+ def get_iam_token(self) -> str:
52
+ """
53
+ This function retrieves an IAM token from the instance variables. If the token does not exist, it raises an exception.
54
+
55
+ Returns:
56
+ str: IAM token
57
+ """
58
+ if not self.__iam_token:
59
+ message = "Not authenticated yet."
60
+ self.logger.error(message)
61
+ raise Exception(message)
62
+ return self.__iam_token
63
+
64
+ def __get_iam_token_cloud(self) -> None:
65
+ """
66
+ Method to authenticate the client with ibm cloud.
67
+ """
68
+ self.logger.info("Authenticating using cloud credentials")
69
+
70
+ headers = {
71
+ "Content-Type": "application/x-www-form-urlencoded",
72
+ "Accept": "application/json",
73
+ }
74
+
75
+ data = {
76
+ "grant_type": "urn:ibm:params:oauth:grant-type:apikey",
77
+ "response_type": "cloud_iam",
78
+ "apikey": self.__credentials["apikey"],
79
+ }
80
+
81
+ try:
82
+ response = RestUtil.request_with_retry().post(
83
+ url=f"{self.__credentials['iam_url']}/identity/token",
84
+ data=data,
85
+ headers=headers,
86
+ allow_redirects=True,
87
+ verify=self.__use_ssl,
88
+ )
89
+ response.raise_for_status()
90
+ except Exception as e:
91
+ message = f"Failed to authenticate. {e}"
92
+ self.logger.error(message)
93
+ raise Exception(message)
94
+
95
+ try:
96
+ json_response = response.json()
97
+ return json_response["access_token"]
98
+ except Exception as e:
99
+ message = f"Failed to parse authentication response. {e}"
100
+ self.logger.error(message)
101
+ raise Exception(message)
102
+
103
+ def __get_iam_token_cpd(self) -> None:
104
+ """
105
+ Method to authenticate the client with CPD.
106
+ """
107
+ self.logger.info("Authenticating using cpd credentials")
108
+
109
+ headers = {
110
+ "Content-Type": "application/json",
111
+ "Accept": "application/json",
112
+ }
113
+
114
+ data = {
115
+ "username": self.__credentials["username"],
116
+ }
117
+
118
+ # Check if the authentication is done using password or api key and it to the payload
119
+ if "password" in self.__credentials.keys():
120
+ data["password"] = self.__credentials["password"]
121
+ elif "api_key" in self.__credentials.keys():
122
+ data["api_key"] = self.__credentials["api_key"]
123
+
124
+ try:
125
+ response = RestUtil.request_with_retry().post(
126
+ url=f"{self.__credentials['url']}/icp4d-api/v1/authorize",
127
+ data=json.dumps(data).encode("utf-8"),
128
+ headers=headers,
129
+ allow_redirects=True,
130
+ verify=self.__use_ssl,
131
+ )
132
+ response.raise_for_status()
133
+ except Exception as e:
134
+ message = f"Failed to authenticate. {e}"
135
+ self.logger.error(message)
136
+ raise Exception(message)
137
+
138
+ try:
139
+ json_response = response.json()
140
+ return json_response["token"]
141
+ except Exception as e:
142
+ message = f"Failed to parse authentication response. {e}"
143
+ self.logger.error(message)
144
+ raise Exception(message)
@@ -0,0 +1,15 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+
11
+ from typing import Literal
12
+
13
+ OOTB_TYPE = Literal["ootb"]
14
+ CUSTOM_TYPE = Literal["custom"]
15
+ METRIC_VALUE_TYPES = Literal["numeric", "categorical"]
@@ -0,0 +1,40 @@
1
+
2
+ # ----------------------------------------------------------------------------------------------------
3
+ # IBM Confidential
4
+ # Licensed Materials - Property of IBM
5
+ # 5737-H76, 5900-A3Q
6
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
7
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
8
+ # GSA ADPSchedule Contract with IBM Corp.
9
+ # ----------------------------------------------------------------------------------------------------
10
+
11
+ import sys
12
+ from http import HTTPStatus
13
+
14
+ from ibm_watsonx_gov.utils.gov_sdk_logger import GovSDKLogger
15
+
16
+ logger = GovSDKLogger.get_logger(__name__)
17
+
18
+
19
+ class ClientError(Exception):
20
+ def __init__(self, code, message, reason=None):
21
+ self.code = code
22
+ self.message = message
23
+ self.reason = reason
24
+ logger.debug(str(self.code) + ": " +
25
+ str(self.message) + ('\nReason: ' + str(self.reason) if sys.exc_info()[0] is not None else ''))
26
+
27
+ def __str__(self):
28
+ return str(self.code) + ": " + str(self.message) + ('\nReason: ' + str(self.reason)
29
+ if sys.exc_info()[0] is not None else '')
30
+
31
+
32
+ class AuthorizationError(ClientError, ValueError):
33
+ def __init__(self, code, message, reason=None):
34
+ ClientError.__init__(self, code=code, message=message, reason=reason)
35
+
36
+
37
+ class UnsupportedOperationError(ClientError, ValueError):
38
+ def __init__(self, message, reason=None):
39
+ ClientError.__init__(
40
+ self, code=HTTPStatus.NOT_IMPLEMENTED, message=message, reason=reason)
@@ -0,0 +1,39 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ import logging
11
+
12
+
13
+ class GovSDKLogger:
14
+ """
15
+ Logger for watsonx governance SDK
16
+ """
17
+ DEFAULT_LOG_LEVEL = logging.WARN
18
+
19
+ @staticmethod
20
+ def get_logger(name):
21
+ """
22
+ Function the return a logger object.
23
+ Params:
24
+ name (str): The name of the logger.
25
+ Returns:
26
+ logging.Logger: A logger object
27
+ """
28
+ logger = logging.getLogger(name)
29
+ logger.propagate = False
30
+ if not logger.hasHandlers():
31
+ logger.setLevel(GovSDKLogger.DEFAULT_LOG_LEVEL)
32
+ logger.propagate = False
33
+ handler = logging.StreamHandler()
34
+ logger.addHandler(handler)
35
+ formatter = logging.Formatter(
36
+ "[%(asctime)s]-[%(name)s]-[ %(levelname)s ]-[Line %(lineno)d] ~~> %(message)s"
37
+ )
38
+ handler.setFormatter(formatter)
39
+ return logger