ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. ibm_watsonx_gov/__init__.py +8 -0
  2. ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
  3. ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
  4. ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
  5. ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
  6. ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
  7. ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
  8. ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
  9. ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
  10. ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
  11. ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
  12. ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
  13. ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
  14. ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
  15. ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
  16. ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
  17. ibm_watsonx_gov/clients/__init__.py +0 -0
  18. ibm_watsonx_gov/clients/api_client.py +99 -0
  19. ibm_watsonx_gov/clients/segment_client.py +46 -0
  20. ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
  21. ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
  22. ibm_watsonx_gov/config/__init__.py +14 -0
  23. ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
  24. ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
  25. ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
  26. ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
  27. ibm_watsonx_gov/entities/__init__.py +8 -0
  28. ibm_watsonx_gov/entities/agentic_app.py +209 -0
  29. ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
  30. ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
  31. ibm_watsonx_gov/entities/ai_experiment.py +419 -0
  32. ibm_watsonx_gov/entities/base_classes.py +134 -0
  33. ibm_watsonx_gov/entities/container.py +54 -0
  34. ibm_watsonx_gov/entities/credentials.py +633 -0
  35. ibm_watsonx_gov/entities/criteria.py +508 -0
  36. ibm_watsonx_gov/entities/enums.py +274 -0
  37. ibm_watsonx_gov/entities/evaluation_result.py +444 -0
  38. ibm_watsonx_gov/entities/foundation_model.py +490 -0
  39. ibm_watsonx_gov/entities/llm_judge.py +44 -0
  40. ibm_watsonx_gov/entities/locale.py +17 -0
  41. ibm_watsonx_gov/entities/mapping.py +49 -0
  42. ibm_watsonx_gov/entities/metric.py +211 -0
  43. ibm_watsonx_gov/entities/metric_threshold.py +36 -0
  44. ibm_watsonx_gov/entities/model_provider.py +329 -0
  45. ibm_watsonx_gov/entities/model_risk_result.py +43 -0
  46. ibm_watsonx_gov/entities/monitor.py +71 -0
  47. ibm_watsonx_gov/entities/prompt_setup.py +40 -0
  48. ibm_watsonx_gov/entities/state.py +22 -0
  49. ibm_watsonx_gov/entities/utils.py +99 -0
  50. ibm_watsonx_gov/evaluators/__init__.py +26 -0
  51. ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
  52. ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
  53. ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
  54. ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
  55. ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
  56. ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
  57. ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
  58. ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
  59. ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
  60. ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
  61. ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
  62. ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
  63. ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
  64. ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
  65. ibm_watsonx_gov/metrics/__init__.py +74 -0
  66. ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
  67. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
  68. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
  69. ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
  70. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
  71. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
  72. ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
  73. ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
  74. ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
  75. ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
  76. ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
  77. ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
  78. ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
  79. ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
  80. ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
  81. ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
  82. ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
  83. ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
  84. ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
  85. ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
  86. ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
  87. ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
  88. ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
  89. ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
  90. ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
  91. ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
  92. ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
  93. ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
  94. ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
  95. ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
  96. ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
  97. ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
  98. ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
  99. ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
  100. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
  101. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
  102. ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
  103. ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
  104. ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
  105. ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
  106. ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
  107. ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
  108. ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
  109. ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
  110. ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
  111. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
  112. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
  113. ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
  114. ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
  115. ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
  116. ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
  117. ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
  118. ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
  119. ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
  120. ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
  121. ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
  122. ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
  123. ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
  124. ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
  125. ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
  126. ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
  127. ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
  128. ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
  129. ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
  130. ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
  131. ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
  132. ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
  133. ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
  134. ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
  135. ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
  136. ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
  137. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
  138. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
  139. ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
  140. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
  141. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
  142. ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
  143. ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
  144. ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
  145. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
  146. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
  147. ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
  148. ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
  149. ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
  150. ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
  151. ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
  152. ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
  153. ibm_watsonx_gov/metrics/status/__init__.py +0 -0
  154. ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
  155. ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
  156. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
  157. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
  158. ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
  159. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
  160. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
  161. ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
  162. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
  163. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
  164. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
  165. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
  166. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
  167. ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
  168. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
  169. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
  170. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
  171. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
  172. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
  173. ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
  174. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
  175. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
  176. ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
  177. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
  178. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
  179. ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
  180. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
  181. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
  182. ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
  183. ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
  184. ibm_watsonx_gov/metrics/utils.py +440 -0
  185. ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
  186. ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
  187. ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
  188. ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
  189. ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
  190. ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
  191. ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
  192. ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
  193. ibm_watsonx_gov/providers/__init__.py +8 -0
  194. ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
  195. ibm_watsonx_gov/providers/detectors_provider.py +415 -0
  196. ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
  197. ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
  198. ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
  199. ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
  200. ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
  201. ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
  202. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
  203. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
  204. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
  205. ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
  206. ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
  207. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
  208. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
  209. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
  210. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
  211. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
  212. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
  213. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
  214. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
  215. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
  216. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
  217. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
  218. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
  219. ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
  220. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
  221. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
  222. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
  223. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
  224. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
  225. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
  226. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
  227. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
  228. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
  229. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
  230. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
  231. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
  232. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
  233. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
  234. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
  235. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
  236. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
  237. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
  238. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
  239. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
  240. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
  241. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
  242. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
  243. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
  244. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
  245. ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
  246. ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
  247. ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
  248. ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
  249. ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
  250. ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
  251. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
  252. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
  253. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
  254. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
  255. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
  256. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
  257. ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
  258. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
  259. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
  260. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
  261. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
  262. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
  263. ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
  264. ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
  265. ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
  266. ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
  267. ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
  268. ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
  269. ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
  270. ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
  271. ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
  272. ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
  273. ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
  274. ibm_watsonx_gov/tools/__init__.py +10 -0
  275. ibm_watsonx_gov/tools/clients/__init__.py +11 -0
  276. ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
  277. ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
  278. ibm_watsonx_gov/tools/core/__init__.py +8 -0
  279. ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
  280. ibm_watsonx_gov/tools/entities/__init__.py +8 -0
  281. ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
  282. ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
  283. ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
  284. ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
  285. ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
  286. ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
  287. ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
  288. ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
  289. ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
  290. ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
  291. ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
  292. ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
  293. ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
  294. ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
  295. ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
  296. ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
  297. ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
  298. ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
  299. ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
  300. ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
  301. ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
  302. ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
  303. ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
  304. ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
  305. ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
  306. ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
  307. ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
  308. ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
  309. ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
  310. ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
  311. ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
  312. ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
  313. ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
  314. ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
  315. ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
  316. ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
  317. ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
  318. ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
  319. ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
  320. ibm_watsonx_gov/tools/utils/__init__.py +14 -0
  321. ibm_watsonx_gov/tools/utils/constants.py +69 -0
  322. ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
  323. ibm_watsonx_gov/tools/utils/environment.py +108 -0
  324. ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
  325. ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
  326. ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
  327. ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
  328. ibm_watsonx_gov/traces/__init__.py +8 -0
  329. ibm_watsonx_gov/traces/span_exporter.py +195 -0
  330. ibm_watsonx_gov/traces/span_node.py +251 -0
  331. ibm_watsonx_gov/traces/span_util.py +153 -0
  332. ibm_watsonx_gov/traces/trace_utils.py +1074 -0
  333. ibm_watsonx_gov/utils/__init__.py +8 -0
  334. ibm_watsonx_gov/utils/aggregation_util.py +346 -0
  335. ibm_watsonx_gov/utils/async_util.py +62 -0
  336. ibm_watsonx_gov/utils/authenticator.py +144 -0
  337. ibm_watsonx_gov/utils/constants.py +15 -0
  338. ibm_watsonx_gov/utils/errors.py +40 -0
  339. ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
  340. ibm_watsonx_gov/utils/insights_generator.py +1285 -0
  341. ibm_watsonx_gov/utils/python_utils.py +425 -0
  342. ibm_watsonx_gov/utils/rest_util.py +73 -0
  343. ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
  344. ibm_watsonx_gov/utils/singleton_meta.py +25 -0
  345. ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
  346. ibm_watsonx_gov/utils/validation_util.py +126 -0
  347. ibm_watsonx_gov/visualizations/__init__.py +13 -0
  348. ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
  349. ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
  350. ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
  351. ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
  352. ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
  353. ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
@@ -0,0 +1,440 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ from typing import Dict, List, Optional
11
+
12
+ import pandas as pd
13
+
14
+
15
+ def mapping_to_df(mapping_data: Dict, execution_count: Optional[int] = 0) -> pd.DataFrame:
16
+ """
17
+ Convert mapping data into a pandas DataFrame.
18
+ If execution_count is given, construct dataframe with values w.r.t to execution count
19
+ """
20
+ def generate_rows():
21
+ row = {}
22
+ for span, lvl1 in mapping_data.items():
23
+ if not isinstance(lvl1, dict):
24
+ row[span] = lvl1
25
+ continue
26
+
27
+ for attr, lvl2 in lvl1.items():
28
+ if not isinstance(lvl2, dict):
29
+ col = f"{span}->{attr}"
30
+ row[col] = _get_list_value(lvl2, execution_count)
31
+ continue
32
+
33
+ for path, value in lvl2.items():
34
+ col = f"{span}->{attr}->{path}"
35
+ row[col] = _get_list_value(value, execution_count)
36
+ yield row
37
+
38
+ return pd.DataFrame(generate_rows())
39
+
40
+
41
+ def _get_list_value(value, execution_count):
42
+ """Helper function to safely get value from list"""
43
+ if isinstance(value, list):
44
+ if execution_count < len(value):
45
+ return value[execution_count]
46
+ return value[0] if value else None
47
+ return value
48
+
49
+
50
+ TARGETED_USAGE_TRACE_NAMES = [
51
+ # openAI
52
+ "openai.embeddings",
53
+ "ChatOpenAI.chat",
54
+ "OpenAI.completion",
55
+ # IBM
56
+ "ChatWatsonx.chat",
57
+ "WatsonxLLM.completion",
58
+ "WatsonxChatModel.chat", # wxo span
59
+ # Azure
60
+ "AzureChatOpenAI.chat",
61
+ "AzureOpenAI.completion",
62
+ # AWS
63
+ "ChatBedrock.chat",
64
+ "ChatBedrockConverse.chat",
65
+ # Google
66
+ "ChatVertexAI.chat",
67
+ "VertexAI.completion",
68
+ # Anthropic
69
+ "ChatAnthropic.chat",
70
+ "ChatAnthropicMessages.chat",
71
+ # TODO: Add attributes for other frameworks as well.
72
+ ]
73
+ ONE_M = 1000000
74
+
75
+ LAST_UPDATED = "2025-01-21" # Date when pricing was last verified
76
+
77
+ # ref: https://platform.openai.com/docs/pricing
78
+ OPENAI_COSTS = { # Costs per 1M tokens
79
+ # GPT-5 Series
80
+ "gpt-5": {"input": 2.50, "output": 20.0},
81
+ "gpt-5.1": {"input": 2.50, "output": 20.0},
82
+ "gpt-5-mini": {"input": 0.45, "output": 3.60},
83
+ "gpt-5-nano": {"input": 0.05, "output": 0.40},
84
+ "gpt-5-pro": {"input": 15.0, "output": 120.0},
85
+
86
+ # GPT-4.1 Series
87
+ "gpt-4.1": {"input": 3.50, "output": 14.0},
88
+ "gpt-4.1-mini": {"input": 0.70, "output": 2.80},
89
+ "gpt-4.1-nano": {"input": 0.20, "output": 0.80},
90
+
91
+ # GPT-4o Series
92
+ "gpt-4o": {"input": 2.5, "output": 10.0},
93
+ "gpt-4o-2024-05-13": {"input": 5.0, "output": 15.0},
94
+ "gpt-4o-2024-08-06": {"input": 3.75, "output": 15.0},
95
+ "gpt-4o-mini": {"input": 0.15, "output": 0.60},
96
+ "gpt-4o-mini-2024-07-18": {"input": 0.30, "output": 1.20},
97
+ "chatgpt-4o-latest": {"input": 5.0, "output": 15.0},
98
+
99
+ # GPT-4 Series (Legacy)
100
+ "gpt-4": {"input": 30.0, "output": 60.0},
101
+ "gpt-4-turbo": {"input": 10.0, "output": 30.0},
102
+ "gpt-4-turbo-2024-04-09": {"input": 10.0, "output": 30.0},
103
+ "gpt-4-0125-preview": {"input": 10.0, "output": 30.0},
104
+ "gpt-4-1106-preview": {"input": 10.0, "output": 30.0},
105
+ "gpt-4-0613": {"input": 30.0, "output": 60.0},
106
+ "gpt-4-32k": {"input": 60.0, "output": 120.0},
107
+
108
+ # O-Series (Reasoning)
109
+ "o1": {"input": 15.0, "output": 60.0},
110
+ "o1-pro": {"input": 150.0, "output": 600.0},
111
+ "o1-mini": {"input": 1.10, "output": 4.40},
112
+ "o3": {"input": 3.50, "output": 14.0},
113
+ "o3-pro": {"input": 20.0, "output": 80.0},
114
+ "o3-mini": {"input": 1.10, "output": 4.40},
115
+ "o4-mini": {"input": 2.0, "output": 8.0},
116
+ "o3-deep-research": {"input": 10.0, "output": 40.0},
117
+ "o4-mini-deep-research": {"input": 2.0, "output": 8.0},
118
+
119
+ # GPT-3.5 Series
120
+ "gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
121
+ "gpt-3.5-turbo-0125": {"input": 0.50, "output": 1.50},
122
+ "gpt-3.5-turbo-1106": {"input": 1.0, "output": 2.0},
123
+ "gpt-3.5-turbo-instruct": {"input": 1.50, "output": 2.0},
124
+
125
+ # Base Models
126
+ "davinci-002": {"input": 2.0, "output": 2.0},
127
+ "babbage-002": {"input": 0.40, "output": 0.40},
128
+
129
+ # Embeddings
130
+ "text-embedding-3-large": {"input": 0.13, "output": 0.0},
131
+ "text-embedding-3-small": {"input": 0.02, "output": 0.0},
132
+ "text-embedding-ada-002": {"input": 0.10, "output": 0.0},
133
+ }
134
+
135
+ # ref: https://docs.anthropic.com/en/docs/about-claude/models/overview#model-pricing
136
+ ANTHROPIC_COSTS = { # Costs per 1M tokens
137
+ # Opus 4 Series
138
+ "claude-opus-4-1": {"input": 15.0, "output": 75.0},
139
+ "claude-opus-4-0": {"input": 15.0, "output": 75.0},
140
+ "claude-opus-4-20250514": {"input": 15.0, "output": 75.0},
141
+ "anthropic.claude-opus-4-20250514-v1:0": {"input": 15.0, "output": 75.0},
142
+ "claude-opus-4@20250514": {"input": 15.0, "output": 75.0},
143
+
144
+ # Sonnet 4.5 Series (tiered pricing based on prompt size)
145
+ "claude-sonnet-4-5": {"input": 3.0, "output": 15.0}, # ≤200K tokens
146
+ "claude-sonnet-4-5-large": {"input": 6.0, "output": 22.50}, # >200K tokens
147
+
148
+ # Sonnet 4 Series
149
+ "claude-sonnet-4-0": {"input": 3.0, "output": 15.0},
150
+ "claude-sonnet-4-20250514": {"input": 3.0, "output": 15.0},
151
+ "anthropic.claude-sonnet-4-20250514-v1:0": {"input": 3.0, "output": 15.0},
152
+ "claude-sonnet-4@20250514": {"input": 3.0, "output": 15.0},
153
+
154
+ # Sonnet 3.7 Series
155
+ "claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0},
156
+ "claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0},
157
+ "anthropic.claude-3-7-sonnet-20250219-v1:0": {"input": 3.0, "output": 15.0},
158
+ "claude-3-7-sonnet@20250219": {"input": 3.0, "output": 15.0},
159
+
160
+ # Sonnet 3.5 Series
161
+ "claude-3-5-sonnet-latest": {"input": 3.0, "output": 15.0},
162
+ "claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
163
+ "anthropic.claude-3-5-sonnet-20241022-v2:0": {"input": 3.0, "output": 15.0},
164
+ "claude-3-5-sonnet-v2@20241022": {"input": 3.0, "output": 15.0},
165
+
166
+ # Haiku 4.5 Series
167
+ "claude-haiku-4-5": {"input": 1.0, "output": 5.0},
168
+
169
+ # Haiku 3.5 Series
170
+ "claude-3-5-haiku-latest": {"input": 0.80, "output": 4.0},
171
+ "claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.0},
172
+ "anthropic.claude-3-5-haiku-20241022-v1:0": {"input": 0.80, "output": 4.0},
173
+ "claude-3-5-haiku@20241022": {"input": 0.80, "output": 4.0},
174
+
175
+ # Haiku 3 Series
176
+ "claude-3-haiku-20240307": {"input": 0.25, "output": 1.25},
177
+ "anthropic.claude-3-haiku-20240307-v1:0": {"input": 0.25, "output": 1.25},
178
+ "claude-3-haiku@20240307": {"input": 0.25, "output": 1.25},
179
+ }
180
+
181
+ # ref: https://cloud.google.com/vertex-ai/generative-ai/pricing
182
+ GOOGLE_COSTS = { # Costs per 1M tokens
183
+ # Gemini 3 Series
184
+ "gemini-3-pro-preview": {"input": 2.0, "output": 12.0}, # ≤200K tokens
185
+ # >200K tokens
186
+ "gemini-3-pro-preview-large": {"input": 4.0, "output": 18.0},
187
+
188
+ # Gemini 2.5 Series
189
+ "gemini-2.5-pro": {"input": 1.25, "output": 10.0}, # ≤200K tokens
190
+ "gemini-2.5-pro-large": {"input": 2.50, "output": 15.0}, # >200K tokens
191
+ "gemini-2.5-flash": {"input": 0.30, "output": 2.50},
192
+ "gemini-2.5-flash-lite": {"input": 0.1, "output": 0.4},
193
+
194
+ # Gemini 2.0 Series
195
+ "gemini-2.0-flash-001": {"input": 0.15, "output": 0.6},
196
+ "gemini-2.0-flash-lite-001": {"input": 0.075, "output": 0.3},
197
+ }
198
+
199
+ # ref: https://mistral.ai/pricing#api-pricing
200
+ # ref: https://aws.amazon.com/bedrock/pricing
201
+ # ref: https://cloud.google.com/vertex-ai/generative-ai/pricing
202
+ MISTRAL_COSTS = { # Costs per 1M tokens
203
+ # Pixtral Series (Vision)
204
+ "pixtral-large-latest": {"input": 2.0, "output": 6.0},
205
+ "mistral.pixtral-large-2502-v1:0": {"input": 2.0, "output": 6.0},
206
+ "pixtral-12b": {"input": 0.15, "output": 0.15},
207
+
208
+ # Mistral Large Series
209
+ "mistral-large-latest": {"input": 2.0, "output": 6.0},
210
+ "mistral.mistral-large-2407-v1:0": {"input": 2.0, "output": 6.0},
211
+ "mistralai/mistral-large-2411@001": {"input": 2.0, "output": 6.0},
212
+ "mistral.mistral-large-2402-v1:0": {"input": 4.0, "output": 12.0},
213
+
214
+ # Mistral Medium Series
215
+ "mistral-medium-3": {"input": 0.4, "output": 2.0},
216
+ "mistral-medium-latest": {"input": 0.4, "output": 2.0},
217
+
218
+ # Mistral Small Series
219
+ "mistral-small-3-2": {"input": 0.1, "output": 0.3},
220
+ "mistral-small-latest": {"input": 0.1, "output": 0.3},
221
+ "mistralai/mistral-small-2503@001": {"input": 0.1, "output": 0.3},
222
+ "mistral.mistral-small-2402-v1:0": {"input": 1.0, "output": 3.0},
223
+
224
+ # Magistral Series (Reasoning)
225
+ "magistral-medium-latest": {"input": 2.0, "output": 5.0},
226
+ "magistral-small-latest": {"input": 0.5, "output": 1.5},
227
+
228
+ # Devstral Series (Coding)
229
+ "devstral-medium-2507": {"input": 0.4, "output": 2.0},
230
+ "devstral-small-2507": {"input": 0.1, "output": 0.3},
231
+
232
+ # Codestral Series
233
+ "codestral-latest": {"input": 0.3, "output": 0.9},
234
+
235
+ # Ministral Series (Edge)
236
+ "ministral-8b-latest": {"input": 0.1, "output": 0.1},
237
+ "ministral-3b-latest": {"input": 0.04, "output": 0.04},
238
+
239
+ # Open Mistral Series
240
+ "open-mistral-7b": {"input": 0.25, "output": 0.25},
241
+ "mistral.mistral-7b-instruct-v0:2": {"input": 0.15, "output": 0.2},
242
+ "open-mistral-nemo": {"input": 0.15, "output": 0.15},
243
+
244
+ # Open Mixtral Series
245
+ "open-mixtral-8x7b": {"input": 0.7, "output": 0.7},
246
+ "mistral.mixtral-8x7b-instruct-v0:1": {"input": 0.45, "output": 0.7},
247
+ "open-mixtral-8x22b": {"input": 2.0, "output": 6.0},
248
+
249
+ # Embeddings
250
+ "mistral-embed": {"input": 0.1, "output": 0.0},
251
+ "codestral-embed-2505": {"input": 0.15, "output": 0.0},
252
+ }
253
+
254
+ # ref: https://aws.amazon.com/bedrock/pricing
255
+ COHERE_COSTS = { # Costs per 1M tokens
256
+ "command-r": {"input": 0.5, "output": 1.5},
257
+ "cohere.command-r-v1:0": {"input": 0.5, "output": 1.5},
258
+ "command-r-plus": {"input": 3.0, "output": 15},
259
+ "cohere.command-r-plus-v1:0": {"input": 3.0, "output": 15},
260
+ "command-light": {"input": 0.3, "output": 0.6},
261
+ "cohere.command-light-text-v14": {"input": 0.3, "output": 0.6},
262
+ "command": {"input": 1.0, "output": 2.0},
263
+ "cohere.command-text-v14": {"input": 1.0, "output": 2.0},
264
+ }
265
+
266
+ # ref: https://www.ai21.com/pricing
267
+ # ref: https://aws.amazon.com/bedrock/pricing
268
+ # ref: https://cloud.google.com/vertex-ai/generative-ai/pricing
269
+ AI21_COSTS = { # Costs per 1M tokens
270
+ "jamba-large": {"input": 2.0, "output": 8.0},
271
+ "ai21.jamba-1-5-large-v1:0": {"input": 2.0, "output": 8.0},
272
+ "ai21/jamba-1.5-large@001": {"input": 2.0, "output": 8.0},
273
+ "jamba-mini": {"input": 0.2, "output": 0.4},
274
+ "ai21.jamba-1-5-mini-v1:0": {"input": 0.2, "output": 0.4},
275
+ "ai21/jamba-1.5-mini@001": {"input": 0.2, "output": 0.4},
276
+ }
277
+
278
+ # ref: https://www.ibm.com/products/watsonx-ai/pricing
279
+ IBM_WATSONX_COSTS = { # Costs per 1M tokens
280
+ # IBM Granite Series
281
+ "ibm/granite-4-h-small": {"input": 0.06, "output": 0.25},
282
+ "ibm/granite-vision-3-2-2b": {"input": 0.10, "output": 0.10},
283
+ "ibm/granite-3-2b-instruct": {"input": 0.10, "output": 0.10},
284
+ "ibm/granite-3-8b-instruct": {"input": 0.20, "output": 0.20},
285
+ "ibm/granite-guardian-3-8b": {"input": 0.20, "output": 0.20},
286
+ "ibm/granite-8b-code-instruct": {"input": 0.20, "output": 0.20},
287
+ "ibm/granite-8b-japanese": {"input": 0.60, "output": 0.60},
288
+ "ibm/granite-3-2-8b-instruct": {"input": 0.20, "output": 0.20},
289
+ "ibm/granite-timeseries-ttm-r2": {"input": 0.38, "output": 0.38},
290
+
291
+ # Meta Llama Series
292
+ "meta-llama/llama-3-2-1b-instruct": {"input": 0.1, "output": 0.1},
293
+ "meta-llama/llama-3-2-3b-instruct": {"input": 0.15, "output": 0.15},
294
+ "meta-llama/llama-3-2-11b-vision-instruct": {"input": 0.35, "output": 0.35},
295
+ "meta-llama/llama-3-2-90b-vision-instruct": {"input": 2.0, "output": 2.0},
296
+ "meta-llama/llama-3-3-70b-instruct": {"input": 0.71, "output": 0.71},
297
+ "meta-llama/llama-3-405b-instruct": {"input": 5.0, "output": 16.0},
298
+ "meta-llama/llama-4-maverick-17b-128e-instruct-fp": {"input": 0.35, "output": 1.4},
299
+ "meta-llama/llama-guard-3-11b-vision": {"input": 0.35, "output": 0.35},
300
+
301
+ # Mistral Series on Watsonx
302
+ "mistralai/mistral-small-3-1-24b-instruct-2503": {"input": 0.1, "output": 0.3},
303
+ "mistralai/mistral-medium-2505": {"input": 3.0, "output": 10.0},
304
+
305
+ # Other Models
306
+ "core42/jais-13b-chat": {"input": 1.8, "output": 1.8},
307
+ "sdaia/allam-1-13b-instruct": {"input": 1.8, "output": 1.8},
308
+
309
+ # IBM Granite Embeddings
310
+ "ibm/granite-embedding-107m-multilingual": {"input": 0.10, "output": 0.0},
311
+ "ibm/granite-embedding-278m-multilingual": {"input": 0.10, "output": 0.0},
312
+
313
+ # IBM Slate Retrieval Models
314
+ "ibm/slate-125m-english-rtrvr": {"input": 0.10, "output": 0.0},
315
+ "ibm/slate-125m-english-rtrvr-v2": {"input": 0.10, "output": 0.0},
316
+ "ibm/slate-30m-english-rtrvr": {"input": 0.10, "output": 0.0},
317
+ "ibm/slate-30m-english-rtrvr-v2": {"input": 0.10, "output": 0.0},
318
+
319
+ # Third-Party Embeddings
320
+ "intfloat/multilingual-e5-large": {"input": 0.10, "output": 0.0},
321
+ "sentence-transformers/all-minilm-l12-v2": {"input": 0.10, "output": 0.0},
322
+ "sentence-transformers/all-minilm-l6-v2": {"input": 0.10, "output": 0.0},
323
+
324
+ # OpenAI
325
+ "openai/gpt-oss-120b": {"input": 0.15, "output": 0.60}
326
+ }
327
+
328
+ # ref: https://aws.amazon.com/bedrock/pricing/ (DeepSeek tab)
329
+ DEEPSEEK_COSTS = { # Costs per 1M tokens
330
+ # DeepSeek models (Standard Tier - AWS Bedrock)
331
+ "deepseek-r1": {"input": 1.35, "output": 5.40},
332
+ "deepseek-v3.1": {"input": 0.58, "output": 1.68},
333
+ }
334
+
335
+ # ref: https://docs.x.ai/docs/models
336
+ XAI_COSTS = { # Costs per 1M tokens
337
+ # Grok 4.1 Series
338
+ "grok-4-1-fast-reasoning": {"input": 0.20, "output": 0.50},
339
+ "grok-4-1-fast-non-reasoning": {"input": 0.20, "output": 0.50},
340
+
341
+ # Grok 4 Series
342
+ "grok-4-fast-reasoning": {"input": 0.20, "output": 0.50},
343
+ "grok-4-fast-non-reasoning": {"input": 0.20, "output": 0.50},
344
+ "grok-4-0709": {"input": 3.0, "output": 15.0},
345
+
346
+ # Grok 3 Series
347
+ "grok-3": {"input": 3.0, "output": 15.0},
348
+ "grok-3-mini": {"input": 0.30, "output": 0.50},
349
+
350
+ # Grok 2 Series
351
+ # Both us-east-1 and eu-west-1
352
+ "grok-2-vision-1212": {"input": 2.0, "output": 10.0},
353
+
354
+ # Grok Code
355
+ "grok-code-fast-1": {"input": 0.20, "output": 1.50},
356
+
357
+ # Image Generation (per image)
358
+ "grok-2-image-1212": {"input": 0.0, "output": 0.07}, # $0.07 per image
359
+ }
360
+
361
+ # ref: https://docs.perplexity.ai/getting-started/pricing
362
+ PERPLEXITY_COSTS = { # Costs per 1M tokens
363
+ # Sonar Models
364
+ "sonar": {"input": 1.0, "output": 1.0},
365
+ "sonar-pro": {"input": 3.0, "output": 15.0},
366
+ "sonar-reasoning": {"input": 1.0, "output": 5.0},
367
+ "sonar-reasoning-pro": {"input": 2.0, "output": 8.0},
368
+ # Plus $2/1M citation tokens, $5/1K search queries, $3/1M reasoning tokens
369
+ "sonar-deep-research": {"input": 2.0, "output": 8.0},
370
+ }
371
+
372
+ # ref: https://groq.com/pricing
373
+ GROQ_COSTS = { # Costs per 1M tokens
374
+ # GPT OSS Series
375
+ "gpt-oss-20b": {"input": 0.075, "output": 0.30},
376
+ "gpt-oss-safeguard-20b": {"input": 0.075, "output": 0.30},
377
+ "gpt-oss-120b": {"input": 0.15, "output": 0.60},
378
+
379
+ # Kimi Series
380
+ "kimi-k2-0905-1t": {"input": 1.0, "output": 3.0},
381
+
382
+ # Llama 4 Series
383
+ "llama-4-scout-17b-16e": {"input": 0.11, "output": 0.34},
384
+ "llama-4-maverick-17b-128e": {"input": 0.20, "output": 0.60},
385
+ "llama-guard-4-12b": {"input": 0.20, "output": 0.20},
386
+
387
+ # Qwen Series
388
+ "qwen3-32b": {"input": 0.29, "output": 0.59},
389
+
390
+ # Llama 3.3 Series
391
+ "llama-3.3-70b-versatile": {"input": 0.59, "output": 0.79},
392
+
393
+ # Llama 3.1 Series
394
+ "llama-3.1-8b-instant": {"input": 0.05, "output": 0.08},
395
+ }
396
+
397
+ # Combined metadata for backward compatibility
398
+ COST_METADATA = {
399
+ **OPENAI_COSTS,
400
+ **ANTHROPIC_COSTS,
401
+ **GOOGLE_COSTS,
402
+ **MISTRAL_COSTS,
403
+ **COHERE_COSTS,
404
+ **AI21_COSTS,
405
+ **IBM_WATSONX_COSTS,
406
+ **DEEPSEEK_COSTS,
407
+ **XAI_COSTS,
408
+ **PERPLEXITY_COSTS,
409
+ **GROQ_COSTS,
410
+ }
411
+
412
+
413
+ def calculate_cost(usage_data: List[dict]) -> float:
414
+ """Calculate cost for given list of usage.
415
+ [
416
+ {"model": ..., "total_prompt_tokens": ..., "total_completion_tokens": ...},
417
+ ...
418
+ ]
419
+ """
420
+ total_cost = 0.0
421
+
422
+ for data in usage_data:
423
+ model = data["model"].lower()
424
+
425
+ try:
426
+ model_pricing = COST_METADATA[model]
427
+ except KeyError:
428
+ return 0
429
+ # raise ValueError(
430
+ # f"Pricing not available for {model}")
431
+
432
+ # Calculate costs (per 1M tokens)
433
+ input_cost = (data.get("total_prompt_tokens", 0) /
434
+ ONE_M) * model_pricing["input"]
435
+ output_cost = (data.get("total_completion_tokens", 0) / ONE_M) * model_pricing[
436
+ "output"
437
+ ]
438
+ total_cost += input_cost + output_cost
439
+
440
+ return total_cost
@@ -0,0 +1,8 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
@@ -0,0 +1,60 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ from functools import partial
11
+ from typing import Callable, Optional
12
+
13
+ from wrapt import decorator
14
+
15
+ from ibm_watsonx_gov.config.agentic_ai_configuration import \
16
+ AgenticAIConfiguration
17
+ from ibm_watsonx_gov.entities.enums import EvaluatorFields
18
+ from ibm_watsonx_gov.entities.metric import GenAIMetric
19
+ from ibm_watsonx_gov.metrics.base_metric_decorator import BaseMetricDecorator
20
+ from ibm_watsonx_gov.metrics.violence.violence_metric import ViolenceMetric
21
+
22
+
23
+ class ViolenceDecorator(BaseMetricDecorator):
24
+
25
+ def evaluate_violence(self,
26
+ func: Optional[Callable] = None,
27
+ *,
28
+ configuration: Optional[AgenticAIConfiguration] = None,
29
+ metrics: list[GenAIMetric] = []
30
+ ) -> dict:
31
+ """
32
+ An evaluation decorator for computing violence on an agentic node via granite guardian.
33
+ """
34
+ if func is None:
35
+ return partial(self.evaluate_violence, configuration=configuration, metrics=metrics)
36
+
37
+ if not metrics:
38
+ metrics = [ViolenceMetric()]
39
+
40
+ @decorator
41
+ def wrapper(func, instance, args, kwargs):
42
+
43
+ try:
44
+ self.validate(func=func, metrics=metrics,
45
+ valid_metric_types=(ViolenceMetric))
46
+
47
+ metric_inputs = [EvaluatorFields.INPUT_FIELDS]
48
+
49
+ original_result = self.compute_helper(func=func, args=args, kwargs=kwargs,
50
+ configuration=configuration,
51
+ metrics=metrics,
52
+ metric_inputs=metric_inputs,
53
+ metric_outputs=[])
54
+
55
+ return original_result
56
+ except Exception as ex:
57
+ raise Exception(
58
+ f"There was an error while evaluating violence on {func.__name__},") from ex
59
+
60
+ return wrapper(func)
@@ -0,0 +1,103 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ from typing import Annotated, Literal
11
+
12
+ import pandas as pd
13
+ from pydantic import Field
14
+
15
+ from ibm_watsonx_gov.config.gen_ai_configuration import GenAIConfiguration
16
+ from ibm_watsonx_gov.entities.enums import MetricGroup, TaskType
17
+ from ibm_watsonx_gov.entities.evaluation_result import AggregateMetricResult
18
+ from ibm_watsonx_gov.entities.metric import GenAIMetric
19
+ from ibm_watsonx_gov.entities.metric_threshold import MetricThreshold
20
+ from ibm_watsonx_gov.providers.detectors_provider import DetectorsProvider
21
+ from ibm_watsonx_gov.utils.async_util import run_in_event_loop
22
+ from ibm_watsonx_gov.utils.validation_util import validate_input
23
+
24
+ VIOLENCE = "violence"
25
+
26
+
27
+ class ViolenceMetric(GenAIMetric):
28
+ """
29
+ Defines the Violence metric class.
30
+
31
+ The Violence metric measures the risk of content promoting or describing physical harm to individuals or groups. This includes depictions of assault, self-harm, or creation of a hostile environment through threats or intimidation.
32
+ It is computed using the granite guardian model.
33
+
34
+ Examples:
35
+ 1. Create Violence metric with default parameters and compute using metrics evaluator.
36
+ .. code-block:: python
37
+
38
+ metric = ViolenceMetric()
39
+ result = MetricsEvaluator().evaluate(data={"input_text": "...", metrics=[metric])
40
+
41
+ 2. Create Violence metric with a custom threshold.
42
+ .. code-block:: python
43
+
44
+ threshold = MetricThreshold(type="lower_limit", value=0.5)
45
+ metric = ViolenceMetric(threshold=threshold)
46
+ """
47
+ name: Annotated[Literal["violence"],
48
+ Field(title="Name",
49
+ description="The violence metric name.",
50
+ default=VIOLENCE, frozen=True)]
51
+ display_name: Annotated[Literal["Violence"],
52
+ Field(title="Display Name",
53
+ description="The violence metric display name.",
54
+ default="Violence", frozen=True)]
55
+ method: Annotated[Literal["granite_guardian"],
56
+ Field(title="Method",
57
+ description="The method used to compute harm metric.",
58
+ default="granite_guardian")]
59
+ tasks: Annotated[list[TaskType],
60
+ Field(title="Tasks",
61
+ description="The list of supported tasks.",
62
+ default=TaskType.values(), frozen=True)]
63
+ thresholds: Annotated[list[MetricThreshold],
64
+ Field(title="Thresholds",
65
+ description="The metric thresholds.",
66
+ default=[MetricThreshold(type="upper_limit", value=0.5)])]
67
+ group: Annotated[MetricGroup,
68
+ Field(title="Group",
69
+ description="The metric group.",
70
+ default=MetricGroup.CONTENT_SAFETY, frozen=True)]
71
+
72
+ async def evaluate_async(
73
+ self,
74
+ data: pd.DataFrame | dict,
75
+ configuration: GenAIConfiguration,
76
+ **kwargs
77
+ ) -> list[AggregateMetricResult]:
78
+
79
+ validate_input(data.columns.to_list(), configuration)
80
+ kwargs["detector_params"] = {"risk_name": VIOLENCE}
81
+ provider = DetectorsProvider(configuration=configuration,
82
+ metric_name=self.name,
83
+ metric_display_name=self.display_name,
84
+ metric_method=self.method,
85
+ metric_group=self.group,
86
+ thresholds=self.thresholds,
87
+ **kwargs)
88
+ aggregated_metric_result = await provider.evaluate_async(data=data)
89
+ return aggregated_metric_result
90
+
91
+ def evaluate(
92
+ self,
93
+ data: pd.DataFrame | dict,
94
+ configuration: GenAIConfiguration,
95
+ **kwargs,
96
+ ):
97
+ # If ran in sync mode, block until it is done
98
+ return run_in_event_loop(
99
+ self.evaluate_async,
100
+ data=data,
101
+ configuration=configuration,
102
+ **kwargs,
103
+ )
@@ -0,0 +1,9 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # OCO Source Materials
4
+ # 5900-A3Q, 5737-H76
5
+ # Copyright IBM Corp. 2025
6
+ # The source code for this program is not published or other-wise divested of its trade
7
+ # secrets, irrespective of what has been deposited with the U.S.Copyright Office.
8
+ # ----------------------------------------------------------------------------------------------------
9
+ from .prompt_evaluator import PromptEvaluator
@@ -0,0 +1,8 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # OCO Source Materials
4
+ # 5900-A3Q, 5737-H76
5
+ # Copyright IBM Corp. 2025
6
+ # The source code for this program is not published or other-wise divested of its trade
7
+ # secrets, irrespective of what has been deposited with the U.S.Copyright Office.
8
+ # ----------------------------------------------------------------------------------------------------