ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. ibm_watsonx_gov/__init__.py +8 -0
  2. ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
  3. ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
  4. ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
  5. ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
  6. ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
  7. ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
  8. ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
  9. ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
  10. ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
  11. ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
  12. ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
  13. ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
  14. ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
  15. ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
  16. ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
  17. ibm_watsonx_gov/clients/__init__.py +0 -0
  18. ibm_watsonx_gov/clients/api_client.py +99 -0
  19. ibm_watsonx_gov/clients/segment_client.py +46 -0
  20. ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
  21. ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
  22. ibm_watsonx_gov/config/__init__.py +14 -0
  23. ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
  24. ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
  25. ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
  26. ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
  27. ibm_watsonx_gov/entities/__init__.py +8 -0
  28. ibm_watsonx_gov/entities/agentic_app.py +209 -0
  29. ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
  30. ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
  31. ibm_watsonx_gov/entities/ai_experiment.py +419 -0
  32. ibm_watsonx_gov/entities/base_classes.py +134 -0
  33. ibm_watsonx_gov/entities/container.py +54 -0
  34. ibm_watsonx_gov/entities/credentials.py +633 -0
  35. ibm_watsonx_gov/entities/criteria.py +508 -0
  36. ibm_watsonx_gov/entities/enums.py +274 -0
  37. ibm_watsonx_gov/entities/evaluation_result.py +444 -0
  38. ibm_watsonx_gov/entities/foundation_model.py +490 -0
  39. ibm_watsonx_gov/entities/llm_judge.py +44 -0
  40. ibm_watsonx_gov/entities/locale.py +17 -0
  41. ibm_watsonx_gov/entities/mapping.py +49 -0
  42. ibm_watsonx_gov/entities/metric.py +211 -0
  43. ibm_watsonx_gov/entities/metric_threshold.py +36 -0
  44. ibm_watsonx_gov/entities/model_provider.py +329 -0
  45. ibm_watsonx_gov/entities/model_risk_result.py +43 -0
  46. ibm_watsonx_gov/entities/monitor.py +71 -0
  47. ibm_watsonx_gov/entities/prompt_setup.py +40 -0
  48. ibm_watsonx_gov/entities/state.py +22 -0
  49. ibm_watsonx_gov/entities/utils.py +99 -0
  50. ibm_watsonx_gov/evaluators/__init__.py +26 -0
  51. ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
  52. ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
  53. ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
  54. ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
  55. ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
  56. ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
  57. ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
  58. ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
  59. ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
  60. ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
  61. ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
  62. ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
  63. ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
  64. ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
  65. ibm_watsonx_gov/metrics/__init__.py +74 -0
  66. ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
  67. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
  68. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
  69. ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
  70. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
  71. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
  72. ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
  73. ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
  74. ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
  75. ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
  76. ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
  77. ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
  78. ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
  79. ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
  80. ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
  81. ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
  82. ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
  83. ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
  84. ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
  85. ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
  86. ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
  87. ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
  88. ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
  89. ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
  90. ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
  91. ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
  92. ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
  93. ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
  94. ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
  95. ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
  96. ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
  97. ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
  98. ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
  99. ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
  100. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
  101. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
  102. ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
  103. ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
  104. ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
  105. ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
  106. ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
  107. ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
  108. ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
  109. ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
  110. ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
  111. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
  112. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
  113. ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
  114. ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
  115. ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
  116. ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
  117. ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
  118. ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
  119. ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
  120. ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
  121. ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
  122. ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
  123. ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
  124. ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
  125. ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
  126. ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
  127. ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
  128. ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
  129. ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
  130. ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
  131. ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
  132. ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
  133. ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
  134. ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
  135. ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
  136. ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
  137. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
  138. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
  139. ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
  140. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
  141. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
  142. ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
  143. ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
  144. ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
  145. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
  146. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
  147. ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
  148. ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
  149. ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
  150. ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
  151. ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
  152. ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
  153. ibm_watsonx_gov/metrics/status/__init__.py +0 -0
  154. ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
  155. ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
  156. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
  157. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
  158. ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
  159. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
  160. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
  161. ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
  162. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
  163. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
  164. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
  165. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
  166. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
  167. ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
  168. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
  169. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
  170. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
  171. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
  172. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
  173. ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
  174. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
  175. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
  176. ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
  177. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
  178. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
  179. ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
  180. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
  181. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
  182. ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
  183. ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
  184. ibm_watsonx_gov/metrics/utils.py +440 -0
  185. ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
  186. ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
  187. ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
  188. ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
  189. ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
  190. ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
  191. ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
  192. ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
  193. ibm_watsonx_gov/providers/__init__.py +8 -0
  194. ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
  195. ibm_watsonx_gov/providers/detectors_provider.py +415 -0
  196. ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
  197. ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
  198. ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
  199. ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
  200. ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
  201. ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
  202. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
  203. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
  204. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
  205. ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
  206. ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
  207. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
  208. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
  209. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
  210. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
  211. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
  212. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
  213. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
  214. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
  215. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
  216. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
  217. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
  218. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
  219. ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
  220. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
  221. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
  222. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
  223. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
  224. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
  225. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
  226. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
  227. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
  228. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
  229. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
  230. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
  231. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
  232. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
  233. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
  234. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
  235. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
  236. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
  237. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
  238. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
  239. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
  240. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
  241. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
  242. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
  243. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
  244. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
  245. ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
  246. ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
  247. ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
  248. ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
  249. ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
  250. ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
  251. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
  252. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
  253. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
  254. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
  255. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
  256. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
  257. ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
  258. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
  259. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
  260. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
  261. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
  262. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
  263. ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
  264. ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
  265. ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
  266. ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
  267. ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
  268. ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
  269. ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
  270. ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
  271. ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
  272. ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
  273. ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
  274. ibm_watsonx_gov/tools/__init__.py +10 -0
  275. ibm_watsonx_gov/tools/clients/__init__.py +11 -0
  276. ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
  277. ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
  278. ibm_watsonx_gov/tools/core/__init__.py +8 -0
  279. ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
  280. ibm_watsonx_gov/tools/entities/__init__.py +8 -0
  281. ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
  282. ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
  283. ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
  284. ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
  285. ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
  286. ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
  287. ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
  288. ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
  289. ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
  290. ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
  291. ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
  292. ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
  293. ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
  294. ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
  295. ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
  296. ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
  297. ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
  298. ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
  299. ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
  300. ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
  301. ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
  302. ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
  303. ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
  304. ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
  305. ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
  306. ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
  307. ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
  308. ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
  309. ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
  310. ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
  311. ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
  312. ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
  313. ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
  314. ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
  315. ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
  316. ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
  317. ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
  318. ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
  319. ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
  320. ibm_watsonx_gov/tools/utils/__init__.py +14 -0
  321. ibm_watsonx_gov/tools/utils/constants.py +69 -0
  322. ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
  323. ibm_watsonx_gov/tools/utils/environment.py +108 -0
  324. ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
  325. ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
  326. ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
  327. ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
  328. ibm_watsonx_gov/traces/__init__.py +8 -0
  329. ibm_watsonx_gov/traces/span_exporter.py +195 -0
  330. ibm_watsonx_gov/traces/span_node.py +251 -0
  331. ibm_watsonx_gov/traces/span_util.py +153 -0
  332. ibm_watsonx_gov/traces/trace_utils.py +1074 -0
  333. ibm_watsonx_gov/utils/__init__.py +8 -0
  334. ibm_watsonx_gov/utils/aggregation_util.py +346 -0
  335. ibm_watsonx_gov/utils/async_util.py +62 -0
  336. ibm_watsonx_gov/utils/authenticator.py +144 -0
  337. ibm_watsonx_gov/utils/constants.py +15 -0
  338. ibm_watsonx_gov/utils/errors.py +40 -0
  339. ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
  340. ibm_watsonx_gov/utils/insights_generator.py +1285 -0
  341. ibm_watsonx_gov/utils/python_utils.py +425 -0
  342. ibm_watsonx_gov/utils/rest_util.py +73 -0
  343. ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
  344. ibm_watsonx_gov/utils/singleton_meta.py +25 -0
  345. ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
  346. ibm_watsonx_gov/utils/validation_util.py +126 -0
  347. ibm_watsonx_gov/visualizations/__init__.py +13 -0
  348. ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
  349. ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
  350. ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
  351. ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
  352. ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
  353. ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
@@ -0,0 +1,554 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # OCO Source Materials
4
+ # 5900-A3Q, 5737-H76
5
+ # Copyright IBM Corp. 2025
6
+ # The source code for this program is not published or other-wise divested of its trade
7
+ # secrets, irrespective of what has been deposited with the U.S.Copyright Office.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ import calendar
11
+ import time
12
+ from urllib.parse import urlparse, urlunparse
13
+
14
+ import pandas as pd
15
+ from ibm_watsonx_ai.foundation_models.prompts.prompt_template import (
16
+ DetachedPromptTemplate, PromptTemplate)
17
+
18
+ from ibm_watsonx_gov.entities.container import (BaseMonitor, ProjectContainer,
19
+ SpaceContainer)
20
+ from ibm_watsonx_gov.entities.credentials import Credentials
21
+ from ibm_watsonx_gov.entities.enums import (ContainerType, EvaluationStage,
22
+ TaskType)
23
+ from ibm_watsonx_gov.entities.monitor import (GenerativeAIQualityMonitor,
24
+ QualityMonitor)
25
+ from ibm_watsonx_gov.entities.prompt_setup import PromptSetup
26
+ from ibm_watsonx_gov.prompt_evaluator.impl.pta_lifecycle_evaluator import \
27
+ PTALifecycleEvaluator
28
+ from ibm_watsonx_gov.utils.url_mapping import WOS_URL_MAPPING
29
+
30
+
31
+ class PromptEvaluatorImpl:
32
+ DEFAULT_MODEL_ID = "ibm/granite-3-2-8b-instruct"
33
+ DEFAULT_PROMPT_NAME = "Insurance RAG ChatBot Prompt"
34
+ DEFAULT_PROMPT_URL = "https://us-south.ml.cloud.ibm.com/ml/v1/deployments/insurance_test_deployment/text/generation?version=2021-05-01"
35
+ DEFAULT_APPROACH_VERSION = "0.0.1"
36
+
37
+ def __init__(self, credentials: Credentials | None = None):
38
+ if not credentials:
39
+ self.credentials = Credentials.create_from_env()
40
+ else:
41
+ self.credentials = credentials
42
+
43
+ self.__pta_evaluator = PTALifecycleEvaluator()
44
+
45
+ def __get_credentials_dict(self) -> dict[str, any]:
46
+ if self.credentials.version:
47
+ # Use use cpd
48
+ return {
49
+ "url": self.credentials.url,
50
+ "username": self.credentials.username,
51
+ "api_key": self.credentials.api_key,
52
+ }
53
+
54
+ # Use cloud
55
+ if not self.credentials.service_instance_id:
56
+ raise Exception("Missing service_instance_id from the credentials")
57
+ return {
58
+ "iam_url": WOS_URL_MAPPING.get(self.credentials.url).iam_url,
59
+ "apikey": self.credentials.api_key,
60
+ }
61
+
62
+ def e2e_prompt_evaluation(
63
+ self,
64
+ config: dict[str, any],
65
+ input_file_path: str = None,
66
+ ):
67
+ parsed_config = self.__parse_simplified_config_dict(config)
68
+
69
+ if input_file_path:
70
+ try:
71
+ input_df = pd.read_csv(input_file_path)
72
+ except Exception as e:
73
+ raise Exception(
74
+ f"Failed to open the file {input_file_path}. {e}")
75
+
76
+ if not parsed_config["prompt_template_id"] and EvaluationStage.DEVELOPMENT in parsed_config["setup_stages"]:
77
+ self.__pta_evaluator.setup(
78
+ configuration=parsed_config["configuration"],
79
+ setup_stages=[EvaluationStage.DEVELOPMENT],
80
+ prompt_template=parsed_config["prompt_template"],
81
+ )
82
+
83
+ if input_file_path:
84
+ self.__pta_evaluator.evaluate(
85
+ input_df=input_df,
86
+ evaluation_stages=[EvaluationStage.DEVELOPMENT],
87
+ )
88
+
89
+ prompt_template_id = self.__pta_evaluator.get_prompt_template_id()
90
+ else:
91
+ prompt_template_id = parsed_config["prompt_template_id"]
92
+
93
+ if EvaluationStage.PRODUCTION in parsed_config["setup_stages"]:
94
+ self.__pta_evaluator.setup(
95
+ configuration=parsed_config["configuration"],
96
+ setup_stages=[EvaluationStage.PRODUCTION],
97
+ prompt_template_id=prompt_template_id,
98
+ prompt_template=parsed_config["prompt_template"],
99
+ )
100
+
101
+ if input_file_path:
102
+ self.__pta_evaluator.evaluate(
103
+ input_df=input_df,
104
+ evaluation_stages=[EvaluationStage.PRODUCTION],
105
+ )
106
+
107
+ def evaluate_risk(
108
+ self,
109
+ containers: list[ProjectContainer | SpaceContainer],
110
+ evaluation_stages: list[EvaluationStage],
111
+ input_file_path: str,
112
+ prompt_setup: PromptSetup,
113
+ prompt_template: PromptTemplate | DetachedPromptTemplate = None,
114
+ prompt_template_id: str = None,
115
+ ):
116
+ if prompt_template is None and prompt_template_id is None:
117
+ raise Exception(
118
+ "Please provide Either prompt_template or prompt_template_id"
119
+ )
120
+
121
+ prompt_setup_base = {
122
+ "label_column": prompt_setup.label_column,
123
+ "context_fields": prompt_setup.context_fields,
124
+ "question_field": prompt_setup.question_field,
125
+ "problem_type": prompt_setup.task_type.value,
126
+ "input_data_type": prompt_setup.input_data_type.value,
127
+ "prediction_field": prompt_setup.prediction_field,
128
+ }
129
+
130
+ stages = {}
131
+
132
+ for container in containers:
133
+ stage = {"prompt_setup": prompt_setup_base.copy()}
134
+ stage["prompt_setup"]["operational_space_id"] = container.stage.value
135
+
136
+ if container.monitors:
137
+ monitors = {}
138
+ for monitor in container.monitors:
139
+ monitors[monitor.monitor_name] = {}
140
+ if monitor.thresholds:
141
+ monitors[monitor.monitor_name][
142
+ "thresholds"
143
+ ] = monitor.thresholds
144
+ if monitor.parameters:
145
+ monitors[monitor.monitor_name][
146
+ "parameters"
147
+ ] = monitor.parameters
148
+
149
+ stage["prompt_setup"]["monitors"] = monitors
150
+
151
+ if container.stage == EvaluationStage.DEVELOPMENT:
152
+ stage["project_id"] = container.container_id
153
+
154
+ elif container.stage in [
155
+ EvaluationStage.PRE_PRODUCTION,
156
+ EvaluationStage.PRODUCTION,
157
+ ]:
158
+ if container.container_type == ContainerType.PROJECT:
159
+ stage["project_id"] = container.container_id
160
+ else:
161
+ stage["space_id"] = container.container_id
162
+ stage["space_deployment"] = {
163
+ "serving_name": container.serving_name,
164
+ "base_model_id": container.base_model_id,
165
+ "description": container.description,
166
+ "name": container.name,
167
+ "version_date": container.version_date,
168
+ }
169
+
170
+ stages[container.stage.value] = stage
171
+
172
+ pta_evaluator_config = {
173
+ "common_configurations": {
174
+ "credentials": self.__get_credentials_dict(),
175
+ "use_cpd": self.credentials.version is not None,
176
+ "use_ssl": not self.credentials.disable_ssl,
177
+ "service_instance_id": (
178
+ self.credentials.service_instance_id
179
+ if self.credentials.service_instance_id
180
+ else "00000000-0000-0000-0000-000000000000"
181
+ ),
182
+ },
183
+ **stages,
184
+ }
185
+
186
+ self.__pta_evaluator.setup(
187
+ configuration=pta_evaluator_config,
188
+ setup_stages=evaluation_stages,
189
+ prompt_template=prompt_template,
190
+ prompt_template_id=prompt_template_id,
191
+ )
192
+
193
+ try:
194
+ input_df = pd.read_csv(input_file_path)
195
+ except Exception as e:
196
+ raise Exception(f"Failed to open the file {input_file_path}. {e}")
197
+
198
+ self.__pta_evaluator.evaluate(
199
+ input_df=input_df,
200
+ evaluation_stages=evaluation_stages,
201
+ )
202
+
203
+ def get_monitor_metrics(
204
+ self,
205
+ monitor: BaseMonitor,
206
+ evaluation_stage: EvaluationStage = EvaluationStage.DEVELOPMENT,
207
+ show_table: bool = False,
208
+ ) -> dict[str, any]:
209
+ return self.__pta_evaluator.get_metrics_from_monitor_list(
210
+ stage=evaluation_stage,
211
+ monitor_name=monitor.monitor_name,
212
+ show_table=show_table,
213
+ )
214
+
215
+ def get_dataset_records(
216
+ self,
217
+ dataset_type: str,
218
+ evaluation_stage: EvaluationStage = EvaluationStage.DEVELOPMENT,
219
+ show_table: bool = False,
220
+ ) -> dict[str, any]:
221
+ return self.__pta_evaluator.get_monitor_data_set_records(
222
+ stage=evaluation_stage,
223
+ data_set_type=dataset_type,
224
+ show_table=show_table,
225
+ )
226
+
227
+ def __get_setup_stages_from_config(self, config: dict[str, any]):
228
+ evaluation_stages = []
229
+
230
+ if "development_project_id" in config.keys():
231
+ evaluation_stages.append(EvaluationStage.DEVELOPMENT)
232
+
233
+ if "production_space_id" in config.keys():
234
+ evaluation_stages.append(EvaluationStage.PRODUCTION)
235
+
236
+ return evaluation_stages
237
+
238
+ def __parse_prompt_config_from_dict(self, config: dict[str, any]):
239
+ if "prompt_template" in config.keys():
240
+ return self.__build_prompt_template_object(
241
+ config.get("prompt_template")
242
+ )
243
+
244
+ if "detached_prompt_template" in config.keys():
245
+ return self.__build_detached_prompt_template_object(
246
+ config.get("detached_prompt_template")
247
+ )
248
+
249
+ return None
250
+
251
+ def __parse_simplified_config_dict(
252
+ self,
253
+ config: dict[str, any]
254
+ ):
255
+ return {
256
+ "configuration": self.__build_pta_evaluator_config(config),
257
+ "setup_stages": self.__get_setup_stages_from_config(config),
258
+ "prompt_template": self.__parse_prompt_config_from_dict(config),
259
+ "prompt_template_id": config.get("prompt_template_id", config.get("detached_prompt_template_id")),
260
+ }
261
+
262
+ def __build_prompt_template_object(
263
+ self, prompt_template_dict: dict[str, any]
264
+ ) -> PromptTemplate:
265
+ required_fields = [
266
+ "input_text",
267
+ "input_variables",
268
+ "task_ids",
269
+ ]
270
+ missing_fields = required_fields - prompt_template_dict.keys()
271
+
272
+ if len(missing_fields) > 0:
273
+ raise Exception(f"Missing required values: {missing_fields}")
274
+
275
+ return PromptTemplate(
276
+ name=prompt_template_dict.get("name", self.DEFAULT_PROMPT_NAME),
277
+ description=prompt_template_dict.get("description", ""),
278
+ model_id=prompt_template_dict.get(
279
+ "model_id", self.DEFAULT_MODEL_ID),
280
+ input_text=prompt_template_dict.get("input_text", ""),
281
+ input_variables=prompt_template_dict.get("input_variables", []),
282
+ task_ids=prompt_template_dict.get("task_ids", []),
283
+ )
284
+
285
+ def __build_detached_prompt_template_object(
286
+ self, detached_prompt_dict: dict[str, any]
287
+ ) -> DetachedPromptTemplate:
288
+ required_fields = [
289
+ "input_text",
290
+ "input_variables",
291
+ "task_ids",
292
+ ]
293
+ missing_fields = required_fields - detached_prompt_dict.keys()
294
+
295
+ if len(missing_fields) > 0:
296
+ raise Exception(f"Missing required values: {missing_fields}")
297
+
298
+ return DetachedPromptTemplate(
299
+ name=detached_prompt_dict.get("name", self.DEFAULT_PROMPT_NAME),
300
+ model_id=detached_prompt_dict.get(
301
+ "model_id", self.DEFAULT_MODEL_ID),
302
+ input_text=detached_prompt_dict.get("input_text", ""),
303
+ input_variables=detached_prompt_dict.get("input_variables", []),
304
+ detached_prompt_id=detached_prompt_dict.get(
305
+ "detached_prompt_id", ""),
306
+ detached_model_id=detached_prompt_dict.get(
307
+ "detached_model_id", self.DEFAULT_MODEL_ID),
308
+ detached_model_provider=detached_prompt_dict.get(
309
+ "detached_model_provider", ""
310
+ ),
311
+ detached_model_name=detached_prompt_dict.get(
312
+ "detached_model_name", ""),
313
+ detached_model_url=detached_prompt_dict.get(
314
+ "detached_model_url", ""),
315
+ detached_prompt_url=detached_prompt_dict.get(
316
+ "detached_prompt_url", self.DEFAULT_PROMPT_NAME),
317
+ detached_prompt_additional_information=detached_prompt_dict.get(
318
+ "detached_prompt_additional_information", None
319
+ ),
320
+ task_ids=detached_prompt_dict.get("task_ids", []),
321
+ )
322
+
323
+ def __build_base_prompt_setup(self, input_prompt_setup: dict[str, any]):
324
+ problem_type = input_prompt_setup.get("problem_type", None)
325
+
326
+ if not problem_type:
327
+ raise Exception("Missing `problem_type` from the configuration")
328
+
329
+ prompt_setup = {}
330
+ prompt_setup["problem_type"] = problem_type
331
+
332
+ prompt_setup["input_data_type"] = input_prompt_setup.get(
333
+ "input_data_type", "unstructured_text"
334
+ )
335
+ prompt_setup["prediction_field"] = input_prompt_setup.get(
336
+ "prediction_field", "generated_text"
337
+ )
338
+
339
+ # Use the default values based on the problem type
340
+ if problem_type == TaskType.RAG.value:
341
+ prompt_setup["question_field"] = input_prompt_setup.get(
342
+ "question_field", "question"
343
+ )
344
+ prompt_setup["label_column"] = input_prompt_setup.get(
345
+ "label_column", "answer"
346
+ )
347
+ prompt_setup["context_fields"] = input_prompt_setup.get(
348
+ "context_fields")
349
+
350
+ elif problem_type == TaskType.SUMMARIZATION.value:
351
+ prompt_setup["label_column"] = input_prompt_setup.get(
352
+ "label_column", "ground_truth"
353
+ )
354
+
355
+ elif problem_type == TaskType.QA.value:
356
+ prompt_setup["label_column"] = input_prompt_setup.get(
357
+ "label_column", "answers"
358
+ )
359
+
360
+ elif problem_type == TaskType.GENERATION.value:
361
+ prompt_setup["label_column"] = input_prompt_setup.get(
362
+ "label_column", "reference"
363
+ )
364
+
365
+ elif problem_type == TaskType.EXTRACTION.value:
366
+ prompt_setup["label_column"] = input_prompt_setup.get(
367
+ "label_column", "answer"
368
+ )
369
+
370
+ elif problem_type == TaskType.CLASSIFICATION.value:
371
+ prompt_setup["label_column"] = input_prompt_setup.get(
372
+ "label_column", "class_name"
373
+ )
374
+
375
+ else:
376
+ raise Exception(
377
+ f"unsupported `problem_type`: {problem_type}. Supported values: {TaskType.values()}"
378
+ )
379
+
380
+ return prompt_setup
381
+
382
+ def __convert_monitor_to_dict(
383
+ self, monitors_list: list[BaseMonitor], task_type: TaskType
384
+ ):
385
+ if not monitors_list:
386
+ if task_type == TaskType.CLASSIFICATION.value:
387
+ monitors_list = [QualityMonitor()]
388
+ if task_type == TaskType.SUMMARIZATION.value:
389
+ monitors_list = [GenerativeAIQualityMonitor(
390
+ parameters={
391
+ "metrics_configuration": {
392
+ "bleu": {},
393
+ "cosine_similarity": {},
394
+ "hap_score": {},
395
+ "jaccard_similarity": {},
396
+ "meteor": {},
397
+ "normalized_f1": {},
398
+ "normalized_precision": {},
399
+ "normalized_recall": {},
400
+ "rouge_score": {},
401
+ "sari": {},
402
+ "pii": {},
403
+ },
404
+ }
405
+ )]
406
+ else:
407
+ monitors_list = [GenerativeAIQualityMonitor()]
408
+
409
+ monitors = {}
410
+ for monitor in monitors_list:
411
+ monitors[monitor.monitor_name] = {}
412
+ if monitor.thresholds:
413
+ monitors[monitor.monitor_name]["thresholds"] = monitor.thresholds
414
+ if monitor.parameters:
415
+ monitors[monitor.monitor_name]["parameters"] = monitor.parameters
416
+
417
+ return monitors
418
+
419
+ def __get_pta_evaluator_urls(self) -> dict[str, any]:
420
+ """Helper to get the urls for pta"""
421
+
422
+ # Using cloud
423
+ if self.credentials.version is None:
424
+ url_map = WOS_URL_MAPPING.get(self.credentials.url)
425
+
426
+ # remove 'api' netloc from dataplatform url
427
+ parsed_dai_url = urlparse(url_map.dai_url)
428
+ updated_netlock = parsed_dai_url.netloc.replace("api.", "", 1)
429
+ platform_url = urlunparse(
430
+ parsed_dai_url._replace(netloc=updated_netlock))
431
+
432
+ return {
433
+ "wml_url": url_map.wml_url,
434
+ "platform_url": platform_url,
435
+ "wos_url": url_map.wxg_url,
436
+ "dataplatform_url": url_map.dai_url,
437
+ }
438
+
439
+ # using CPD
440
+ return {
441
+ "wml_url": self.credentials.url,
442
+ "platform_url": self.credentials.url,
443
+ "wos_url": self.credentials.url,
444
+ "dataplatform_url": self.credentials.url,
445
+ }
446
+
447
+ def __build_pta_evaluator_config(self, config: dict[str, any]):
448
+ input_prompt_setup = config.get("prompt_setup", {})
449
+
450
+ base_prompt_setup = self.__build_base_prompt_setup(
451
+ input_prompt_setup=input_prompt_setup
452
+ )
453
+
454
+ stages = {}
455
+
456
+ if "development_project_id" in config.keys():
457
+ development_prompt_setup = base_prompt_setup.copy()
458
+ development_prompt_setup["operational_space_id"] = "development"
459
+
460
+ development_prompt_setup["monitors"] = self.__convert_monitor_to_dict(
461
+ monitors_list=config.get("development_monitors"),
462
+ task_type=input_prompt_setup.get("problem_type"),
463
+ )
464
+
465
+ stages["development"] = {
466
+ "prompt_setup": development_prompt_setup,
467
+ "project_id": config.get("development_project_id"),
468
+ }
469
+
470
+ if "production_space_id" in config.keys():
471
+ production_prompt_setup = base_prompt_setup.copy()
472
+ production_prompt_setup["operational_space_id"] = "production"
473
+
474
+ production_prompt_setup["monitors"] = self.__convert_monitor_to_dict(
475
+ monitors_list=config.get("production_monitors"),
476
+ task_type=input_prompt_setup.get("problem_type"),
477
+ )
478
+
479
+ space_deployment = config.get("space_deployment", {})
480
+
481
+ if "serving_name" not in space_deployment.keys():
482
+ space_deployment["serving_name"] = (
483
+ f"deployment_{calendar.timegm(time.gmtime())}"
484
+ )
485
+
486
+ base_model_id = space_deployment.get("base_model_id")
487
+
488
+ if not base_model_id:
489
+ if "prompt_template" in config.keys():
490
+ base_model_id = config.get("prompt_template", {}).get(
491
+ "model_id", self.DEFAULT_MODEL_ID
492
+ )
493
+ else:
494
+ base_model_id = config.get("detached_prompt_template", {}).get(
495
+ "model_id", self.DEFAULT_MODEL_ID
496
+ )
497
+
498
+ space_deployment["base_model_id"] = base_model_id
499
+
500
+ if "description" not in space_deployment.keys():
501
+ space_deployment["description"] = (
502
+ f"production space {calendar.timegm(time.gmtime())}"
503
+ )
504
+
505
+ if "name" not in space_deployment.keys():
506
+ space_deployment["name"] = "Production Space"
507
+
508
+ if "version_date" not in space_deployment.keys():
509
+ space_deployment["version_date"] = "2024-12-18"
510
+
511
+ stages["production"] = {
512
+ "space_deployment": space_deployment,
513
+ "prompt_setup": production_prompt_setup,
514
+ "space_id": config.get("production_space_id"),
515
+ }
516
+
517
+ # Only parse model usecase details if ai_usecase_id is provided
518
+ if "ai_usecase_id" in config.keys():
519
+ usecase_details = {}
520
+ for key in ["ai_usecase_id", "catalog_id", "approach_version", "approach_id"]:
521
+ if key in config.keys():
522
+ usecase_details[key] = config[key]
523
+
524
+ # Set default approach_version
525
+ if "approach_version" not in usecase_details.keys():
526
+ usecase_details["approach_version"] = self.DEFAULT_APPROACH_VERSION
527
+ else:
528
+ usecase_details = None
529
+
530
+ config_urls = self.__get_pta_evaluator_urls()
531
+
532
+ pta_config = {
533
+ "common_configurations": {
534
+ "credentials": self.__get_credentials_dict(),
535
+ "use_cpd": self.credentials.version is not None,
536
+ "use_ssl": not self.credentials.disable_ssl,
537
+ "service_instance_id": (
538
+ self.credentials.service_instance_id
539
+ if self.credentials.service_instance_id
540
+ else "00000000-0000-0000-0000-000000000000"
541
+ ),
542
+ "ai_usecase": usecase_details,
543
+ **config_urls,
544
+ },
545
+ **stages,
546
+ }
547
+
548
+ return pta_config
549
+
550
+ def get_prompt_template_id(
551
+ self,
552
+ environment: EvaluationStage = EvaluationStage.DEVELOPMENT,
553
+ ) -> str:
554
+ return self.__pta_evaluator.get_prompt_template_id(stage=environment)