ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. ibm_watsonx_gov/__init__.py +8 -0
  2. ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
  3. ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
  4. ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
  5. ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
  6. ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
  7. ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
  8. ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
  9. ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
  10. ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
  11. ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
  12. ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
  13. ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
  14. ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
  15. ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
  16. ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
  17. ibm_watsonx_gov/clients/__init__.py +0 -0
  18. ibm_watsonx_gov/clients/api_client.py +99 -0
  19. ibm_watsonx_gov/clients/segment_client.py +46 -0
  20. ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
  21. ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
  22. ibm_watsonx_gov/config/__init__.py +14 -0
  23. ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
  24. ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
  25. ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
  26. ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
  27. ibm_watsonx_gov/entities/__init__.py +8 -0
  28. ibm_watsonx_gov/entities/agentic_app.py +209 -0
  29. ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
  30. ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
  31. ibm_watsonx_gov/entities/ai_experiment.py +419 -0
  32. ibm_watsonx_gov/entities/base_classes.py +134 -0
  33. ibm_watsonx_gov/entities/container.py +54 -0
  34. ibm_watsonx_gov/entities/credentials.py +633 -0
  35. ibm_watsonx_gov/entities/criteria.py +508 -0
  36. ibm_watsonx_gov/entities/enums.py +274 -0
  37. ibm_watsonx_gov/entities/evaluation_result.py +444 -0
  38. ibm_watsonx_gov/entities/foundation_model.py +490 -0
  39. ibm_watsonx_gov/entities/llm_judge.py +44 -0
  40. ibm_watsonx_gov/entities/locale.py +17 -0
  41. ibm_watsonx_gov/entities/mapping.py +49 -0
  42. ibm_watsonx_gov/entities/metric.py +211 -0
  43. ibm_watsonx_gov/entities/metric_threshold.py +36 -0
  44. ibm_watsonx_gov/entities/model_provider.py +329 -0
  45. ibm_watsonx_gov/entities/model_risk_result.py +43 -0
  46. ibm_watsonx_gov/entities/monitor.py +71 -0
  47. ibm_watsonx_gov/entities/prompt_setup.py +40 -0
  48. ibm_watsonx_gov/entities/state.py +22 -0
  49. ibm_watsonx_gov/entities/utils.py +99 -0
  50. ibm_watsonx_gov/evaluators/__init__.py +26 -0
  51. ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
  52. ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
  53. ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
  54. ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
  55. ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
  56. ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
  57. ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
  58. ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
  59. ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
  60. ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
  61. ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
  62. ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
  63. ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
  64. ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
  65. ibm_watsonx_gov/metrics/__init__.py +74 -0
  66. ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
  67. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
  68. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
  69. ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
  70. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
  71. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
  72. ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
  73. ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
  74. ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
  75. ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
  76. ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
  77. ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
  78. ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
  79. ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
  80. ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
  81. ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
  82. ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
  83. ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
  84. ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
  85. ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
  86. ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
  87. ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
  88. ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
  89. ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
  90. ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
  91. ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
  92. ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
  93. ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
  94. ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
  95. ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
  96. ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
  97. ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
  98. ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
  99. ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
  100. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
  101. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
  102. ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
  103. ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
  104. ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
  105. ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
  106. ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
  107. ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
  108. ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
  109. ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
  110. ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
  111. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
  112. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
  113. ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
  114. ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
  115. ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
  116. ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
  117. ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
  118. ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
  119. ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
  120. ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
  121. ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
  122. ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
  123. ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
  124. ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
  125. ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
  126. ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
  127. ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
  128. ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
  129. ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
  130. ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
  131. ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
  132. ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
  133. ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
  134. ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
  135. ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
  136. ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
  137. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
  138. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
  139. ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
  140. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
  141. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
  142. ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
  143. ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
  144. ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
  145. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
  146. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
  147. ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
  148. ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
  149. ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
  150. ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
  151. ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
  152. ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
  153. ibm_watsonx_gov/metrics/status/__init__.py +0 -0
  154. ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
  155. ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
  156. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
  157. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
  158. ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
  159. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
  160. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
  161. ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
  162. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
  163. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
  164. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
  165. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
  166. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
  167. ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
  168. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
  169. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
  170. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
  171. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
  172. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
  173. ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
  174. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
  175. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
  176. ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
  177. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
  178. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
  179. ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
  180. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
  181. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
  182. ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
  183. ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
  184. ibm_watsonx_gov/metrics/utils.py +440 -0
  185. ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
  186. ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
  187. ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
  188. ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
  189. ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
  190. ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
  191. ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
  192. ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
  193. ibm_watsonx_gov/providers/__init__.py +8 -0
  194. ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
  195. ibm_watsonx_gov/providers/detectors_provider.py +415 -0
  196. ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
  197. ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
  198. ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
  199. ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
  200. ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
  201. ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
  202. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
  203. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
  204. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
  205. ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
  206. ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
  207. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
  208. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
  209. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
  210. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
  211. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
  212. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
  213. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
  214. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
  215. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
  216. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
  217. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
  218. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
  219. ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
  220. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
  221. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
  222. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
  223. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
  224. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
  225. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
  226. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
  227. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
  228. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
  229. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
  230. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
  231. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
  232. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
  233. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
  234. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
  235. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
  236. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
  237. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
  238. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
  239. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
  240. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
  241. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
  242. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
  243. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
  244. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
  245. ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
  246. ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
  247. ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
  248. ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
  249. ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
  250. ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
  251. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
  252. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
  253. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
  254. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
  255. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
  256. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
  257. ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
  258. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
  259. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
  260. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
  261. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
  262. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
  263. ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
  264. ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
  265. ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
  266. ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
  267. ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
  268. ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
  269. ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
  270. ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
  271. ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
  272. ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
  273. ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
  274. ibm_watsonx_gov/tools/__init__.py +10 -0
  275. ibm_watsonx_gov/tools/clients/__init__.py +11 -0
  276. ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
  277. ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
  278. ibm_watsonx_gov/tools/core/__init__.py +8 -0
  279. ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
  280. ibm_watsonx_gov/tools/entities/__init__.py +8 -0
  281. ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
  282. ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
  283. ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
  284. ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
  285. ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
  286. ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
  287. ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
  288. ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
  289. ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
  290. ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
  291. ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
  292. ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
  293. ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
  294. ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
  295. ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
  296. ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
  297. ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
  298. ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
  299. ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
  300. ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
  301. ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
  302. ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
  303. ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
  304. ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
  305. ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
  306. ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
  307. ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
  308. ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
  309. ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
  310. ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
  311. ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
  312. ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
  313. ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
  314. ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
  315. ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
  316. ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
  317. ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
  318. ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
  319. ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
  320. ibm_watsonx_gov/tools/utils/__init__.py +14 -0
  321. ibm_watsonx_gov/tools/utils/constants.py +69 -0
  322. ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
  323. ibm_watsonx_gov/tools/utils/environment.py +108 -0
  324. ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
  325. ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
  326. ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
  327. ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
  328. ibm_watsonx_gov/traces/__init__.py +8 -0
  329. ibm_watsonx_gov/traces/span_exporter.py +195 -0
  330. ibm_watsonx_gov/traces/span_node.py +251 -0
  331. ibm_watsonx_gov/traces/span_util.py +153 -0
  332. ibm_watsonx_gov/traces/trace_utils.py +1074 -0
  333. ibm_watsonx_gov/utils/__init__.py +8 -0
  334. ibm_watsonx_gov/utils/aggregation_util.py +346 -0
  335. ibm_watsonx_gov/utils/async_util.py +62 -0
  336. ibm_watsonx_gov/utils/authenticator.py +144 -0
  337. ibm_watsonx_gov/utils/constants.py +15 -0
  338. ibm_watsonx_gov/utils/errors.py +40 -0
  339. ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
  340. ibm_watsonx_gov/utils/insights_generator.py +1285 -0
  341. ibm_watsonx_gov/utils/python_utils.py +425 -0
  342. ibm_watsonx_gov/utils/rest_util.py +73 -0
  343. ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
  344. ibm_watsonx_gov/utils/singleton_meta.py +25 -0
  345. ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
  346. ibm_watsonx_gov/utils/validation_util.py +126 -0
  347. ibm_watsonx_gov/visualizations/__init__.py +13 -0
  348. ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
  349. ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
  350. ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
  351. ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
  352. ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
  353. ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
@@ -0,0 +1,656 @@
1
+ import os
2
+ from typing import Any, Dict, List, Optional, Type, TypeVar, Union
3
+
4
+ try:
5
+ from ibm_watsonx_ai import Credentials
6
+ from ibm_watsonx_ai.foundation_models import ModelInference
7
+ except ImportError as e:
8
+ raise ImportError(
9
+ "Please install the ibm-watsonx-ai package: pip install 'llmevalkit[ibm_watsonx_ai]'"
10
+ ) from e
11
+
12
+ from llmevalkit.llm.base import Hook, LLMClient, register_llm
13
+ from llmevalkit.llm.output_parser import ValidatingLLMClient
14
+ from llmevalkit.llm.types import GenerationMode, LLMResponse, ParameterMapper
15
+ from pydantic import BaseModel
16
+
17
+ from ..consts import WX_API_KEY, WX_PROJECT_ID, WX_SPACE_ID, WX_URL
18
+
19
+ T = TypeVar("T", bound="WatsonxLLMClient")
20
+ SchemaType = Union[Dict[str, Any], Type["BaseModel"], Type]
21
+
22
+ # -------------------------------------------------------------------
23
+ # 1. Non-validating Watsonx wrapper
24
+ # -------------------------------------------------------------------
25
+
26
+
27
+ @register_llm("watsonx")
28
+ class WatsonxLLMClient(LLMClient):
29
+ """
30
+ Adapter for IBM watsonx.ai Foundation Model (via ibm_watsonx_ai.foundation_models.ModelInference).
31
+
32
+ Supports:
33
+ - text: sync generation (ModelInference.generate)
34
+ - chat: sync chat (ModelInference.chat)
35
+ - text_async: async generation (ModelInference.agenerate)
36
+ - chat_async: async chat (ModelInference.achat)
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ model_name: Optional[str] = None,
42
+ api_key: Optional[str] = None,
43
+ project_id: Optional[str] = None,
44
+ space_id: Optional[str] = None,
45
+ deployment_id: Optional[str] = None,
46
+ url: Optional[str] = None,
47
+ hooks: Optional[List[Hook]] = None,
48
+ model_id: Optional[str] = None,
49
+ **model_kwargs: Any,
50
+ ) -> None:
51
+ """
52
+ Initialize the Watsonx client.
53
+
54
+ Args:
55
+ model_name: Identifier of the watsonx model (e.g., "meta-llama/llama-3-3-70b-instruct").
56
+ api_key: (Optional) Your IBM Cloud API Key for watsonx.ai.
57
+ project_id: (Optional) watsonx project ID.
58
+ space_id: (Optional) watsonx space ID.
59
+ deployment_id: (Optional) watsonx deployment ID.
60
+ url: (Optional) Base URL for the watsonx endpoint (e.g., "https://us-south.ml.cloud.ibm.com").
61
+ hooks: Optional observability hooks.
62
+ model_kwargs: Additional keyword args passed to ModelInference constructor.
63
+ """
64
+ self.model_name = model_name
65
+ self._model_kwargs = model_kwargs
66
+
67
+ if not url:
68
+ url = os.getenv(WX_URL)
69
+ if not url:
70
+ raise EnvironmentError(
71
+ f"Missing API URL; please set the '{WX_URL}' environment variable."
72
+ )
73
+
74
+ if not api_key:
75
+ api_key = os.getenv(WX_API_KEY)
76
+ if not api_key:
77
+ raise EnvironmentError(
78
+ f"Missing API key; please set the '{WX_API_KEY}' environment variable."
79
+ )
80
+
81
+ if not project_id:
82
+ project_id = os.getenv(WX_PROJECT_ID)
83
+ if not project_id:
84
+ if not space_id:
85
+ space_id = os.getenv(WX_SPACE_ID)
86
+ raise EnvironmentError(
87
+ f"Missing project ID; please set the '{WX_PROJECT_ID}' or '{WX_SPACE_ID}' environment variable."
88
+ )
89
+
90
+ creds = Credentials(api_key=api_key, url=url)
91
+
92
+ if model_id:
93
+ # If model_id is provided, use it as the model_name
94
+ model_name = model_id
95
+
96
+ self.model_name = model_name
97
+ self.model_id = model_name
98
+
99
+ if not model_name:
100
+ raise ValueError("model_name or model_id must be provided")
101
+
102
+ # Assemble provider_kwargs for LLMClient base class
103
+ provider_kwargs: Dict[str, Any] = {
104
+ "model_id": model_name,
105
+ "credentials": creds,
106
+ }
107
+ if project_id:
108
+ provider_kwargs["project_id"] = project_id
109
+ elif space_id:
110
+ provider_kwargs["space_id"] = space_id
111
+
112
+ if deployment_id:
113
+ provider_kwargs["deployment_id"] = deployment_id
114
+
115
+ # Pass through any additional ModelInference args (params, space_id, verify, validate, etc.)
116
+ provider_kwargs.update(model_kwargs)
117
+
118
+ # Initialize underlying ModelInference instance via LLMClient logic
119
+ super().__init__(
120
+ client=None, client_needs_init=True, hooks=hooks, **provider_kwargs
121
+ )
122
+
123
+ @classmethod
124
+ def provider_class(cls) -> Type:
125
+ """
126
+ Underlying SDK client class for watsonx.ai: ModelInference.
127
+ """
128
+ return ModelInference
129
+
130
+ def _register_methods(self) -> None:
131
+ """
132
+ Register how to call watsonx methods:
133
+
134
+ - 'text' -> ModelInference.generate
135
+ - 'text_async' -> ModelInference.agenerate
136
+ - 'chat' -> ModelInference.chat
137
+ - 'chat_async' -> ModelInference.achat
138
+ """
139
+ self.set_method_config(GenerationMode.TEXT.value, "generate", "prompt")
140
+ self.set_method_config(
141
+ GenerationMode.TEXT_ASYNC.value, "agenerate", "prompt")
142
+ self.set_method_config(GenerationMode.CHAT.value, "chat", "messages")
143
+ self.set_method_config(
144
+ GenerationMode.CHAT_ASYNC.value, "achat", "messages")
145
+
146
+ def _setup_parameter_mapper(self) -> None:
147
+ """Setup parameter mapping for IBM WatsonX provider."""
148
+ self._parameter_mapper = ParameterMapper()
149
+
150
+ # Text generation parameters (based on TextGenParameters)
151
+ self._parameter_mapper.set_text_mapping("temperature", "temperature")
152
+ self._parameter_mapper.set_text_mapping("top_p", "top_p")
153
+ self._parameter_mapper.set_text_mapping("top_k", "top_k")
154
+ self._parameter_mapper.set_text_mapping("max_tokens", "max_new_tokens")
155
+ self._parameter_mapper.set_text_mapping("min_tokens", "min_new_tokens")
156
+ self._parameter_mapper.set_text_mapping(
157
+ "repetition_penalty", "repetition_penalty"
158
+ )
159
+ self._parameter_mapper.set_text_mapping("seed", "random_seed")
160
+ self._parameter_mapper.set_text_mapping(
161
+ "stop_sequences", "stop_sequences")
162
+ self._parameter_mapper.set_text_mapping("timeout", "time_limit")
163
+ self._parameter_mapper.set_text_mapping(
164
+ "decoding_method", "decoding_method")
165
+
166
+ # Chat parameters (based on TextChatParameters)
167
+ self._parameter_mapper.set_chat_mapping("temperature", "temperature")
168
+ self._parameter_mapper.set_chat_mapping("top_p", "top_p")
169
+ self._parameter_mapper.set_chat_mapping("max_tokens", "max_tokens")
170
+ self._parameter_mapper.set_chat_mapping(
171
+ "frequency_penalty", "frequency_penalty"
172
+ )
173
+ self._parameter_mapper.set_chat_mapping(
174
+ "presence_penalty", "presence_penalty")
175
+ self._parameter_mapper.set_chat_mapping("seed", "seed")
176
+ self._parameter_mapper.set_chat_mapping("stop_sequences", "stop")
177
+ self._parameter_mapper.set_chat_mapping("timeout", "time_limit")
178
+ self._parameter_mapper.set_chat_mapping("logprobs", "logprobs")
179
+ self._parameter_mapper.set_chat_mapping("top_logprobs", "top_logprobs")
180
+
181
+ # Custom transforms for complex parameters
182
+ def transform_echo_text_mode(value, mode):
183
+ if mode in ["text", "text_async"]:
184
+ # Text mode can include input text in response
185
+ return (
186
+ {"include_stop_sequence": value}
187
+ if "stop" in str(value).lower()
188
+ else {}
189
+ )
190
+ return {}
191
+
192
+ self._parameter_mapper.set_custom_transform(
193
+ "echo", transform_echo_text_mode)
194
+
195
+ def _parse_llm_response(self, raw: Any) -> Union[str, LLMResponse]:
196
+ """
197
+ Extract the generated text and tool calls from a watsonx response.
198
+
199
+ - For text generation: raw['results'][0]['generated_text']
200
+ - For chat: raw['choices'][0]['message']['content']
201
+ """
202
+ content = ""
203
+ tool_calls = []
204
+
205
+ # Text‐generation style
206
+ if isinstance(raw, dict) and "results" in raw:
207
+ results = raw["results"]
208
+ if isinstance(results, list) and results:
209
+ first = results[0]
210
+ content = first.get("generated_text", "")
211
+
212
+ # Chat style
213
+ elif isinstance(raw, dict) and "choices" in raw:
214
+ choices = raw["choices"]
215
+ if isinstance(choices, list) and choices:
216
+ first = choices[0]
217
+ msg = first.get("message")
218
+ if isinstance(msg, dict):
219
+ content = msg.get("content", "")
220
+ # Extract tool calls if present
221
+ if "tool_calls" in msg and msg["tool_calls"]:
222
+ tool_calls = []
223
+ for tool_call in msg["tool_calls"]:
224
+ tool_call_dict = {
225
+ "id": tool_call.get("id"),
226
+ "type": tool_call.get("type", "function"),
227
+ "function": {
228
+ "name": tool_call.get("function", {}).get("name"),
229
+ "arguments": tool_call.get("function", {}).get(
230
+ "arguments"
231
+ ),
232
+ },
233
+ }
234
+ tool_calls.append(tool_call_dict)
235
+ elif "text" in first:
236
+ content = first["text"]
237
+
238
+ if not content and not tool_calls:
239
+ raise ValueError(f"Unexpected watsonx response format: {raw!r}")
240
+
241
+ # Return LLMResponse if tool calls exist, otherwise just content
242
+ if tool_calls:
243
+ return LLMResponse(content=content, tool_calls=tool_calls)
244
+ return content
245
+
246
+ def generate(
247
+ self,
248
+ prompt: Union[str, List[Dict[str, Any]]],
249
+ mode: Union[str, GenerationMode] = GenerationMode.CHAT,
250
+ generation_args: Optional[Any] = None,
251
+ **kwargs: Any,
252
+ ) -> str:
253
+ """
254
+ Synchronous generation override for WatsonX.
255
+
256
+ - If mode is 'chat' and prompt is str, wrap into messages list.
257
+ - If mode is 'text', prompt must be str or list of strings.
258
+ - Handle WatsonX-specific params structure.
259
+ """
260
+ mode_str = mode.value if isinstance(mode, GenerationMode) else mode
261
+ mode_str = mode_str.lower()
262
+
263
+ if mode_str not in ("text", "chat"):
264
+ raise KeyError(
265
+ f"Unsupported mode '{mode_str}' for WatsonxLLMClient.generate"
266
+ )
267
+
268
+ # Normalize prompt format based on mode
269
+ if mode_str == GenerationMode.CHAT.value:
270
+ # Chat mode expects list of messages
271
+ if isinstance(prompt, str):
272
+ prompt = [{"role": "user", "content": prompt}]
273
+ elif isinstance(prompt, list):
274
+ prompt = prompt
275
+ else:
276
+ raise TypeError(
277
+ "For chat mode, prompt must be a string or List[Dict[str,str]]"
278
+ )
279
+ elif mode_str == GenerationMode.TEXT.value:
280
+ # Text mode expects a string prompt
281
+ if isinstance(prompt, list):
282
+ # Convert messages to simple string
283
+ prompt = "\n".join(
284
+ [msg.get("content", "")
285
+ for msg in prompt if msg.get("content")]
286
+ )
287
+
288
+ # Handle WatsonX params structure
289
+ watsonx_kwargs = {}
290
+
291
+ # Extract any existing params from kwargs
292
+ existing_params = kwargs.pop("params", {})
293
+
294
+ # Map generation_args to WatsonX parameters if provided
295
+ if generation_args and self._parameter_mapper:
296
+ from llmevalkit.llm.types import GenerationArgs
297
+
298
+ if isinstance(generation_args, GenerationArgs):
299
+ mapped_args = self._parameter_mapper.map_args(
300
+ generation_args, mode_str)
301
+ # Merge mapped args with existing params
302
+ existing_params.update(mapped_args)
303
+
304
+ # Set params if we have any
305
+ if existing_params:
306
+ watsonx_kwargs["params"] = existing_params
307
+
308
+ # Add any other kwargs that aren't generation parameters
309
+ watsonx_kwargs.update(kwargs)
310
+
311
+ return super().generate(prompt=prompt, mode=mode_str, **watsonx_kwargs)
312
+
313
+ async def generate_async(
314
+ self,
315
+ prompt: Union[str, List[Dict[str, Any]]],
316
+ mode: Union[str, GenerationMode] = GenerationMode.CHAT_ASYNC,
317
+ generation_args: Optional[Any] = None,
318
+ **kwargs: Any,
319
+ ) -> str:
320
+ """
321
+ Asynchronous generation override for WatsonX.
322
+
323
+ - If mode is 'chat_async', wrap prompt into messages.
324
+ - If mode is 'text_async', prompt must be str or list of strings.
325
+ - Handle WatsonX-specific params structure.
326
+ """
327
+ mode_str = mode.value if isinstance(mode, GenerationMode) else mode
328
+ mode_str = mode_str.lower()
329
+
330
+ if mode_str not in ("text_async", "chat_async"):
331
+ raise KeyError(
332
+ f"Unsupported mode '{mode_str}' for WatsonxLLMClient.generate_async"
333
+ )
334
+
335
+ if mode_str == GenerationMode.CHAT_ASYNC.value:
336
+ # Chat mode expects list of messages
337
+ if isinstance(prompt, str):
338
+ prompt = [{"role": "user", "content": prompt}]
339
+ elif isinstance(prompt, list):
340
+ prompt = prompt
341
+ else:
342
+ raise TypeError(
343
+ "For chat_async mode, prompt must be a string or List[Dict[str,str]]"
344
+ )
345
+ elif mode_str == GenerationMode.TEXT_ASYNC.value:
346
+ # Text mode expects a string prompt
347
+ if isinstance(prompt, list):
348
+ # Convert messages to simple string
349
+ prompt = "\n".join(
350
+ [msg.get("content", "")
351
+ for msg in prompt if msg.get("content")]
352
+ )
353
+
354
+ # Handle WatsonX params structure
355
+ watsonx_kwargs = {}
356
+
357
+ # Extract any existing params from kwargs
358
+ existing_params = kwargs.pop("params", {})
359
+
360
+ # Map generation_args to WatsonX parameters if provided
361
+ if generation_args and self._parameter_mapper:
362
+ from llmevalkit.llm.types import GenerationArgs
363
+
364
+ if isinstance(generation_args, GenerationArgs):
365
+ mapped_args = self._parameter_mapper.map_args(
366
+ generation_args, mode_str)
367
+ # Merge mapped args with existing params
368
+ existing_params.update(mapped_args)
369
+
370
+ # Set params if we have any
371
+ if existing_params:
372
+ watsonx_kwargs["params"] = existing_params
373
+
374
+ # Add any other kwargs that aren't generation parameters
375
+ watsonx_kwargs.update(kwargs)
376
+
377
+ return await super().generate_async(
378
+ prompt=prompt, mode=mode_str, **watsonx_kwargs
379
+ )
380
+
381
+
382
+ # -------------------------------------------------------------------
383
+ # 2. Validating Watsonx wrapper
384
+ # -------------------------------------------------------------------
385
+
386
+
387
+ @register_llm("watsonx.output_val")
388
+ class WatsonxLLMClientOutputVal(ValidatingLLMClient):
389
+ """
390
+ Validating adapter for IBM watsonx.ai Foundation Model.
391
+
392
+ Extends ValidatingLLMClient to enforce output structure (via JSON Schema,
393
+ Pydantic models, or simple Python types) on all generate calls,
394
+ with retries and batch support (sync & async).
395
+ """
396
+
397
+ def __init__(
398
+ self,
399
+ model_id: str,
400
+ api_key: Optional[str] = None,
401
+ project_id: Optional[str] = None,
402
+ space_id: Optional[str] = None,
403
+ username: Optional[str] = None,
404
+ version: Optional[str] = None,
405
+ instance_id: Optional[str] = None,
406
+ password: Optional[str] = None,
407
+ url: Optional[str] = "https://us-south.ml.cloud.ibm.com",
408
+ hooks: Optional[List[Hook]] = None,
409
+ **model_kwargs: Any,
410
+ ) -> None:
411
+ """
412
+ Initialize a Watsonx client with output validation.
413
+
414
+ Args:
415
+ model_id: Identifier of the watsonx model.
416
+ api_key: Your IBM Cloud API Key.
417
+ project_id: (Optional) watsonx project ID.
418
+ url: (Optional) Base URL for the watsonx endpoint.
419
+ hooks: Optional observability hooks.
420
+ model_kwargs: Additional arguments passed to the ModelInference constructor.
421
+ """
422
+ self.model_id = model_id
423
+ self._model_kwargs = model_kwargs
424
+
425
+ creds_args = {"url": url}
426
+ if api_key is not None:
427
+ creds_args["api_key"] = api_key
428
+ if version is not None:
429
+ creds_args["version"] = version
430
+ creds_args["instance_id"] = instance_id
431
+ creds_args["username"] = username
432
+ if api_key is None and password is not None:
433
+ creds_args["password"] = password
434
+
435
+ creds = Credentials(**creds_args)
436
+ provider_kwargs: Dict[str, Any] = {
437
+ "model_id": model_id,
438
+ "credentials": creds,
439
+ }
440
+ if project_id is not None:
441
+ provider_kwargs["project_id"] = project_id
442
+ if space_id is not None:
443
+ provider_kwargs["space_id"] = space_id
444
+
445
+ provider_kwargs.update(model_kwargs)
446
+
447
+ super().__init__(
448
+ client=None, client_needs_init=True, hooks=hooks, **provider_kwargs
449
+ )
450
+
451
+ @classmethod
452
+ def provider_class(cls) -> Type:
453
+ """
454
+ Underlying SDK client class: ModelInference.
455
+ """
456
+ return ModelInference
457
+
458
+ def _register_methods(self) -> None:
459
+ """
460
+ Register how to call watsonx methods for validation:
461
+
462
+ - 'text' -> ModelInference.generate
463
+ - 'text_async' -> ModelInference.agenerate
464
+ - 'chat' -> ModelInference.chat
465
+ - 'chat_async' -> ModelInference.achat
466
+ """
467
+ self.set_method_config(GenerationMode.TEXT.value, "generate", "prompt")
468
+ self.set_method_config(
469
+ GenerationMode.TEXT_ASYNC.value, "agenerate", "prompt")
470
+ self.set_method_config(GenerationMode.CHAT.value, "chat", "messages")
471
+ self.set_method_config(
472
+ GenerationMode.CHAT_ASYNC.value, "achat", "messages")
473
+
474
+ def _setup_parameter_mapper(self) -> None:
475
+ """Setup parameter mapping for IBM WatsonX provider (same as regular WatsonX)."""
476
+ self._parameter_mapper = ParameterMapper()
477
+
478
+ # Text generation parameters (based on TextGenParameters)
479
+ self._parameter_mapper.set_text_mapping("temperature", "temperature")
480
+ self._parameter_mapper.set_text_mapping("top_p", "top_p")
481
+ self._parameter_mapper.set_text_mapping("top_k", "top_k")
482
+ self._parameter_mapper.set_text_mapping("max_tokens", "max_new_tokens")
483
+ self._parameter_mapper.set_text_mapping("min_tokens", "min_new_tokens")
484
+ self._parameter_mapper.set_text_mapping(
485
+ "repetition_penalty", "repetition_penalty"
486
+ )
487
+ self._parameter_mapper.set_text_mapping("seed", "random_seed")
488
+ self._parameter_mapper.set_text_mapping(
489
+ "stop_sequences", "stop_sequences")
490
+ self._parameter_mapper.set_text_mapping("timeout", "time_limit")
491
+
492
+ # Chat parameters (based on TextChatParameters)
493
+ self._parameter_mapper.set_chat_mapping("temperature", "temperature")
494
+ self._parameter_mapper.set_chat_mapping("top_p", "top_p")
495
+ self._parameter_mapper.set_chat_mapping("max_tokens", "max_tokens")
496
+ self._parameter_mapper.set_chat_mapping(
497
+ "frequency_penalty", "frequency_penalty"
498
+ )
499
+ self._parameter_mapper.set_chat_mapping(
500
+ "presence_penalty", "presence_penalty")
501
+ self._parameter_mapper.set_chat_mapping("seed", "seed")
502
+ self._parameter_mapper.set_chat_mapping("stop_sequences", "stop")
503
+ self._parameter_mapper.set_chat_mapping("timeout", "time_limit")
504
+ self._parameter_mapper.set_chat_mapping("logprobs", "logprobs")
505
+ self._parameter_mapper.set_chat_mapping("top_logprobs", "top_logprobs")
506
+
507
+ def transform_echo_text_mode(value, mode):
508
+ if mode in ["text", "text_async"]:
509
+ return (
510
+ {"include_stop_sequence": value}
511
+ if "stop" in str(value).lower()
512
+ else {}
513
+ )
514
+ return {}
515
+
516
+ self._parameter_mapper.set_custom_transform(
517
+ "echo", transform_echo_text_mode)
518
+
519
+ def _parse_llm_response(self, raw: Any) -> str:
520
+ """
521
+ Extract the assistant-generated text from a watsonx response.
522
+
523
+ Same logic as non-validating client.
524
+ """
525
+ if isinstance(raw, dict) and "results" in raw:
526
+ results = raw["results"]
527
+ if isinstance(results, list) and results:
528
+ first = results[0]
529
+ return first.get("generated_text", "")
530
+ if isinstance(raw, dict) and "choices" in raw:
531
+ choices = raw["choices"]
532
+ if isinstance(choices, list) and choices:
533
+ first = choices[0]
534
+ msg = first.get("message")
535
+ if isinstance(msg, dict) and "content" in msg:
536
+ return msg["content"]
537
+ if "text" in first:
538
+ return first["text"]
539
+ raise ValueError(f"Unexpected watsonx response format: {raw!r}")
540
+
541
+ def generate(
542
+ self,
543
+ prompt: Union[str, List[Dict[str, Any]]],
544
+ *,
545
+ schema: SchemaType,
546
+ retries: int = 3,
547
+ generation_args: Optional[Any] = None,
548
+ **kwargs: Any,
549
+ ) -> Any:
550
+ """
551
+ Synchronous chat generation with validation + retries.
552
+
553
+ Args:
554
+ prompt: Either a string or a list of chat messages.
555
+ schema: JSON Schema dict, Pydantic model class, or built-in Python type.
556
+ retries: Maximum attempts (including the first).
557
+ generation_args: GenerationArgs to map to provider parameters.
558
+ **kwargs: Passed to the underlying ModelInference call (e.g., temperature).
559
+ """
560
+ mode = "chat"
561
+
562
+ # Normalize prompt to chat-messages
563
+ if isinstance(prompt, str):
564
+ prompt = [{"role": "user", "content": prompt}]
565
+
566
+ # Handle WatsonX params structure
567
+ watsonx_kwargs = {}
568
+
569
+ # Extract any existing params from kwargs
570
+ existing_params = kwargs.pop("params", {})
571
+
572
+ # Map generation_args to WatsonX parameters if provided
573
+ if generation_args and self._parameter_mapper:
574
+ from llmevalkit.llm.types import GenerationArgs
575
+
576
+ if isinstance(generation_args, GenerationArgs):
577
+ mapped_args = self._parameter_mapper.map_args(
578
+ generation_args, mode)
579
+ # Merge mapped args with existing params
580
+ existing_params.update(mapped_args)
581
+
582
+ # Set params if we have any
583
+ if existing_params:
584
+ watsonx_kwargs["params"] = existing_params
585
+
586
+ # Add any other kwargs that aren't generation parameters
587
+ watsonx_kwargs.update(kwargs)
588
+
589
+ return super().generate(
590
+ **{
591
+ "prompt": prompt,
592
+ "schema": schema,
593
+ "retries": retries,
594
+ "mode": mode,
595
+ **self._model_kwargs,
596
+ **watsonx_kwargs,
597
+ }
598
+ )
599
+
600
+ async def generate_async(
601
+ self,
602
+ prompt: Union[str, List[Dict[str, Any]]],
603
+ *,
604
+ schema: SchemaType,
605
+ retries: int = 3,
606
+ generation_args: Optional[Any] = None,
607
+ **kwargs: Any,
608
+ ) -> Any:
609
+ """
610
+ Asynchronous chat generation with validation + retries.
611
+
612
+ Args:
613
+ prompt: Either a string or a list of chat messages.
614
+ schema: JSON Schema dict, Pydantic model class, or built-in Python type.
615
+ retries: Maximum attempts.
616
+ generation_args: GenerationArgs to map to provider parameters.
617
+ **kwargs: Passed to the underlying ModelInference call.
618
+ """
619
+ mode = "chat_async"
620
+
621
+ if isinstance(prompt, str):
622
+ prompt = [{"role": "user", "content": prompt}]
623
+
624
+ # Handle WatsonX params structure
625
+ watsonx_kwargs = {}
626
+
627
+ # Extract any existing params from kwargs
628
+ existing_params = kwargs.pop("params", {})
629
+
630
+ # Map generation_args to WatsonX parameters if provided
631
+ if generation_args and self._parameter_mapper:
632
+ from llmevalkit.llm.types import GenerationArgs
633
+
634
+ if isinstance(generation_args, GenerationArgs):
635
+ mapped_args = self._parameter_mapper.map_args(
636
+ generation_args, mode)
637
+ # Merge mapped args with existing params
638
+ existing_params.update(mapped_args)
639
+
640
+ # Set params if we have any
641
+ if existing_params:
642
+ watsonx_kwargs["params"] = existing_params
643
+
644
+ # Add any other kwargs that aren't generation parameters
645
+ watsonx_kwargs.update(kwargs)
646
+
647
+ return await super().generate_async(
648
+ **{
649
+ "prompt": prompt,
650
+ "schema": schema,
651
+ "retries": retries,
652
+ "mode": mode,
653
+ **self._model_kwargs,
654
+ **watsonx_kwargs,
655
+ }
656
+ )