ibm-watsonx-gov 1.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. ibm_watsonx_gov/__init__.py +8 -0
  2. ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
  3. ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
  4. ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
  5. ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
  6. ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
  7. ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
  8. ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
  9. ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
  10. ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
  11. ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
  12. ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
  13. ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
  14. ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
  15. ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
  16. ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
  17. ibm_watsonx_gov/clients/__init__.py +0 -0
  18. ibm_watsonx_gov/clients/api_client.py +99 -0
  19. ibm_watsonx_gov/clients/segment_client.py +46 -0
  20. ibm_watsonx_gov/clients/usage_client.cp313-win_amd64.pyd +0 -0
  21. ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
  22. ibm_watsonx_gov/config/__init__.py +14 -0
  23. ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
  24. ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
  25. ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
  26. ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
  27. ibm_watsonx_gov/entities/__init__.py +8 -0
  28. ibm_watsonx_gov/entities/agentic_app.py +209 -0
  29. ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
  30. ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
  31. ibm_watsonx_gov/entities/ai_experiment.py +419 -0
  32. ibm_watsonx_gov/entities/base_classes.py +134 -0
  33. ibm_watsonx_gov/entities/container.py +54 -0
  34. ibm_watsonx_gov/entities/credentials.py +633 -0
  35. ibm_watsonx_gov/entities/criteria.py +508 -0
  36. ibm_watsonx_gov/entities/enums.py +274 -0
  37. ibm_watsonx_gov/entities/evaluation_result.py +444 -0
  38. ibm_watsonx_gov/entities/foundation_model.py +490 -0
  39. ibm_watsonx_gov/entities/llm_judge.py +44 -0
  40. ibm_watsonx_gov/entities/locale.py +17 -0
  41. ibm_watsonx_gov/entities/mapping.py +49 -0
  42. ibm_watsonx_gov/entities/metric.py +211 -0
  43. ibm_watsonx_gov/entities/metric_threshold.py +36 -0
  44. ibm_watsonx_gov/entities/model_provider.py +329 -0
  45. ibm_watsonx_gov/entities/model_risk_result.py +43 -0
  46. ibm_watsonx_gov/entities/monitor.py +71 -0
  47. ibm_watsonx_gov/entities/prompt_setup.py +40 -0
  48. ibm_watsonx_gov/entities/state.py +22 -0
  49. ibm_watsonx_gov/entities/utils.py +99 -0
  50. ibm_watsonx_gov/evaluators/__init__.py +26 -0
  51. ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
  52. ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
  53. ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
  54. ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
  55. ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cp313-win_amd64.pyd +0 -0
  56. ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cp313-win_amd64.pyd +0 -0
  57. ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
  58. ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
  59. ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
  60. ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
  61. ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
  62. ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
  63. ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
  64. ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
  65. ibm_watsonx_gov/metrics/__init__.py +74 -0
  66. ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
  67. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
  68. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
  69. ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
  70. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
  71. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
  72. ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
  73. ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
  74. ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
  75. ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
  76. ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
  77. ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
  78. ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
  79. ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
  80. ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
  81. ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
  82. ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
  83. ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
  84. ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
  85. ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
  86. ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
  87. ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
  88. ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
  89. ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
  90. ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
  91. ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
  92. ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
  93. ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
  94. ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
  95. ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
  96. ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
  97. ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
  98. ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
  99. ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
  100. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
  101. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
  102. ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
  103. ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
  104. ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
  105. ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
  106. ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
  107. ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
  108. ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
  109. ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
  110. ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
  111. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
  112. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
  113. ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
  114. ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
  115. ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
  116. ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
  117. ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
  118. ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
  119. ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
  120. ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
  121. ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
  122. ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
  123. ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
  124. ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
  125. ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
  126. ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
  127. ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
  128. ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
  129. ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
  130. ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
  131. ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
  132. ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
  133. ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
  134. ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
  135. ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
  136. ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
  137. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
  138. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
  139. ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
  140. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
  141. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
  142. ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
  143. ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
  144. ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
  145. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
  146. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
  147. ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
  148. ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
  149. ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
  150. ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
  151. ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
  152. ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
  153. ibm_watsonx_gov/metrics/status/__init__.py +0 -0
  154. ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
  155. ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
  156. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
  157. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
  158. ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
  159. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
  160. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
  161. ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
  162. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
  163. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
  164. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
  165. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
  166. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
  167. ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
  168. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
  169. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
  170. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
  171. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
  172. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
  173. ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
  174. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
  175. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
  176. ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
  177. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
  178. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
  179. ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
  180. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
  181. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
  182. ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
  183. ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
  184. ibm_watsonx_gov/metrics/utils.py +440 -0
  185. ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
  186. ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
  187. ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
  188. ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
  189. ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
  190. ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
  191. ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
  192. ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
  193. ibm_watsonx_gov/providers/__init__.py +8 -0
  194. ibm_watsonx_gov/providers/detectors_provider.cp313-win_amd64.pyd +0 -0
  195. ibm_watsonx_gov/providers/detectors_provider.py +415 -0
  196. ibm_watsonx_gov/providers/eval_assist_provider.cp313-win_amd64.pyd +0 -0
  197. ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
  198. ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
  199. ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
  200. ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
  201. ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
  202. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
  203. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
  204. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
  205. ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
  206. ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
  207. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
  208. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
  209. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
  210. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
  211. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
  212. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
  213. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
  214. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
  215. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
  216. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
  217. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
  218. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
  219. ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
  220. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
  221. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
  222. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
  223. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
  224. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
  225. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
  226. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
  227. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
  228. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
  229. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
  230. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
  231. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
  232. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
  233. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
  234. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
  235. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
  236. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
  237. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
  238. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
  239. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
  240. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
  241. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
  242. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
  243. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
  244. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
  245. ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
  246. ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
  247. ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
  248. ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
  249. ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
  250. ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
  251. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
  252. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
  253. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
  254. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
  255. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
  256. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
  257. ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
  258. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
  259. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
  260. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
  261. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
  262. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
  263. ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
  264. ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
  265. ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
  266. ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
  267. ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
  268. ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
  269. ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
  270. ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
  271. ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
  272. ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
  273. ibm_watsonx_gov/providers/unitxt_provider.cp313-win_amd64.pyd +0 -0
  274. ibm_watsonx_gov/tools/__init__.py +10 -0
  275. ibm_watsonx_gov/tools/clients/__init__.py +11 -0
  276. ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
  277. ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
  278. ibm_watsonx_gov/tools/core/__init__.py +8 -0
  279. ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
  280. ibm_watsonx_gov/tools/entities/__init__.py +8 -0
  281. ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
  282. ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
  283. ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
  284. ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
  285. ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
  286. ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
  287. ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
  288. ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
  289. ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
  290. ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
  291. ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
  292. ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
  293. ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
  294. ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
  295. ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
  296. ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
  297. ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
  298. ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
  299. ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
  300. ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
  301. ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
  302. ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
  303. ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
  304. ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
  305. ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
  306. ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
  307. ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
  308. ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
  309. ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
  310. ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
  311. ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
  312. ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
  313. ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
  314. ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
  315. ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
  316. ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
  317. ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
  318. ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
  319. ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
  320. ibm_watsonx_gov/tools/utils/__init__.py +14 -0
  321. ibm_watsonx_gov/tools/utils/constants.py +69 -0
  322. ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
  323. ibm_watsonx_gov/tools/utils/environment.py +108 -0
  324. ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
  325. ibm_watsonx_gov/tools/utils/platform_url_mapping.cp313-win_amd64.pyd +0 -0
  326. ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
  327. ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
  328. ibm_watsonx_gov/traces/__init__.py +8 -0
  329. ibm_watsonx_gov/traces/span_exporter.py +195 -0
  330. ibm_watsonx_gov/traces/span_node.py +251 -0
  331. ibm_watsonx_gov/traces/span_util.py +153 -0
  332. ibm_watsonx_gov/traces/trace_utils.py +1074 -0
  333. ibm_watsonx_gov/utils/__init__.py +8 -0
  334. ibm_watsonx_gov/utils/aggregation_util.py +346 -0
  335. ibm_watsonx_gov/utils/async_util.py +62 -0
  336. ibm_watsonx_gov/utils/authenticator.py +144 -0
  337. ibm_watsonx_gov/utils/constants.py +15 -0
  338. ibm_watsonx_gov/utils/errors.py +40 -0
  339. ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
  340. ibm_watsonx_gov/utils/insights_generator.py +1285 -0
  341. ibm_watsonx_gov/utils/python_utils.py +425 -0
  342. ibm_watsonx_gov/utils/rest_util.py +73 -0
  343. ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
  344. ibm_watsonx_gov/utils/singleton_meta.py +25 -0
  345. ibm_watsonx_gov/utils/url_mapping.cp313-win_amd64.pyd +0 -0
  346. ibm_watsonx_gov/utils/validation_util.py +126 -0
  347. ibm_watsonx_gov/visualizations/__init__.py +13 -0
  348. ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
  349. ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
  350. ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
  351. ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
  352. ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
  353. ibm_watsonx_gov-1.3.3.dist-info/WHEEL +4 -0
@@ -0,0 +1,596 @@
1
+ from __future__ import annotations
2
+ import json
3
+ from typing import (
4
+ Any,
5
+ Dict,
6
+ List,
7
+ Optional,
8
+ Union,
9
+ Literal,
10
+ )
11
+ from types import NoneType
12
+
13
+ from pydantic import BaseModel, Field, model_validator, ValidationError
14
+
15
+ from llmevalkit.metrics import MetricRunResult
16
+
17
+ # ----------------------------------------------------------------------
18
+ # 1. Function-Call Metric Models
19
+ # ----------------------------------------------------------------------
20
+
21
+
22
+ class FunctionCallMetric(BaseModel):
23
+ """
24
+ Function-call metric: a single metric name, schema, and examples.
25
+ """
26
+
27
+ name: str = Field(
28
+ ..., description="Name of the metric (e.g. 'function_selection')."
29
+ )
30
+ task_description: str = Field(
31
+ ..., description="Description of the task this metric is evaluating."
32
+ )
33
+ jsonschema: Dict[str, Any] = Field(
34
+ ..., description="JSON Schema dict for this metric's output."
35
+ )
36
+ examples: Optional[List[Dict[Literal["user_kwargs", "output"], Any]]] = Field(
37
+ None,
38
+ description=(
39
+ "List of example inputs and outputs for this metric; "
40
+ "each example is a dict with 'user_kwargs' and 'output' keys."
41
+ ),
42
+ )
43
+
44
+
45
+ # ----------------------------------------------------------------------
46
+ # 2. Static-Check Models (Optional)
47
+ # ----------------------------------------------------------------------
48
+
49
+
50
+ class StaticMetricResult(BaseModel):
51
+ """
52
+ Result of a single static (schema-based) check.
53
+ """
54
+
55
+ description: str = Field(
56
+ ..., description="Human-readable description of this static validation check."
57
+ )
58
+ valid: bool = Field(
59
+ ..., description="True if this static check passed; False otherwise."
60
+ )
61
+ explanation: Optional[str] = Field(
62
+ None,
63
+ description=(
64
+ "If valid==False, a detailed explanation of why the check failed; "
65
+ "otherwise None."
66
+ ),
67
+ )
68
+ correction: Optional[Dict[str, Any]] = Field(
69
+ None,
70
+ description=(
71
+ "If type conversion was successful, contains the corrected tool call; "
72
+ "otherwise None."
73
+ ),
74
+ )
75
+
76
+
77
+ class StaticResult(BaseModel):
78
+ """
79
+ Aggregated results of static (schema-based) checks for one function call.
80
+ """
81
+
82
+ metrics: Dict[str, StaticMetricResult] = Field(
83
+ ...,
84
+ description=("Mapping from each static-check name to its StaticMetricResult."),
85
+ )
86
+ final_decision: bool = Field(
87
+ ...,
88
+ description=(
89
+ "Overall outcome: False if any metric.valid is False; True only if all pass."
90
+ ),
91
+ )
92
+
93
+
94
+ # ----------------------------------------------------------------------
95
+ # 3. Semantic Metric Result Models
96
+ # ----------------------------------------------------------------------
97
+
98
+
99
+ class SemanticMetricResult(BaseModel):
100
+ """
101
+ Wraps a single metric evaluation result returned by MetricRunner.
102
+ """
103
+
104
+ metric_name: str = Field(
105
+ ..., description="Identifier (name) of the evaluated metric."
106
+ )
107
+ jsonschema: Dict[str, Any] = Field(
108
+ ..., description="JSON Schema dict that was used to validate output."
109
+ )
110
+ prompt: Union[str, List[Dict[str, str]]] = Field(
111
+ ...,
112
+ description=(
113
+ "The actual prompt sent to the LLM—either a plain string "
114
+ "or a list of {'role','content'} messages."
115
+ ),
116
+ )
117
+ raw_response: Any = Field(
118
+ ..., description="Raw response returned by the LLM client."
119
+ )
120
+ numeric_thresholds_checks: Dict[str, bool] = Field(
121
+ ...,
122
+ description=(
123
+ "For every numeric field in the metric, a boolean indicating "
124
+ "whether the parsed value fell within its [low, high] thresholds."
125
+ ),
126
+ )
127
+ is_important: bool = Field(
128
+ ...,
129
+ description=(
130
+ "True if the metric's confidence field met its importance threshold; "
131
+ "False otherwise."
132
+ ),
133
+ )
134
+ importance_reason: Optional[str] = Field(
135
+ None,
136
+ description=(
137
+ "If is_important==False, a textual reason (e.g. 'confidence too low'); "
138
+ "otherwise None."
139
+ ),
140
+ )
141
+ error: Optional[str] = Field(
142
+ None,
143
+ description=(
144
+ "Error message if prompt generation or parsing failed; " "otherwise None."
145
+ ),
146
+ )
147
+ is_correct: bool = Field(
148
+ ...,
149
+ description=(
150
+ "True if both importance and the metric's primary value field "
151
+ "fell within thresholds; False otherwise."
152
+ ),
153
+ )
154
+ correctness_reason: Optional[str] = Field(
155
+ None,
156
+ description=(
157
+ "If is_correct==False, a textual reason why the value or confidence "
158
+ "fell outside thresholds; otherwise None."
159
+ ),
160
+ )
161
+ is_issue: bool = Field(
162
+ ...,
163
+ description=(
164
+ "True if is_correct==False and is_important==True; False otherwise."
165
+ ),
166
+ )
167
+
168
+ @classmethod
169
+ def from_runner(cls, rr: MetricRunResult) -> "SemanticMetricResult":
170
+ """
171
+ Construct from an internal MetricRunResult instance.
172
+ """
173
+ # first construct the object from what MetricRunner gave us
174
+ data = rr.model_dump()
175
+ inst: SemanticMetricResult = cls(**data)
176
+
177
+ return inst
178
+
179
+ @property
180
+ def output_value(self) -> Optional[float]:
181
+ """
182
+ Convenience accessor for the metric's primary 'output' numeric field,
183
+ if present and parsed successfully.
184
+ """
185
+ if self.raw_response and isinstance(
186
+ self.raw_response.get("output"), (int, float)
187
+ ):
188
+ return float(self.raw_response["output"])
189
+ return None
190
+
191
+ @property
192
+ def normalized_output(self) -> Optional[float]:
193
+ """
194
+ Linearly scale 'output' into [0,1] according to its schema min/max.
195
+ """
196
+ out = self.output_value
197
+ subs = self.jsonschema.get("properties", {}).get("output", {})
198
+ low = subs.get("minimum", 0.0)
199
+ high = subs.get("maximum", 1.0)
200
+ if out is None or high == low:
201
+ return None
202
+ return (out - low) / (high - low)
203
+
204
+
205
+ class SemanticCategoryResult(BaseModel):
206
+ """
207
+ Collection of SemanticMetricResults for a single category:
208
+ - general
209
+ - function_selection
210
+ - parameter
211
+ """
212
+
213
+ metrics: Optional[Dict[str, SemanticMetricResult]] = Field(
214
+ None,
215
+ description=("Mapping metric_name -> SemanticMetricResult for this category."),
216
+ )
217
+ avg_score: Optional[float] = Field(
218
+ None,
219
+ description=(
220
+ "Average of the 'output' values across all metrics whose "
221
+ "confidence was within thresholds (is_important==True)."
222
+ ),
223
+ )
224
+
225
+ @classmethod
226
+ def from_results(cls, results: List[MetricRunResult]) -> "SemanticCategoryResult":
227
+ """
228
+ Build a category result from a list of MetricRunResult objects.
229
+ """
230
+ # 1) build per-metric results
231
+ mapping: Dict[str, SemanticMetricResult] = {
232
+ r.metric_name: SemanticMetricResult.from_runner(r) for r in results
233
+ }
234
+
235
+ # 2) compute normalized‐output average over 'important' metrics only
236
+ norms: List[float] = []
237
+ for m in mapping.values():
238
+ norm = m.normalized_output
239
+ if norm is not None and m.is_important:
240
+ norms.append(norm)
241
+
242
+ avg = (sum(norms) / len(norms)) if norms else None
243
+ return cls(metrics=mapping, avg_score=avg)
244
+
245
+
246
+ class SemanticResult(BaseModel):
247
+ """
248
+ Aggregated semantic metrics across all categories for one function call.
249
+ """
250
+
251
+ general: Optional[SemanticCategoryResult] = Field(
252
+ None,
253
+ description=("Results of general tool-call metrics, if any; otherwise None."),
254
+ )
255
+ function_selection: Optional[SemanticCategoryResult] = Field(
256
+ None,
257
+ description=("Results of function-selection metrics, if any; otherwise None."),
258
+ )
259
+ parameter: Optional[Dict[str, SemanticCategoryResult]] = Field(
260
+ None,
261
+ description=(
262
+ "Parameter-level results, keyed by parameter name, each with its metrics."
263
+ ),
264
+ )
265
+ transform: Optional[Dict[str, TransformResult]] = Field(
266
+ None,
267
+ description=(
268
+ "Optional per-parameter transformation results: "
269
+ "mapping parameter_name -> TransformResult."
270
+ ),
271
+ )
272
+
273
+
274
+ # ----------------------------------------------------------------------
275
+ # 4. Transformation Result Model
276
+ # ----------------------------------------------------------------------
277
+
278
+
279
+ class TransformResult(BaseModel):
280
+ """
281
+ Result of unit-extraction and code-based transformation checks for one parameter.
282
+ """
283
+
284
+ units: Dict[str, Any] = Field(
285
+ ...,
286
+ description=(
287
+ "Extracted unit info: keys 'user_units', 'user_value', and 'spec_units'."
288
+ ),
289
+ )
290
+ generated_code: str = Field(
291
+ ...,
292
+ description="The Python code snippet returned by the LLM for unit conversion.",
293
+ )
294
+ execution_success: bool = Field(
295
+ ...,
296
+ description="True if generated_code executed without error and matched values.",
297
+ )
298
+ correct: bool = Field(
299
+ ...,
300
+ description=(
301
+ "False if execution_success is True but the transformation "
302
+ "was incorrect; True if the transformation was correct or was not executed."
303
+ ),
304
+ )
305
+ execution_output: Any = Field(
306
+ None, description="The actual output of executing the transformation code."
307
+ )
308
+ correction: Optional[str] = Field(
309
+ None,
310
+ description="Correction explanation if execution succedded but the transformation was incorrect.",
311
+ )
312
+ error: Optional[str] = Field(
313
+ None,
314
+ description=(
315
+ "Error message if code generation or execution failed; " "otherwise None."
316
+ ),
317
+ )
318
+
319
+
320
+ # ----------------------------------------------------------------------
321
+ # 5. Pipeline I/O Models
322
+ # ----------------------------------------------------------------------
323
+
324
+
325
+ class FunctionCallInput(BaseModel):
326
+ """
327
+ Input bundle for the function-calling pipeline.
328
+ """
329
+
330
+ conversation_context: Union[str, List[Dict]] = Field(
331
+ ...,
332
+ description=(
333
+ "Either a single user text string or a list of chat messages "
334
+ "with {'role','content'}."
335
+ ),
336
+ )
337
+ tools_inventory: List[ToolSpec] = Field(
338
+ ...,
339
+ description=(
340
+ "List of available tools; each entry must at least include "
341
+ "'name' and argument schema."
342
+ ),
343
+ )
344
+ tool_call: ToolCall = Field(
345
+ ...,
346
+ description=(
347
+ "Proposed function call dict: {\n"
348
+ " 'name': '<function_name>',\n"
349
+ " 'args': {<param>:<value>, ...}\n"
350
+ "}."
351
+ ),
352
+ )
353
+
354
+
355
+ class TrajectoryPipelineInput(BaseModel):
356
+ """
357
+ Input bundle for the trajectory evaluation pipeline.
358
+ """
359
+
360
+ trajectory: Union[str, List[Dict]] = Field(
361
+ ...,
362
+ description="The full conversation trajectory, including all user, assistant, and tool messages.",
363
+ )
364
+ tool_inventory: List[ToolSpec] = Field(
365
+ ...,
366
+ description="List of available tools; each entry must at least include 'name' and argument schema.",
367
+ )
368
+
369
+
370
+ class TrajectoryPipelineResult(BaseModel):
371
+ """
372
+ Final output of the trajectory evaluation pipeline.
373
+ """
374
+
375
+ inputs: TrajectoryPipelineInput = Field(
376
+ ..., description="Echo of the pipeline inputs."
377
+ )
378
+ result: Optional[SemanticCategoryResult] = Field(
379
+ None, description="Trajectory evaluation results, if any; otherwise None."
380
+ )
381
+
382
+
383
+ class PipelineResult(BaseModel):
384
+ """
385
+ Final output of the function-calling pipeline for one tool call.
386
+ """
387
+
388
+ inputs: FunctionCallInput = Field(..., description="Echo of the pipeline inputs.")
389
+ static: Optional[StaticResult] = Field(
390
+ None, description="Static schema-validation results, if enabled."
391
+ )
392
+ semantic: SemanticResult = Field(
393
+ ..., description="All semantic metric results by category."
394
+ )
395
+ overall_valid: bool = Field(
396
+ ...,
397
+ description=(
398
+ "True if all semantic metrics passed (is_correct==True) "
399
+ "and, if present, all transformations succeeded."
400
+ ),
401
+ )
402
+ overall_avg_score: Optional[float] = Field(
403
+ None,
404
+ description=(
405
+ "Average of the three category avg_scores "
406
+ "(general, function_selection, parameter) where available."
407
+ ),
408
+ )
409
+
410
+ @model_validator(mode="after")
411
+ def compute_overall(self):
412
+ """
413
+ After validation, compute overall_valid as AND of:
414
+ • all semantic is_correct flags
415
+ • if transform exists: all execution_success flags
416
+ """
417
+ ok = True # Start with True and set to False on any failure
418
+
419
+ if self.static:
420
+ # static checks
421
+ ok = self.static.final_decision
422
+
423
+ if self.semantic:
424
+ # semantic checks
425
+ if self.semantic.general and self.semantic.general.metrics:
426
+ for m in self.semantic.general.metrics.values():
427
+ if not m.is_correct:
428
+ ok = False
429
+ if (
430
+ self.semantic.function_selection
431
+ and self.semantic.function_selection.metrics
432
+ ):
433
+ for m in self.semantic.function_selection.metrics.values():
434
+ if not m.is_correct:
435
+ ok = False
436
+ if self.semantic.parameter:
437
+ for cat in self.semantic.parameter.values():
438
+ if cat and cat.metrics:
439
+ for m in cat.metrics.values():
440
+ if not m.is_correct:
441
+ ok = False
442
+
443
+ # transformation checks (if any)
444
+ trans = self.semantic.transform
445
+ if trans:
446
+ for tr in trans.values():
447
+ if not tr.correct:
448
+ ok = False
449
+
450
+ # compute overall_avg_score from category averages
451
+ cat_avgs: List[float] = []
452
+ for cat in (self.semantic.general, self.semantic.function_selection):
453
+ if cat and cat.avg_score is not None:
454
+ cat_avgs.append(cat.avg_score)
455
+ # for parameters, average the per‐param avg_scores
456
+ if self.semantic.parameter:
457
+ param_avgs = [
458
+ cat.avg_score
459
+ for cat in self.semantic.parameter.values()
460
+ if cat.avg_score is not None
461
+ ]
462
+ if param_avgs:
463
+ cat_avgs.append(sum(param_avgs) / len(param_avgs))
464
+
465
+ self.overall_avg_score = sum(cat_avgs) / len(cat_avgs) if cat_avgs else None
466
+
467
+ self.overall_valid = ok
468
+ return self
469
+
470
+
471
+ # ----------------------------------------------------------------------
472
+ # 6. API Specification & Call Models
473
+ # ----------------------------------------------------------------------
474
+
475
+
476
+ # Map primitive spec-types to Python types (optional helper)
477
+ SPEC_TYPES: Dict[str, Any] = {
478
+ "any": str,
479
+ "array": list,
480
+ "bigint": int,
481
+ "boolean": bool,
482
+ "byte": int,
483
+ "char": str,
484
+ "dict": dict,
485
+ "double": float,
486
+ "float": float,
487
+ "hashtable": dict,
488
+ "hashmap": dict,
489
+ "integer": int,
490
+ "int": int,
491
+ "list": list,
492
+ "long": int,
493
+ "number": float,
494
+ "null": NoneType,
495
+ "object": dict,
496
+ "string": str,
497
+ "tuple": tuple,
498
+ "uint": int,
499
+ "ulong": int,
500
+ "unsigned": int,
501
+ "void": NoneType,
502
+ }
503
+
504
+
505
+ class FunctionDefinition(BaseModel):
506
+ """
507
+ Wraps an OpenAI-style function definition for function-calling clients.
508
+ """
509
+
510
+ name: str = Field(..., description="Function name as expected by the LLM.")
511
+ description: Optional[str] = Field(
512
+ None, description="Human-readable description of the function."
513
+ )
514
+ parameters: Dict[str, Any] = Field(
515
+ ...,
516
+ description=(
517
+ "JSON-Schema object describing all parameters; either a dict "
518
+ "or a FunctionParameter model."
519
+ ),
520
+ )
521
+
522
+
523
+ class ToolSpec(BaseModel):
524
+ """
525
+ OpenAI tool specification wrapper, matching function-calling API.
526
+ """
527
+
528
+ type: Literal["function"] = Field(
529
+ "function",
530
+ description="Must be 'function' for OpenAI function-calling.",
531
+ )
532
+ function: FunctionDefinition = Field(
533
+ ..., description="Underlying function definition or raw dict."
534
+ )
535
+
536
+
537
+ class ToolFunctionCall(BaseModel):
538
+ """
539
+ Parsed representation of an LLM's function call response.
540
+ """
541
+
542
+ name: str = Field(..., description="Name of the function the LLM chose to call.")
543
+ arguments: str | dict | None = Field(
544
+ ..., description="JSON-encoded string or dictionary of the call's arguments."
545
+ )
546
+ parsed_arguments: Dict[str, Any] = Field(
547
+ default_factory=dict,
548
+ description="Parsed JSON arguments, populated after validation.",
549
+ )
550
+
551
+ @model_validator(mode="after")
552
+ def _parse_arguments(self):
553
+ """
554
+ After model construction, parse the `arguments` JSON string
555
+ into `parsed_arguments`, or raise a ValidationError.
556
+ """
557
+ try:
558
+ raw = self.arguments
559
+ if raw is None or raw == "":
560
+ self.parsed_arguments = {}
561
+ elif isinstance(raw, str):
562
+ self.parsed_arguments = json.loads(raw)
563
+ elif isinstance(raw, dict):
564
+ self.parsed_arguments = raw
565
+ else:
566
+ raise TypeError(f"Unsupported type for arguments: {type(raw).__name__}")
567
+ except Exception as e:
568
+ raise ValidationError.from_exception_data(
569
+ self.__class__.__name__,
570
+ [
571
+ {
572
+ "loc": ("arguments",),
573
+ "msg": f"Failed to parse arguments: {e}",
574
+ "type": "value_error",
575
+ }
576
+ ],
577
+ )
578
+ return self
579
+
580
+
581
+ class ToolCall(BaseModel):
582
+ """
583
+ Full OpenAI function call object (for v1 function-calling API).
584
+ """
585
+
586
+ id: Optional[str] = Field(
587
+ None,
588
+ description=("Optional unique identifier for this function call."),
589
+ )
590
+ type: Literal["function"] = Field(
591
+ "function",
592
+ description="Must be 'function' for OpenAI function calls.",
593
+ )
594
+ function: ToolFunctionCall = Field(
595
+ ..., description="Nested function name+arguments object or raw dict."
596
+ )