ibm-watsonx-gov 1.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. ibm_watsonx_gov/__init__.py +8 -0
  2. ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
  3. ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
  4. ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
  5. ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
  6. ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
  7. ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
  8. ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
  9. ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
  10. ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
  11. ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
  12. ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
  13. ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
  14. ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
  15. ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
  16. ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
  17. ibm_watsonx_gov/clients/__init__.py +0 -0
  18. ibm_watsonx_gov/clients/api_client.py +99 -0
  19. ibm_watsonx_gov/clients/segment_client.py +46 -0
  20. ibm_watsonx_gov/clients/usage_client.cp313-win_amd64.pyd +0 -0
  21. ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
  22. ibm_watsonx_gov/config/__init__.py +14 -0
  23. ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
  24. ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
  25. ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
  26. ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
  27. ibm_watsonx_gov/entities/__init__.py +8 -0
  28. ibm_watsonx_gov/entities/agentic_app.py +209 -0
  29. ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
  30. ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
  31. ibm_watsonx_gov/entities/ai_experiment.py +419 -0
  32. ibm_watsonx_gov/entities/base_classes.py +134 -0
  33. ibm_watsonx_gov/entities/container.py +54 -0
  34. ibm_watsonx_gov/entities/credentials.py +633 -0
  35. ibm_watsonx_gov/entities/criteria.py +508 -0
  36. ibm_watsonx_gov/entities/enums.py +274 -0
  37. ibm_watsonx_gov/entities/evaluation_result.py +444 -0
  38. ibm_watsonx_gov/entities/foundation_model.py +490 -0
  39. ibm_watsonx_gov/entities/llm_judge.py +44 -0
  40. ibm_watsonx_gov/entities/locale.py +17 -0
  41. ibm_watsonx_gov/entities/mapping.py +49 -0
  42. ibm_watsonx_gov/entities/metric.py +211 -0
  43. ibm_watsonx_gov/entities/metric_threshold.py +36 -0
  44. ibm_watsonx_gov/entities/model_provider.py +329 -0
  45. ibm_watsonx_gov/entities/model_risk_result.py +43 -0
  46. ibm_watsonx_gov/entities/monitor.py +71 -0
  47. ibm_watsonx_gov/entities/prompt_setup.py +40 -0
  48. ibm_watsonx_gov/entities/state.py +22 -0
  49. ibm_watsonx_gov/entities/utils.py +99 -0
  50. ibm_watsonx_gov/evaluators/__init__.py +26 -0
  51. ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
  52. ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
  53. ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
  54. ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
  55. ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cp313-win_amd64.pyd +0 -0
  56. ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cp313-win_amd64.pyd +0 -0
  57. ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
  58. ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
  59. ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
  60. ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
  61. ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
  62. ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
  63. ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
  64. ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
  65. ibm_watsonx_gov/metrics/__init__.py +74 -0
  66. ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
  67. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
  68. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
  69. ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
  70. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
  71. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
  72. ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
  73. ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
  74. ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
  75. ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
  76. ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
  77. ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
  78. ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
  79. ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
  80. ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
  81. ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
  82. ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
  83. ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
  84. ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
  85. ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
  86. ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
  87. ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
  88. ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
  89. ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
  90. ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
  91. ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
  92. ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
  93. ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
  94. ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
  95. ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
  96. ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
  97. ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
  98. ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
  99. ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
  100. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
  101. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
  102. ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
  103. ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
  104. ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
  105. ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
  106. ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
  107. ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
  108. ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
  109. ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
  110. ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
  111. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
  112. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
  113. ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
  114. ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
  115. ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
  116. ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
  117. ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
  118. ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
  119. ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
  120. ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
  121. ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
  122. ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
  123. ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
  124. ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
  125. ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
  126. ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
  127. ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
  128. ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
  129. ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
  130. ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
  131. ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
  132. ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
  133. ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
  134. ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
  135. ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
  136. ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
  137. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
  138. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
  139. ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
  140. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
  141. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
  142. ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
  143. ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
  144. ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
  145. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
  146. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
  147. ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
  148. ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
  149. ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
  150. ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
  151. ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
  152. ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
  153. ibm_watsonx_gov/metrics/status/__init__.py +0 -0
  154. ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
  155. ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
  156. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
  157. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
  158. ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
  159. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
  160. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
  161. ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
  162. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
  163. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
  164. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
  165. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
  166. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
  167. ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
  168. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
  169. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
  170. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
  171. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
  172. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
  173. ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
  174. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
  175. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
  176. ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
  177. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
  178. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
  179. ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
  180. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
  181. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
  182. ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
  183. ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
  184. ibm_watsonx_gov/metrics/utils.py +440 -0
  185. ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
  186. ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
  187. ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
  188. ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
  189. ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
  190. ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
  191. ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
  192. ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
  193. ibm_watsonx_gov/providers/__init__.py +8 -0
  194. ibm_watsonx_gov/providers/detectors_provider.cp313-win_amd64.pyd +0 -0
  195. ibm_watsonx_gov/providers/detectors_provider.py +415 -0
  196. ibm_watsonx_gov/providers/eval_assist_provider.cp313-win_amd64.pyd +0 -0
  197. ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
  198. ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
  199. ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
  200. ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
  201. ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
  202. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
  203. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
  204. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
  205. ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
  206. ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
  207. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
  208. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
  209. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
  210. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
  211. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
  212. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
  213. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
  214. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
  215. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
  216. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
  217. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
  218. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
  219. ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
  220. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
  221. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
  222. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
  223. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
  224. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
  225. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
  226. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
  227. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
  228. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
  229. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
  230. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
  231. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
  232. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
  233. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
  234. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
  235. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
  236. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
  237. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
  238. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
  239. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
  240. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
  241. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
  242. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
  243. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
  244. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
  245. ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
  246. ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
  247. ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
  248. ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
  249. ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
  250. ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
  251. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
  252. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
  253. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
  254. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
  255. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
  256. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
  257. ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
  258. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
  259. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
  260. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
  261. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
  262. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
  263. ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
  264. ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
  265. ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
  266. ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
  267. ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
  268. ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
  269. ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
  270. ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
  271. ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
  272. ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
  273. ibm_watsonx_gov/providers/unitxt_provider.cp313-win_amd64.pyd +0 -0
  274. ibm_watsonx_gov/tools/__init__.py +10 -0
  275. ibm_watsonx_gov/tools/clients/__init__.py +11 -0
  276. ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
  277. ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
  278. ibm_watsonx_gov/tools/core/__init__.py +8 -0
  279. ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
  280. ibm_watsonx_gov/tools/entities/__init__.py +8 -0
  281. ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
  282. ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
  283. ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
  284. ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
  285. ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
  286. ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
  287. ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
  288. ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
  289. ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
  290. ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
  291. ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
  292. ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
  293. ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
  294. ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
  295. ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
  296. ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
  297. ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
  298. ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
  299. ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
  300. ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
  301. ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
  302. ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
  303. ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
  304. ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
  305. ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
  306. ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
  307. ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
  308. ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
  309. ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
  310. ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
  311. ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
  312. ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
  313. ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
  314. ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
  315. ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
  316. ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
  317. ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
  318. ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
  319. ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
  320. ibm_watsonx_gov/tools/utils/__init__.py +14 -0
  321. ibm_watsonx_gov/tools/utils/constants.py +69 -0
  322. ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
  323. ibm_watsonx_gov/tools/utils/environment.py +108 -0
  324. ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
  325. ibm_watsonx_gov/tools/utils/platform_url_mapping.cp313-win_amd64.pyd +0 -0
  326. ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
  327. ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
  328. ibm_watsonx_gov/traces/__init__.py +8 -0
  329. ibm_watsonx_gov/traces/span_exporter.py +195 -0
  330. ibm_watsonx_gov/traces/span_node.py +251 -0
  331. ibm_watsonx_gov/traces/span_util.py +153 -0
  332. ibm_watsonx_gov/traces/trace_utils.py +1074 -0
  333. ibm_watsonx_gov/utils/__init__.py +8 -0
  334. ibm_watsonx_gov/utils/aggregation_util.py +346 -0
  335. ibm_watsonx_gov/utils/async_util.py +62 -0
  336. ibm_watsonx_gov/utils/authenticator.py +144 -0
  337. ibm_watsonx_gov/utils/constants.py +15 -0
  338. ibm_watsonx_gov/utils/errors.py +40 -0
  339. ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
  340. ibm_watsonx_gov/utils/insights_generator.py +1285 -0
  341. ibm_watsonx_gov/utils/python_utils.py +425 -0
  342. ibm_watsonx_gov/utils/rest_util.py +73 -0
  343. ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
  344. ibm_watsonx_gov/utils/singleton_meta.py +25 -0
  345. ibm_watsonx_gov/utils/url_mapping.cp313-win_amd64.pyd +0 -0
  346. ibm_watsonx_gov/utils/validation_util.py +126 -0
  347. ibm_watsonx_gov/visualizations/__init__.py +13 -0
  348. ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
  349. ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
  350. ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
  351. ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
  352. ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
  353. ibm_watsonx_gov-1.3.3.dist-info/WHEEL +4 -0
@@ -0,0 +1,444 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ import json
11
+ import uuid
12
+ from collections import Counter
13
+ from datetime import datetime
14
+ from typing import Annotated, Any, Self
15
+
16
+ import pandas as pd
17
+ from pydantic import BaseModel, Field, model_validator
18
+
19
+ from ibm_watsonx_gov.entities.base_classes import BaseMetricResult
20
+ from ibm_watsonx_gov.entities.enums import (CategoryClassificationType,
21
+ MetricValueType)
22
+ from ibm_watsonx_gov.entities.metric import Mapping
23
+
24
+ AGENTIC_RESULT_COMPONENTS = ["conversation", "message", "node"]
25
+
26
+
27
+ class RecordMetricResult(BaseMetricResult):
28
+ record_id: Annotated[str, Field(
29
+ description="The record identifier.", examples=["record1"])]
30
+ record_timestamp: Annotated[str | None, Field(
31
+ description="The record timestamp.", examples=["2025-01-01T00:00:00.000000Z"], default=None)]
32
+
33
+
34
+ class ToolMetricResult(RecordMetricResult):
35
+ tool_name: Annotated[str, Field(
36
+ title="Tool Name", description="Name of the tool for which this result is computed.")]
37
+ execution_count: Annotated[int, Field(
38
+ title="Execution count", description="The execution count for this tool name.", gt=0, default=1)]
39
+
40
+ def __eq__(self, other: Any) -> bool:
41
+ if not isinstance(other, ToolMetricResult):
42
+ return False
43
+
44
+ return (self.record_id, self.tool_name, self.execution_count, self.name, self.method, self.value, self.record_timestamp) == \
45
+ (other.record_id, other.tool_name, other.execution_count,
46
+ other.name, other.method, other.value, other.record_timestamp)
47
+
48
+ def __lt__(self, other: Any) -> bool:
49
+ if not isinstance(other, ToolMetricResult):
50
+ raise NotImplementedError
51
+
52
+ return (self.record_id, self.tool_name, self.execution_count, self.name, self.method, self.value, self.record_timestamp) < \
53
+ (other.record_id, other.tool_name, other.execution_count,
54
+ other.name, other.method, other.value, other.record_timestamp)
55
+
56
+ def __gt__(self, other: Any) -> bool:
57
+ if not isinstance(other, ToolMetricResult):
58
+ raise NotImplementedError
59
+
60
+ return (self.record_id, self.tool_name, self.execution_count, self.name, self.method, self.value, self.record_timestamp) > \
61
+ (other.record_id, other.tool_name, other.execution_count,
62
+ other.name, other.method, other.value, other.record_timestamp)
63
+
64
+ def __le__(self, other: Any) -> bool:
65
+ if not isinstance(other, ToolMetricResult):
66
+ raise NotImplementedError
67
+
68
+ return (self.record_id, self.tool_name, self.execution_count, self.name, self.method, self.value, self.record_timestamp) <= \
69
+ (other.record_id, other.tool_name, other.execution_count,
70
+ other.name, other.method, other.value, other.record_timestamp)
71
+
72
+ def __ge__(self, other: Any) -> bool:
73
+ if not isinstance(other, ToolMetricResult):
74
+ raise NotImplementedError
75
+
76
+ return (self.record_id, self.tool_name, self.execution_count, self.name, self.method, self.value, self.record_timestamp) >= \
77
+ (other.record_id, other.tool_name, other.execution_count,
78
+ other.name, other.method, other.value, other.record_timestamp)
79
+
80
+
81
+ class AggregateMetricResult(BaseMetricResult):
82
+ min: float | None = None
83
+ max: float | None = None
84
+ mean: float | None = None
85
+ total_records: int
86
+ labels_count: dict | None = None
87
+ record_level_metrics: list[RecordMetricResult] = []
88
+
89
+ @staticmethod
90
+ def create(results: list[RecordMetricResult]) -> Self | None:
91
+ if not results:
92
+ return None
93
+
94
+ values, labels = [], []
95
+
96
+ for r in results:
97
+ if r.value is not None:
98
+ values.append(r.value)
99
+ if r.label is not None:
100
+ labels.append(r.label)
101
+
102
+ value, mean, min_val, max_val, labels_count = None, None, None, None, None
103
+ if values:
104
+ mean = sum(values) / len(values)
105
+ min_val = min(values)
106
+ max_val = max(values)
107
+ value = mean
108
+
109
+ if labels:
110
+ labels_count = dict(Counter(labels))
111
+
112
+ first = results[0]
113
+ # creating AggregateMetricResult
114
+ aggregated_result = AggregateMetricResult(
115
+ name=first.name,
116
+ display_name=first.display_name,
117
+ method=first.method,
118
+ group=first.group,
119
+ provider=first.provider,
120
+ value=value,
121
+ value_type=first.value_type,
122
+ labels_count=labels_count,
123
+ total_records=len(results),
124
+ record_level_metrics=results,
125
+ min=min_val,
126
+ max=max_val,
127
+ mean=mean,
128
+ thresholds=first.thresholds
129
+ )
130
+ return aggregated_result
131
+
132
+
133
+ class MetricsEvaluationResult(BaseModel):
134
+ metrics_result: list[AggregateMetricResult]
135
+
136
+ def to_json(self, indent: int | None = None, **kwargs):
137
+ """
138
+ Transform the metrics evaluation result to a json.
139
+ The kwargs are passed to the model_dump_json method of pydantic model. All the arguments supported by pydantic model_dump_json can be passed.
140
+
141
+ Args:
142
+ indent (int, optional): The indentation level for the json. Defaults to None.
143
+
144
+ Returns:
145
+ string of the result json.
146
+ """
147
+ if kwargs.get("exclude_unset") is None:
148
+ kwargs["exclude_unset"] = True
149
+ return self.remove_empty_dicts(self.model_dump_json(
150
+ exclude={
151
+ "metrics_result": {
152
+ "__all__": {
153
+ "record_level_metrics": {
154
+ "__all__": {"provider", "name", "method", "group", "display_name"}
155
+ }
156
+ }
157
+ }
158
+ },
159
+ indent=indent,
160
+ exclude_none=True,
161
+ **kwargs,
162
+ ))
163
+
164
+ def to_df(self, data: pd.DataFrame | None = None, include_additional_info: bool = False) -> pd.DataFrame:
165
+ """
166
+ Transform the metrics evaluation result to a dataframe.
167
+
168
+ Args:
169
+ data (pd.DataFrame): the input dataframe, when passed will be concatenated to the metrics result
170
+ include_additional_info (bool): wether to include additional info in the metrics result
171
+ Returns:
172
+ pd.DataFrame: new dataframe of the input and the evaluated metrics
173
+ """
174
+ values_dict: dict[str, list[float | str | bool]] = {}
175
+ for result in self.metrics_result:
176
+ metric_key = f"{result.name}.{result.method}" if result.method else result.name
177
+
178
+ values_dict[metric_key] = [
179
+ record_metric.value for record_metric in result.record_level_metrics]
180
+
181
+ if include_additional_info and len(result.record_level_metrics) > 0:
182
+ # Display full evidence in the df
183
+ pd.set_option('display.max_colwidth', None)
184
+ additional_info = result.record_level_metrics[0].additional_info
185
+ if additional_info:
186
+ for k in additional_info.keys():
187
+ values_dict[f"{metric_key}.{k}"] = [
188
+ record_metric.additional_info[k] for record_metric in result.record_level_metrics
189
+ ]
190
+ evidences = result.record_level_metrics[0].evidences
191
+ if evidences:
192
+ values_dict[f"{metric_key}.evidences"] = json.dumps(
193
+ evidences)
194
+ if data is None:
195
+ return pd.DataFrame.from_dict(values_dict)
196
+ else:
197
+ return pd.concat([data, pd.DataFrame.from_dict(values_dict)], axis=1)
198
+
199
+ def remove_empty_dicts(self, d):
200
+ # Post-process the response to remove empty dicts
201
+ if isinstance(d, dict):
202
+ return {k: self.remove_empty_dicts(v) for k, v in d.items() if v not in ({}, None)}
203
+ return d
204
+
205
+ def to_dict(self) -> list[dict]:
206
+ """
207
+ Transform the metrics evaluation result to a list of dict containing the record level metrics.
208
+ """
209
+ result = []
210
+ for aggregate_metric_result in self.metrics_result:
211
+ for record_level_metric_result in aggregate_metric_result.record_level_metrics:
212
+ result.append(
213
+ record_level_metric_result.model_dump(exclude_none=True))
214
+ return self.remove_empty_dicts(result)
215
+
216
+
217
+ class AgentMetricResult(BaseMetricResult):
218
+ """
219
+ This is the data model for metric results in the agentic app.
220
+ It stores evaluation results for conversations, messages and nodes.
221
+ """
222
+ id: Annotated[str, Field(
223
+ description="The unique identifier for the metric result record. UUID.",
224
+ default_factory=lambda: str(uuid.uuid4()))]
225
+
226
+ ts: Annotated[datetime, Field(
227
+ description="The timestamp when the metric was recorded.",
228
+ default_factory=datetime.now)]
229
+
230
+ applies_to: Annotated[str, Field(
231
+ description="The type of component the metric result applies to.",
232
+ examples=AGENTIC_RESULT_COMPONENTS
233
+ )]
234
+
235
+ message_id: Annotated[str | None, Field(
236
+ description="The ID of the message being evaluated.")]
237
+
238
+ message_timestamp: Annotated[datetime | None, Field(
239
+ description="The timestamp of the message being evaluated.", default=None)]
240
+
241
+ conversation_id: Annotated[str | None, Field(
242
+ description="The ID of the conversation containing the message.", default=None)]
243
+
244
+ node_name: Annotated[str | None, Field(
245
+ description="The name of the node being evaluated.", default=None)]
246
+
247
+ execution_count: Annotated[int | None, Field(
248
+ title="Execution count", description="The execution count of the node in a message.", default=None)]
249
+
250
+ execution_order: Annotated[int | None, Field(
251
+ title="Execution order", description="The execution order number in the sequence of nodes executed in a message.", default=None)]
252
+
253
+ is_violated: Annotated[int | None, Field(
254
+ title="Is Violated", description="Indicates whether the metric threshold is violated or not. For numeric metric, "
255
+ "its set to 1 if the metric value violates the defined threshold lower or upper limit and 0 otherwise. "
256
+ "For categorical metric, its set to 1 if the metric value belongs to unfavourable category and 0 otherwise.", default=None)]
257
+
258
+ @model_validator(mode="after")
259
+ def validate_is_violated(self) -> Any:
260
+
261
+ if self.value is not None or self.label is not None:
262
+ self.is_violated = self.check_violated_record()
263
+ return self
264
+
265
+ def check_violated_record(self) -> Any:
266
+ """
267
+ Helper to check if a metric value violates any of the defined thresholds.
268
+
269
+ Returns:
270
+ int|None: Returns 1 if the value violates any threshold, 0 if it does not violate any,
271
+ and None if the value_type is unsupported or thresholds are not defined.
272
+
273
+ """
274
+
275
+ if self.value_type == MetricValueType.NUMERIC.value and self.thresholds:
276
+ for threshold in self.thresholds:
277
+ if threshold.type == "lower_limit" and self.value < threshold.value:
278
+ return 1
279
+ elif threshold.type == "upper_limit" and self.value > threshold.value:
280
+ return 1
281
+ return 0
282
+
283
+ elif self.value_type == MetricValueType.CATEGORICAL.value:
284
+ if self.category_classification:
285
+ unfavourable_categories = self.category_classification.get(
286
+ CategoryClassificationType.UNFAVOURABLE.value, [])
287
+ if self.label in unfavourable_categories:
288
+ return 1
289
+ return 0
290
+ else:
291
+ return None
292
+
293
+
294
+ class AggregateAgentMetricResult(BaseMetricResult):
295
+ min: Annotated[float | None, Field(
296
+ description="The minimum value of the metric. Applicable for numeric metric types.", default=None)]
297
+ max: Annotated[float | None, Field(
298
+ description="The maximum value of the metric. Applicable for numeric metric types.", default=None)]
299
+ mean: Annotated[float | None, Field(
300
+ description="The mean value of the metric. Applicable for numeric metric types.", default=None)]
301
+ percentiles: Annotated[dict[str, float] | None, Field(
302
+ description="Dictionary of percentile values (25th, 50th, 75th, 90th, 95th, 99th) of the metric. Applicable for numeric metric types.", default=None)]
303
+ unique: Annotated[int | None, Field(
304
+ description="The distinct count of the string values found. Applicable for categorical metric types.", default=None)]
305
+ value: Annotated[float | dict[str, int] | None, Field(
306
+ description="The value of the metric. Defaults to mean for numeric metric types. For categorical metric types, this has the frequency distribution of non-null categories.", default=None)]
307
+ count: Annotated[int | None, Field(
308
+ description="The count for metric results used for aggregation.", default=None)]
309
+ node_name: Annotated[str | None, Field(
310
+ description="The name of the node being evaluated.", default=None)]
311
+ applies_to: Annotated[str, Field(
312
+ description="The type of component the metric result applies to.",
313
+ examples=AGENTIC_RESULT_COMPONENTS
314
+ )]
315
+ individual_results: Annotated[list[AgentMetricResult], Field(
316
+ description="The list individual metric results.", default=[]
317
+ )]
318
+
319
+ violations_count: Annotated[int | None, Field(
320
+ description="The count of records that violated the defined thresholds.", default=None
321
+ )]
322
+
323
+ @model_validator(mode="after")
324
+ def validate_violations_count(self) -> Any:
325
+ if self.individual_results and any(r.is_violated is not None for r in self.individual_results):
326
+ self.violations_count = sum(
327
+ 1 for r in self.individual_results if r.is_violated == 1)
328
+ return self
329
+
330
+
331
+ class MessageData(BaseModel):
332
+ """
333
+ The model class to capture the message input output data for an agent.
334
+ """
335
+ message_id: Annotated[str | None,
336
+ Field(title="Message ID",
337
+ description="The ID of the message.")]
338
+ message_timestamp: Annotated[datetime | None,
339
+ Field(title="Message timestamp",
340
+ description="The timestamp of the message in ISO format. The end timestamp of the message processing is considered as the message timestamp.")]
341
+ conversation_id: Annotated[str | None,
342
+ Field(title="Conversation ID",
343
+ description="The ID of the conversation containing the message.")]
344
+ start_time: Annotated[str | None,
345
+ Field(title="Start time",
346
+ description="The message execution start time in ISO format.")]
347
+ end_time: Annotated[str | None,
348
+ Field(title="End time",
349
+ description="The message excution end time in ISO format.")]
350
+ input: Annotated[dict | str | None,
351
+ Field(title="Input",
352
+ description="The message input data.")]
353
+ output: Annotated[dict | str | None,
354
+ Field(title="Input",
355
+ description="The message output data.")]
356
+ num_loops: Annotated[int,
357
+ Field(title="Number of Loops",
358
+ description="The number of loops occurred in the agent while generating the output.",
359
+ default=0)]
360
+
361
+
362
+ class NodeData(BaseModel):
363
+ """
364
+ The model class to capture the node input output data of a langgraph agent.
365
+ """
366
+ message_id: Annotated[str | None,
367
+ Field(title="Message ID",
368
+ description="The ID of the message.")]
369
+ message_timestamp: Annotated[datetime | None,
370
+ Field(title="Message timestamp",
371
+ description="The timestamp of the message in ISO format. The end timestamp of the message processing is considered as the message timestamp.")]
372
+ conversation_id: Annotated[str | None,
373
+ Field(title="Conversation ID",
374
+ description="The ID of the conversation containing the message.")]
375
+ node_name: Annotated[str | None,
376
+ Field(title="Node name",
377
+ description="The name of the node.")]
378
+ start_time: Annotated[str | None,
379
+ Field(title="Start time",
380
+ description="The node execution start time in ISO format.")]
381
+ end_time: Annotated[str | None,
382
+ Field(title="End time",
383
+ description="The node execution end time in ISO format.")]
384
+ input: Annotated[dict | str | None,
385
+ Field(title="Input",
386
+ description="The node input data.")]
387
+ output: Annotated[dict | str | None,
388
+ Field(title="Input",
389
+ description="The node output data.")]
390
+ execution_order: Annotated[int,
391
+ Field(title="Execution Order",
392
+ description="The execution order of the node in the langgraph.",
393
+ default=0)]
394
+ execution_count: Annotated[int,
395
+ Field(title="Execution Count",
396
+ description="The execution count of the node in the langgraph.",
397
+ default=0)]
398
+ node_txn_id: Annotated[str,
399
+ Field(title="Node transaction id",
400
+ description="Unique identifier of the object.",
401
+ default=str(uuid.uuid4()))
402
+ ]
403
+ node_txn_timestamp: Annotated[str,
404
+ Field(title="Node transaction timestamp",
405
+ description="The node transaction timestamp. The end timestamp of the node execution is considered as the node transaction timestamp.")]
406
+
407
+
408
+ class MetricMapping(BaseModel):
409
+ """
410
+ The metric mapping data
411
+ """
412
+ name: Annotated[str,
413
+ Field(title="Name",
414
+ description="The name of the metric.")]
415
+ method: Annotated[str | None,
416
+ Field(title="Method",
417
+ description="The method used to compute the metric.",
418
+ default=None)]
419
+ applies_to: Annotated[str,
420
+ Field(Field(title="Applies to",
421
+ description="The tag to indicate for which the metric is applied to. Used for agentic application metric computation.",
422
+ examples=["message",
423
+ "conversation", "sub_agent"],
424
+ default="message"))]
425
+ mapping: Annotated[Mapping | None,
426
+ Field(title="Mapping",
427
+ description="The data mapping details for the metric which are used to read the values needed to compute the metric.",
428
+ default=None)]
429
+
430
+
431
+ class MetricsMappingData(BaseModel):
432
+ """
433
+ The model class to capture the metrics mappings and the span data.
434
+ """
435
+ message_id: Annotated[str,
436
+ Field(title="Message ID",
437
+ description="The ID of the message.")]
438
+ metric_mappings: Annotated[list[MetricMapping],
439
+ Field(title="Metric Mapping",
440
+ description="The list of metric mappings.")]
441
+ data: Annotated[dict,
442
+ Field(title="Data",
443
+ description="The span data used for metrics computation.",
444
+ examples=[{"LangGraph.workflow": {"traceloop.entity.output": {"$.outputs.generated_text": "The response"}}}])]