ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. ibm_watsonx_gov/__init__.py +8 -0
  2. ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
  3. ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
  4. ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
  5. ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
  6. ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
  7. ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
  8. ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
  9. ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
  10. ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
  11. ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
  12. ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
  13. ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
  14. ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
  15. ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
  16. ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
  17. ibm_watsonx_gov/clients/__init__.py +0 -0
  18. ibm_watsonx_gov/clients/api_client.py +99 -0
  19. ibm_watsonx_gov/clients/segment_client.py +46 -0
  20. ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
  21. ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
  22. ibm_watsonx_gov/config/__init__.py +14 -0
  23. ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
  24. ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
  25. ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
  26. ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
  27. ibm_watsonx_gov/entities/__init__.py +8 -0
  28. ibm_watsonx_gov/entities/agentic_app.py +209 -0
  29. ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
  30. ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
  31. ibm_watsonx_gov/entities/ai_experiment.py +419 -0
  32. ibm_watsonx_gov/entities/base_classes.py +134 -0
  33. ibm_watsonx_gov/entities/container.py +54 -0
  34. ibm_watsonx_gov/entities/credentials.py +633 -0
  35. ibm_watsonx_gov/entities/criteria.py +508 -0
  36. ibm_watsonx_gov/entities/enums.py +274 -0
  37. ibm_watsonx_gov/entities/evaluation_result.py +444 -0
  38. ibm_watsonx_gov/entities/foundation_model.py +490 -0
  39. ibm_watsonx_gov/entities/llm_judge.py +44 -0
  40. ibm_watsonx_gov/entities/locale.py +17 -0
  41. ibm_watsonx_gov/entities/mapping.py +49 -0
  42. ibm_watsonx_gov/entities/metric.py +211 -0
  43. ibm_watsonx_gov/entities/metric_threshold.py +36 -0
  44. ibm_watsonx_gov/entities/model_provider.py +329 -0
  45. ibm_watsonx_gov/entities/model_risk_result.py +43 -0
  46. ibm_watsonx_gov/entities/monitor.py +71 -0
  47. ibm_watsonx_gov/entities/prompt_setup.py +40 -0
  48. ibm_watsonx_gov/entities/state.py +22 -0
  49. ibm_watsonx_gov/entities/utils.py +99 -0
  50. ibm_watsonx_gov/evaluators/__init__.py +26 -0
  51. ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
  52. ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
  53. ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
  54. ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
  55. ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
  56. ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
  57. ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
  58. ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
  59. ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
  60. ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
  61. ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
  62. ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
  63. ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
  64. ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
  65. ibm_watsonx_gov/metrics/__init__.py +74 -0
  66. ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
  67. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
  68. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
  69. ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
  70. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
  71. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
  72. ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
  73. ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
  74. ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
  75. ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
  76. ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
  77. ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
  78. ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
  79. ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
  80. ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
  81. ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
  82. ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
  83. ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
  84. ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
  85. ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
  86. ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
  87. ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
  88. ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
  89. ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
  90. ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
  91. ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
  92. ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
  93. ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
  94. ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
  95. ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
  96. ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
  97. ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
  98. ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
  99. ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
  100. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
  101. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
  102. ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
  103. ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
  104. ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
  105. ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
  106. ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
  107. ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
  108. ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
  109. ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
  110. ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
  111. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
  112. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
  113. ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
  114. ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
  115. ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
  116. ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
  117. ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
  118. ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
  119. ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
  120. ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
  121. ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
  122. ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
  123. ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
  124. ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
  125. ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
  126. ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
  127. ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
  128. ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
  129. ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
  130. ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
  131. ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
  132. ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
  133. ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
  134. ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
  135. ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
  136. ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
  137. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
  138. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
  139. ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
  140. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
  141. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
  142. ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
  143. ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
  144. ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
  145. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
  146. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
  147. ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
  148. ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
  149. ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
  150. ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
  151. ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
  152. ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
  153. ibm_watsonx_gov/metrics/status/__init__.py +0 -0
  154. ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
  155. ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
  156. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
  157. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
  158. ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
  159. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
  160. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
  161. ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
  162. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
  163. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
  164. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
  165. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
  166. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
  167. ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
  168. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
  169. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
  170. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
  171. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
  172. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
  173. ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
  174. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
  175. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
  176. ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
  177. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
  178. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
  179. ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
  180. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
  181. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
  182. ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
  183. ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
  184. ibm_watsonx_gov/metrics/utils.py +440 -0
  185. ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
  186. ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
  187. ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
  188. ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
  189. ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
  190. ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
  191. ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
  192. ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
  193. ibm_watsonx_gov/providers/__init__.py +8 -0
  194. ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
  195. ibm_watsonx_gov/providers/detectors_provider.py +415 -0
  196. ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
  197. ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
  198. ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
  199. ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
  200. ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
  201. ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
  202. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
  203. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
  204. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
  205. ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
  206. ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
  207. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
  208. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
  209. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
  210. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
  211. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
  212. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
  213. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
  214. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
  215. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
  216. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
  217. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
  218. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
  219. ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
  220. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
  221. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
  222. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
  223. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
  224. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
  225. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
  226. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
  227. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
  228. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
  229. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
  230. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
  231. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
  232. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
  233. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
  234. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
  235. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
  236. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
  237. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
  238. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
  239. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
  240. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
  241. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
  242. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
  243. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
  244. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
  245. ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
  246. ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
  247. ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
  248. ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
  249. ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
  250. ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
  251. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
  252. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
  253. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
  254. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
  255. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
  256. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
  257. ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
  258. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
  259. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
  260. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
  261. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
  262. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
  263. ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
  264. ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
  265. ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
  266. ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
  267. ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
  268. ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
  269. ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
  270. ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
  271. ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
  272. ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
  273. ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
  274. ibm_watsonx_gov/tools/__init__.py +10 -0
  275. ibm_watsonx_gov/tools/clients/__init__.py +11 -0
  276. ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
  277. ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
  278. ibm_watsonx_gov/tools/core/__init__.py +8 -0
  279. ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
  280. ibm_watsonx_gov/tools/entities/__init__.py +8 -0
  281. ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
  282. ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
  283. ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
  284. ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
  285. ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
  286. ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
  287. ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
  288. ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
  289. ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
  290. ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
  291. ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
  292. ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
  293. ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
  294. ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
  295. ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
  296. ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
  297. ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
  298. ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
  299. ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
  300. ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
  301. ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
  302. ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
  303. ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
  304. ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
  305. ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
  306. ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
  307. ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
  308. ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
  309. ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
  310. ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
  311. ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
  312. ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
  313. ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
  314. ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
  315. ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
  316. ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
  317. ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
  318. ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
  319. ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
  320. ibm_watsonx_gov/tools/utils/__init__.py +14 -0
  321. ibm_watsonx_gov/tools/utils/constants.py +69 -0
  322. ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
  323. ibm_watsonx_gov/tools/utils/environment.py +108 -0
  324. ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
  325. ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
  326. ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
  327. ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
  328. ibm_watsonx_gov/traces/__init__.py +8 -0
  329. ibm_watsonx_gov/traces/span_exporter.py +195 -0
  330. ibm_watsonx_gov/traces/span_node.py +251 -0
  331. ibm_watsonx_gov/traces/span_util.py +153 -0
  332. ibm_watsonx_gov/traces/trace_utils.py +1074 -0
  333. ibm_watsonx_gov/utils/__init__.py +8 -0
  334. ibm_watsonx_gov/utils/aggregation_util.py +346 -0
  335. ibm_watsonx_gov/utils/async_util.py +62 -0
  336. ibm_watsonx_gov/utils/authenticator.py +144 -0
  337. ibm_watsonx_gov/utils/constants.py +15 -0
  338. ibm_watsonx_gov/utils/errors.py +40 -0
  339. ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
  340. ibm_watsonx_gov/utils/insights_generator.py +1285 -0
  341. ibm_watsonx_gov/utils/python_utils.py +425 -0
  342. ibm_watsonx_gov/utils/rest_util.py +73 -0
  343. ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
  344. ibm_watsonx_gov/utils/singleton_meta.py +25 -0
  345. ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
  346. ibm_watsonx_gov/utils/validation_util.py +126 -0
  347. ibm_watsonx_gov/visualizations/__init__.py +13 -0
  348. ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
  349. ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
  350. ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
  351. ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
  352. ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
  353. ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
@@ -0,0 +1,1074 @@
1
+ # ----------------------------------------------------------------------------------------------------
2
+ # IBM Confidential
3
+ # Licensed Materials - Property of IBM
4
+ # 5737-H76, 5900-A3Q
5
+ # © Copyright IBM Corp. 2025 All Rights Reserved.
6
+ # US Government Users Restricted Rights - Use, duplication or disclosure restricted by
7
+ # GSA ADPSchedule Contract with IBM Corp.
8
+ # ----------------------------------------------------------------------------------------------------
9
+
10
+ import json
11
+ import uuid
12
+ from collections import defaultdict
13
+ from datetime import datetime
14
+ from pathlib import Path
15
+ from typing import Any, Dict, Generator, List, Optional, Tuple
16
+
17
+ from jsonpath_ng import parse as parse_jsonpath
18
+
19
+ from ibm_watsonx_gov.clients.api_client import APIClient
20
+ from ibm_watsonx_gov.config.agentic_ai_configuration import \
21
+ AgenticAIConfiguration
22
+ from ibm_watsonx_gov.entities.agentic_app import (AgenticApp,
23
+ MetricsConfiguration, Node)
24
+ from ibm_watsonx_gov.entities.enums import (EvaluatorFields, MessageStatus,
25
+ MetricGroup)
26
+ from ibm_watsonx_gov.entities.evaluation_result import (AgentMetricResult,
27
+ MessageData,
28
+ MetricMapping,
29
+ MetricsMappingData,
30
+ NodeData)
31
+ from ibm_watsonx_gov.entities.foundation_model import FoundationModelInfo
32
+ from ibm_watsonx_gov.entities.metric import Mapping, MappingItem
33
+ from ibm_watsonx_gov.entities.utils import \
34
+ build_configuration_from_metric_mappings
35
+ from ibm_watsonx_gov.evaluators.impl.evaluate_metrics_impl import \
36
+ _evaluate_metrics_async
37
+ from ibm_watsonx_gov.metrics.utils import (COST_METADATA, ONE_M,
38
+ TARGETED_USAGE_TRACE_NAMES,
39
+ mapping_to_df)
40
+ from ibm_watsonx_gov.traces.span_node import SpanNode
41
+ from ibm_watsonx_gov.traces.span_util import (get_attributes,
42
+ get_span_nodes_from_json)
43
+ from ibm_watsonx_gov.utils.async_util import (gather_with_concurrency,
44
+ run_in_event_loop)
45
+ from ibm_watsonx_gov.utils.python_utils import add_if_unique
46
+
47
+ try:
48
+ from opentelemetry.proto.trace.v1.trace_pb2 import Span, Status
49
+ except:
50
+ pass
51
+
52
+
53
+ STATUS_MAP = {
54
+ Status.STATUS_CODE_OK: MessageStatus.SUCCESSFUL,
55
+ Status.STATUS_CODE_ERROR: MessageStatus.FAILURE,
56
+ Status.STATUS_CODE_UNSET: MessageStatus.UNKNOWN
57
+ }
58
+
59
+
60
+ class TraceUtils:
61
+
62
+ @staticmethod
63
+ def build_span_trees(spans: list[dict], agentic_app: AgenticApp | None = None) -> List[SpanNode]:
64
+ root_spans: list[SpanNode] = []
65
+
66
+ span_nodes: dict[bytes, SpanNode] = {}
67
+ for span in spans:
68
+ span_nodes.update(get_span_nodes_from_json(span, agentic_app))
69
+
70
+ # Create tree
71
+ for _, node in span_nodes.items():
72
+ parent_id = node.span.parent_span_id
73
+ if not parent_id:
74
+ root_spans.append(node) # Root span which will not have parent
75
+ else:
76
+ # Use composite key of trace_id + parent_span_id to handle spans with same span_id across different traces
77
+ parent_composite_key = node.span.trace_id + parent_id
78
+ parent_node = span_nodes.get(parent_composite_key)
79
+ if parent_node:
80
+ parent_node.add_child(node)
81
+ else:
82
+ # Orphan span where parent is not found
83
+ root_spans.append(node)
84
+
85
+ return root_spans
86
+
87
+ @staticmethod
88
+ def convert_array_value(array_obj: Dict) -> List:
89
+ """Convert OTEL array value to Python list"""
90
+ return [
91
+ item.get("stringValue")
92
+ or int(item.get("intValue", ""))
93
+ or float(item.get("doubleValue", ""))
94
+ or bool(item.get("boolValue", ""))
95
+ for item in array_obj.get("values", [])
96
+ ]
97
+
98
+ @staticmethod
99
+ def stream_trace_data(file_path: Path) -> Generator:
100
+ """Generator that yields spans one at a time."""
101
+ with open(file_path) as f:
102
+ for line in f:
103
+ try:
104
+ yield json.loads(line)
105
+ except json.JSONDecodeError as e:
106
+ print(f"Failed to parse line: {line}\nError: {e}")
107
+
108
+ @staticmethod
109
+ def __extract_usage_meta_data(attributes: dict) -> dict:
110
+ """
111
+ Extract meta data required to calculate usage metrics from spans
112
+ """
113
+ meta_data = {}
114
+ model = attributes.get("gen_ai.request.model")
115
+
116
+ if not model:
117
+ return meta_data
118
+
119
+ meta_data["cost"] = {
120
+ "model": model,
121
+ "total_prompt_tokens": attributes.get("gen_ai.usage.prompt_tokens", 0),
122
+ "total_completion_tokens": attributes.get(
123
+ "gen_ai.usage.completion_tokens", 0
124
+ ),
125
+ "total_tokens": attributes.get("llm.usage.total_tokens", 0),
126
+ }
127
+ meta_data["input_token_count"] = attributes.get(
128
+ "gen_ai.usage.prompt_tokens", 0)
129
+ meta_data["output_token_count"] = attributes.get(
130
+ "gen_ai.usage.completion_tokens", 0)
131
+ return meta_data
132
+
133
+ @staticmethod
134
+ def calculate_cost(usage_data: List[dict]) -> float:
135
+ """Calculate cost for given list of usage."""
136
+ total_cost = 0.0
137
+
138
+ for data in usage_data:
139
+ model = data["model"].lower()
140
+
141
+ try:
142
+ model_pricing = COST_METADATA[model]
143
+ except KeyError:
144
+ return 0
145
+ # raise ValueError(
146
+ # f"Pricing not available for {model}")
147
+
148
+ # Calculate costs (per 1M tokens)
149
+ input_cost = (data["total_prompt_tokens"] /
150
+ ONE_M) * model_pricing["input"]
151
+ output_cost = (data["total_completion_tokens"] / ONE_M) * model_pricing[
152
+ "output"
153
+ ]
154
+ total_cost += input_cost + output_cost
155
+
156
+ return total_cost
157
+
158
+ @staticmethod
159
+ async def compute_metrics_from_trace_async(span_tree: SpanNode, api_client: APIClient = None, **kwargs) -> tuple[list[AgentMetricResult], list[Node], list]:
160
+ metric_results, edges = [], []
161
+
162
+ # Add Interaction level metrics
163
+ metric_results.extend(await TraceUtils.__compute_message_level_metrics(
164
+ span_tree, api_client, **kwargs))
165
+
166
+ # Add node level metrics result
167
+ node_metric_results, nodes_list, experiment_run_metadata = await TraceUtils.__compute_node_level_metrics(
168
+ span_tree, api_client, **kwargs)
169
+ metric_results.extend(node_metric_results)
170
+
171
+ for node in nodes_list:
172
+ if node.name in experiment_run_metadata:
173
+ node.foundation_models = list(
174
+ experiment_run_metadata[node.name]["foundation_models"])
175
+
176
+ return metric_results, nodes_list, edges
177
+
178
+ @staticmethod
179
+ def compute_metrics_from_trace(span_tree: SpanNode, api_client: APIClient = None) -> tuple[
180
+ list[AgentMetricResult], list[Node], list]:
181
+ return run_in_event_loop(
182
+ TraceUtils.compute_metrics_from_trace_async, span_tree, api_client)
183
+
184
+ @staticmethod
185
+ async def __compute_node_level_metrics(span_tree: SpanNode, api_client: APIClient | None, **kwargs):
186
+ metric_results = []
187
+ trace_metadata = defaultdict(list)
188
+ experiment_run_metadata = defaultdict(lambda: defaultdict(set))
189
+ nodes_list = []
190
+ node_stack = list(span_tree.children)
191
+ child_stack = list()
192
+ node_execution_count = {}
193
+ while node_stack or child_stack:
194
+ is_parent = not child_stack
195
+ node = child_stack.pop() if child_stack else node_stack.pop()
196
+ if is_parent:
197
+ parent_span: Span = node.span
198
+ node_name, metrics_config_from_decorators, code_id, events, execution_order = None, [], "", [], None
199
+ data = {}
200
+ # inputs = get_nested_attribute_values(
201
+ # [node], "traceloop.entity.input")
202
+ # outputs = get_nested_attribute_values(
203
+ # [node], "traceloop.entity.output")
204
+ span: Span = node.span
205
+ attributes = get_attributes(span.attributes)
206
+ if is_parent:
207
+ node_name = attributes.get("traceloop.entity.name")
208
+ code_id = attributes.get("gen_ai.runnable.code_id")
209
+ execution_order = int(attributes.get("traceloop.association.properties.langgraph_step")) if attributes.get(
210
+ "traceloop.association.properties.langgraph_step") else None
211
+ for key in ("traceloop.entity.input", "traceloop.entity.output"):
212
+ try:
213
+ attr_value = attributes.get(key)
214
+ content = attr_value if isinstance(
215
+ attr_value, dict) else json.loads(attr_value)
216
+
217
+ inputs_outputs = content.get(
218
+ "inputs" if key.endswith("input") else "outputs")
219
+ if isinstance(inputs_outputs, str):
220
+ inputs_outputs = json.loads(inputs_outputs)
221
+ if data:
222
+ data.update(inputs_outputs)
223
+ else:
224
+ data = inputs_outputs
225
+ except (json.JSONDecodeError, AttributeError) as e:
226
+ raise Exception(
227
+ "Unable to parse json string") from e
228
+
229
+ if attributes.get("wxgov.config.metrics"):
230
+ metrics_config_from_decorators.append(
231
+ json.loads(attributes.get("wxgov.config.metrics")))
232
+
233
+ if span.events:
234
+ events.extend(span.events)
235
+
236
+ if (not node_name) or (node_name == "__start__"):
237
+ continue
238
+
239
+ if span.name in TARGETED_USAGE_TRACE_NAMES:
240
+ # Extract required details to calculate usage metrics from each span
241
+ for k, v in TraceUtils.__extract_usage_meta_data(attributes).items():
242
+ trace_metadata[k].append(v)
243
+
244
+ for k, v in TraceUtils.__get_run_metadata_from_span(attributes).items():
245
+ experiment_run_metadata[node_name][k].add(v)
246
+
247
+ child_stack.extend(node.children)
248
+
249
+ if not child_stack:
250
+ metrics_to_compute, all_metrics_config = TraceUtils.__get_metrics_to_compute(
251
+ span_tree.get_nodes_configuration(), node_name, metrics_config_from_decorators)
252
+
253
+ add_if_unique(Node(name=node_name, func_name=code_id.split(":")[-1] if code_id else node_name, metrics_configurations=all_metrics_config), nodes_list,
254
+ ["name", "func_name"])
255
+
256
+ if node_name in node_execution_count:
257
+ node_execution_count[node_name] += node_execution_count.get(
258
+ node_name)
259
+ else:
260
+ node_execution_count[node_name] = 1
261
+
262
+ coros = []
263
+ for mc in metrics_to_compute:
264
+ coros.append(_evaluate_metrics_async(
265
+ configuration=mc.configuration,
266
+ data=data,
267
+ metrics=mc.metrics,
268
+ metric_groups=mc.metric_groups,
269
+ api_client=api_client,
270
+ **kwargs))
271
+
272
+ results = await gather_with_concurrency(coros, max_concurrency=kwargs.get("max_concurrency", 10))
273
+ for metric_result in results:
274
+ for mr in metric_result.to_dict():
275
+ node_result = {
276
+ "applies_to": "node",
277
+ "message_id": span_tree.get_message_id(),
278
+ "node_name": node_name,
279
+ "conversation_id": span_tree.get_conversation_id(),
280
+ "execution_count": node_execution_count.get(node_name),
281
+ "execution_order": execution_order,
282
+ **mr
283
+ }
284
+ metric_results.append(AgentMetricResult(**node_result))
285
+
286
+ # Add node latency metric result
287
+ metric_results.append(AgentMetricResult(name="latency",
288
+ display_name="Latency",
289
+ value=(int(
290
+ parent_span.end_time_unix_nano) - int(parent_span.start_time_unix_nano))/1e9,
291
+ group=MetricGroup.PERFORMANCE,
292
+ applies_to="node",
293
+ message_id=span_tree.get_message_id(),
294
+ conversation_id=span_tree.get_conversation_id(),
295
+ node_name=node_name,
296
+ execution_count=node_execution_count.get(
297
+ node_name),
298
+ execution_order=execution_order))
299
+
300
+ # Get the node level metrics computed online during graph invocation from events
301
+ metric_results.extend(TraceUtils.__get_metrics_results_from_events(
302
+ events=events,
303
+ message_id=span_tree.get_message_id(),
304
+ conversation_id=span_tree.get_conversation_id(),
305
+ node_name=node_name,
306
+ execution_count=node_execution_count.get(node_name),
307
+ execution_order=execution_order))
308
+
309
+ metric_results.extend(
310
+ TraceUtils.__compute_usage_metrics_from_trace_metadata(trace_metadata, span_tree.get_message_id(), span_tree.get_conversation_id()))
311
+
312
+ return metric_results, nodes_list, experiment_run_metadata
313
+
314
+ @staticmethod
315
+ async def __compute_message_level_metrics(span_tree: SpanNode, api_client: APIClient | None, **kwargs) -> list[AgentMetricResult]:
316
+ metric_results = []
317
+ span = span_tree.span
318
+ metric_results.append(AgentMetricResult(name="duration",
319
+ display_name="Duration",
320
+ value=(int(
321
+ span.end_time_unix_nano) - int(span.start_time_unix_nano))/1000000000,
322
+ group=MetricGroup.PERFORMANCE,
323
+ applies_to="message",
324
+ message_id=span_tree.get_message_id(),
325
+ conversation_id=span_tree.get_conversation_id()))
326
+
327
+ if not span_tree.agentic_app:
328
+ return metric_results
329
+
330
+ data = TraceUtils.__get_data_from_default_mapping(span_tree)
331
+
332
+ metric_result = await _evaluate_metrics_async(configuration=span_tree.agentic_app.metrics_configuration.configuration,
333
+ data=data,
334
+ metrics=span_tree.agentic_app.metrics_configuration.metrics,
335
+ metric_groups=span_tree.agentic_app.metrics_configuration.metric_groups,
336
+ api_client=api_client,
337
+ **kwargs)
338
+ metric_result = metric_result.to_dict()
339
+ for mr in metric_result:
340
+ node_result = {
341
+ "applies_to": "message",
342
+ "message_id": span_tree.get_message_id(),
343
+ "conversation_id": span_tree.get_conversation_id(),
344
+ **mr
345
+ }
346
+
347
+ metric_results.append(AgentMetricResult(**node_result))
348
+
349
+ return metric_results
350
+
351
+ @staticmethod
352
+ def __get_data_from_default_mapping(span_tree: SpanNode) -> Dict[str, Any]:
353
+ data = {}
354
+
355
+ span = span_tree.span
356
+ attrs = get_attributes(
357
+ span.attributes, ["traceloop.entity.input", "traceloop.entity.output"])
358
+ inputs = attrs.get("traceloop.entity.input", "{}")
359
+ if isinstance(inputs, str):
360
+ inputs = json.loads(inputs).get("inputs", {})
361
+ elif isinstance(inputs, dict):
362
+ inputs = inputs.get("inputs", {})
363
+
364
+ if "messages" in inputs:
365
+ for message in reversed(inputs["messages"]):
366
+ if "kwargs" in message and "type" in message["kwargs"] and message["kwargs"]["type"].upper() == "HUMAN":
367
+ data["input_text"] = message["kwargs"]["content"]
368
+ break
369
+ else:
370
+ data.update(inputs)
371
+
372
+ outputs = attrs.get("traceloop.entity.output", "{}")
373
+ if isinstance(outputs, str):
374
+ outputs = json.loads(outputs).get("outputs", {})
375
+ elif isinstance(outputs, dict):
376
+ outputs = outputs.get("outputs", {})
377
+
378
+ if "messages" in outputs:
379
+ # The messages is a list depicting the history of messages with the agent.
380
+ # It need NOT be the whole list of messages in the conversation though.
381
+ # We will traverse the list from the end to find the human input of the message,
382
+ # and the AI output.
383
+
384
+ # If there was no input_text so far, find first human message
385
+ if "input_text" not in data:
386
+ for message in reversed(outputs["messages"]):
387
+ if "kwargs" in message and "type" in message["kwargs"] and message["kwargs"]["type"].upper() == "HUMAN":
388
+ data["input_text"] = message["kwargs"]["content"]
389
+ break
390
+
391
+ # Find last AI message
392
+ for message in reversed(outputs["messages"]):
393
+ if "kwargs" in message and "type" in message["kwargs"] and message["kwargs"]["type"].upper() == "AI":
394
+ data["generated_text"] = message["kwargs"]["content"]
395
+ break
396
+ else:
397
+ data.update(outputs)
398
+
399
+ mapping = EvaluatorFields.get_default_fields_mapping()
400
+ data.update(
401
+ {mapping[EvaluatorFields.STATUS_FIELD]: span_tree.get_message_status()})
402
+
403
+ return data
404
+
405
+ @staticmethod
406
+ def __get_metrics_to_compute(nodes_config, node_name, metrics_configurations):
407
+ metrics_to_compute, all_metrics_config = [], []
408
+
409
+ if nodes_config.get(node_name):
410
+ metrics_config = nodes_config.get(node_name)
411
+ for mc in metrics_config:
412
+ mc_obj = MetricsConfiguration(configuration=mc.configuration,
413
+ metrics=mc.metrics,
414
+ metric_groups=mc.metric_groups)
415
+ metrics_to_compute.append(mc_obj)
416
+ all_metrics_config.append(mc_obj)
417
+
418
+ for mc in metrics_configurations:
419
+ mc_obj = MetricsConfiguration.model_validate(
420
+ mc.get("metrics_configuration"))
421
+
422
+ all_metrics_config.append(mc_obj)
423
+ if mc.get("compute_real_time") == "false":
424
+ metrics_to_compute.append(mc_obj)
425
+
426
+ return metrics_to_compute, all_metrics_config
427
+
428
+ @staticmethod
429
+ def __get_metrics_results_from_events(events, message_id, conversation_id, node_name, execution_count, execution_order):
430
+ results = []
431
+ if not events:
432
+ return results
433
+
434
+ for event in events:
435
+ for attr in event.attributes:
436
+ if attr.key == "attr_wxgov.result.metric":
437
+ val = attr.value.string_value
438
+ if val:
439
+ mr = json.loads(val)
440
+ mr.update({
441
+ "node_name": node_name,
442
+ "message_id": message_id,
443
+ "conversation_id": conversation_id,
444
+ "execution_count": execution_count,
445
+ "execution_order": execution_order
446
+ })
447
+ results.append(AgentMetricResult(**mr))
448
+
449
+ return results
450
+
451
+ @staticmethod
452
+ def __compute_usage_metrics_from_trace_metadata(trace_metadata: dict, message_id: str, conversation_id: str) -> list:
453
+ metrics_result = []
454
+
455
+ for metric, data in trace_metadata.items():
456
+ if metric == "cost":
457
+ metric_value = TraceUtils.calculate_cost(data)
458
+ elif metric == "input_token_count":
459
+ metric_value = sum(data)
460
+ elif metric == "output_token_count":
461
+ metric_value = sum(data)
462
+ else:
463
+ continue
464
+ agent_mr = {
465
+ "name": metric,
466
+ "value": metric_value,
467
+ "display_name": metric,
468
+ "message_id": message_id,
469
+ "applies_to": "message",
470
+ "conversation_id": conversation_id,
471
+ "group": MetricGroup.USAGE.value
472
+ }
473
+
474
+ metrics_result.append(AgentMetricResult(**agent_mr))
475
+
476
+ return metrics_result
477
+
478
+ @staticmethod
479
+ def __get_run_metadata_from_span(attributes: dict) -> dict:
480
+ """
481
+ Extract run specific metadata from traces
482
+ 1. Foundation model involved in run
483
+ 2. Tools involved in run
484
+ """
485
+ metadata = {}
486
+ provider = attributes.get(
487
+ "traceloop.association.properties.ls_provider", attributes.get("gen_ai.system"))
488
+ llm_type = attributes.get("llm.request.type")
489
+ model_name = attributes.get("gen_ai.request.model")
490
+
491
+ if model_name:
492
+ metadata["foundation_models"] = FoundationModelInfo(
493
+ model_name=model_name, provider=provider, type=llm_type
494
+ )
495
+
496
+ return metadata
497
+
498
+ @staticmethod
499
+ async def __process_span_and_extract_data(span_tree: SpanNode,
500
+ metric_mappings: List[MetricMapping],
501
+ target_component_mapping: List[MappingItem],
502
+ message_io_mapping: Optional[Mapping],
503
+ **kwargs) -> Tuple[MessageData, Dict[str, List[NodeData]], MetricsMappingData, Dict[str, Node], Dict]:
504
+ """
505
+ Extract and process span tree data to generate metrics, node information, and mapping data.
506
+
507
+ This method traverses a span tree extracting:
508
+ - Node information and I/O data
509
+ - Experiment run metadata
510
+ - Metric mapping data
511
+ - Application I/O data
512
+ """
513
+ root_span = span_tree.span
514
+ conversation_id = str(span_tree.get_conversation_id())
515
+ message_id = str(span_tree.get_message_id())
516
+
517
+ app_io_start_time = TraceUtils._timestamp_to_iso(
518
+ root_span.start_time_unix_nano)
519
+ app_io_end_time = TraceUtils._timestamp_to_iso(
520
+ root_span.end_time_unix_nano)
521
+
522
+ app_io_data = TraceUtils._extract_app_io_from_attributes(
523
+ root_span.attributes, message_io_mapping)
524
+
525
+ # Initialize data structures
526
+ experiment_run_metadata = defaultdict(lambda: defaultdict(set))
527
+ nodes_list = []
528
+ node_execution_count = {}
529
+ nodes_data: Dict[str, List[NodeData]] = {}
530
+
531
+ # Build quick index for span name to mapping items lookup
532
+ span_mapping_items = defaultdict(list)
533
+ metrics_with_mapping = dict()
534
+ for metric_mapping in metric_mappings:
535
+ metrics_with_mapping[metric_mapping.name] = False
536
+ if metric_mapping.mapping:
537
+ metrics_with_mapping[metric_mapping.name] = True
538
+ for mapping_item in metric_mapping.mapping.items:
539
+ if mapping_item.span_name and (mapping_item not in span_mapping_items[mapping_item.span_name]):
540
+ span_mapping_items[mapping_item.span_name].append(
541
+ mapping_item)
542
+
543
+ for mapping_item in target_component_mapping:
544
+ if mapping_item.span_name:
545
+ span_mapping_items[mapping_item.span_name].append(
546
+ mapping_item)
547
+
548
+ metric_map_data = defaultdict(
549
+ lambda: defaultdict(lambda: defaultdict(list)))
550
+
551
+ # Process span tree using iterative DFS
552
+ TraceUtils._process_span_tree(
553
+ span_tree=span_tree,
554
+ root_span=root_span,
555
+ conversation_id=conversation_id,
556
+ message_id=message_id,
557
+ app_io_data=app_io_data,
558
+ span_mapping_items=span_mapping_items,
559
+ experiment_run_metadata=experiment_run_metadata,
560
+ nodes_list=nodes_list,
561
+ node_execution_count=node_execution_count,
562
+ nodes_data=nodes_data,
563
+ metric_map_data=metric_map_data,
564
+ metrics_with_mapping=metrics_with_mapping,
565
+ )
566
+
567
+ # Prepare message data
568
+ messages_data = MessageData(
569
+ message_id=message_id,
570
+ message_timestamp=app_io_end_time,
571
+ conversation_id=conversation_id,
572
+ start_time=app_io_start_time,
573
+ end_time=app_io_end_time,
574
+ input=TraceUtils._string_to_bytes(app_io_data["input"]),
575
+ output=TraceUtils._string_to_bytes(app_io_data["output"]),
576
+ num_loops=sum(node_execution_count.values()) -
577
+ len(node_execution_count)
578
+ )
579
+
580
+ metric_mapping_data = MetricsMappingData(
581
+ message_id=message_id,
582
+ metric_mappings=metric_mappings,
583
+ data=metric_map_data
584
+ )
585
+
586
+ return (
587
+ messages_data,
588
+ nodes_data,
589
+ metric_mapping_data,
590
+ nodes_list,
591
+ experiment_run_metadata,
592
+ )
593
+
594
+ @staticmethod
595
+ def _timestamp_to_iso(timestamp_ns: int) -> str:
596
+ """Convert nanosecond timestamp to ISO format string."""
597
+ return datetime.fromtimestamp(timestamp_ns / 1e9).isoformat()
598
+
599
+ @staticmethod
600
+ def _iso_to_timestamp(iso_str: str) -> int:
601
+ """Convert ISO format string to nanosecond timestamp."""
602
+ dt = datetime.fromisoformat(iso_str)
603
+ return int(dt.timestamp() * 1e9)
604
+
605
+ @staticmethod
606
+ def _extract_app_io_from_attributes(attributes: List, message_io_mapping: Optional[Mapping]) -> Tuple[Optional[str], Optional[str]]:
607
+ """
608
+ Extract application input and output from span attributes.
609
+ """
610
+ app_input = None
611
+ app_output = None
612
+ input_key = "traceloop.entity.input"
613
+ output_key = "traceloop.entity.output"
614
+ input_json_path, output_json_path = None, None
615
+
616
+ # If message_io_mapping is provided, use it to extract the input and output from the attributes
617
+ if message_io_mapping is not None:
618
+ for item in message_io_mapping.items:
619
+ if item.type_ == "input":
620
+ input_key = item.attribute_name if item.attribute_name else input_key
621
+ input_json_path = item.json_path
622
+ elif item.type_ == "output":
623
+ output_key = item.attribute_name if item.attribute_name else output_key
624
+ output_json_path = item.json_path
625
+
626
+ for attribute in attributes:
627
+ att_key = attribute.key
628
+ att_val = attribute.value.string_value
629
+
630
+ if att_key == input_key:
631
+ if input_json_path:
632
+ app_input = TraceUtils._extract_with_jsonpath(
633
+ json.loads(att_val), input_json_path)
634
+ else:
635
+ app_input = TraceUtils._safe_json_dumps(att_val)
636
+ elif att_key == output_key:
637
+ if output_json_path:
638
+ app_output = TraceUtils._extract_with_jsonpath(
639
+ json.loads(att_val), output_json_path)
640
+ else:
641
+ app_output = TraceUtils._safe_json_dumps(att_val)
642
+
643
+ return {"input": app_input, "output": app_output}
644
+
645
+ @staticmethod
646
+ def _safe_json_dumps(value: str) -> str:
647
+ """
648
+ Safely JSON dump a string value only if it's not already JSON-formatted.
649
+ """
650
+ if value and '\\"' not in value:
651
+ try:
652
+ return json.dumps(value)
653
+ except (TypeError, ValueError):
654
+ return value
655
+ return value
656
+
657
+ @staticmethod
658
+ def _string_to_bytes(text: Optional[str]) -> Optional[bytes]:
659
+ """Convert string to bytes if not None."""
660
+ return bytes(text, "utf-8") if text is not None else None
661
+
662
+ @staticmethod
663
+ def _process_span_tree(span_tree: SpanNode, root_span: Span, conversation_id: str, message_id: str,
664
+ app_io_data: Dict, span_mapping_items: defaultdict[str, list[MappingItem]], experiment_run_metadata: defaultdict[str, defaultdict[str, set]],
665
+ nodes_list: List[Node], node_execution_count: Dict[str, int], nodes_data: Dict[str, List[NodeData]], metric_map_data: defaultdict,
666
+ metrics_with_mapping: dict) -> None:
667
+ """
668
+ Process the span tree using iterative depth-first search in correct order.
669
+ """
670
+ current_parent_context = TraceUtils._initialize_parent_context(
671
+ span_tree)
672
+ root_span_status = root_span.status.code
673
+
674
+ # Process root span attributes for message I/O data
675
+ TraceUtils._process_span_attributes(
676
+ current_span=root_span,
677
+ is_parent=True,
678
+ parent_context=current_parent_context,
679
+ span_mapping_items=span_mapping_items,
680
+ metric_map_data=metric_map_data,
681
+ experiment_run_metadata=experiment_run_metadata,
682
+ metrics_with_mapping=metrics_with_mapping,
683
+ )
684
+
685
+ # Reverse the initial children to process in correct order
686
+ node_stack: List[SpanNode] = list(reversed(span_tree.children))
687
+ child_stack: List[SpanNode] = []
688
+ while node_stack or child_stack:
689
+ is_parent = not child_stack
690
+ node = child_stack.pop() if child_stack else node_stack.pop()
691
+ current_span = node.span
692
+
693
+ if not current_span.name:
694
+ # No data to extract from current span
695
+ continue
696
+ if is_parent:
697
+ current_parent_context = TraceUtils._initialize_parent_context(
698
+ node)
699
+
700
+ # Process span attributes for node I/O data and metric mappings
701
+ TraceUtils._process_span_attributes(
702
+ current_span=current_span,
703
+ is_parent=is_parent,
704
+ parent_context=current_parent_context,
705
+ span_mapping_items=span_mapping_items,
706
+ metric_map_data=metric_map_data,
707
+ experiment_run_metadata=experiment_run_metadata,
708
+ metrics_with_mapping=metrics_with_mapping,
709
+ )
710
+
711
+ if current_parent_context.get("name") == "__start__":
712
+ if app_io_data["input"] is None:
713
+ # Reading the application input from `__start__` node
714
+ app_io_data["input"] = current_parent_context["input"]
715
+ # No data to extract from current span
716
+ continue
717
+
718
+ # Add children to stack for processing
719
+ child_stack.extend(node.children)
720
+
721
+ # All node span process completed when all children are processed
722
+ if not child_stack:
723
+ TraceUtils._finalize_node_processing(
724
+ parent_context=current_parent_context,
725
+ conversation_id=conversation_id,
726
+ message_id=message_id,
727
+ node_execution_count=node_execution_count,
728
+ nodes_list=nodes_list,
729
+ nodes_data=nodes_data,
730
+ )
731
+
732
+ # If status is extracted from default paths
733
+ if metrics_with_mapping.get("status") is False:
734
+ # Once process all child spans, finalize the message status
735
+ metric_map_data["status"] = metric_map_data["status"] if metric_map_data["status"] else STATUS_MAP[root_span_status]
736
+
737
+ @staticmethod
738
+ def _initialize_parent_context(node: SpanNode) -> Dict:
739
+ """
740
+ Initialize context for a parent node.
741
+ """
742
+ parent_span = node.span
743
+ return {
744
+ "span": parent_span,
745
+ "txn_id": str(uuid.uuid4()),
746
+ "execution_order": None,
747
+ "name": None,
748
+ "input": None,
749
+ "output": None,
750
+ "metrics_config": [],
751
+ "code_id": "",
752
+ "start_time": TraceUtils._timestamp_to_iso(parent_span.start_time_unix_nano),
753
+ "end_time": TraceUtils._timestamp_to_iso(parent_span.end_time_unix_nano)
754
+ }
755
+
756
+ @staticmethod
757
+ def _process_span_attributes(current_span: Span, is_parent: bool, parent_context: Dict, span_mapping_items: defaultdict[str, list[MappingItem]], metric_map_data: defaultdict,
758
+ experiment_run_metadata: defaultdict, metrics_with_mapping: dict
759
+ ) -> None:
760
+ """
761
+ Need to process all spans to extract FM details
762
+ Process attributes of the current span for I/O data and metric mappings.
763
+ """
764
+ has_metric_mapping = current_span.name in span_mapping_items
765
+ attributes = get_attributes(current_span.attributes)
766
+
767
+ if is_parent:
768
+ TraceUtils._process_parent_attribute(
769
+ attributes, parent_context)
770
+ # Extract required details to calculate duration metrics from each parent span
771
+ if any(
772
+ metrics_with_mapping.get(metric) is False
773
+ for metric in ("duration", "latency")
774
+ ):
775
+ # Process only non `__start__` span
776
+ if "__start__" not in current_span.name:
777
+ # Initialize span start end time
778
+ if current_span.name not in metric_map_data:
779
+ metric_map_data[current_span.name]["start_time"] = []
780
+ metric_map_data[current_span.name]["end_time"] = []
781
+
782
+ metric_map_data[current_span.name]["start_time"].append(TraceUtils._iso_to_timestamp(
783
+ parent_context["start_time"]))
784
+ metric_map_data[current_span.name]["end_time"].append(TraceUtils._iso_to_timestamp(
785
+ parent_context["end_time"]))
786
+
787
+ if has_metric_mapping:
788
+ TraceUtils._process_metric_mapping(
789
+ current_span.name, attributes,
790
+ span_mapping_items[current_span.name],
791
+ metric_map_data,
792
+ )
793
+
794
+ # Extract required details to calculate usage and duration metrics from each span, in case mapping is not provided in metric configuration
795
+ if current_span.name in TARGETED_USAGE_TRACE_NAMES:
796
+ cost_meta_data = TraceUtils.__extract_usage_meta_data(
797
+ attributes)["cost"]
798
+ # Aggregate total input and output token
799
+ model_key = cost_meta_data["model"]
800
+ inner_map = metric_map_data.get(current_span.name)
801
+ if inner_map and model_key in inner_map:
802
+ prev_cost_meta_data = metric_map_data[current_span.name][model_key]["model_usage_details"]
803
+ cost_meta_data["total_prompt_tokens"] += prev_cost_meta_data.get(
804
+ "total_prompt_tokens", 0)
805
+ cost_meta_data["total_completion_tokens"] += prev_cost_meta_data.get(
806
+ "total_completion_tokens", 0)
807
+
808
+ # Cost
809
+ if metrics_with_mapping.get("cost") is False:
810
+ metric_map_data[current_span.name][model_key]["model_usage_details"] = cost_meta_data
811
+ # Token count
812
+ if metrics_with_mapping.get("input_token_count") is False:
813
+ metric_map_data[current_span.name][model_key]["prompt_tokens_count"] = cost_meta_data["total_prompt_tokens"]
814
+ if metrics_with_mapping.get("output_token_count") is False:
815
+ metric_map_data[current_span.name][model_key]["completion_tokens_count"] = cost_meta_data["total_completion_tokens"]
816
+
817
+ # Extract FM details to store it node details
818
+ for k, v in TraceUtils.__get_run_metadata_from_span(attributes).items():
819
+ experiment_run_metadata[parent_context.get("name")][k].add(v)
820
+
821
+ # Extract failed status if any
822
+ if metrics_with_mapping.get("status") is False:
823
+ if current_span.status.code == Status.STATUS_CODE_ERROR:
824
+ metric_map_data["status"] = MessageStatus.FAILURE
825
+
826
+ @staticmethod
827
+ def _process_parent_attribute(attributes: dict, parent_context: Dict) -> None:
828
+ """
829
+ Process an attribute for a parent node.
830
+ """
831
+ parent_context["name"] = attributes.get("traceloop.entity.name")
832
+ parent_context["code_id"] = attributes.get("gen_ai.runnable.code_id")
833
+ parent_context["execution_order"] = int(attributes.get("traceloop.association.properties.langgraph_step")) if attributes.get(
834
+ "traceloop.association.properties.langgraph_step") else None
835
+ parent_context["input"] = TraceUtils._safe_json_dumps(
836
+ attributes.get("traceloop.entity.input"))
837
+ parent_context["output"] = TraceUtils._safe_json_dumps(
838
+ attributes.get("traceloop.entity.output"))
839
+
840
+ @staticmethod
841
+ def _process_metric_mapping(span_name: str, attribute: dict, mapping_items: List[MappingItem], metric_map_data: defaultdict
842
+ ) -> None:
843
+ """
844
+ Process metric mapping for a span attribute.
845
+ """
846
+ for mapping_item in mapping_items:
847
+ try:
848
+ content = attribute.get(mapping_item.attribute_name)
849
+ content = TraceUtils._parse_nested_json_fields(content)
850
+ if mapping_item.json_path:
851
+ extracted_value = TraceUtils._extract_with_jsonpath(
852
+ content, mapping_item.json_path)
853
+ else:
854
+ extracted_value = content
855
+ except (json.JSONDecodeError, AttributeError):
856
+ # Fallback to string value if JSON parsing fails
857
+ extracted_value = attribute.get(mapping_item.attribute_name)
858
+
859
+ if mapping_item.type_ == "target_component":
860
+ metric_map_data[span_name][mapping_item.attribute_name][mapping_item.json_path] = extracted_value
861
+ else:
862
+ metric_map_data[span_name][mapping_item.attribute_name][mapping_item.json_path].append(
863
+ extracted_value)
864
+
865
+ @staticmethod
866
+ def _parse_nested_json_fields(content) -> Dict:
867
+ """
868
+ Recursively parse a value that might be a JSON string.
869
+ """
870
+ if isinstance(content, str):
871
+ try:
872
+ # Try to parse as JSON
873
+ parsed = json.loads(content)
874
+ # Recursively parse the result in case it contains more JSON strings
875
+ return TraceUtils._parse_nested_json_fields(parsed)
876
+ except (json.JSONDecodeError, ValueError):
877
+ # Not a JSON string, return as-is
878
+ return content
879
+ elif isinstance(content, dict):
880
+ # Recursively parse all values in the dictionary
881
+ return {k: TraceUtils._parse_nested_json_fields(v) for k, v in content.items()}
882
+ elif isinstance(content, list):
883
+ # Recursively parse all items in the list
884
+ return [TraceUtils._parse_nested_json_fields(item) for item in content]
885
+ else:
886
+ # Return other types as-is (int, float, bool, None, etc.)
887
+ return content
888
+
889
+ @staticmethod
890
+ def _extract_with_jsonpath(content: Dict, json_path: str) -> Any:
891
+ """
892
+ Extract value from content using JSONPath expression.
893
+ """
894
+ try:
895
+ jsonpath_expr = parse_jsonpath(json_path)
896
+ matches = [match.value for match in jsonpath_expr.find(content)]
897
+
898
+ if matches:
899
+ return matches[0] if len(matches) == 1 else matches
900
+ return None
901
+ except Exception:
902
+ return None
903
+
904
+ @staticmethod
905
+ def _finalize_node_processing(parent_context: Dict, conversation_id: str, message_id: str, node_execution_count: Dict[str, int],
906
+ nodes_list: List[Node], nodes_data: Dict[str, List[NodeData]]) -> None:
907
+ """
908
+ Finalize processing for a completed node.
909
+ """
910
+ node_name = parent_context["name"]
911
+
912
+ # Update execution count
913
+ node_execution_count[node_name] = node_execution_count.get(
914
+ node_name, 0) + 1
915
+
916
+ # Add unique node to nodes list
917
+ func_name = parent_context["code_id"].split(
918
+ ":")[-1] if parent_context["code_id"] else node_name
919
+ add_if_unique(
920
+ Node(
921
+ name=node_name,
922
+ func_name=func_name,
923
+ ),
924
+ nodes_list,
925
+ ["name", "func_name"]
926
+ )
927
+
928
+ # Add node I/O data
929
+ if node_name not in nodes_data:
930
+ nodes_data[node_name] = []
931
+
932
+ nodes_data[node_name].append(NodeData(
933
+ message_id=message_id,
934
+ message_timestamp=parent_context["end_time"],
935
+ conversation_id=conversation_id,
936
+ node_name=node_name,
937
+ start_time=parent_context["start_time"],
938
+ end_time=parent_context["end_time"],
939
+ input=TraceUtils._string_to_bytes(parent_context["input"]),
940
+ output=TraceUtils._string_to_bytes(parent_context["output"]),
941
+ execution_order=parent_context["execution_order"],
942
+ execution_count=node_execution_count[node_name],
943
+ node_txn_id=parent_context["txn_id"],
944
+ node_txn_timestamp=parent_context["end_time"]
945
+ ))
946
+
947
+ @staticmethod
948
+ async def __compute_metrics_from_maps(metrics_configuration: MetricsConfiguration,
949
+ mapping_data: Dict,
950
+ api_client: APIClient,
951
+ message_id: str,
952
+ conversation_id: str,
953
+ message_timestamp: str,
954
+ nodes_data: Dict[str, List[NodeData]],
955
+ **kwargs) -> List[AgentMetricResult]:
956
+ """
957
+ Process all configured metrics by:
958
+ 1. Extracting required data from mapping data
959
+ 2. Computing metrics asynchronously
960
+ """
961
+ metric_results = []
962
+ coros = []
963
+ execution_map = defaultdict(lambda: defaultdict())
964
+ metric_count = 0
965
+ msg_data = mapping_to_df(mapping_data)
966
+ for metric in metrics_configuration.metrics:
967
+ target_component = None
968
+ if metric.target_component:
969
+ if metric.target_component.type == "mapping":
970
+ target_component = mapping_data[metric.target_component.value.span_name][
971
+ metric.target_component.value.attribute_name][metric.target_component.value.json_path]
972
+ else:
973
+ target_component = metric.target_component.value
974
+ configuration = AgenticAIConfiguration(
975
+ **build_configuration_from_metric_mappings(metric, target_component))
976
+ if metric.applies_to == "message":
977
+ coros.append(_evaluate_metrics_async(
978
+ configuration=configuration,
979
+ data=msg_data,
980
+ metrics=[metric],
981
+ api_client=api_client,
982
+ **kwargs))
983
+ metric_count += 1
984
+ execution_map[metric_count]["applies_to"] = metric.applies_to
985
+ else: # Node level
986
+ node_data_list = nodes_data.get(target_component)
987
+ if node_data_list is None:
988
+ # Skip this metric if the target component doesn't exist in nodes_data
989
+ continue
990
+ for i in range(len(node_data_list)):
991
+ coros.append(_evaluate_metrics_async(
992
+ configuration=configuration,
993
+ # Extract data specific to execution order <i>
994
+ data=mapping_to_df(mapping_data, i),
995
+ metrics=[metric],
996
+ api_client=api_client,
997
+ **kwargs))
998
+ metric_count += 1
999
+ execution_map[metric_count]["target_component"] = target_component
1000
+ execution_map[metric_count]["applies_to"] = metric.applies_to
1001
+ execution_map[metric_count]["execution_count"] = node_data_list[i].execution_count
1002
+ execution_map[metric_count]["execution_order"] = node_data_list[i].execution_order
1003
+
1004
+ results = await gather_with_concurrency(coros, max_concurrency=kwargs.get("max_concurrency", 10))
1005
+ for i, result in enumerate(results, start=1):
1006
+ for mr in result.to_dict():
1007
+ result = {
1008
+ "applies_to": execution_map[i].get("applies_to"),
1009
+ "message_id": message_id,
1010
+ "conversation_id": conversation_id,
1011
+ "message_timestamp": message_timestamp,
1012
+ **mr
1013
+ }
1014
+ if execution_map[i].get("target_component"):
1015
+ result.update({
1016
+ "node_name": execution_map[i].get("target_component"),
1017
+ "execution_count": execution_map[i].get("execution_count"),
1018
+ "execution_order": execution_map[i].get("execution_order"),
1019
+ })
1020
+ metric_results.append(AgentMetricResult(**result))
1021
+
1022
+ return metric_results
1023
+
1024
+ @staticmethod
1025
+ async def compute_metrics_from_trace_async_v2(span_tree: SpanNode,
1026
+ metrics_configuration: MetricsConfiguration,
1027
+ message_io_mapping: Mapping | None = None,
1028
+ api_client: APIClient | None = None,
1029
+ **kwargs
1030
+ ) -> Tuple[List[AgentMetricResult], MessageData, List[NodeData], MetricsMappingData, List[Node]]:
1031
+ """
1032
+ Process span tree data to compute comprehensive metrics and extract execution artifacts.
1033
+
1034
+ This method orchestrates the end-to-end metrics computation pipeline by:
1035
+ 1. Extracting and processing raw data from span traces
1036
+ 2. Computing metrics from the extracted trace data
1037
+ 3. Calculating additional metrics based on mapping configurations
1038
+ """
1039
+
1040
+ # Assuming both the message and node level mappings are available in `agentic_app.metrics_configuration`
1041
+ metric_mappings = []
1042
+ target_component_mapping = []
1043
+ for m in metrics_configuration.metrics:
1044
+ metric_mappings.append(MetricMapping(
1045
+ name=m.name, method=m.method, applies_to=m.applies_to, mapping=m.mapping))
1046
+ if m.target_component and m.target_component.type == "mapping":
1047
+ target_component_mapping.append(m.target_component.value)
1048
+
1049
+ # Extract and process core data components from span tree
1050
+ (
1051
+ message_data, nodes_data, metric_mapping_data,
1052
+ nodes, experiment_run_metadata) = await TraceUtils.__process_span_and_extract_data(span_tree,
1053
+ metric_mappings,
1054
+ target_component_mapping,
1055
+ message_io_mapping,
1056
+ **kwargs)
1057
+
1058
+ # Compute metrics using mapping configurations
1059
+ metric_results = await TraceUtils.__compute_metrics_from_maps(metrics_configuration=metrics_configuration,
1060
+ mapping_data=metric_mapping_data.data,
1061
+ api_client=api_client,
1062
+ message_id=message_data.message_id,
1063
+ conversation_id=message_data.conversation_id,
1064
+ message_timestamp=message_data.message_timestamp,
1065
+ nodes_data=nodes_data,
1066
+ **kwargs)
1067
+
1068
+ # Add foundation model details to node
1069
+ for node in nodes:
1070
+ if node.name in experiment_run_metadata:
1071
+ node.foundation_models = list(
1072
+ experiment_run_metadata[node.name]["foundation_models"])
1073
+
1074
+ return metric_results, message_data, [item for sublist in nodes_data.values() for item in sublist], metric_mapping_data, nodes