ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. ibm_watsonx_gov/__init__.py +8 -0
  2. ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
  3. ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
  4. ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
  5. ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
  6. ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
  7. ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
  8. ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
  9. ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
  10. ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
  11. ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
  12. ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
  13. ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
  14. ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
  15. ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
  16. ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
  17. ibm_watsonx_gov/clients/__init__.py +0 -0
  18. ibm_watsonx_gov/clients/api_client.py +99 -0
  19. ibm_watsonx_gov/clients/segment_client.py +46 -0
  20. ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
  21. ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
  22. ibm_watsonx_gov/config/__init__.py +14 -0
  23. ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
  24. ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
  25. ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
  26. ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
  27. ibm_watsonx_gov/entities/__init__.py +8 -0
  28. ibm_watsonx_gov/entities/agentic_app.py +209 -0
  29. ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
  30. ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
  31. ibm_watsonx_gov/entities/ai_experiment.py +419 -0
  32. ibm_watsonx_gov/entities/base_classes.py +134 -0
  33. ibm_watsonx_gov/entities/container.py +54 -0
  34. ibm_watsonx_gov/entities/credentials.py +633 -0
  35. ibm_watsonx_gov/entities/criteria.py +508 -0
  36. ibm_watsonx_gov/entities/enums.py +274 -0
  37. ibm_watsonx_gov/entities/evaluation_result.py +444 -0
  38. ibm_watsonx_gov/entities/foundation_model.py +490 -0
  39. ibm_watsonx_gov/entities/llm_judge.py +44 -0
  40. ibm_watsonx_gov/entities/locale.py +17 -0
  41. ibm_watsonx_gov/entities/mapping.py +49 -0
  42. ibm_watsonx_gov/entities/metric.py +211 -0
  43. ibm_watsonx_gov/entities/metric_threshold.py +36 -0
  44. ibm_watsonx_gov/entities/model_provider.py +329 -0
  45. ibm_watsonx_gov/entities/model_risk_result.py +43 -0
  46. ibm_watsonx_gov/entities/monitor.py +71 -0
  47. ibm_watsonx_gov/entities/prompt_setup.py +40 -0
  48. ibm_watsonx_gov/entities/state.py +22 -0
  49. ibm_watsonx_gov/entities/utils.py +99 -0
  50. ibm_watsonx_gov/evaluators/__init__.py +26 -0
  51. ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
  52. ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
  53. ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
  54. ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
  55. ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
  56. ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
  57. ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
  58. ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
  59. ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
  60. ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
  61. ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
  62. ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
  63. ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
  64. ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
  65. ibm_watsonx_gov/metrics/__init__.py +74 -0
  66. ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
  67. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
  68. ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
  69. ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
  70. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
  71. ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
  72. ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
  73. ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
  74. ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
  75. ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
  76. ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
  77. ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
  78. ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
  79. ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
  80. ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
  81. ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
  82. ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
  83. ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
  84. ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
  85. ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
  86. ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
  87. ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
  88. ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
  89. ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
  90. ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
  91. ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
  92. ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
  93. ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
  94. ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
  95. ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
  96. ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
  97. ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
  98. ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
  99. ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
  100. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
  101. ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
  102. ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
  103. ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
  104. ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
  105. ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
  106. ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
  107. ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
  108. ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
  109. ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
  110. ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
  111. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
  112. ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
  113. ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
  114. ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
  115. ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
  116. ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
  117. ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
  118. ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
  119. ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
  120. ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
  121. ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
  122. ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
  123. ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
  124. ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
  125. ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
  126. ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
  127. ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
  128. ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
  129. ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
  130. ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
  131. ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
  132. ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
  133. ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
  134. ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
  135. ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
  136. ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
  137. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
  138. ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
  139. ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
  140. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
  141. ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
  142. ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
  143. ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
  144. ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
  145. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
  146. ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
  147. ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
  148. ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
  149. ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
  150. ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
  151. ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
  152. ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
  153. ibm_watsonx_gov/metrics/status/__init__.py +0 -0
  154. ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
  155. ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
  156. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
  157. ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
  158. ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
  159. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
  160. ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
  161. ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
  162. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
  163. ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
  164. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
  165. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
  166. ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
  167. ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
  168. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
  169. ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
  170. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
  171. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
  172. ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
  173. ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
  174. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
  175. ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
  176. ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
  177. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
  178. ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
  179. ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
  180. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
  181. ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
  182. ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
  183. ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
  184. ibm_watsonx_gov/metrics/utils.py +440 -0
  185. ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
  186. ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
  187. ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
  188. ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
  189. ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
  190. ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
  191. ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
  192. ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
  193. ibm_watsonx_gov/providers/__init__.py +8 -0
  194. ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
  195. ibm_watsonx_gov/providers/detectors_provider.py +415 -0
  196. ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
  197. ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
  198. ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
  199. ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
  200. ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
  201. ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
  202. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
  203. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
  204. ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
  205. ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
  206. ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
  207. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
  208. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
  209. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
  210. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
  211. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
  212. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
  213. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
  214. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
  215. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
  216. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
  217. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
  218. ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
  219. ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
  220. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
  221. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
  222. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
  223. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
  224. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
  225. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
  226. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
  227. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
  228. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
  229. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
  230. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
  231. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
  232. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
  233. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
  234. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
  235. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
  236. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
  237. ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
  238. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
  239. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
  240. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
  241. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
  242. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
  243. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
  244. ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
  245. ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
  246. ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
  247. ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
  248. ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
  249. ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
  250. ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
  251. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
  252. ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
  253. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
  254. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
  255. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
  256. ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
  257. ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
  258. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
  259. ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
  260. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
  261. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
  262. ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
  263. ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
  264. ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
  265. ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
  266. ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
  267. ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
  268. ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
  269. ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
  270. ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
  271. ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
  272. ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
  273. ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
  274. ibm_watsonx_gov/tools/__init__.py +10 -0
  275. ibm_watsonx_gov/tools/clients/__init__.py +11 -0
  276. ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
  277. ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
  278. ibm_watsonx_gov/tools/core/__init__.py +8 -0
  279. ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
  280. ibm_watsonx_gov/tools/entities/__init__.py +8 -0
  281. ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
  282. ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
  283. ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
  284. ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
  285. ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
  286. ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
  287. ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
  288. ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
  289. ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
  290. ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
  291. ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
  292. ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
  293. ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
  294. ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
  295. ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
  296. ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
  297. ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
  298. ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
  299. ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
  300. ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
  301. ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
  302. ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
  303. ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
  304. ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
  305. ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
  306. ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
  307. ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
  308. ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
  309. ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
  310. ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
  311. ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
  312. ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
  313. ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
  314. ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
  315. ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
  316. ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
  317. ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
  318. ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
  319. ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
  320. ibm_watsonx_gov/tools/utils/__init__.py +14 -0
  321. ibm_watsonx_gov/tools/utils/constants.py +69 -0
  322. ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
  323. ibm_watsonx_gov/tools/utils/environment.py +108 -0
  324. ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
  325. ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
  326. ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
  327. ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
  328. ibm_watsonx_gov/traces/__init__.py +8 -0
  329. ibm_watsonx_gov/traces/span_exporter.py +195 -0
  330. ibm_watsonx_gov/traces/span_node.py +251 -0
  331. ibm_watsonx_gov/traces/span_util.py +153 -0
  332. ibm_watsonx_gov/traces/trace_utils.py +1074 -0
  333. ibm_watsonx_gov/utils/__init__.py +8 -0
  334. ibm_watsonx_gov/utils/aggregation_util.py +346 -0
  335. ibm_watsonx_gov/utils/async_util.py +62 -0
  336. ibm_watsonx_gov/utils/authenticator.py +144 -0
  337. ibm_watsonx_gov/utils/constants.py +15 -0
  338. ibm_watsonx_gov/utils/errors.py +40 -0
  339. ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
  340. ibm_watsonx_gov/utils/insights_generator.py +1285 -0
  341. ibm_watsonx_gov/utils/python_utils.py +425 -0
  342. ibm_watsonx_gov/utils/rest_util.py +73 -0
  343. ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
  344. ibm_watsonx_gov/utils/singleton_meta.py +25 -0
  345. ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
  346. ibm_watsonx_gov/utils/validation_util.py +126 -0
  347. ibm_watsonx_gov/visualizations/__init__.py +13 -0
  348. ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
  349. ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
  350. ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
  351. ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
  352. ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
  353. ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
@@ -0,0 +1,306 @@
1
+ # Function Call Comparison Framework
2
+
3
+ A framework for evaluating and comparing function call predictions against ground truth data. This module provides multiple comparison strategies to handle different evaluation scenarios, from exact matching to semantic analysis using LLM judges.
4
+
5
+ ## Overview
6
+
7
+ The Function Call Comparison Framework enables precise evaluation of tool calls in LLM applications. It supports various comparison strategies and provides detailed analysis of both function names and parameters.
8
+
9
+ ## Features
10
+
11
+ - **Multiple Comparison Strategies**: Exact match, fuzzy string matching, LLM-based semantic analysis, and code-based programmatic evaluation
12
+ - **Parameter Analysis**: Detailed comparison of individual parameters with type normalization and default value handling
13
+ - **Tool Specification Support**: Integration with OpenAI-format tool specifications for context-aware comparisons
14
+ - **Async Support**: Full asynchronous operation for high-performance evaluations
15
+ - **Custom Instructions**: Specialized evaluation logic for domain-specific requirements
16
+ - **Batch Processing**: Efficient comparison of multiple tool calls
17
+ - **Comprehensive Reporting**: Detailed results with explanations and confidence scores
18
+
19
+ ## Comparison Strategies
20
+
21
+ ### 1. Exact Match
22
+ Performs precise structural comparison with optional type normalization.
23
+
24
+ ```python
25
+ from llmevalkit.function_calling.comparison import ComparisonStrategy, ComparisonConfig, ComparisonPipeline
26
+
27
+ config = ComparisonConfig(strategy=ComparisonStrategy.EXACT_MATCH)
28
+ pipeline = ComparisonPipeline(config=config)
29
+ ```
30
+
31
+ ### 2. Fuzzy String Matching
32
+ Uses string similarity algorithms for near-match detection.
33
+
34
+ ```python
35
+ config = ComparisonConfig(
36
+ strategy=ComparisonStrategy.FUZZY_STRING,
37
+ string_similarity_threshold=0.8
38
+ )
39
+ ```
40
+
41
+ ### 3. LLM Judge
42
+ Employs language models for semantic understanding and context-aware evaluation.
43
+
44
+ ```python
45
+ from llmevalkit.llm import get_llm
46
+
47
+ llm_client = get_llm("watsonx.output_val")(model_name="meta-llama/llama-3-3-70b-instruct")
48
+ config = ComparisonConfig(strategy=ComparisonStrategy.LLM_JUDGE)
49
+ pipeline = ComparisonPipeline(config=config, llm_client=llm_client)
50
+ ```
51
+
52
+ ### 4. Code Agent
53
+ Uses programmatic analysis through code generation and execution for complex evaluations.
54
+
55
+ ```python
56
+ config = ComparisonConfig(strategy=ComparisonStrategy.CODE_AGENT)
57
+ pipeline = ComparisonPipeline(config=config, llm_client=llm_client)
58
+ ```
59
+
60
+ ### 5. Hybrid Strategy
61
+ Combines multiple strategies and selects the best result based on confidence scores.
62
+
63
+ ```python
64
+ config = ComparisonConfig(strategy=ComparisonStrategy.HYBRID)
65
+ pipeline = ComparisonPipeline(config=config, llm_client=llm_client)
66
+ ```
67
+
68
+ ## Basic Usage
69
+
70
+ ### Simple Comparison
71
+
72
+ ```python
73
+ import json
74
+ from llmevalkit.function_calling.comparison import ComparisonStrategy, ComparisonConfig, ComparisonPipeline
75
+
76
+ # Configure comparison strategy
77
+ config = ComparisonConfig(strategy=ComparisonStrategy.EXACT_MATCH)
78
+ pipeline = ComparisonPipeline(config=config)
79
+
80
+ # Define tool calls
81
+ predicted_call = {
82
+ "function": {
83
+ "name": "send_email",
84
+ "arguments": {
85
+ "to": "user@example.com",
86
+ "subject": "Project Update",
87
+ "body": "The project is complete."
88
+ }
89
+ }
90
+ }
91
+
92
+ ground_truth_call = {
93
+ "function": {
94
+ "name": "send_email",
95
+ "arguments": {
96
+ "to": "user@example.com",
97
+ "subject": "Project Update",
98
+ "body": "The project is complete."
99
+ }
100
+ }
101
+ }
102
+
103
+ # Perform comparison
104
+ result = pipeline.compare(predicted_call, ground_truth_call)
105
+
106
+ print(f"Overall Score: {result.overall_score}")
107
+ print(f"Function Match: {result.function_name_match}")
108
+ print(f"Parameters Evaluated: {len(result.parameter_results)}")
109
+ ```
110
+
111
+ ### Advanced LLM-Based Comparison
112
+
113
+ ```python
114
+ from llmevalkit.llm import get_llm
115
+ from llmevalkit.function_calling.comparison import ComparisonStrategy, ComparisonConfig, ComparisonPipeline
116
+
117
+ # Initialize LLM client
118
+ llm_client = get_llm("watsonx.output_val")(model_name="meta-llama/llama-3-3-70b-instruct")
119
+
120
+ # Configure LLM Judge strategy
121
+ config = ComparisonConfig(
122
+ strategy=ComparisonStrategy.LLM_JUDGE,
123
+ string_similarity_threshold=0.8,
124
+ numeric_tolerance=0.01
125
+ )
126
+
127
+ pipeline = ComparisonPipeline(config=config, llm_client=llm_client)
128
+
129
+ # Tool specification for context
130
+ tool_spec = {
131
+ "type": "function",
132
+ "function": {
133
+ "name": "book_flight",
134
+ "description": "Book a flight for passengers",
135
+ "parameters": {
136
+ "type": "object",
137
+ "properties": {
138
+ "departure_city": {"type": "string"},
139
+ "arrival_city": {"type": "string"},
140
+ "departure_date": {"type": "string", "format": "date"},
141
+ "passenger_count": {"type": "integer"}
142
+ },
143
+ "required": ["departure_city", "arrival_city", "departure_date"]
144
+ }
145
+ }
146
+ }
147
+
148
+ # Semantic comparison with context
149
+ result = await pipeline.compare_async(
150
+ predicted_call=predicted_call,
151
+ ground_truth_call=ground_truth_call,
152
+ tool_specs=[tool_spec]
153
+ )
154
+ ```
155
+
156
+ ## Custom Instructions
157
+
158
+ For specialized evaluation scenarios, provide custom instructions to guide the comparison logic:
159
+
160
+ ```python
161
+ custom_instructions = """
162
+ When comparing dates, treat relative terms like "tomorrow" and "yesterday"
163
+ as equivalent to their absolute date representations. Consider timezone
164
+ context when evaluating time-sensitive parameters.
165
+ """
166
+
167
+ config = ComparisonConfig(strategy=ComparisonStrategy.LLM_JUDGE)
168
+ pipeline = ComparisonPipeline(config=config, llm_client=llm_client)
169
+
170
+ result = await pipeline.compare_async(
171
+ predicted_call=predicted_call,
172
+ ground_truth_call=ground_truth_call,
173
+ custom_instructions=custom_instructions
174
+ )
175
+ ```
176
+
177
+ ## Batch Processing
178
+
179
+ Process multiple comparisons efficiently:
180
+
181
+ ```python
182
+ comparisons = [
183
+ {
184
+ "predicted_call": predicted_call_1,
185
+ "ground_truth_call": ground_truth_call_1
186
+ },
187
+ {
188
+ "predicted_call": predicted_call_2,
189
+ "ground_truth_call": ground_truth_call_2
190
+ }
191
+ ]
192
+
193
+ results = await pipeline.batch_compare_async(comparisons)
194
+ summary = pipeline.get_comparison_summary(results)
195
+
196
+ print(f"Average Score: {summary['average_score']}")
197
+ print(f"Success Rate: {summary['success_rate']}")
198
+ ```
199
+
200
+ ## Configuration Options
201
+
202
+ ### ComparisonConfig Parameters
203
+
204
+ - `strategy`: Comparison strategy to use
205
+ - `string_similarity_threshold`: Threshold for fuzzy matching (0.0-1.0)
206
+ - `numeric_tolerance`: Tolerance for numeric comparisons
207
+ - `normalize_types`: Enable type normalization (string "123" ↔ int 123)
208
+ - `weight_function_name`: Weight given to function name matching
209
+ - `weight_parameters`: Weight given to parameter matching
210
+ - `parameter_weights`: Custom weights for specific parameters
211
+
212
+ ### Strategy-Specific Configuration
213
+
214
+ ```python
215
+ config = ComparisonConfig(
216
+ strategy=ComparisonStrategy.FUZZY_STRING,
217
+ string_similarity_threshold=0.8,
218
+ numeric_tolerance=0.01,
219
+ normalize_types=True,
220
+ strategy_config={
221
+ "enable_semantic_matching": True,
222
+ "case_sensitive": False
223
+ }
224
+ )
225
+ ```
226
+
227
+ ## Result Analysis
228
+
229
+ ### ToolCallComparisonResult
230
+
231
+ The main result object containing:
232
+
233
+ - `overall_score`: Combined score (0.0-1.0)
234
+ - `function_name_match`: Boolean function name match result
235
+ - `function_name_score`: Function name similarity score
236
+ - `parameter_results`: List of individual parameter comparisons
237
+ - `overall_explanation`: Human-readable explanation
238
+ - `strategy_used`: Strategy or strategies employed
239
+ - `metadata`: Additional evaluation metadata
240
+
241
+ ### ParameterComparisonResult
242
+
243
+ Individual parameter comparison details:
244
+
245
+ - `parameter_name`: Name of the compared parameter
246
+ - `predicted_value`: Value from predicted call
247
+ - `ground_truth_value`: Value from ground truth
248
+ - `score`: Parameter similarity score (0.0-1.0)
249
+ - `is_match`: Boolean match result
250
+ - `explanation`: Detailed comparison explanation
251
+ - `confidence`: Confidence in the evaluation
252
+
253
+ ## Error Handling
254
+
255
+ The framework provides robust error handling with fallback strategies:
256
+
257
+ ```python
258
+ try:
259
+ result = await pipeline.compare_async(predicted_call, ground_truth_call)
260
+ except Exception as e:
261
+ print(f"Comparison failed: {e}")
262
+ # Framework automatically falls back to simpler strategies when possible
263
+ ```
264
+
265
+ ## Installation Requirements
266
+
267
+ ### Core Dependencies
268
+ ```bash
269
+ pip install llmevalkit
270
+ ```
271
+
272
+ ### Optional Dependencies
273
+
274
+ For Code Agent functionality:
275
+ ```bash
276
+ pip install langgraph langchain-core langchain-experimental
277
+ ```
278
+
279
+ For specific LLM providers:
280
+ ```bash
281
+ pip install ibm-watsonx-ai # For Watson LLM
282
+ pip install openai # For OpenAI models
283
+ ```
284
+
285
+ ## Examples
286
+
287
+ See the `examples/function_calling/comparison/` directory for comprehensive examples:
288
+
289
+ - `basic_examples.py`: Core functionality demonstrations
290
+ - `llm_judge_examples.py`: LLM-based semantic comparisons
291
+ - `custom_instructions_examples.py`: Specialized evaluation scenarios
292
+ - `custom_schema_examples.py`: Custom response format examples
293
+ - `code_agent_demo.py`: Programmatic analysis examples
294
+
295
+ ## Best Practices
296
+
297
+ 1. **Choose Appropriate Strategy**: Use exact match for structured data, LLM judge for semantic understanding
298
+ 2. **Provide Tool Specifications**: Include tool specs for better context and default value handling
299
+ 3. **Use Custom Instructions**: Provide domain-specific guidance for specialized scenarios
300
+ 4. **Batch Processing**: Use async batch operations for large-scale evaluations
301
+ 5. **Error Handling**: Implement proper error handling and fallback strategies
302
+ 6. **Performance Monitoring**: Monitor LLM API usage and response times in production
303
+
304
+ ## Contributing
305
+
306
+ This module is part of the LLMEvalKit framework. For contributions and issues, please refer to the main project repository.
@@ -0,0 +1,89 @@
1
+ from .types import (
2
+ ComparisonStrategy,
3
+ ComparisonConfig,
4
+ ParameterComparisonResult,
5
+ BulkParameterComparisonResult,
6
+ ToolCallComparisonResult,
7
+ ParameterStatus,
8
+ ToolSpecFunction,
9
+ ToolSpecParameter,
10
+ )
11
+
12
+ from .pipeline import ComparisonPipeline
13
+
14
+ from .comparators.base import BaseComparator
15
+ from .comparators.exact_match import ExactMatchComparator
16
+ from .comparators.fuzzy_string import FuzzyStringComparator
17
+ from .comparators.llm_judge import LLMJudgeComparator
18
+ from .comparators.hybrid import HybridComparator
19
+
20
+ # Code Agent Comparator (requires LangGraph)
21
+ try:
22
+ from .comparators.code_agent import CodeAgentComparator
23
+
24
+ _code_agent_available = True
25
+ except ImportError:
26
+ _code_agent_available = False
27
+
28
+ # Create placeholder class to avoid import errors
29
+ class CodeAgentComparator:
30
+ def __init__(self):
31
+ raise ImportError(
32
+ "Code Agent dependencies not available. Install: pip install langgraph langchain-core langchain-experimental"
33
+ )
34
+
35
+
36
+ from .utils import (
37
+ calculate_string_similarity,
38
+ deep_compare_objects,
39
+ validate_tool_call_structure,
40
+ )
41
+
42
+ __all__ = [
43
+ # Core types
44
+ "ComparisonStrategy",
45
+ "ComparisonConfig",
46
+ "ParameterComparisonResult",
47
+ "BulkParameterComparisonResult",
48
+ "ToolCallComparisonResult",
49
+ "ParameterStatus",
50
+ "ToolSpecFunction",
51
+ "ToolSpecParameter",
52
+ # Main pipeline
53
+ "ComparisonPipeline",
54
+ # Comparators
55
+ "BaseComparator",
56
+ "ExactMatchComparator",
57
+ "FuzzyStringComparator",
58
+ "LLMJudgeComparator",
59
+ "HybridComparator",
60
+ "CodeAgentComparator",
61
+ # Utilities
62
+ "calculate_string_similarity",
63
+ "deep_compare_objects",
64
+ "validate_tool_call_structure",
65
+ # Testing
66
+ "ComparisonTester",
67
+ "TestDataGenerator",
68
+ "MockLLMClient",
69
+ ]
70
+
71
+
72
+ # Quick usage example
73
+ def quick_compare(
74
+ predicted_call, ground_truth_call, strategy=ComparisonStrategy.EXACT_MATCH
75
+ ):
76
+ """
77
+ Quick comparison function for simple use cases.
78
+
79
+ Args:
80
+ predicted_call: The predicted tool call
81
+ ground_truth_call: The ground truth tool call
82
+ strategy: Comparison strategy to use
83
+
84
+ Returns:
85
+ ToolCallComparisonResult: Comparison result
86
+ """
87
+ config = ComparisonConfig(strategy=strategy)
88
+ pipeline = ComparisonPipeline(config=config)
89
+ return pipeline.compare(predicted_call, ground_truth_call)
@@ -0,0 +1,30 @@
1
+ from .base import BaseComparator
2
+ from .exact_match import ExactMatchComparator
3
+ from .fuzzy_string import FuzzyStringComparator
4
+ from .llm_judge import LLMJudgeComparator
5
+ from .hybrid import HybridComparator
6
+
7
+ # Optional code agent comparator (requires LangGraph)
8
+ try:
9
+ from .code_agent import CodeAgentComparator
10
+
11
+ _code_agent_available = True
12
+ except ImportError:
13
+ _code_agent_available = False
14
+
15
+ # Create placeholder class to avoid import errors
16
+ class CodeAgentComparator:
17
+ def __init__(self):
18
+ raise ImportError(
19
+ "Code Agent dependencies not available. Install: pip install langgraph langchain-core langchain-experimental"
20
+ )
21
+
22
+
23
+ __all__ = [
24
+ "BaseComparator",
25
+ "ExactMatchComparator",
26
+ "FuzzyStringComparator",
27
+ "LLMJudgeComparator",
28
+ "CodeAgentComparator",
29
+ "HybridComparator",
30
+ ]