ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ibm_watsonx_gov/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
- ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
- ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
- ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
- ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
- ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
- ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
- ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
- ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
- ibm_watsonx_gov/clients/__init__.py +0 -0
- ibm_watsonx_gov/clients/api_client.py +99 -0
- ibm_watsonx_gov/clients/segment_client.py +46 -0
- ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
- ibm_watsonx_gov/config/__init__.py +14 -0
- ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
- ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
- ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
- ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
- ibm_watsonx_gov/entities/__init__.py +8 -0
- ibm_watsonx_gov/entities/agentic_app.py +209 -0
- ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
- ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
- ibm_watsonx_gov/entities/ai_experiment.py +419 -0
- ibm_watsonx_gov/entities/base_classes.py +134 -0
- ibm_watsonx_gov/entities/container.py +54 -0
- ibm_watsonx_gov/entities/credentials.py +633 -0
- ibm_watsonx_gov/entities/criteria.py +508 -0
- ibm_watsonx_gov/entities/enums.py +274 -0
- ibm_watsonx_gov/entities/evaluation_result.py +444 -0
- ibm_watsonx_gov/entities/foundation_model.py +490 -0
- ibm_watsonx_gov/entities/llm_judge.py +44 -0
- ibm_watsonx_gov/entities/locale.py +17 -0
- ibm_watsonx_gov/entities/mapping.py +49 -0
- ibm_watsonx_gov/entities/metric.py +211 -0
- ibm_watsonx_gov/entities/metric_threshold.py +36 -0
- ibm_watsonx_gov/entities/model_provider.py +329 -0
- ibm_watsonx_gov/entities/model_risk_result.py +43 -0
- ibm_watsonx_gov/entities/monitor.py +71 -0
- ibm_watsonx_gov/entities/prompt_setup.py +40 -0
- ibm_watsonx_gov/entities/state.py +22 -0
- ibm_watsonx_gov/entities/utils.py +99 -0
- ibm_watsonx_gov/evaluators/__init__.py +26 -0
- ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
- ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
- ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
- ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
- ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
- ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
- ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
- ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
- ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
- ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
- ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
- ibm_watsonx_gov/metrics/__init__.py +74 -0
- ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
- ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
- ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
- ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
- ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
- ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
- ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
- ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
- ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
- ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
- ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
- ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
- ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
- ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
- ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
- ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
- ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
- ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
- ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
- ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
- ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
- ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
- ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
- ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
- ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
- ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
- ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
- ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
- ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
- ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
- ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
- ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
- ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
- ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
- ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
- ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
- ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
- ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
- ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
- ibm_watsonx_gov/metrics/status/__init__.py +0 -0
- ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
- ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
- ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
- ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
- ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
- ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
- ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
- ibm_watsonx_gov/metrics/utils.py +440 -0
- ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
- ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
- ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
- ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
- ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
- ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
- ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
- ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
- ibm_watsonx_gov/providers/__init__.py +8 -0
- ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/detectors_provider.py +415 -0
- ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
- ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
- ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
- ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
- ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
- ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
- ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/__init__.py +10 -0
- ibm_watsonx_gov/tools/clients/__init__.py +11 -0
- ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
- ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
- ibm_watsonx_gov/tools/core/__init__.py +8 -0
- ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
- ibm_watsonx_gov/tools/entities/__init__.py +8 -0
- ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
- ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
- ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
- ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
- ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
- ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
- ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
- ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
- ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
- ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
- ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
- ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
- ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
- ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
- ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
- ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
- ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
- ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
- ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
- ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
- ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
- ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
- ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
- ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
- ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
- ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
- ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
- ibm_watsonx_gov/tools/utils/__init__.py +14 -0
- ibm_watsonx_gov/tools/utils/constants.py +69 -0
- ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
- ibm_watsonx_gov/tools/utils/environment.py +108 -0
- ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
- ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
- ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
- ibm_watsonx_gov/traces/__init__.py +8 -0
- ibm_watsonx_gov/traces/span_exporter.py +195 -0
- ibm_watsonx_gov/traces/span_node.py +251 -0
- ibm_watsonx_gov/traces/span_util.py +153 -0
- ibm_watsonx_gov/traces/trace_utils.py +1074 -0
- ibm_watsonx_gov/utils/__init__.py +8 -0
- ibm_watsonx_gov/utils/aggregation_util.py +346 -0
- ibm_watsonx_gov/utils/async_util.py +62 -0
- ibm_watsonx_gov/utils/authenticator.py +144 -0
- ibm_watsonx_gov/utils/constants.py +15 -0
- ibm_watsonx_gov/utils/errors.py +40 -0
- ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
- ibm_watsonx_gov/utils/insights_generator.py +1285 -0
- ibm_watsonx_gov/utils/python_utils.py +425 -0
- ibm_watsonx_gov/utils/rest_util.py +73 -0
- ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
- ibm_watsonx_gov/utils/singleton_meta.py +25 -0
- ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/utils/validation_util.py +126 -0
- ibm_watsonx_gov/visualizations/__init__.py +13 -0
- ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
- ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
- ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
- ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
- ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
- ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
|
@@ -0,0 +1,1304 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
import ast
|
|
10
|
+
|
|
11
|
+
import ipywidgets as widgets
|
|
12
|
+
import matplotlib.pyplot as plt
|
|
13
|
+
import pandas as pd
|
|
14
|
+
from IPython.display import HTML, display
|
|
15
|
+
from itables.widget import ITable
|
|
16
|
+
from matplotlib.axes import Axes
|
|
17
|
+
from matplotlib_venn import venn2, venn2_circles, venn3, venn3_circles
|
|
18
|
+
from matplotlib_venn.layout.venn2 import \
|
|
19
|
+
DefaultLayoutAlgorithm as Venn2DefaultLayoutAlgorithm
|
|
20
|
+
from matplotlib_venn.layout.venn3 import \
|
|
21
|
+
DefaultLayoutAlgorithm as Venn3DefaultLayoutAlgorithm
|
|
22
|
+
|
|
23
|
+
from ibm_watsonx_gov.config.gen_ai_configuration import GenAIConfiguration
|
|
24
|
+
from ibm_watsonx_gov.entities.enums import TaskType
|
|
25
|
+
from ibm_watsonx_gov.entities.metric import GenAIMetric
|
|
26
|
+
from ibm_watsonx_gov.utils.gov_sdk_logger import GovSDKLogger
|
|
27
|
+
|
|
28
|
+
from .metric_descriptions import metric_description_mapping
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ModelInsights():
|
|
32
|
+
"""
|
|
33
|
+
Class to display venn diagrams using metric violations
|
|
34
|
+
NOTE: For venn diagram interactivity, `ipympl` (jupyter-matplotlib) Jupyter extension needs to be installed
|
|
35
|
+
"""
|
|
36
|
+
# Color constants used to style the circles
|
|
37
|
+
PURPLE = "#8A3FFC"
|
|
38
|
+
CYAN = "#1192E8"
|
|
39
|
+
TEAL = "#009D9A"
|
|
40
|
+
COLORS = [PURPLE, CYAN, TEAL]
|
|
41
|
+
|
|
42
|
+
# general constants
|
|
43
|
+
MAX_METRIC_GROUP_SIZE = 3
|
|
44
|
+
DEFAULT_SELECTED_METRICS_COUNT = 9
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
configuration: GenAIConfiguration,
|
|
49
|
+
metrics: list[GenAIMetric],
|
|
50
|
+
) -> None:
|
|
51
|
+
"""
|
|
52
|
+
ModelInsights construction. This will parse and validate the configuration
|
|
53
|
+
|
|
54
|
+
Notes:
|
|
55
|
+
- The visualization and interactivity features in the module are not supported
|
|
56
|
+
by the jupyter notebook within VS Code. It is recommended to use Jupyter notebook
|
|
57
|
+
or Jupyter lab from the web browser to take advantage of the features of this module
|
|
58
|
+
- Supported task types: 'question_answering', 'classification', 'summarization',
|
|
59
|
+
'generation', 'extraction', 'retrieval_augmented_generation'
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
configuration (GenAIConfiguration): Metric evaluation configuration
|
|
63
|
+
metrics (list[GenAIMetric]): List of metrics to visualize
|
|
64
|
+
"""
|
|
65
|
+
self.logger = GovSDKLogger.get_logger(__name__)
|
|
66
|
+
self.configuration: GenAIConfiguration = configuration
|
|
67
|
+
self.metrics: list[GenAIMetric] = metrics
|
|
68
|
+
self.metric_config = self.__parse_metrics_object(self.metrics)
|
|
69
|
+
self.df: pd.DataFrame = None
|
|
70
|
+
self.violation_sets: set = {}
|
|
71
|
+
self.violations = pd.DataFrame()
|
|
72
|
+
self.config_metric_ids = []
|
|
73
|
+
self.selected_patch_id = None
|
|
74
|
+
self.venn_diagram_callback_id = None
|
|
75
|
+
self.violation_summary_and_table_output = widgets.Output()
|
|
76
|
+
self.faithfulness_attributions_output = widgets.Output()
|
|
77
|
+
self.metric_groups = []
|
|
78
|
+
|
|
79
|
+
self.__init_stylesheet()
|
|
80
|
+
|
|
81
|
+
def __parse_metrics_object(self, metrics: list[GenAIMetric]):
|
|
82
|
+
parsed_metrics = {}
|
|
83
|
+
for metric in metrics:
|
|
84
|
+
metric_name = metric.name
|
|
85
|
+
if metric.method:
|
|
86
|
+
metric_name += f".{metric.method}"
|
|
87
|
+
for metric_threshold in metric.thresholds:
|
|
88
|
+
parsed_metrics[metric_name] = {
|
|
89
|
+
"type": metric_threshold.type,
|
|
90
|
+
"threshold": metric_threshold.value,
|
|
91
|
+
}
|
|
92
|
+
return parsed_metrics
|
|
93
|
+
|
|
94
|
+
def __reset_state(self):
|
|
95
|
+
"""
|
|
96
|
+
Helper to reset the object state.
|
|
97
|
+
"""
|
|
98
|
+
self.violation_sets = {}
|
|
99
|
+
self.violations = pd.DataFrame()
|
|
100
|
+
self.config_metric_ids = []
|
|
101
|
+
self.selected_patch_id = None
|
|
102
|
+
self.venn_diagram_callback_id = None
|
|
103
|
+
self.metric_groups = []
|
|
104
|
+
|
|
105
|
+
def __init_stylesheet(self):
|
|
106
|
+
"""
|
|
107
|
+
Helper to initialize all needed custom css for the html components
|
|
108
|
+
"""
|
|
109
|
+
styles = HTML(
|
|
110
|
+
"""
|
|
111
|
+
<style>
|
|
112
|
+
.reset_input_style > input {
|
|
113
|
+
border: unset !important;
|
|
114
|
+
background: unset !important;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
.violations_table td {
|
|
118
|
+
white-space: nowrap; text-overflow:ellipsis; overflow: hidden; max-width:1px;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
.tooltip {
|
|
122
|
+
position: relative;
|
|
123
|
+
}
|
|
124
|
+
.tooltip .tooltiptext {
|
|
125
|
+
visibility: hidden;
|
|
126
|
+
width: 120px;
|
|
127
|
+
background-color: #555;
|
|
128
|
+
color: #fff;
|
|
129
|
+
text-align: center;
|
|
130
|
+
border-radius: 6px;
|
|
131
|
+
padding: 5px 0;
|
|
132
|
+
position: absolute;
|
|
133
|
+
z-index: 1;
|
|
134
|
+
bottom: 125%;
|
|
135
|
+
left: 50%;
|
|
136
|
+
margin-left: -60px;
|
|
137
|
+
opacity: 0;
|
|
138
|
+
transition: opacity 0.3s;
|
|
139
|
+
}
|
|
140
|
+
.tooltip .tooltiptext::after {
|
|
141
|
+
content: "";
|
|
142
|
+
position: absolute;
|
|
143
|
+
top: 100%;
|
|
144
|
+
left: 50%;
|
|
145
|
+
margin-left: -5px;
|
|
146
|
+
border-width: 5px;
|
|
147
|
+
border-style: solid;
|
|
148
|
+
border-color: #555 transparent transparent transparent;
|
|
149
|
+
}
|
|
150
|
+
.tooltip:hover .tooltiptext {
|
|
151
|
+
visibility: visible;
|
|
152
|
+
opacity: 1;
|
|
153
|
+
}
|
|
154
|
+
mark:hover {
|
|
155
|
+
background-color: white !important;
|
|
156
|
+
}
|
|
157
|
+
</style>
|
|
158
|
+
"""
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
display(styles)
|
|
163
|
+
except Exception as e:
|
|
164
|
+
message = f"Failed to inject css styling. {e}"
|
|
165
|
+
self.logger.error(message)
|
|
166
|
+
raise (message)
|
|
167
|
+
|
|
168
|
+
def __process_df(self, metric_df: pd.DataFrame):
|
|
169
|
+
"""
|
|
170
|
+
Parse the dataframe based on the provided config
|
|
171
|
+
"""
|
|
172
|
+
self.logger.info(
|
|
173
|
+
f"processing the input metrics dataframe with {metric_df.columns}")
|
|
174
|
+
|
|
175
|
+
# Check if the required columns exist based on the task_type
|
|
176
|
+
required_columns = []
|
|
177
|
+
if self.configuration.task_type == TaskType.RAG.value:
|
|
178
|
+
required_columns.extend(
|
|
179
|
+
[*self.configuration.output_fields, *self.configuration.input_fields,
|
|
180
|
+
*self.configuration.context_fields]
|
|
181
|
+
)
|
|
182
|
+
else:
|
|
183
|
+
self.logger.info(
|
|
184
|
+
f"Dataframe columns were not validated for task_type: '{self.configuration.task_type}'"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
missing_columns = set(required_columns) - set(metric_df.columns)
|
|
188
|
+
if len(missing_columns) > 0:
|
|
189
|
+
message = f"Missing columns from the dataframe. {missing_columns}"
|
|
190
|
+
self.logger.error(message)
|
|
191
|
+
raise Exception(message)
|
|
192
|
+
|
|
193
|
+
for metric in self.metrics:
|
|
194
|
+
metric_id = f"{metric.name}.{metric.method}" if metric.method else metric.name
|
|
195
|
+
self.logger.info(
|
|
196
|
+
f"metric_id: {metric_id}, config: {metric.thresholds}")
|
|
197
|
+
|
|
198
|
+
if metric_id not in metric_df.columns:
|
|
199
|
+
self.logger.warning(
|
|
200
|
+
f"metric_id {metric_id} is not present in the dataframe"
|
|
201
|
+
)
|
|
202
|
+
continue
|
|
203
|
+
|
|
204
|
+
if len(metric.thresholds) == 1:
|
|
205
|
+
if metric.thresholds[0].type == "lower_limit":
|
|
206
|
+
violated_records = metric_df[metric_df[metric_id]
|
|
207
|
+
< metric.thresholds[0].value]
|
|
208
|
+
else:
|
|
209
|
+
violated_records = metric_df[metric_df[metric_id]
|
|
210
|
+
> metric.thresholds[0].value]
|
|
211
|
+
else:
|
|
212
|
+
lower_limit = None
|
|
213
|
+
upper_limit = None
|
|
214
|
+
|
|
215
|
+
for threshold in metric.thresholds:
|
|
216
|
+
if threshold.type == "lower_limit":
|
|
217
|
+
lower_limit = threshold.value
|
|
218
|
+
else:
|
|
219
|
+
upper_limit = threshold.value
|
|
220
|
+
|
|
221
|
+
if lower_limit is None or upper_limit is None:
|
|
222
|
+
message = f"Invalid metrics thresholds. duplicated threshold type. {metric.thresholds}"
|
|
223
|
+
self.logger.error(message)
|
|
224
|
+
raise Exception(message)
|
|
225
|
+
|
|
226
|
+
violated_records = metric_df[(metric_df[metric_id] > upper_limit) & (
|
|
227
|
+
metric_df[metric_id] < lower_limit)]
|
|
228
|
+
|
|
229
|
+
self.violation_sets[metric_id] = set(violated_records.index)
|
|
230
|
+
self.violations = pd.concat(
|
|
231
|
+
[self.violations, violated_records])
|
|
232
|
+
|
|
233
|
+
self.config_metric_ids.append(
|
|
234
|
+
{
|
|
235
|
+
"metric_id": metric_id,
|
|
236
|
+
"violation_count": (
|
|
237
|
+
len(self.violation_sets[metric_id])
|
|
238
|
+
if metric_id in violated_records.keys()
|
|
239
|
+
else 0
|
|
240
|
+
),
|
|
241
|
+
}
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Compute the default metric grouping
|
|
245
|
+
self.df = metric_df
|
|
246
|
+
self.__find_metric_grouping()
|
|
247
|
+
|
|
248
|
+
self.logger.info(
|
|
249
|
+
f"Finished processing input dataframe. {self.config_metric_ids}"
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
def __metric_overlaps(self, metric_id: str, config_filter=None):
|
|
253
|
+
"""
|
|
254
|
+
Helper method to check for violations overlap between metrics. this will return a list of the provided
|
|
255
|
+
metric id and the top two metric ids with the largest overlap.
|
|
256
|
+
"""
|
|
257
|
+
self.logger.info(
|
|
258
|
+
f"getting metric overlap for metric_id {metric_id}. filters: {config_filter}"
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
if metric_id not in self.violation_sets:
|
|
262
|
+
# no violations for this metric id, we can skip it
|
|
263
|
+
self.logger.info("No violations for {metric_id}. Skipping")
|
|
264
|
+
return
|
|
265
|
+
|
|
266
|
+
intersections = [] # list to store a tuple of metric id and intersection size
|
|
267
|
+
current_set = self.violation_sets[metric_id]
|
|
268
|
+
for violation, v in self.violation_sets.items():
|
|
269
|
+
if violation == metric_id:
|
|
270
|
+
# skip comparing to self
|
|
271
|
+
continue
|
|
272
|
+
if config_filter is not None and violation not in config_filter:
|
|
273
|
+
# skip comparing with metrics that are selected
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
# check if the metric id already added to a group already
|
|
277
|
+
is_used = False
|
|
278
|
+
for i in range(len(self.metric_groups)):
|
|
279
|
+
for j in range(len(self.metric_groups[i])):
|
|
280
|
+
if violation == self.metric_groups[i][j]:
|
|
281
|
+
is_used = True
|
|
282
|
+
break
|
|
283
|
+
if is_used:
|
|
284
|
+
break
|
|
285
|
+
if is_used:
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
intersections.append((violation, len(v.intersection(current_set))))
|
|
289
|
+
|
|
290
|
+
# sort the metrics by the size of the intersection
|
|
291
|
+
intersections = sorted(intersections, key=lambda x: x[1], reverse=True)
|
|
292
|
+
self.logger.info(
|
|
293
|
+
f"sorted overlaps with metric_id {metric_id} = {intersections}"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# return a list of the current metric id and the top two metrics by intersection size
|
|
297
|
+
return [metric_id] + [
|
|
298
|
+
intersections[i][0] for i in range(min(len(intersections), 2))
|
|
299
|
+
]
|
|
300
|
+
|
|
301
|
+
def __find_metric_grouping(self, config_filter=None):
|
|
302
|
+
"""
|
|
303
|
+
Function to find metrics grouping to be used for generating the venn diagrams.
|
|
304
|
+
The logic is to find the metric id with the most violations, then group it with the
|
|
305
|
+
metrics with the most overlap with it.
|
|
306
|
+
"""
|
|
307
|
+
self.logger.info(
|
|
308
|
+
f"building metric grouping. filter {config_filter}")
|
|
309
|
+
|
|
310
|
+
# Sort the metric ids descending by the number of the violations
|
|
311
|
+
sorted_metrics = sorted(
|
|
312
|
+
self.config_metric_ids, key=lambda d: d["violation_count"], reverse=True
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# temporary list to keep track of metric ids that we already grouped
|
|
316
|
+
used_metric_ids = []
|
|
317
|
+
for i in range(len(sorted_metrics)):
|
|
318
|
+
self.logger.info(
|
|
319
|
+
f"Checking metric grouping for {sorted_metrics[i]}")
|
|
320
|
+
|
|
321
|
+
# Check if the violation count is 0, since the list is sorted, this means we can break from
|
|
322
|
+
# the for loop as all the rest of metrics do no have any violations
|
|
323
|
+
if sorted_metrics[i]["violation_count"] == 0:
|
|
324
|
+
self.logger.info(
|
|
325
|
+
"Metric does not have any violation -- metric grouping is done"
|
|
326
|
+
)
|
|
327
|
+
break
|
|
328
|
+
|
|
329
|
+
# Check if we already included this metric in another group
|
|
330
|
+
if sorted_metrics[i]["metric_id"] in used_metric_ids:
|
|
331
|
+
self.logger.info("Metric already used. skipping")
|
|
332
|
+
continue
|
|
333
|
+
|
|
334
|
+
# In case the current metric id is not in the config filter (not selected) we can skip this iteration
|
|
335
|
+
if (
|
|
336
|
+
config_filter is not None
|
|
337
|
+
and sorted_metrics[i]["metric_id"] not in config_filter
|
|
338
|
+
):
|
|
339
|
+
self.logger.info(
|
|
340
|
+
"Metric is not included in the filter. skipping")
|
|
341
|
+
continue
|
|
342
|
+
|
|
343
|
+
# Check which other unused metrics that have the most overlap with the current metric id
|
|
344
|
+
self.metric_groups.append(
|
|
345
|
+
self.__metric_overlaps(
|
|
346
|
+
sorted_metrics[i]["metric_id"], config_filter)
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
# Mark add the current metric id and the other metrics grouped with is as used
|
|
350
|
+
used_metric_ids.extend(self.metric_groups[-1])
|
|
351
|
+
|
|
352
|
+
# Check if we reached the configured group size and break
|
|
353
|
+
if len(self.metric_groups) == self.MAX_METRIC_GROUP_SIZE:
|
|
354
|
+
self.logger.info(
|
|
355
|
+
f"Reached the maximum group size: {self.MAX_METRIC_GROUP_SIZE} -- metric grouping is done"
|
|
356
|
+
)
|
|
357
|
+
break
|
|
358
|
+
|
|
359
|
+
self.logger.info(
|
|
360
|
+
f"Finished finding metric grouping. metric groups: {self.metric_groups}"
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
def __is_in_circle(
|
|
364
|
+
self,
|
|
365
|
+
circle_center_x: float,
|
|
366
|
+
circle_center_y: float,
|
|
367
|
+
circle_r: float,
|
|
368
|
+
x: float,
|
|
369
|
+
y: float,
|
|
370
|
+
):
|
|
371
|
+
"""
|
|
372
|
+
Helper to identify if a given point is in a circle.
|
|
373
|
+
"""
|
|
374
|
+
if (x - circle_center_x) * (x - circle_center_x) + (y - circle_center_y) * (
|
|
375
|
+
y - circle_center_y
|
|
376
|
+
) <= circle_r * circle_r:
|
|
377
|
+
return True
|
|
378
|
+
else:
|
|
379
|
+
return False
|
|
380
|
+
|
|
381
|
+
def render_venn_diagrams(self, group_index=None, filters=None):
|
|
382
|
+
"""
|
|
383
|
+
Function to render multiple interactive venn diagrams
|
|
384
|
+
"""
|
|
385
|
+
self.logger.info(
|
|
386
|
+
f"Rendering venn diagrams. group_index: {group_index}, filters: {filters}"
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
# Reset the context of matplotlib, this insures we start with an empty figure
|
|
390
|
+
plt.clf()
|
|
391
|
+
plt.close("all")
|
|
392
|
+
|
|
393
|
+
# If we have the group index, we need to check if at least one item is selected in the filters
|
|
394
|
+
if group_index is not None:
|
|
395
|
+
num_of_diagrams = 1 if any(list(filters.values())) else 0
|
|
396
|
+
else:
|
|
397
|
+
# Check how many venn diagrams (plots) to draw
|
|
398
|
+
num_of_diagrams = len(self.metric_groups)
|
|
399
|
+
|
|
400
|
+
self.logger.info(
|
|
401
|
+
f"Number of venn diagrams to render is {num_of_diagrams}")
|
|
402
|
+
|
|
403
|
+
if num_of_diagrams == 0:
|
|
404
|
+
self.logger.warning("No venn diagrams to render.")
|
|
405
|
+
print("There are no diagrams to display.")
|
|
406
|
+
return
|
|
407
|
+
|
|
408
|
+
# Set up the diagrams layout
|
|
409
|
+
# align diagrams horizontally
|
|
410
|
+
fig, axes = plt.subplots(1, num_of_diagrams)
|
|
411
|
+
plt.tight_layout()
|
|
412
|
+
|
|
413
|
+
diagram_list = []
|
|
414
|
+
|
|
415
|
+
# 2 or more venn diagrams
|
|
416
|
+
if num_of_diagrams > 1:
|
|
417
|
+
fig.set_figwidth(fig.get_figwidth() * num_of_diagrams * 0.8)
|
|
418
|
+
for i in range(num_of_diagrams):
|
|
419
|
+
self.logger.info(
|
|
420
|
+
f"building venn diagram #{i} out of {num_of_diagrams}"
|
|
421
|
+
)
|
|
422
|
+
# set the config for each of the filters
|
|
423
|
+
metric_filters = {}
|
|
424
|
+
for metric in self.metric_groups[i]:
|
|
425
|
+
metric_filters[metric] = True
|
|
426
|
+
diagram_list.append(
|
|
427
|
+
(
|
|
428
|
+
axes[i],
|
|
429
|
+
self.__build_venn(filters=metric_filters, ax=axes[i]),
|
|
430
|
+
self.metric_groups[i],
|
|
431
|
+
)
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
# One venn diagram only
|
|
435
|
+
elif num_of_diagrams == 1:
|
|
436
|
+
metric_filters = {}
|
|
437
|
+
|
|
438
|
+
# Check if there metric id filter is provided, otherwise use all metrics in the group
|
|
439
|
+
if filters is not None:
|
|
440
|
+
for metric_id, is_used in filters.items():
|
|
441
|
+
if is_used is True:
|
|
442
|
+
metric_filters[metric_id] = is_used
|
|
443
|
+
else:
|
|
444
|
+
for metric in self.metric_groups[
|
|
445
|
+
0 if group_index is None else group_index
|
|
446
|
+
]:
|
|
447
|
+
metric_filters[metric] = True
|
|
448
|
+
|
|
449
|
+
diagram_list.append(
|
|
450
|
+
(
|
|
451
|
+
axes,
|
|
452
|
+
self.__build_venn(filters=metric_filters, ax=axes),
|
|
453
|
+
list(metric_filters.keys()),
|
|
454
|
+
)
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
@self.violation_summary_and_table_output.capture()
|
|
458
|
+
def venn_callback(event):
|
|
459
|
+
"""
|
|
460
|
+
On click handler for venn diagrams. This will determine which venn diagram got clicked
|
|
461
|
+
and update the violation summary and table to reflect the patch that got selected
|
|
462
|
+
"""
|
|
463
|
+
self.logger.info(f"Handling venn diagram click event: {event}")
|
|
464
|
+
self.logger.info(f"Diagrams to be processed: {diagram_list}")
|
|
465
|
+
self.logger.info(
|
|
466
|
+
f"Selected patch_id: {self.selected_patch_id}")
|
|
467
|
+
# Start by clearing the UI, this includes the violation summary and violation table
|
|
468
|
+
self.violation_summary_and_table_output.clear_output()
|
|
469
|
+
self.faithfulness_attributions_output.clear_output()
|
|
470
|
+
|
|
471
|
+
# Check if we have a selected patch already and update the style
|
|
472
|
+
if self.selected_patch_id is not None:
|
|
473
|
+
# go over all the venn diagrams and set the opacity
|
|
474
|
+
for ax, venn, _ in diagram_list:
|
|
475
|
+
for patch in venn.patches:
|
|
476
|
+
if patch is not None:
|
|
477
|
+
patch.set_alpha(0.25)
|
|
478
|
+
|
|
479
|
+
# Identify the clicked diagram, set the patch opacity, and determine which records to display
|
|
480
|
+
for ax, venn, labels in diagram_list:
|
|
481
|
+
|
|
482
|
+
# If the event is not in this venn diagram, skip to the next one
|
|
483
|
+
if not ax.in_axes(event):
|
|
484
|
+
continue
|
|
485
|
+
|
|
486
|
+
# Determine which circles are located on the clicked coordinates, this insures we consider
|
|
487
|
+
# the intersection between circles
|
|
488
|
+
clicked_metric_ids = {} # dict to store which metric ids got clicked
|
|
489
|
+
for i in range(len(venn.centers)):
|
|
490
|
+
if i >= len(labels):
|
|
491
|
+
clicked_metric_ids[""] = False
|
|
492
|
+
break
|
|
493
|
+
clicked_metric_ids[labels[i]] = self.__is_in_circle(
|
|
494
|
+
venn.centers[i].x,
|
|
495
|
+
venn.centers[i].y,
|
|
496
|
+
venn.radii[i],
|
|
497
|
+
event.xdata,
|
|
498
|
+
event.ydata,
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
# Determine the patch id
|
|
502
|
+
patch_id = ""
|
|
503
|
+
for _, is_selected in clicked_metric_ids.items():
|
|
504
|
+
patch_id = patch_id + ("1" if is_selected is True else "0")
|
|
505
|
+
|
|
506
|
+
# The click event was not on any patch, no further actions need to be done
|
|
507
|
+
if patch_id in ["00", "000"]:
|
|
508
|
+
return
|
|
509
|
+
|
|
510
|
+
# reduce the opacity of all patches
|
|
511
|
+
for patch in venn.patches:
|
|
512
|
+
if patch is not None:
|
|
513
|
+
patch.set_alpha(0.10)
|
|
514
|
+
|
|
515
|
+
# set the opacity of the selected patch
|
|
516
|
+
patch = venn.get_patch_by_id(patch_id)
|
|
517
|
+
patch.set_alpha(1)
|
|
518
|
+
self.selected_patch_id = (ax, patch_id)
|
|
519
|
+
|
|
520
|
+
# Determine the selected record ids based on the patch id
|
|
521
|
+
violated_record_ids = set()
|
|
522
|
+
for i in range(min(len(patch_id), len(labels))):
|
|
523
|
+
if patch_id[i] == "1":
|
|
524
|
+
if len(violated_record_ids) == 0: # First record to be added
|
|
525
|
+
violated_record_ids = self.violation_sets.get(
|
|
526
|
+
labels[i], set()
|
|
527
|
+
)
|
|
528
|
+
else:
|
|
529
|
+
violated_record_ids = violated_record_ids.intersection(
|
|
530
|
+
self.violation_sets.get(labels[i], set())
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
for i in range(min(len(patch_id), len(labels))):
|
|
534
|
+
if patch_id[i] == "0":
|
|
535
|
+
violated_record_ids = (
|
|
536
|
+
violated_record_ids
|
|
537
|
+
- self.violation_sets.get(labels[i], set())
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
# Check how many violated records under each metric id from the clicked venn diagram
|
|
541
|
+
metric_ids_violation_count = {}
|
|
542
|
+
for metric_id in labels:
|
|
543
|
+
metric_ids_violation_count[metric_id] = len(
|
|
544
|
+
violated_record_ids.intersection(
|
|
545
|
+
self.violation_sets[metric_id])
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
self.logger.info(
|
|
549
|
+
f"Updated venn diagram. selected_patch_id: {self.selected_patch_id}, metric_ids_violation_count: {metric_ids_violation_count}"
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
# Update the UI based on the clicked section of the venn diagram
|
|
553
|
+
self.print_violation_summary(metric_ids_violation_count)
|
|
554
|
+
self.show_violations_table_by_violation_ids(
|
|
555
|
+
list(violated_record_ids))
|
|
556
|
+
|
|
557
|
+
# Register matplotlib callback to handle all clicks on the plots
|
|
558
|
+
self.venn_diagram_callback_id = plt.gcf().canvas.mpl_connect(
|
|
559
|
+
"button_press_event", venn_callback
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
plt.show()
|
|
563
|
+
|
|
564
|
+
def __build_venn(self, filters: dict[str, any], ax: Axes):
|
|
565
|
+
"""
|
|
566
|
+
Helper function to generate a single venn diagram and implement its styling
|
|
567
|
+
"""
|
|
568
|
+
self.logger.info(
|
|
569
|
+
f"Building venn diagram. filters: {filters}, ax: {ax}")
|
|
570
|
+
|
|
571
|
+
# Check the filters and processed violation sets to determine what violations we would add to the venn diagrams
|
|
572
|
+
# items from filters object will be ignored if the metric id does not exist in the config, dataframe, or has no violations
|
|
573
|
+
sets = []
|
|
574
|
+
labels = []
|
|
575
|
+
for key, value in filters.items():
|
|
576
|
+
if key in self.violation_sets.keys() and value is True:
|
|
577
|
+
if len(self.violation_sets[key]) > 0:
|
|
578
|
+
sets.append(self.violation_sets.get(key, set()))
|
|
579
|
+
labels.append(key)
|
|
580
|
+
venn = None
|
|
581
|
+
circles = [] # Store circles object to be able to style the borders
|
|
582
|
+
try:
|
|
583
|
+
if len(sets) == 1:
|
|
584
|
+
# matplotlib_venn does not support diagrams with 1 set only. We need to
|
|
585
|
+
# add an empty set and hide it in this case
|
|
586
|
+
venn = venn2(
|
|
587
|
+
[sets[0], set()],
|
|
588
|
+
set_labels=labels,
|
|
589
|
+
set_colors=self.COLORS[0:2],
|
|
590
|
+
alpha=0.25,
|
|
591
|
+
ax=ax,
|
|
592
|
+
)
|
|
593
|
+
circles = venn2_circles(
|
|
594
|
+
subsets=[sets[0], set()], linewidth=1, ax=ax)
|
|
595
|
+
|
|
596
|
+
# hide the 0 from the empty set and move the label to the center
|
|
597
|
+
venn.hide_zeroes()
|
|
598
|
+
label = venn.get_label_by_id("A")
|
|
599
|
+
label.set_horizontalalignment("center")
|
|
600
|
+
elif len(sets) == 2:
|
|
601
|
+
venn = venn2(
|
|
602
|
+
sets,
|
|
603
|
+
set_labels=labels,
|
|
604
|
+
set_colors=self.COLORS[0:2],
|
|
605
|
+
alpha=0.25,
|
|
606
|
+
ax=ax,
|
|
607
|
+
layout_algorithm=Venn2DefaultLayoutAlgorithm(
|
|
608
|
+
fixed_subset_sizes=(1, 1, 1)
|
|
609
|
+
),
|
|
610
|
+
)
|
|
611
|
+
circles = venn2_circles(
|
|
612
|
+
subsets=sets,
|
|
613
|
+
linewidth=1,
|
|
614
|
+
ax=ax,
|
|
615
|
+
layout_algorithm=Venn2DefaultLayoutAlgorithm(
|
|
616
|
+
fixed_subset_sizes=(1, 1, 1)
|
|
617
|
+
),
|
|
618
|
+
)
|
|
619
|
+
elif len(sets) == 3:
|
|
620
|
+
venn = venn3(
|
|
621
|
+
sets,
|
|
622
|
+
set_labels=labels,
|
|
623
|
+
set_colors=self.COLORS,
|
|
624
|
+
alpha=0.25,
|
|
625
|
+
ax=ax,
|
|
626
|
+
layout_algorithm=Venn3DefaultLayoutAlgorithm(
|
|
627
|
+
fixed_subset_sizes=(1, 1, 1, 1, 1, 1, 1)
|
|
628
|
+
),
|
|
629
|
+
)
|
|
630
|
+
circles = venn3_circles(
|
|
631
|
+
subsets=sets,
|
|
632
|
+
linewidth=1,
|
|
633
|
+
ax=ax,
|
|
634
|
+
layout_algorithm=Venn3DefaultLayoutAlgorithm(
|
|
635
|
+
fixed_subset_sizes=(1, 1, 1, 1, 1, 1, 1)
|
|
636
|
+
),
|
|
637
|
+
)
|
|
638
|
+
else:
|
|
639
|
+
self.logger.warning(
|
|
640
|
+
"No metrics were selected for the venn diagram")
|
|
641
|
+
print("you must select 1 to 3 metrics to display the venn diagram")
|
|
642
|
+
|
|
643
|
+
# Set the circles borders
|
|
644
|
+
for circle, color in zip(circles, self.COLORS):
|
|
645
|
+
circle.set_edgecolor(color)
|
|
646
|
+
except Exception as e:
|
|
647
|
+
message = f"Failed to build venn diagrams. {e}"
|
|
648
|
+
self.logger.error(message)
|
|
649
|
+
raise Exception(message)
|
|
650
|
+
|
|
651
|
+
return venn
|
|
652
|
+
|
|
653
|
+
def __get_faithfulness_highlight(self, score: float):
|
|
654
|
+
"""
|
|
655
|
+
Helper to translate the faithfulness score to text
|
|
656
|
+
"""
|
|
657
|
+
if score >= 0.75:
|
|
658
|
+
return "Faithful"
|
|
659
|
+
if score < 0.75 and score >= 0.3:
|
|
660
|
+
return "Somewhat faithful"
|
|
661
|
+
return "Unfaithful"
|
|
662
|
+
|
|
663
|
+
def __highlight_faithfulness(self, input: str, attributions: list[tuple[str, float]]):
|
|
664
|
+
"""
|
|
665
|
+
Helper to highlight sections of the input based on a list of substrings and their scores.
|
|
666
|
+
This is intended to highlight the faithfulness attributions in both answers and contexts.
|
|
667
|
+
Note: this helper does not handle attributions overlapping.
|
|
668
|
+
"""
|
|
669
|
+
# Remove unwanted whitespaces
|
|
670
|
+
result = " ".join(input.split())
|
|
671
|
+
|
|
672
|
+
# Go over each attribution and highlight in the context based on its score
|
|
673
|
+
for attribution in attributions:
|
|
674
|
+
# Remove unwanted whitespaces
|
|
675
|
+
attribution_value = " ".join(attribution[0].split())
|
|
676
|
+
|
|
677
|
+
# Determine the highlight color
|
|
678
|
+
color = ""
|
|
679
|
+
if attribution[1] >= 0.75:
|
|
680
|
+
color = "green"
|
|
681
|
+
elif attribution[1] < 0.75 and attribution[1] >= 0.3:
|
|
682
|
+
color = "yellow"
|
|
683
|
+
else:
|
|
684
|
+
color = "red"
|
|
685
|
+
|
|
686
|
+
# Find the attribution in the context and highlight
|
|
687
|
+
result = result.replace(
|
|
688
|
+
attribution_value,
|
|
689
|
+
f"""
|
|
690
|
+
<mark style='background-color: {color}' class='tooltip'>{attribution[0]}<span class='tooltiptext'>faithfulness score: {attribution[1]}</span></mark>
|
|
691
|
+
"""
|
|
692
|
+
)
|
|
693
|
+
return result
|
|
694
|
+
|
|
695
|
+
def render_faithfulness_attributions(self, selected_violation):
|
|
696
|
+
"""
|
|
697
|
+
This function will render a table of each faithfulness attribution of the answer with its score. When
|
|
698
|
+
a row is selected, the contexts will be listed with each attribution highlighted and color coded based on its score.
|
|
699
|
+
"""
|
|
700
|
+
# The object is converted to a string in the dataframe if it was loaded as a csv, we need to parse it back to a dict
|
|
701
|
+
if isinstance(selected_violation["faithfulness_attributions"], str):
|
|
702
|
+
faithfulness_attributions = ast.literal_eval(
|
|
703
|
+
selected_violation["faithfulness_attributions"]
|
|
704
|
+
)
|
|
705
|
+
else:
|
|
706
|
+
faithfulness_attributions = selected_violation["faithfulness_attributions"]
|
|
707
|
+
|
|
708
|
+
attributions_df = pd.DataFrame.from_dict(faithfulness_attributions)
|
|
709
|
+
|
|
710
|
+
attributions_table = ITable(
|
|
711
|
+
# only display certain columns
|
|
712
|
+
df=attributions_df[["output_text", "faithfulness_score"]],
|
|
713
|
+
caption="Faithfulness attributions",
|
|
714
|
+
classes="display wrap compact",
|
|
715
|
+
select="single",
|
|
716
|
+
)
|
|
717
|
+
attributions_output = widgets.Output()
|
|
718
|
+
|
|
719
|
+
@attributions_output.capture()
|
|
720
|
+
def on_row_clicked(change):
|
|
721
|
+
"""
|
|
722
|
+
Callback handler when a row is selected. It will list all the context with highlighting which sections of the context
|
|
723
|
+
attributed to the answer and its faithfulness score
|
|
724
|
+
"""
|
|
725
|
+
attributions_output.clear_output()
|
|
726
|
+
|
|
727
|
+
try:
|
|
728
|
+
# Check if we do not need to render the attributions, this would be in these cases:
|
|
729
|
+
# - The update is to deselect a record
|
|
730
|
+
# - The faithfulness attributions is not provided in the dataframe
|
|
731
|
+
if (
|
|
732
|
+
len(change["new"]) < 1
|
|
733
|
+
or "faithfulness_attributions" not in self.df.columns
|
|
734
|
+
):
|
|
735
|
+
return
|
|
736
|
+
|
|
737
|
+
# Go over all the attributions and build a dict for the data that will be rendered
|
|
738
|
+
attributions_data = {}
|
|
739
|
+
for attribution in faithfulness_attributions[change["new"][0]]["attributions"]:
|
|
740
|
+
attributions_data[attribution["feature_name"]
|
|
741
|
+
] = selected_violation[attribution["feature_name"]]
|
|
742
|
+
|
|
743
|
+
# Create a list of tuples that contain the attribution text and its score, this will be used to
|
|
744
|
+
# highlight the sections in the context
|
|
745
|
+
attrib_tuple = []
|
|
746
|
+
for feature_value, faithfulness_score in zip(attribution["feature_values"], attribution["faithfulness_scores"]):
|
|
747
|
+
attrib_tuple.append(
|
|
748
|
+
(feature_value, faithfulness_score))
|
|
749
|
+
|
|
750
|
+
attributions_data[attribution["feature_name"]] = self.__highlight_faithfulness(
|
|
751
|
+
attributions_data[attribution["feature_name"]], attrib_tuple)
|
|
752
|
+
|
|
753
|
+
html = ""
|
|
754
|
+
for context_column in self.configuration.context_fields:
|
|
755
|
+
context = attributions_data.get(
|
|
756
|
+
context_column, selected_violation[context_column])
|
|
757
|
+
html += f"<h3>{context_column}</h3>"
|
|
758
|
+
html += f"<p>{context}</p>"
|
|
759
|
+
|
|
760
|
+
display(HTML(html))
|
|
761
|
+
except Exception as e:
|
|
762
|
+
message = f"Failed to render faithfulness attributions. {e}"
|
|
763
|
+
self.logger.error(message)
|
|
764
|
+
raise Exception(message)
|
|
765
|
+
|
|
766
|
+
# Connect row selection callback
|
|
767
|
+
attributions_table.observe(on_row_clicked, names=["selected_rows"])
|
|
768
|
+
|
|
769
|
+
display(attributions_table, attributions_output)
|
|
770
|
+
|
|
771
|
+
def render_question_and_answer_faithfulness(self, selected_violation):
|
|
772
|
+
"""
|
|
773
|
+
Function to parse the faithfulness attributions, build html code, and display it
|
|
774
|
+
"""
|
|
775
|
+
self.logger.info(
|
|
776
|
+
f"Rendering question and answer faithfulness. Selected violation: {selected_violation}"
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
try:
|
|
780
|
+
display(
|
|
781
|
+
HTML(
|
|
782
|
+
f"""
|
|
783
|
+
<div>
|
|
784
|
+
<h2>Question</h2>
|
|
785
|
+
<p>{selected_violation[self.configuration.input_fields[0]]}</p>
|
|
786
|
+
<h2>Answer</h2>
|
|
787
|
+
<ul>
|
|
788
|
+
<li>{selected_violation[self.configuration.output_fields[0]]}</li>
|
|
789
|
+
<li>{self.__get_faithfulness_highlight(selected_violation['faithfulness'])} {selected_violation['faithfulness']}</li>
|
|
790
|
+
</ul>
|
|
791
|
+
<div>
|
|
792
|
+
"""
|
|
793
|
+
)
|
|
794
|
+
)
|
|
795
|
+
except Exception as e:
|
|
796
|
+
message = f"Failed to render faithfulness attributions. {e}"
|
|
797
|
+
self.logger.error(message)
|
|
798
|
+
raise Exception(message)
|
|
799
|
+
|
|
800
|
+
def show_violations_table_by_violation_ids(self, violation_ids: list[int]):
|
|
801
|
+
"""
|
|
802
|
+
Function to display records by in ids list
|
|
803
|
+
"""
|
|
804
|
+
self.logger.info(
|
|
805
|
+
f"Displaying violation table by violation ids. Total violations: {len(violation_ids)}"
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
try:
|
|
809
|
+
violations_table = ITable(
|
|
810
|
+
# Select violated records by id
|
|
811
|
+
df=self.df[self.df.index.isin(violation_ids)],
|
|
812
|
+
caption="Violated Records",
|
|
813
|
+
buttons=[{"extend": "csvHtml5", "text": "Download"}],
|
|
814
|
+
classes="display nowrap compact violations_table",
|
|
815
|
+
select="single",
|
|
816
|
+
)
|
|
817
|
+
except Exception as e:
|
|
818
|
+
message = f"Failed to create violation table. {e}"
|
|
819
|
+
self.logger.error(message)
|
|
820
|
+
raise Exception(message)
|
|
821
|
+
|
|
822
|
+
@self.faithfulness_attributions_output.capture()
|
|
823
|
+
def on_row_clicked(change):
|
|
824
|
+
"""
|
|
825
|
+
Callback handler when a row is selected. This will display the record faithfulness attribution if it exist.
|
|
826
|
+
"""
|
|
827
|
+
# Reset the faithfulness attributions section
|
|
828
|
+
self.faithfulness_attributions_output.clear_output()
|
|
829
|
+
|
|
830
|
+
self.logger.info(
|
|
831
|
+
f"Violation table row selected. Event: {change}")
|
|
832
|
+
|
|
833
|
+
# Check if we do not need to render the attributions, this would be in these cases:
|
|
834
|
+
# - The update is to deselect a record
|
|
835
|
+
# - The faithfulness attributions is not provided in the dataframe
|
|
836
|
+
if (
|
|
837
|
+
len(change["new"]) < 1
|
|
838
|
+
or "faithfulness_attributions" not in self.df.columns
|
|
839
|
+
):
|
|
840
|
+
return
|
|
841
|
+
|
|
842
|
+
# Pass all columns of the selected row to be rendered in the attributions section
|
|
843
|
+
self.render_question_and_answer_faithfulness(
|
|
844
|
+
violations_table.df.iloc[change["new"][0]]
|
|
845
|
+
)
|
|
846
|
+
self.render_faithfulness_attributions(
|
|
847
|
+
violations_table.df.iloc[change["new"][0]]
|
|
848
|
+
)
|
|
849
|
+
|
|
850
|
+
# Connect row selection callback
|
|
851
|
+
violations_table.observe(on_row_clicked, names=["selected_rows"])
|
|
852
|
+
|
|
853
|
+
# Display the table and the faithfulness attributions below it
|
|
854
|
+
try:
|
|
855
|
+
display(violations_table, self.faithfulness_attributions_output)
|
|
856
|
+
except Exception as e:
|
|
857
|
+
message = f"Failed to render violation table. {e}"
|
|
858
|
+
self.logger.error(message)
|
|
859
|
+
raise Exception(message)
|
|
860
|
+
|
|
861
|
+
def __reset_venn_diagram(self):
|
|
862
|
+
"""
|
|
863
|
+
Resets the diagram by clearing matplotlib, disconnecting on click callback, and clearing the selected patch
|
|
864
|
+
"""
|
|
865
|
+
self.logger.info("Resetting Venn Diagrams.")
|
|
866
|
+
plt.clf()
|
|
867
|
+
plt.gcf().canvas.mpl_disconnect(self.venn_diagram_callback_id)
|
|
868
|
+
self.selected_patch_id = None
|
|
869
|
+
|
|
870
|
+
def print_violation_summary(self, metric_ids_violation_count):
|
|
871
|
+
"""
|
|
872
|
+
Helper method to format and display the violated records summary. This will highlight this information:
|
|
873
|
+
- metric id
|
|
874
|
+
- configured threshold
|
|
875
|
+
- number of violated records
|
|
876
|
+
"""
|
|
877
|
+
self.logger.info(
|
|
878
|
+
f"Printing violation summary. Metric ids violation count: {metric_ids_violation_count}"
|
|
879
|
+
)
|
|
880
|
+
|
|
881
|
+
html_violations_list = []
|
|
882
|
+
for metric_id, count in metric_ids_violation_count.items():
|
|
883
|
+
html_violations_list.append(
|
|
884
|
+
f"""
|
|
885
|
+
<li>{metric_id} ({self.metric_config[metric_id]['threshold']})
|
|
886
|
+
<ul>
|
|
887
|
+
<li>{count} violated records</li>
|
|
888
|
+
</ul>
|
|
889
|
+
</li>
|
|
890
|
+
"""
|
|
891
|
+
)
|
|
892
|
+
|
|
893
|
+
try:
|
|
894
|
+
display(
|
|
895
|
+
HTML(
|
|
896
|
+
f"""
|
|
897
|
+
<div>
|
|
898
|
+
<h3>Violations:</h3>
|
|
899
|
+
<ul>
|
|
900
|
+
{''.join(html_violations_list)}
|
|
901
|
+
</ul>
|
|
902
|
+
</div>
|
|
903
|
+
"""
|
|
904
|
+
)
|
|
905
|
+
)
|
|
906
|
+
except Exception as e:
|
|
907
|
+
message = f"Failed to render violation summary. {e}"
|
|
908
|
+
self.logger.error(message)
|
|
909
|
+
raise Exception(message)
|
|
910
|
+
|
|
911
|
+
def __print_rca(self, metric_ids_violation_count: dict[str, int]):
|
|
912
|
+
"""
|
|
913
|
+
Function to print the root cause analysis to the user
|
|
914
|
+
Note: This depends on ibm_metrics_plugin
|
|
915
|
+
"""
|
|
916
|
+
raise Exception("RCA is not supported.")
|
|
917
|
+
self.logger.info(
|
|
918
|
+
f"Printing RCA. Metric ids violation count: {metric_ids_violation_count}"
|
|
919
|
+
)
|
|
920
|
+
# Based on the count, build the argument generate the RCA and build the html metric RCA list
|
|
921
|
+
evaluation_analysis_argument = ""
|
|
922
|
+
rca_metrics_html = ""
|
|
923
|
+
for metric_id, count in metric_ids_violation_count.items():
|
|
924
|
+
evaluation_analysis_argument += (
|
|
925
|
+
f"{metric_id}:eq:{'low' if count > 0 else 'high'},"
|
|
926
|
+
)
|
|
927
|
+
rca_metrics_html += (
|
|
928
|
+
f"<li>{'Low' if count > 0 else 'High'}: {metric_id}</li>"
|
|
929
|
+
)
|
|
930
|
+
|
|
931
|
+
try:
|
|
932
|
+
# Generate the RCA using the metrics plugin
|
|
933
|
+
rca = EvalAnalysisProvider().get_metrics_eval_analysis(
|
|
934
|
+
evaluation_analysis_argument
|
|
935
|
+
)
|
|
936
|
+
except Exception as e:
|
|
937
|
+
message = f"Failed to get metric evaluation analysis. {e}"
|
|
938
|
+
self.logger.error(message)
|
|
939
|
+
raise Exception(message)
|
|
940
|
+
|
|
941
|
+
# Build the html based on the generated RCA values
|
|
942
|
+
causes_html = ""
|
|
943
|
+
for cause in rca["causes"]:
|
|
944
|
+
causes_html += f"<li>{cause}</li>"
|
|
945
|
+
|
|
946
|
+
# Build the accordion for the recommendations section, this needs to be added into
|
|
947
|
+
# an output widget to then be displayed in the accordion
|
|
948
|
+
recommendations_html = ""
|
|
949
|
+
for recommendation in rca["recommendations"]:
|
|
950
|
+
recommendations_html += f"<li>{recommendation}</li>"
|
|
951
|
+
recommendations_output = widgets.Output()
|
|
952
|
+
with recommendations_output:
|
|
953
|
+
try:
|
|
954
|
+
display(
|
|
955
|
+
HTML(
|
|
956
|
+
f"""
|
|
957
|
+
<h2>Recommendations</h2>
|
|
958
|
+
<ul>
|
|
959
|
+
{recommendations_html}
|
|
960
|
+
</ul>
|
|
961
|
+
"""
|
|
962
|
+
)
|
|
963
|
+
)
|
|
964
|
+
except Exception as e:
|
|
965
|
+
message = f"Failed to render recommendations. {e}"
|
|
966
|
+
self.logger.error(message)
|
|
967
|
+
raise Exception(message)
|
|
968
|
+
|
|
969
|
+
recommendations_accordion = widgets.Accordion(
|
|
970
|
+
children=[recommendations_output], titles=[
|
|
971
|
+
"See recommended actions"]
|
|
972
|
+
)
|
|
973
|
+
|
|
974
|
+
try:
|
|
975
|
+
display(
|
|
976
|
+
HTML(
|
|
977
|
+
f"""
|
|
978
|
+
<h1>Root cause analysis</h1>
|
|
979
|
+
<ul>
|
|
980
|
+
{rca_metrics_html}
|
|
981
|
+
</ul>
|
|
982
|
+
<h3>What does this mean?</h3>
|
|
983
|
+
<p>{rca['description']}</p>
|
|
984
|
+
<h3>What could be the cause?</h3>
|
|
985
|
+
<ul>
|
|
986
|
+
{causes_html}
|
|
987
|
+
</ul>"""
|
|
988
|
+
),
|
|
989
|
+
recommendations_accordion,
|
|
990
|
+
)
|
|
991
|
+
except Exception as e:
|
|
992
|
+
message = f"failed to render RCA. {e}"
|
|
993
|
+
self.logger.error(message)
|
|
994
|
+
raise Exception(message)
|
|
995
|
+
|
|
996
|
+
def __get_metric_id_description(self, metric_id: str) -> widgets.Output:
|
|
997
|
+
"""
|
|
998
|
+
Helper to create an icon with metric id description
|
|
999
|
+
"""
|
|
1000
|
+
output = widgets.Output(layout={'align_self': 'center'})
|
|
1001
|
+
metric_description = metric_description_mapping.get(metric_id, None)
|
|
1002
|
+
|
|
1003
|
+
# If the metric id description exist, populate the output widget, otherwise keep it empty
|
|
1004
|
+
if metric_description:
|
|
1005
|
+
with output:
|
|
1006
|
+
metric_description_icon = widgets.Text(
|
|
1007
|
+
value="\u24D8", tooltip=metric_description)
|
|
1008
|
+
metric_description_icon.add_class("reset_input_style")
|
|
1009
|
+
metric_description_icon.disabled = True
|
|
1010
|
+
metric_description_icon.layout = widgets.Layout(width='35px')
|
|
1011
|
+
display(metric_description_icon)
|
|
1012
|
+
|
|
1013
|
+
return output
|
|
1014
|
+
|
|
1015
|
+
def show_all_metrics_dropdown(self):
|
|
1016
|
+
"""
|
|
1017
|
+
Function to render the widget UI. This will render the following:
|
|
1018
|
+
- Dropdown component to select metrics
|
|
1019
|
+
- Default selected metrics based on the top metrics with violated records
|
|
1020
|
+
- Venn diagrams of the selected metrics
|
|
1021
|
+
|
|
1022
|
+
Note: For the venn diagrams to be interactive `ipympl` backend should by enabled, this can be done by:
|
|
1023
|
+
- installing ipympl Jupyter extension
|
|
1024
|
+
- explicitly enable `ipympl` backend by adding this line to the notebook `%matplotlib ipympl`
|
|
1025
|
+
"""
|
|
1026
|
+
self.logger.info("Displaying interactive metric id drop down view")
|
|
1027
|
+
|
|
1028
|
+
# Create an output widget for each component, this helps in customizing the layout of the ui
|
|
1029
|
+
dropdown_output = widgets.Output()
|
|
1030
|
+
checkbox_output = widgets.Output()
|
|
1031
|
+
venn_output = widgets.Output()
|
|
1032
|
+
|
|
1033
|
+
# Sort the metric based on the number of violated records
|
|
1034
|
+
sorted_metrics = sorted(
|
|
1035
|
+
self.config_metric_ids, key=lambda d: d["violation_count"], reverse=True
|
|
1036
|
+
)
|
|
1037
|
+
|
|
1038
|
+
# Define the dropdown widget to select the metric ids
|
|
1039
|
+
dropdown = widgets.Dropdown(
|
|
1040
|
+
options=[metric["metric_id"] for metric in sorted_metrics],
|
|
1041
|
+
description="Metrics",
|
|
1042
|
+
)
|
|
1043
|
+
|
|
1044
|
+
# Select the top metrics with violation based on the configured limit
|
|
1045
|
+
selected_metrics = [metric["metric_id"]
|
|
1046
|
+
for metric in sorted_metrics[0:3]]
|
|
1047
|
+
|
|
1048
|
+
self.logger.info(
|
|
1049
|
+
f"Dropdown metrics: {sorted_metrics}, selected metrics: {selected_metrics}"
|
|
1050
|
+
)
|
|
1051
|
+
|
|
1052
|
+
def add_to_checkboxes(metric_id: str):
|
|
1053
|
+
"""
|
|
1054
|
+
Callback handler to add metrics to the checkbox list, this will be called when selecting a metric from the dropdown.
|
|
1055
|
+
"""
|
|
1056
|
+
self.logger.info(
|
|
1057
|
+
f"Metric id: {metric_id} is being added the checkboxes list"
|
|
1058
|
+
)
|
|
1059
|
+
|
|
1060
|
+
checkbox_metrics = {}
|
|
1061
|
+
metric_descriptions = {}
|
|
1062
|
+
checkbox_output.clear_output()
|
|
1063
|
+
|
|
1064
|
+
# Add the metric id to the list if it is not there already
|
|
1065
|
+
if metric_id not in selected_metrics:
|
|
1066
|
+
selected_metrics.append(metric_id)
|
|
1067
|
+
|
|
1068
|
+
self.logger.info(
|
|
1069
|
+
f"Updated selected metrics: {selected_metrics}")
|
|
1070
|
+
|
|
1071
|
+
# create the checkbox widgets based on the selected metric ids
|
|
1072
|
+
for metric in selected_metrics:
|
|
1073
|
+
checkbox_metrics[metric] = widgets.Checkbox(
|
|
1074
|
+
value=True, description=metric
|
|
1075
|
+
)
|
|
1076
|
+
metric_descriptions[metric] = self.__get_metric_id_description(
|
|
1077
|
+
metric)
|
|
1078
|
+
|
|
1079
|
+
def on_checkbox_updated(**kwargs):
|
|
1080
|
+
"""
|
|
1081
|
+
Callback handler that gets triggered by adding / removing items from the checkbox list. This handler will update the venn
|
|
1082
|
+
diagram on any change on the metric list
|
|
1083
|
+
This will be triggered by these two cases:
|
|
1084
|
+
- If a new metric is selected from the dropdown
|
|
1085
|
+
- If a metric got unselected from the checkbox
|
|
1086
|
+
"""
|
|
1087
|
+
self.logger.info(
|
|
1088
|
+
f"Checkboxes are updated. kwargs {kwargs}")
|
|
1089
|
+
|
|
1090
|
+
# Clear the venn diagrams before updating them to show the new selection
|
|
1091
|
+
self.__reset_venn_diagram()
|
|
1092
|
+
venn_output.clear_output()
|
|
1093
|
+
|
|
1094
|
+
# Find which metrics got deselected and remove them from the UI
|
|
1095
|
+
for k, v in kwargs.items():
|
|
1096
|
+
if v is False:
|
|
1097
|
+
try:
|
|
1098
|
+
if k in selected_metrics:
|
|
1099
|
+
selected_metrics.remove(k)
|
|
1100
|
+
checkbox_metrics[k].close()
|
|
1101
|
+
metric_description = metric_descriptions.pop(
|
|
1102
|
+
k, None)
|
|
1103
|
+
if metric_description:
|
|
1104
|
+
metric_description.close()
|
|
1105
|
+
except Exception as e:
|
|
1106
|
+
message = f"Failed to remove checkbox from the list. {e}"
|
|
1107
|
+
self.logger.error(message)
|
|
1108
|
+
raise Exception(message)
|
|
1109
|
+
|
|
1110
|
+
# Reset the current groups and regenerate them based on the new selection
|
|
1111
|
+
self.metric_groups = []
|
|
1112
|
+
self.__find_metric_grouping(selected_metrics)
|
|
1113
|
+
with venn_output:
|
|
1114
|
+
self.render_venn_diagrams()
|
|
1115
|
+
|
|
1116
|
+
# Connect the call back to update the checkboxes when an item is deselected
|
|
1117
|
+
interactive_checkboxes = widgets.interactive_output(
|
|
1118
|
+
on_checkbox_updated, checkbox_metrics
|
|
1119
|
+
)
|
|
1120
|
+
with checkbox_output:
|
|
1121
|
+
try:
|
|
1122
|
+
checkboxes_list = []
|
|
1123
|
+
for checkbox, metric_description in zip(list(checkbox_metrics.values()), list(metric_descriptions.values())):
|
|
1124
|
+
checkboxes_list.append(
|
|
1125
|
+
widgets.HBox([checkbox, metric_description]))
|
|
1126
|
+
ui = widgets.VBox(checkboxes_list)
|
|
1127
|
+
display(ui, interactive_checkboxes)
|
|
1128
|
+
except Exception as e:
|
|
1129
|
+
message = f"Failed to display checkboxes. {e}"
|
|
1130
|
+
self.logger.error(message)
|
|
1131
|
+
raise Exception(message)
|
|
1132
|
+
|
|
1133
|
+
# Connect the callback to update the checkboxes when a metric id is selected from the dropdown
|
|
1134
|
+
with dropdown_output:
|
|
1135
|
+
widgets.interact(add_to_checkboxes, metric_id=dropdown)
|
|
1136
|
+
|
|
1137
|
+
try:
|
|
1138
|
+
display(
|
|
1139
|
+
widgets.HBox(
|
|
1140
|
+
[
|
|
1141
|
+
venn_output,
|
|
1142
|
+
widgets.VBox(
|
|
1143
|
+
[dropdown_output, checkbox_output],
|
|
1144
|
+
layout=widgets.Layout(margin="33px 0 0 0"),
|
|
1145
|
+
),
|
|
1146
|
+
]
|
|
1147
|
+
),
|
|
1148
|
+
self.violation_summary_and_table_output,
|
|
1149
|
+
)
|
|
1150
|
+
except Exception as e:
|
|
1151
|
+
message = f"Failed to display dropdown menu and checkboxes. {e}"
|
|
1152
|
+
self.logger.error(message)
|
|
1153
|
+
raise Exception(message)
|
|
1154
|
+
|
|
1155
|
+
def show_checkboxes_with_venn(self, metric_group_index: int):
|
|
1156
|
+
"""
|
|
1157
|
+
Display venn diagram for the selected metric group along with checkboxes to select which metrics should be shown
|
|
1158
|
+
"""
|
|
1159
|
+
if metric_group_index >= len(self.metric_groups):
|
|
1160
|
+
message = f"Metric group index ({metric_group_index}) is out of bound"
|
|
1161
|
+
self.logger.error(message)
|
|
1162
|
+
raise Exception(message)
|
|
1163
|
+
|
|
1164
|
+
self.logger.info(
|
|
1165
|
+
f"Showing venn diagram with metric group index: {metric_group_index}, metric ids {self.metric_groups[metric_group_index]}"
|
|
1166
|
+
)
|
|
1167
|
+
|
|
1168
|
+
# create the checkbox widgets based on the selected metric ids
|
|
1169
|
+
checkbox_metrics = {}
|
|
1170
|
+
metric_descriptions = {}
|
|
1171
|
+
for metric in self.metric_groups[metric_group_index]:
|
|
1172
|
+
checkbox_metrics[metric] = widgets.Checkbox(
|
|
1173
|
+
value=True, description=metric)
|
|
1174
|
+
metric_descriptions[metric] = self.__get_metric_id_description(
|
|
1175
|
+
metric)
|
|
1176
|
+
venn_diagram_output = widgets.Output()
|
|
1177
|
+
checkboxes_output = widgets.Output()
|
|
1178
|
+
|
|
1179
|
+
def on_checkbox_updated(**kwargs):
|
|
1180
|
+
"""
|
|
1181
|
+
Helper to handle checkboxes updates. This will trigger rerendering the venn diagram based on the new selection.
|
|
1182
|
+
"""
|
|
1183
|
+
self.logger.info(f"Checkboxes updated: {kwargs}")
|
|
1184
|
+
venn_diagram_output.clear_output()
|
|
1185
|
+
with venn_diagram_output:
|
|
1186
|
+
self.render_venn_diagrams(metric_group_index, filters=kwargs)
|
|
1187
|
+
|
|
1188
|
+
# Connect the call back to update the checkboxes when an item is deselected
|
|
1189
|
+
interactive_checkboxes = widgets.interactive_output(
|
|
1190
|
+
on_checkbox_updated, checkbox_metrics
|
|
1191
|
+
)
|
|
1192
|
+
with checkboxes_output:
|
|
1193
|
+
try:
|
|
1194
|
+
checkboxes_list = []
|
|
1195
|
+
for checkbox, metric_description in zip(list(checkbox_metrics.values()), list(metric_descriptions.values())):
|
|
1196
|
+
checkboxes_list.append(widgets.HBox(
|
|
1197
|
+
[checkbox, metric_description]))
|
|
1198
|
+
ui = widgets.VBox(checkboxes_list)
|
|
1199
|
+
display(ui, interactive_checkboxes)
|
|
1200
|
+
except Exception as e:
|
|
1201
|
+
message = f"Failed to display interactive checkboxes. {e}"
|
|
1202
|
+
self.logger.error(message)
|
|
1203
|
+
raise Exception(message)
|
|
1204
|
+
|
|
1205
|
+
checkboxes_output.layout = widgets.Layout(margin="33px 0 0 0")
|
|
1206
|
+
|
|
1207
|
+
try:
|
|
1208
|
+
display(widgets.HBox(
|
|
1209
|
+
[venn_diagram_output, checkboxes_output]))
|
|
1210
|
+
except Exception as e:
|
|
1211
|
+
message = f"Failed to display venn diagram and checkboxes output. {e}"
|
|
1212
|
+
self.logger.error(message)
|
|
1213
|
+
raise Exception(message)
|
|
1214
|
+
|
|
1215
|
+
def display_metrics(self, metrics_result: pd.DataFrame):
|
|
1216
|
+
"""Method to display ModelInsights
|
|
1217
|
+
|
|
1218
|
+
Args:
|
|
1219
|
+
metrics_result (pd.DataFrame): _description_
|
|
1220
|
+
"""
|
|
1221
|
+
# Process the DataFrame
|
|
1222
|
+
self.__reset_state()
|
|
1223
|
+
self.__process_df(metrics_result)
|
|
1224
|
+
|
|
1225
|
+
# Check if there were no violations
|
|
1226
|
+
if len(self.violations) == 0:
|
|
1227
|
+
print("No violations were detected.")
|
|
1228
|
+
return
|
|
1229
|
+
|
|
1230
|
+
# Check if we need to display the custom metrics tab,
|
|
1231
|
+
# this is needed when we have more than one metric group
|
|
1232
|
+
show_custom_metrics_tab: bool = len(self.metric_groups) > 1
|
|
1233
|
+
|
|
1234
|
+
# The number of tabs should be the number of found groups, if we have more than one
|
|
1235
|
+
# metric group an extra tab is added for custom metric selection
|
|
1236
|
+
tabs_count = len(self.metric_groups) + 1 \
|
|
1237
|
+
if show_custom_metrics_tab else len(self.metric_groups)
|
|
1238
|
+
|
|
1239
|
+
self.logger.info(
|
|
1240
|
+
"Displaying venn diagrams using tabs. Total tab count: {tabs_count}"
|
|
1241
|
+
)
|
|
1242
|
+
|
|
1243
|
+
# create tabs with the the length of the groups
|
|
1244
|
+
tabs = widgets.Tab()
|
|
1245
|
+
tab_output = widgets.Output() # Reuse the same output for all tabs..
|
|
1246
|
+
tabs_content = [tab_output for _ in range(tabs_count)]
|
|
1247
|
+
tabs_titles = [str(i + 1) for i in range(tabs_count)]
|
|
1248
|
+
tabs.children = tabs_content
|
|
1249
|
+
tabs.titles = tabs_titles
|
|
1250
|
+
|
|
1251
|
+
# render content for the default
|
|
1252
|
+
with tab_output:
|
|
1253
|
+
self.show_checkboxes_with_venn(0)
|
|
1254
|
+
try:
|
|
1255
|
+
display(self.violation_summary_and_table_output)
|
|
1256
|
+
except Exception as e:
|
|
1257
|
+
message = f"Failed to display violation summary and table output. {e}"
|
|
1258
|
+
self.logger.error(message)
|
|
1259
|
+
raise Exception(message)
|
|
1260
|
+
|
|
1261
|
+
@tab_output.capture()
|
|
1262
|
+
def on_tab_change(event):
|
|
1263
|
+
"""
|
|
1264
|
+
Callback handler to render the content on tab change.
|
|
1265
|
+
"""
|
|
1266
|
+
self.logger.info(f"Tab changed. event {event}")
|
|
1267
|
+
|
|
1268
|
+
# We are only interested in tab change events
|
|
1269
|
+
if event["name"] != "selected_index":
|
|
1270
|
+
return
|
|
1271
|
+
|
|
1272
|
+
# Clear all the content of the tab
|
|
1273
|
+
tab_output.clear_output()
|
|
1274
|
+
self.violation_summary_and_table_output.clear_output()
|
|
1275
|
+
self.faithfulness_attributions_output.clear_output()
|
|
1276
|
+
self.__reset_venn_diagram()
|
|
1277
|
+
|
|
1278
|
+
# If the last tab is selected and we have custom metric tab then render it
|
|
1279
|
+
if show_custom_metrics_tab and event["new"] == tabs_count - 1:
|
|
1280
|
+
self.show_all_metrics_dropdown()
|
|
1281
|
+
else:
|
|
1282
|
+
# If the previous tab was the custom tab, re compute the metric groups
|
|
1283
|
+
if show_custom_metrics_tab and event["old"] == tabs_count - 1:
|
|
1284
|
+
self.metric_groups = []
|
|
1285
|
+
self.__find_metric_grouping()
|
|
1286
|
+
|
|
1287
|
+
# Render the venn diagram based which metric group corresponds to the selected tab
|
|
1288
|
+
self.show_checkboxes_with_venn(event["new"])
|
|
1289
|
+
try:
|
|
1290
|
+
display(self.violation_summary_and_table_output)
|
|
1291
|
+
except Exception as e:
|
|
1292
|
+
message = f"Failed to display violation summary and table output. {e}"
|
|
1293
|
+
self.logger.error(message)
|
|
1294
|
+
raise Exception(message)
|
|
1295
|
+
|
|
1296
|
+
# Register callback handler for tabs events
|
|
1297
|
+
tabs.observe(on_tab_change)
|
|
1298
|
+
|
|
1299
|
+
try:
|
|
1300
|
+
display(tabs)
|
|
1301
|
+
except Exception as e:
|
|
1302
|
+
message = f"Failed to display tabs. {e}"
|
|
1303
|
+
self.logger.error(message)
|
|
1304
|
+
raise Exception(message)
|