ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ibm_watsonx_gov/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
- ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
- ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
- ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
- ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
- ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
- ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
- ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
- ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
- ibm_watsonx_gov/clients/__init__.py +0 -0
- ibm_watsonx_gov/clients/api_client.py +99 -0
- ibm_watsonx_gov/clients/segment_client.py +46 -0
- ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
- ibm_watsonx_gov/config/__init__.py +14 -0
- ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
- ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
- ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
- ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
- ibm_watsonx_gov/entities/__init__.py +8 -0
- ibm_watsonx_gov/entities/agentic_app.py +209 -0
- ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
- ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
- ibm_watsonx_gov/entities/ai_experiment.py +419 -0
- ibm_watsonx_gov/entities/base_classes.py +134 -0
- ibm_watsonx_gov/entities/container.py +54 -0
- ibm_watsonx_gov/entities/credentials.py +633 -0
- ibm_watsonx_gov/entities/criteria.py +508 -0
- ibm_watsonx_gov/entities/enums.py +274 -0
- ibm_watsonx_gov/entities/evaluation_result.py +444 -0
- ibm_watsonx_gov/entities/foundation_model.py +490 -0
- ibm_watsonx_gov/entities/llm_judge.py +44 -0
- ibm_watsonx_gov/entities/locale.py +17 -0
- ibm_watsonx_gov/entities/mapping.py +49 -0
- ibm_watsonx_gov/entities/metric.py +211 -0
- ibm_watsonx_gov/entities/metric_threshold.py +36 -0
- ibm_watsonx_gov/entities/model_provider.py +329 -0
- ibm_watsonx_gov/entities/model_risk_result.py +43 -0
- ibm_watsonx_gov/entities/monitor.py +71 -0
- ibm_watsonx_gov/entities/prompt_setup.py +40 -0
- ibm_watsonx_gov/entities/state.py +22 -0
- ibm_watsonx_gov/entities/utils.py +99 -0
- ibm_watsonx_gov/evaluators/__init__.py +26 -0
- ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
- ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
- ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
- ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
- ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
- ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
- ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
- ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
- ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
- ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
- ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
- ibm_watsonx_gov/metrics/__init__.py +74 -0
- ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
- ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
- ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
- ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
- ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
- ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
- ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
- ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
- ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
- ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
- ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
- ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
- ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
- ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
- ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
- ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
- ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
- ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
- ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
- ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
- ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
- ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
- ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
- ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
- ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
- ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
- ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
- ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
- ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
- ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
- ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
- ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
- ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
- ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
- ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
- ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
- ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
- ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
- ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
- ibm_watsonx_gov/metrics/status/__init__.py +0 -0
- ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
- ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
- ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
- ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
- ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
- ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
- ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
- ibm_watsonx_gov/metrics/utils.py +440 -0
- ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
- ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
- ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
- ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
- ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
- ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
- ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
- ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
- ibm_watsonx_gov/providers/__init__.py +8 -0
- ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/detectors_provider.py +415 -0
- ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
- ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
- ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
- ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
- ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
- ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
- ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/__init__.py +10 -0
- ibm_watsonx_gov/tools/clients/__init__.py +11 -0
- ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
- ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
- ibm_watsonx_gov/tools/core/__init__.py +8 -0
- ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
- ibm_watsonx_gov/tools/entities/__init__.py +8 -0
- ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
- ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
- ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
- ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
- ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
- ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
- ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
- ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
- ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
- ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
- ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
- ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
- ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
- ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
- ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
- ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
- ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
- ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
- ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
- ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
- ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
- ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
- ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
- ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
- ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
- ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
- ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
- ibm_watsonx_gov/tools/utils/__init__.py +14 -0
- ibm_watsonx_gov/tools/utils/constants.py +69 -0
- ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
- ibm_watsonx_gov/tools/utils/environment.py +108 -0
- ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
- ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
- ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
- ibm_watsonx_gov/traces/__init__.py +8 -0
- ibm_watsonx_gov/traces/span_exporter.py +195 -0
- ibm_watsonx_gov/traces/span_node.py +251 -0
- ibm_watsonx_gov/traces/span_util.py +153 -0
- ibm_watsonx_gov/traces/trace_utils.py +1074 -0
- ibm_watsonx_gov/utils/__init__.py +8 -0
- ibm_watsonx_gov/utils/aggregation_util.py +346 -0
- ibm_watsonx_gov/utils/async_util.py +62 -0
- ibm_watsonx_gov/utils/authenticator.py +144 -0
- ibm_watsonx_gov/utils/constants.py +15 -0
- ibm_watsonx_gov/utils/errors.py +40 -0
- ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
- ibm_watsonx_gov/utils/insights_generator.py +1285 -0
- ibm_watsonx_gov/utils/python_utils.py +425 -0
- ibm_watsonx_gov/utils/rest_util.py +73 -0
- ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
- ibm_watsonx_gov/utils/singleton_meta.py +25 -0
- ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/utils/validation_util.py +126 -0
- ibm_watsonx_gov/visualizations/__init__.py +13 -0
- ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
- ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
- ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
- ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
- ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
- ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
import copy
|
|
11
|
+
from collections import Counter, defaultdict
|
|
12
|
+
from typing import List
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from ibm_watsonx_gov.entities.agentic_app import Node
|
|
17
|
+
from ibm_watsonx_gov.entities.agentic_evaluation_result import \
|
|
18
|
+
AgenticEvaluationResult
|
|
19
|
+
from ibm_watsonx_gov.entities.enums import MetricGroup, MetricValueType
|
|
20
|
+
from ibm_watsonx_gov.entities.evaluation_result import (
|
|
21
|
+
AgentMetricResult, AggregateAgentMetricResult, RecordMetricResult)
|
|
22
|
+
from ibm_watsonx_gov.entities.metric_threshold import MetricThreshold
|
|
23
|
+
from ibm_watsonx_gov.metrics.llm_validation.llm_validation_metric import \
|
|
24
|
+
LLMValidationMetric
|
|
25
|
+
from ibm_watsonx_gov.utils.gov_sdk_logger import GovSDKLogger
|
|
26
|
+
|
|
27
|
+
logger = GovSDKLogger.get_logger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_aggregated_thresholds(metric_results: List[AgentMetricResult]) -> List[MetricThreshold]:
|
|
31
|
+
"""
|
|
32
|
+
Aggregates thresholds from a list of AgentMetricResult objects.
|
|
33
|
+
|
|
34
|
+
This function takes a list of AgentMetricResult objects and returns a list of MetricThreshold objects.
|
|
35
|
+
It aggregates thresholds if all AgentMetricResult objects have identical sets of thresholds.
|
|
36
|
+
If the list is empty, it returns an empty list. If there's only one AgentMetricResult, it returns its thresholds.
|
|
37
|
+
|
|
38
|
+
Parameters:
|
|
39
|
+
metric_results (List[AgentMetricResult]): A list of AgentMetricResult objects.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
List[MetricThreshold]: A list of MetricThreshold objects, either aggregated or an empty list if thresholds do not match.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
if not metric_results:
|
|
46
|
+
return []
|
|
47
|
+
|
|
48
|
+
if len(metric_results) == 1:
|
|
49
|
+
return metric_results[0].thresholds
|
|
50
|
+
|
|
51
|
+
first_thresholds = set(metric_results[0].thresholds)
|
|
52
|
+
for metric_result in metric_results[1:]:
|
|
53
|
+
if first_thresholds != set(metric_result.thresholds):
|
|
54
|
+
logger.warning(
|
|
55
|
+
f"Did not get matching thresholds for {metric_results[0].name} metric. Not aggregating.")
|
|
56
|
+
return []
|
|
57
|
+
|
|
58
|
+
return metric_results[0].thresholds
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def __get_aggregation_result(metric_results: List[AgentMetricResult]) -> AggregateAgentMetricResult | None:
|
|
62
|
+
values, labels = [], []
|
|
63
|
+
|
|
64
|
+
for r in metric_results:
|
|
65
|
+
if r.value is not None:
|
|
66
|
+
values.append(r.value)
|
|
67
|
+
if r.label is not None:
|
|
68
|
+
labels.append(r.label)
|
|
69
|
+
|
|
70
|
+
value, mean, min_val, max_val, labels_count, percentiles = None, None, None, None, None, None
|
|
71
|
+
if values:
|
|
72
|
+
mean = sum(values) / len(values)
|
|
73
|
+
min_val = min(values)
|
|
74
|
+
max_val = max(values)
|
|
75
|
+
value = mean
|
|
76
|
+
if len(values) > 1:
|
|
77
|
+
# Calculate all percentiles in a single call
|
|
78
|
+
percentile_values = np.percentile(
|
|
79
|
+
values, [25, 50, 75, 90, 95, 99])
|
|
80
|
+
percentiles = {
|
|
81
|
+
"25": percentile_values[0],
|
|
82
|
+
"50": percentile_values[1],
|
|
83
|
+
"75": percentile_values[2],
|
|
84
|
+
"90": percentile_values[3],
|
|
85
|
+
"95": percentile_values[4],
|
|
86
|
+
"99": percentile_values[5]
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if labels:
|
|
90
|
+
labels_count = dict(Counter(labels))
|
|
91
|
+
|
|
92
|
+
combined_thresholds = get_aggregated_thresholds(
|
|
93
|
+
metric_results=metric_results)
|
|
94
|
+
first_metric_result = metric_results[0]
|
|
95
|
+
return AggregateAgentMetricResult(name=first_metric_result.name,
|
|
96
|
+
value_type=first_metric_result.value_type,
|
|
97
|
+
display_name=first_metric_result.display_name,
|
|
98
|
+
thresholds=combined_thresholds,
|
|
99
|
+
method=first_metric_result.method,
|
|
100
|
+
provider=first_metric_result.provider,
|
|
101
|
+
node_name=first_metric_result.node_name,
|
|
102
|
+
applies_to=first_metric_result.applies_to,
|
|
103
|
+
group=first_metric_result.group,
|
|
104
|
+
value=mean,
|
|
105
|
+
min=min_val,
|
|
106
|
+
max=max_val,
|
|
107
|
+
count=len(metric_results),
|
|
108
|
+
percentiles=percentiles,
|
|
109
|
+
individual_results=metric_results)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def __compute_aggregated_metrics_results(metrics_result: List[AgentMetricResult],
|
|
113
|
+
nodes: List[Node],
|
|
114
|
+
include_individual_results: bool = True) -> List[AggregateAgentMetricResult]:
|
|
115
|
+
|
|
116
|
+
nodes_result_group, message_result_group, conversation_result_map = __get_grouped_metrics_result(
|
|
117
|
+
metrics_result)
|
|
118
|
+
|
|
119
|
+
aggregated_results = []
|
|
120
|
+
aggregated_results.extend(__get_aggregated_node_metrics(
|
|
121
|
+
include_individual_results, nodes, nodes_result_group))
|
|
122
|
+
aggregated_results.extend(
|
|
123
|
+
__get_aggregated_metrics(message_result_group))
|
|
124
|
+
aggregated_results.extend(
|
|
125
|
+
__get_aggregated_metrics(conversation_result_map))
|
|
126
|
+
|
|
127
|
+
return aggregated_results
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def __get_aggregated_metrics(message_results):
|
|
131
|
+
aggregated_results = []
|
|
132
|
+
# Aggregate message or conversation level metrics
|
|
133
|
+
for values in list(message_results.values()):
|
|
134
|
+
aggregated_result = __get_aggregation_result(
|
|
135
|
+
values)
|
|
136
|
+
if aggregated_result:
|
|
137
|
+
aggregated_results.append(aggregated_result)
|
|
138
|
+
return aggregated_results
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def __get_grouped_metrics_result(metrics_result):
|
|
142
|
+
"""
|
|
143
|
+
Group the metrics results based on node and message.
|
|
144
|
+
"""
|
|
145
|
+
nodes_result_map, message_result_map = {}, {}
|
|
146
|
+
conversation_result_map = defaultdict(list)
|
|
147
|
+
conversation_metrics = defaultdict(lambda: defaultdict(float))
|
|
148
|
+
for mr in metrics_result:
|
|
149
|
+
key = mr.name+"_"+mr.method if mr.method else mr.name
|
|
150
|
+
if mr.applies_to == "node":
|
|
151
|
+
if mr.node_name in nodes_result_map:
|
|
152
|
+
if key in nodes_result_map[mr.node_name]:
|
|
153
|
+
nodes_result_map[mr.node_name][key].append(mr)
|
|
154
|
+
else:
|
|
155
|
+
nodes_result_map[mr.node_name][key] = [mr]
|
|
156
|
+
else:
|
|
157
|
+
nodes_result_map[mr.node_name] = {
|
|
158
|
+
key: [mr]
|
|
159
|
+
}
|
|
160
|
+
elif mr.applies_to == "message":
|
|
161
|
+
if key in message_result_map:
|
|
162
|
+
message_result_map[key].append(mr)
|
|
163
|
+
else:
|
|
164
|
+
message_result_map[key] = [mr]
|
|
165
|
+
if key in ("duration", "cost", "input_token_count", "output_token_count"):
|
|
166
|
+
conversation_metrics[mr.conversation_id][key] += mr.value
|
|
167
|
+
for conversation_id, metric_value in conversation_metrics.items():
|
|
168
|
+
for metric, value in metric_value.items():
|
|
169
|
+
conversation_result_map[metric].append(AgentMetricResult(name=metric,
|
|
170
|
+
value=value,
|
|
171
|
+
display_name=metric,
|
|
172
|
+
group=MetricGroup.PERFORMANCE.value if metric == "duration" else MetricGroup.USAGE.value,
|
|
173
|
+
message_id=None,
|
|
174
|
+
applies_to="conversation",
|
|
175
|
+
conversation_id=conversation_id))
|
|
176
|
+
|
|
177
|
+
return nodes_result_map, message_result_map, dict(conversation_result_map)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def __get_aggregated_node_metrics(include_individual_results, nodes, nodes_results):
|
|
181
|
+
aggregated_results = []
|
|
182
|
+
|
|
183
|
+
# Create node metrics dict for easy access to metrics
|
|
184
|
+
node_to_metrics = {}
|
|
185
|
+
for n in nodes:
|
|
186
|
+
mts = {}
|
|
187
|
+
for mc in n.metrics_configurations:
|
|
188
|
+
for m in mc.metrics:
|
|
189
|
+
mts[m.id] = m
|
|
190
|
+
node_to_metrics[n.name] = mts
|
|
191
|
+
|
|
192
|
+
# Aggregate node level metrics
|
|
193
|
+
for node, node_metrics in nodes_results.items():
|
|
194
|
+
for metric_key, values in node_metrics.items():
|
|
195
|
+
aggregated_result = None
|
|
196
|
+
metric_obj = node_to_metrics.get(node, {}).get(metric_key)
|
|
197
|
+
|
|
198
|
+
if isinstance(metric_obj, LLMValidationMetric):
|
|
199
|
+
# convert metrics result from AgentMetricResult to RecordMetricResult used by the metric
|
|
200
|
+
aggregated_result = __get_llm_validation_metric_aggregation_result(
|
|
201
|
+
include_individual_results, values, metric_obj)
|
|
202
|
+
else:
|
|
203
|
+
aggregated_result = __get_aggregation_result(
|
|
204
|
+
values)
|
|
205
|
+
if aggregated_result:
|
|
206
|
+
aggregated_results.append(aggregated_result)
|
|
207
|
+
return aggregated_results
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def __get_llm_validation_metric_aggregation_result(include_individual_results, values, metric_obj):
|
|
211
|
+
record_level_metrics = [RecordMetricResult(
|
|
212
|
+
**v.__dict__, record_id=v.message_id) for v in values]
|
|
213
|
+
aggregated_result = metric_obj.get_aggregated_results_from_individual_results(
|
|
214
|
+
record_level_metrics)
|
|
215
|
+
|
|
216
|
+
# convert updated record results to AgentMetricResult
|
|
217
|
+
updated_record_level_metrics = aggregated_result.record_level_metrics
|
|
218
|
+
agent_individual_results = []
|
|
219
|
+
for record_result, agent_result in zip(updated_record_level_metrics, values):
|
|
220
|
+
args = {**agent_result.__dict__,
|
|
221
|
+
**record_result.__dict__}
|
|
222
|
+
agent_individual_results.append(
|
|
223
|
+
AgentMetricResult(**args))
|
|
224
|
+
|
|
225
|
+
if aggregated_result:
|
|
226
|
+
# convert AggregateMetricResult to AggregateAgentMetricResult
|
|
227
|
+
mv = values[0]
|
|
228
|
+
|
|
229
|
+
# Calculate percentiles if we have enough data points
|
|
230
|
+
percentiles = None
|
|
231
|
+
|
|
232
|
+
if len(agent_individual_results) > 1:
|
|
233
|
+
# Extract values for percentile calculation
|
|
234
|
+
valid_values = [
|
|
235
|
+
r.value for r in agent_individual_results if r.value is not None]
|
|
236
|
+
if valid_values and all(isinstance(v, (int, float)) for v in valid_values):
|
|
237
|
+
# Calculate all percentiles in a single call
|
|
238
|
+
percentile_values = np.percentile(
|
|
239
|
+
valid_values, [25, 50, 75, 90, 95, 99])
|
|
240
|
+
|
|
241
|
+
percentiles = {
|
|
242
|
+
"25": percentile_values[0],
|
|
243
|
+
"50": percentile_values[1],
|
|
244
|
+
"75": percentile_values[2],
|
|
245
|
+
"90": percentile_values[3],
|
|
246
|
+
"95": percentile_values[4],
|
|
247
|
+
"99": percentile_values[5]
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
aggregated_result = AggregateAgentMetricResult(
|
|
251
|
+
name=mv.name,
|
|
252
|
+
method=mv.method,
|
|
253
|
+
provider=mv.provider,
|
|
254
|
+
node_name=mv.node_name,
|
|
255
|
+
applies_to=mv.applies_to,
|
|
256
|
+
group=mv.group,
|
|
257
|
+
value=aggregated_result.mean,
|
|
258
|
+
min=aggregated_result.min,
|
|
259
|
+
max=aggregated_result.max,
|
|
260
|
+
count=aggregated_result.total_records,
|
|
261
|
+
percentiles=percentiles,
|
|
262
|
+
individual_results=copy.deepcopy(
|
|
263
|
+
agent_individual_results) if include_individual_results else [],
|
|
264
|
+
additional_info=copy.deepcopy(
|
|
265
|
+
aggregated_result.additional_info)
|
|
266
|
+
)
|
|
267
|
+
return aggregated_result
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def get_agentic_evaluation_result(metrics_result: list[AgentMetricResult], nodes: list[Node] = []) -> AgenticEvaluationResult:
|
|
271
|
+
aggregated_metrics_results = __compute_aggregated_metrics_results(
|
|
272
|
+
metrics_result, nodes)
|
|
273
|
+
metrics_result = []
|
|
274
|
+
for amr in aggregated_metrics_results:
|
|
275
|
+
metrics_result.extend(amr.individual_results)
|
|
276
|
+
|
|
277
|
+
return AgenticEvaluationResult(metrics_results=metrics_result,
|
|
278
|
+
aggregated_metrics_results=aggregated_metrics_results)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def get_summaries(individual_metric_values: list):
|
|
282
|
+
"""
|
|
283
|
+
Calculates statistical summaries for a list of numeric metric values.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
individual_metric_values (list): A list of numeric values representing individual
|
|
287
|
+
metrics. May contain None values which will be filtered out.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
dict: A dictionary containing the following statistical summaries:
|
|
291
|
+
- "metric_value" (float): Mean of the values (same as "mean")
|
|
292
|
+
- "mean" (float): Arithmetic mean of the values
|
|
293
|
+
- "min" (float): Minimum value in the dataset
|
|
294
|
+
- "max" (float): Maximum value in the dataset
|
|
295
|
+
- "std" (float): Standard deviation of the values
|
|
296
|
+
- "percentiles" (dict): Dictionary containing percentile values with keys:
|
|
297
|
+
- "25": 25th percentile of the values
|
|
298
|
+
- "50": 50th percentile (median) of the values
|
|
299
|
+
- "75": 75th percentile of the values
|
|
300
|
+
- "90": 90th percentile of the values
|
|
301
|
+
- "95": 95th percentile of the values
|
|
302
|
+
- "99": 99th percentile of the values
|
|
303
|
+
|
|
304
|
+
If input is empty or contains only None values, returns:
|
|
305
|
+
{"metric_value": 0, "mean": 0, "min": 0, "max": 0, "std": 0,
|
|
306
|
+
"percentiles": {"25": 0, "50": 0, "75": 0, "90": 0, "95": 0, "99": 0}}
|
|
307
|
+
"""
|
|
308
|
+
individual_metric_values = [
|
|
309
|
+
ele for ele in individual_metric_values if ele is not None]
|
|
310
|
+
|
|
311
|
+
if individual_metric_values is None or len(individual_metric_values) == 0:
|
|
312
|
+
return {
|
|
313
|
+
"metric_value": 0,
|
|
314
|
+
"mean": 0,
|
|
315
|
+
"min": 0,
|
|
316
|
+
"max": 0,
|
|
317
|
+
"std": 0,
|
|
318
|
+
"percentiles": {
|
|
319
|
+
"25": 0,
|
|
320
|
+
"50": 0,
|
|
321
|
+
"75": 0,
|
|
322
|
+
"90": 0,
|
|
323
|
+
"95": 0,
|
|
324
|
+
"99": 0
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
else:
|
|
328
|
+
# Calculate all percentiles in a single call
|
|
329
|
+
percentile_values = np.percentile(
|
|
330
|
+
individual_metric_values, [25, 50, 75, 90, 95, 99])
|
|
331
|
+
|
|
332
|
+
return {
|
|
333
|
+
"metric_value": np.mean(individual_metric_values).item(),
|
|
334
|
+
"mean": np.mean(individual_metric_values).item(),
|
|
335
|
+
"min": np.min(individual_metric_values).item(),
|
|
336
|
+
"max": np.max(individual_metric_values).item(),
|
|
337
|
+
"std": np.std(individual_metric_values).item(),
|
|
338
|
+
"percentiles": {
|
|
339
|
+
"25": percentile_values[0].item(),
|
|
340
|
+
"50": percentile_values[1].item(),
|
|
341
|
+
"75": percentile_values[2].item(),
|
|
342
|
+
"90": percentile_values[3].item(),
|
|
343
|
+
"95": percentile_values[4].item(),
|
|
344
|
+
"99": percentile_values[5].item()
|
|
345
|
+
}
|
|
346
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
from typing import Any, Awaitable, Iterable
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def run_in_event_loop(task, *args, **kwargs):
|
|
16
|
+
"""Run the given async task in an event loop, safely handling loop reuse."""
|
|
17
|
+
try:
|
|
18
|
+
event_loop = asyncio.get_running_loop()
|
|
19
|
+
# Use existing event loop and wait for the task to be executed.
|
|
20
|
+
import nest_asyncio
|
|
21
|
+
nest_asyncio.apply()
|
|
22
|
+
return event_loop.run_until_complete(task(*args, **kwargs))
|
|
23
|
+
except RuntimeError:
|
|
24
|
+
# No running loop, create one and close it when done
|
|
25
|
+
event_loop = asyncio.new_event_loop()
|
|
26
|
+
try:
|
|
27
|
+
return event_loop.run_until_complete(task(*args, **kwargs))
|
|
28
|
+
finally:
|
|
29
|
+
event_loop.close()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
async def gather_with_concurrency(
|
|
33
|
+
coros: Iterable[Awaitable],
|
|
34
|
+
return_exceptions: bool = False,
|
|
35
|
+
max_concurrency: int = 10,
|
|
36
|
+
) -> Any:
|
|
37
|
+
semaphore = asyncio.Semaphore(max_concurrency)
|
|
38
|
+
|
|
39
|
+
async def safe_coroutine_fn(fn):
|
|
40
|
+
async with semaphore:
|
|
41
|
+
return await fn
|
|
42
|
+
|
|
43
|
+
tasks = [asyncio.create_task(safe_coroutine_fn(fn)) for fn in coros]
|
|
44
|
+
try:
|
|
45
|
+
return await asyncio.gather(*tasks, return_exceptions=return_exceptions)
|
|
46
|
+
except Exception as ex:
|
|
47
|
+
for task in tasks:
|
|
48
|
+
task.cancel()
|
|
49
|
+
raise ex
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def start_event_loop_run_func(func, data):
|
|
53
|
+
"""
|
|
54
|
+
Create a wrapper function to start the event loop in the thread as unitxt LiteLLMInference fails without it.
|
|
55
|
+
"""
|
|
56
|
+
loop = asyncio.new_event_loop()
|
|
57
|
+
asyncio.set_event_loop(loop)
|
|
58
|
+
try:
|
|
59
|
+
return func(data)
|
|
60
|
+
finally:
|
|
61
|
+
loop.close()
|
|
62
|
+
asyncio.set_event_loop(None)
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
|
|
12
|
+
from ibm_watsonx_gov.utils.gov_sdk_logger import GovSDKLogger
|
|
13
|
+
from ibm_watsonx_gov.utils.rest_util import RestUtil
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Authenticator:
|
|
17
|
+
"""
|
|
18
|
+
Helper class to authenticate with IBM Cloud and CPD
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, credentials: dict, use_cpd: bool, use_ssl: bool) -> None:
|
|
22
|
+
"""
|
|
23
|
+
Initialize the authenticator object
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
credentials (dict): A dictionary containing the necessary credentials for authentication.
|
|
27
|
+
use_cpd (bool): A boolean indicating whether to authenticate with CPD or IBM Cloud.
|
|
28
|
+
"""
|
|
29
|
+
self.__credentials: dict[str, str] = credentials
|
|
30
|
+
self.__use_cpd: bool = use_cpd
|
|
31
|
+
self.__iam_token: str = None
|
|
32
|
+
self.__use_ssl: bool = use_ssl
|
|
33
|
+
self.logger = GovSDKLogger.get_logger(__name__)
|
|
34
|
+
|
|
35
|
+
def authenticate(self) -> str:
|
|
36
|
+
"""
|
|
37
|
+
Function to complete the authentication flow with either IBM Cloud or CPD based
|
|
38
|
+
on the configuration. This will set self.__iam_token and return the token to the user
|
|
39
|
+
"""
|
|
40
|
+
self.logger.info("Authenticating the client")
|
|
41
|
+
if self.__use_cpd:
|
|
42
|
+
self.logger.info("Authenticating the client with CPD")
|
|
43
|
+
self.__iam_token = self.__get_iam_token_cpd()
|
|
44
|
+
else:
|
|
45
|
+
self.logger.info("Authenticating the client with ibm cloud")
|
|
46
|
+
self.__iam_token = self.__get_iam_token_cloud()
|
|
47
|
+
|
|
48
|
+
self.logger.info("Client authenticated successfully")
|
|
49
|
+
return self.__iam_token
|
|
50
|
+
|
|
51
|
+
def get_iam_token(self) -> str:
|
|
52
|
+
"""
|
|
53
|
+
This function retrieves an IAM token from the instance variables. If the token does not exist, it raises an exception.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
str: IAM token
|
|
57
|
+
"""
|
|
58
|
+
if not self.__iam_token:
|
|
59
|
+
message = "Not authenticated yet."
|
|
60
|
+
self.logger.error(message)
|
|
61
|
+
raise Exception(message)
|
|
62
|
+
return self.__iam_token
|
|
63
|
+
|
|
64
|
+
def __get_iam_token_cloud(self) -> None:
|
|
65
|
+
"""
|
|
66
|
+
Method to authenticate the client with ibm cloud.
|
|
67
|
+
"""
|
|
68
|
+
self.logger.info("Authenticating using cloud credentials")
|
|
69
|
+
|
|
70
|
+
headers = {
|
|
71
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
72
|
+
"Accept": "application/json",
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
data = {
|
|
76
|
+
"grant_type": "urn:ibm:params:oauth:grant-type:apikey",
|
|
77
|
+
"response_type": "cloud_iam",
|
|
78
|
+
"apikey": self.__credentials["apikey"],
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
response = RestUtil.request_with_retry().post(
|
|
83
|
+
url=f"{self.__credentials['iam_url']}/identity/token",
|
|
84
|
+
data=data,
|
|
85
|
+
headers=headers,
|
|
86
|
+
allow_redirects=True,
|
|
87
|
+
verify=self.__use_ssl,
|
|
88
|
+
)
|
|
89
|
+
response.raise_for_status()
|
|
90
|
+
except Exception as e:
|
|
91
|
+
message = f"Failed to authenticate. {e}"
|
|
92
|
+
self.logger.error(message)
|
|
93
|
+
raise Exception(message)
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
json_response = response.json()
|
|
97
|
+
return json_response["access_token"]
|
|
98
|
+
except Exception as e:
|
|
99
|
+
message = f"Failed to parse authentication response. {e}"
|
|
100
|
+
self.logger.error(message)
|
|
101
|
+
raise Exception(message)
|
|
102
|
+
|
|
103
|
+
def __get_iam_token_cpd(self) -> None:
|
|
104
|
+
"""
|
|
105
|
+
Method to authenticate the client with CPD.
|
|
106
|
+
"""
|
|
107
|
+
self.logger.info("Authenticating using cpd credentials")
|
|
108
|
+
|
|
109
|
+
headers = {
|
|
110
|
+
"Content-Type": "application/json",
|
|
111
|
+
"Accept": "application/json",
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
data = {
|
|
115
|
+
"username": self.__credentials["username"],
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
# Check if the authentication is done using password or api key and it to the payload
|
|
119
|
+
if "password" in self.__credentials.keys():
|
|
120
|
+
data["password"] = self.__credentials["password"]
|
|
121
|
+
elif "api_key" in self.__credentials.keys():
|
|
122
|
+
data["api_key"] = self.__credentials["api_key"]
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
response = RestUtil.request_with_retry().post(
|
|
126
|
+
url=f"{self.__credentials['url']}/icp4d-api/v1/authorize",
|
|
127
|
+
data=json.dumps(data).encode("utf-8"),
|
|
128
|
+
headers=headers,
|
|
129
|
+
allow_redirects=True,
|
|
130
|
+
verify=self.__use_ssl,
|
|
131
|
+
)
|
|
132
|
+
response.raise_for_status()
|
|
133
|
+
except Exception as e:
|
|
134
|
+
message = f"Failed to authenticate. {e}"
|
|
135
|
+
self.logger.error(message)
|
|
136
|
+
raise Exception(message)
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
json_response = response.json()
|
|
140
|
+
return json_response["token"]
|
|
141
|
+
except Exception as e:
|
|
142
|
+
message = f"Failed to parse authentication response. {e}"
|
|
143
|
+
self.logger.error(message)
|
|
144
|
+
raise Exception(message)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from typing import Literal
|
|
12
|
+
|
|
13
|
+
OOTB_TYPE = Literal["ootb"]
|
|
14
|
+
CUSTOM_TYPE = Literal["custom"]
|
|
15
|
+
METRIC_VALUE_TYPES = Literal["numeric", "categorical"]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
|
|
2
|
+
# ----------------------------------------------------------------------------------------------------
|
|
3
|
+
# IBM Confidential
|
|
4
|
+
# Licensed Materials - Property of IBM
|
|
5
|
+
# 5737-H76, 5900-A3Q
|
|
6
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
7
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
8
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
9
|
+
# ----------------------------------------------------------------------------------------------------
|
|
10
|
+
|
|
11
|
+
import sys
|
|
12
|
+
from http import HTTPStatus
|
|
13
|
+
|
|
14
|
+
from ibm_watsonx_gov.utils.gov_sdk_logger import GovSDKLogger
|
|
15
|
+
|
|
16
|
+
logger = GovSDKLogger.get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ClientError(Exception):
|
|
20
|
+
def __init__(self, code, message, reason=None):
|
|
21
|
+
self.code = code
|
|
22
|
+
self.message = message
|
|
23
|
+
self.reason = reason
|
|
24
|
+
logger.debug(str(self.code) + ": " +
|
|
25
|
+
str(self.message) + ('\nReason: ' + str(self.reason) if sys.exc_info()[0] is not None else ''))
|
|
26
|
+
|
|
27
|
+
def __str__(self):
|
|
28
|
+
return str(self.code) + ": " + str(self.message) + ('\nReason: ' + str(self.reason)
|
|
29
|
+
if sys.exc_info()[0] is not None else '')
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AuthorizationError(ClientError, ValueError):
|
|
33
|
+
def __init__(self, code, message, reason=None):
|
|
34
|
+
ClientError.__init__(self, code=code, message=message, reason=reason)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class UnsupportedOperationError(ClientError, ValueError):
|
|
38
|
+
def __init__(self, message, reason=None):
|
|
39
|
+
ClientError.__init__(
|
|
40
|
+
self, code=HTTPStatus.NOT_IMPLEMENTED, message=message, reason=reason)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GovSDKLogger:
|
|
14
|
+
"""
|
|
15
|
+
Logger for watsonx governance SDK
|
|
16
|
+
"""
|
|
17
|
+
DEFAULT_LOG_LEVEL = logging.WARN
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def get_logger(name):
|
|
21
|
+
"""
|
|
22
|
+
Function the return a logger object.
|
|
23
|
+
Params:
|
|
24
|
+
name (str): The name of the logger.
|
|
25
|
+
Returns:
|
|
26
|
+
logging.Logger: A logger object
|
|
27
|
+
"""
|
|
28
|
+
logger = logging.getLogger(name)
|
|
29
|
+
logger.propagate = False
|
|
30
|
+
if not logger.hasHandlers():
|
|
31
|
+
logger.setLevel(GovSDKLogger.DEFAULT_LOG_LEVEL)
|
|
32
|
+
logger.propagate = False
|
|
33
|
+
handler = logging.StreamHandler()
|
|
34
|
+
logger.addHandler(handler)
|
|
35
|
+
formatter = logging.Formatter(
|
|
36
|
+
"[%(asctime)s]-[%(name)s]-[ %(levelname)s ]-[Line %(lineno)d] ~~> %(message)s"
|
|
37
|
+
)
|
|
38
|
+
handler.setFormatter(formatter)
|
|
39
|
+
return logger
|