ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ibm_watsonx_gov/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
- ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
- ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
- ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
- ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
- ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
- ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
- ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
- ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
- ibm_watsonx_gov/clients/__init__.py +0 -0
- ibm_watsonx_gov/clients/api_client.py +99 -0
- ibm_watsonx_gov/clients/segment_client.py +46 -0
- ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
- ibm_watsonx_gov/config/__init__.py +14 -0
- ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
- ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
- ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
- ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
- ibm_watsonx_gov/entities/__init__.py +8 -0
- ibm_watsonx_gov/entities/agentic_app.py +209 -0
- ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
- ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
- ibm_watsonx_gov/entities/ai_experiment.py +419 -0
- ibm_watsonx_gov/entities/base_classes.py +134 -0
- ibm_watsonx_gov/entities/container.py +54 -0
- ibm_watsonx_gov/entities/credentials.py +633 -0
- ibm_watsonx_gov/entities/criteria.py +508 -0
- ibm_watsonx_gov/entities/enums.py +274 -0
- ibm_watsonx_gov/entities/evaluation_result.py +444 -0
- ibm_watsonx_gov/entities/foundation_model.py +490 -0
- ibm_watsonx_gov/entities/llm_judge.py +44 -0
- ibm_watsonx_gov/entities/locale.py +17 -0
- ibm_watsonx_gov/entities/mapping.py +49 -0
- ibm_watsonx_gov/entities/metric.py +211 -0
- ibm_watsonx_gov/entities/metric_threshold.py +36 -0
- ibm_watsonx_gov/entities/model_provider.py +329 -0
- ibm_watsonx_gov/entities/model_risk_result.py +43 -0
- ibm_watsonx_gov/entities/monitor.py +71 -0
- ibm_watsonx_gov/entities/prompt_setup.py +40 -0
- ibm_watsonx_gov/entities/state.py +22 -0
- ibm_watsonx_gov/entities/utils.py +99 -0
- ibm_watsonx_gov/evaluators/__init__.py +26 -0
- ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
- ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
- ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
- ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
- ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
- ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
- ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
- ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
- ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
- ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
- ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
- ibm_watsonx_gov/metrics/__init__.py +74 -0
- ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
- ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
- ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
- ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
- ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
- ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
- ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
- ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
- ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
- ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
- ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
- ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
- ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
- ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
- ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
- ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
- ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
- ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
- ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
- ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
- ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
- ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
- ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
- ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
- ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
- ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
- ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
- ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
- ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
- ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
- ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
- ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
- ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
- ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
- ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
- ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
- ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
- ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
- ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
- ibm_watsonx_gov/metrics/status/__init__.py +0 -0
- ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
- ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
- ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
- ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
- ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
- ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
- ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
- ibm_watsonx_gov/metrics/utils.py +440 -0
- ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
- ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
- ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
- ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
- ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
- ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
- ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
- ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
- ibm_watsonx_gov/providers/__init__.py +8 -0
- ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/detectors_provider.py +415 -0
- ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
- ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
- ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
- ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
- ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
- ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
- ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/__init__.py +10 -0
- ibm_watsonx_gov/tools/clients/__init__.py +11 -0
- ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
- ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
- ibm_watsonx_gov/tools/core/__init__.py +8 -0
- ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
- ibm_watsonx_gov/tools/entities/__init__.py +8 -0
- ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
- ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
- ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
- ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
- ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
- ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
- ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
- ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
- ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
- ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
- ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
- ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
- ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
- ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
- ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
- ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
- ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
- ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
- ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
- ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
- ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
- ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
- ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
- ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
- ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
- ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
- ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
- ibm_watsonx_gov/tools/utils/__init__.py +14 -0
- ibm_watsonx_gov/tools/utils/constants.py +69 -0
- ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
- ibm_watsonx_gov/tools/utils/environment.py +108 -0
- ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
- ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
- ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
- ibm_watsonx_gov/traces/__init__.py +8 -0
- ibm_watsonx_gov/traces/span_exporter.py +195 -0
- ibm_watsonx_gov/traces/span_node.py +251 -0
- ibm_watsonx_gov/traces/span_util.py +153 -0
- ibm_watsonx_gov/traces/trace_utils.py +1074 -0
- ibm_watsonx_gov/utils/__init__.py +8 -0
- ibm_watsonx_gov/utils/aggregation_util.py +346 -0
- ibm_watsonx_gov/utils/async_util.py +62 -0
- ibm_watsonx_gov/utils/authenticator.py +144 -0
- ibm_watsonx_gov/utils/constants.py +15 -0
- ibm_watsonx_gov/utils/errors.py +40 -0
- ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
- ibm_watsonx_gov/utils/insights_generator.py +1285 -0
- ibm_watsonx_gov/utils/python_utils.py +425 -0
- ibm_watsonx_gov/utils/rest_util.py +73 -0
- ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
- ibm_watsonx_gov/utils/singleton_meta.py +25 -0
- ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/utils/validation_util.py +126 -0
- ibm_watsonx_gov/visualizations/__init__.py +13 -0
- ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
- ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
- ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
- ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
- ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
- ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
from typing import Annotated, Callable, Dict, Optional, Union
|
|
11
|
+
|
|
12
|
+
from pydantic import Field, model_validator
|
|
13
|
+
from typing_extensions import Self
|
|
14
|
+
|
|
15
|
+
from ibm_watsonx_gov.entities.base_classes import BaseConfiguration
|
|
16
|
+
from ibm_watsonx_gov.entities.enums import TaskType
|
|
17
|
+
from ibm_watsonx_gov.entities.llm_judge import LLMJudge
|
|
18
|
+
from ibm_watsonx_gov.entities.locale import Locale
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class GenAIConfiguration(BaseConfiguration):
|
|
22
|
+
"""
|
|
23
|
+
Defines the GenAIConfiguration class.
|
|
24
|
+
|
|
25
|
+
This is used to specify the fields mapping details in the data and other configuration parameters needed for evaluation.
|
|
26
|
+
|
|
27
|
+
Examples:
|
|
28
|
+
1. Create configuration with default parameters
|
|
29
|
+
.. code-block:: python
|
|
30
|
+
|
|
31
|
+
configuration = GenAIConfiguration()
|
|
32
|
+
|
|
33
|
+
2. Create configuration with parameters
|
|
34
|
+
.. code-block:: python
|
|
35
|
+
|
|
36
|
+
configuration = GenAIConfiguration(input_fields=["input"],
|
|
37
|
+
output_fields=["output"])
|
|
38
|
+
|
|
39
|
+
2. Create configuration with dict parameters
|
|
40
|
+
.. code-block:: python
|
|
41
|
+
|
|
42
|
+
config = {"input_fields": ["input"],
|
|
43
|
+
"output_fields": ["output"],
|
|
44
|
+
"context_fields": ["contexts"],
|
|
45
|
+
"reference_fields": ["reference"]}
|
|
46
|
+
configuration = GenAIConfiguration(**config)
|
|
47
|
+
"""
|
|
48
|
+
task_type: Annotated[TaskType | None, Field(title="Task Type",
|
|
49
|
+
description="The generative task type. Default value is None.",
|
|
50
|
+
default=None,
|
|
51
|
+
examples=[TaskType.RAG])]
|
|
52
|
+
input_fields: Annotated[list[str], Field(title="Input Fields",
|
|
53
|
+
description="The list of model input fields in the data. Default value is ['input_text'].",
|
|
54
|
+
examples=[
|
|
55
|
+
["question"]],
|
|
56
|
+
default=["input_text"])]
|
|
57
|
+
context_fields: Annotated[list[str], Field(title="Context Fields",
|
|
58
|
+
description="The list of context fields in the input fields. Default value is ['context'].",
|
|
59
|
+
default=["context"],
|
|
60
|
+
examples=[["context1", "context2"]])]
|
|
61
|
+
output_fields: Annotated[list[str], Field(title="Output Fields",
|
|
62
|
+
description="The list of model output fields in the data. Default value is ['generated_text'].",
|
|
63
|
+
default=["generated_text"],
|
|
64
|
+
examples=[["output"]])]
|
|
65
|
+
reference_fields: Annotated[list[str], Field(title="Reference Fields",
|
|
66
|
+
description="The list of reference fields in the data. Default value is ['ground_truth'].",
|
|
67
|
+
default=["ground_truth"],
|
|
68
|
+
examples=[["reference"]])]
|
|
69
|
+
locale: Annotated[Locale | None, Field(title="Locale",
|
|
70
|
+
description="The language locale of the input, output and reference fields in the data.",
|
|
71
|
+
default=None)]
|
|
72
|
+
tools: Annotated[Union[list[Callable], list[Dict]], Field(title="Tools",
|
|
73
|
+
description="The list of tools used by the LLM.",
|
|
74
|
+
default=[],
|
|
75
|
+
examples=[["function1", "function2"]])]
|
|
76
|
+
tool_calls_field: Annotated[Optional[str], Field(title="Tool Calls Field",
|
|
77
|
+
description="The tool calls field in the input fields. Default value is 'tool_calls'.",
|
|
78
|
+
default="tool_calls",
|
|
79
|
+
examples=["tool_calls"])]
|
|
80
|
+
available_tools_field: Annotated[Optional[str], Field(title="Available Tools Field",
|
|
81
|
+
description="The tool inventory field in the data. Default value is 'available_tools'.",
|
|
82
|
+
default="available_tools",
|
|
83
|
+
examples=["available_tools"])]
|
|
84
|
+
|
|
85
|
+
llm_judge: Annotated[LLMJudge | None, Field(title="LLM Judge",
|
|
86
|
+
description="LLM as Judge Model details.",
|
|
87
|
+
default=None)]
|
|
88
|
+
prompt_field: Annotated[Optional[str], Field(title="Model Prompt Field",
|
|
89
|
+
description="The prompt field in the input fields. Default value is 'model_prompt'.",
|
|
90
|
+
default="model_prompt",
|
|
91
|
+
examples=["model_prompt"])]
|
|
92
|
+
start_time_field: Annotated[Optional[str], Field(title="Span Start Time Field ",
|
|
93
|
+
description="The start time field in span attributes.",
|
|
94
|
+
default=None,
|
|
95
|
+
examples=["start_time"])]
|
|
96
|
+
end_time_field: Annotated[Optional[str], Field(title="Span End Time Field",
|
|
97
|
+
description="The end time field in span attributes.",
|
|
98
|
+
default=None,
|
|
99
|
+
examples=["end_time"])]
|
|
100
|
+
model_usage_detail_fields: Annotated[Optional[list[str]], Field(title="Model Usage Detail Field",
|
|
101
|
+
description="The model usage detail field in span attributes.This field should provide information on model name, input_token_count and output_token_count",
|
|
102
|
+
default=[])]
|
|
103
|
+
input_token_count_fields: Annotated[Optional[list[str]], Field(title="Input Token Count Field",
|
|
104
|
+
description="The input token count field in span attributes.",
|
|
105
|
+
default=[],
|
|
106
|
+
examples=[["prompt_tokens"]])]
|
|
107
|
+
output_token_count_fields: Annotated[Optional[list[str]], Field(title="Output Token Count Field",
|
|
108
|
+
description="The output token count field in span attributes.",
|
|
109
|
+
default=[
|
|
110
|
+
"completion_tokens"],
|
|
111
|
+
examples=[["completion_tokens"]])]
|
|
112
|
+
status_field: Annotated[Optional[str], Field(title="Span Status Field ",
|
|
113
|
+
description="The status field in span attributes.",
|
|
114
|
+
default="status",
|
|
115
|
+
examples=["status"])]
|
|
116
|
+
user_id_field: Annotated[Optional[str], Field(title="User Id Field ",
|
|
117
|
+
description="The user id field in span attributes.",
|
|
118
|
+
default="user_id",
|
|
119
|
+
examples=["user_id"])]
|
|
120
|
+
|
|
121
|
+
@model_validator(mode="after")
|
|
122
|
+
def validate_fields(self) -> Self:
|
|
123
|
+
|
|
124
|
+
if self.task_type == TaskType.RAG:
|
|
125
|
+
if not self.input_fields or not self.context_fields:
|
|
126
|
+
raise ValueError(
|
|
127
|
+
"input_fields and context_fields are required for RAG task type.")
|
|
128
|
+
|
|
129
|
+
return self
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
from typing import Annotated, List, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, ConfigDict, Field, PositiveInt, field_validator
|
|
13
|
+
|
|
14
|
+
from ibm_watsonx_gov.entities.credentials import WxGovConsoleCredentials
|
|
15
|
+
from ibm_watsonx_gov.entities.foundation_model import FoundationModel
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class WxGovConsoleConfiguration(BaseModel):
|
|
19
|
+
"""
|
|
20
|
+
Defines the WxGovConsoleConfiguration class.
|
|
21
|
+
|
|
22
|
+
This configuration is used to integrate with the watsonx Governance Console for storing model risk evaluation results.
|
|
23
|
+
It includes the model identifier and the credentials required for authentication.
|
|
24
|
+
|
|
25
|
+
Examples:
|
|
26
|
+
1. Create configuration with explicit credentials:
|
|
27
|
+
.. code-block:: python
|
|
28
|
+
|
|
29
|
+
credentials = WxGovConsoleCredentials(
|
|
30
|
+
url="https://governance-console.example.com",
|
|
31
|
+
username="admin",
|
|
32
|
+
password="securepassword",
|
|
33
|
+
api_key="optional-api-key"
|
|
34
|
+
)
|
|
35
|
+
configuration = WxGovConsoleConfiguration(
|
|
36
|
+
model_id="model-12345",
|
|
37
|
+
credentials=credentials
|
|
38
|
+
)
|
|
39
|
+
"""
|
|
40
|
+
model_id: Annotated[
|
|
41
|
+
str,
|
|
42
|
+
Field(
|
|
43
|
+
description="The watsonx Governance Console identifier of the model to store the model risk result."
|
|
44
|
+
),
|
|
45
|
+
]
|
|
46
|
+
credentials: Annotated[
|
|
47
|
+
WxGovConsoleCredentials,
|
|
48
|
+
Field(
|
|
49
|
+
description="The watsonx Governance Console credentials."
|
|
50
|
+
),
|
|
51
|
+
]
|
|
52
|
+
model_config = ConfigDict(protected_namespaces=())
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ModelRiskConfiguration(BaseModel):
|
|
56
|
+
"""
|
|
57
|
+
Defines the ModelRiskConfiguration class.
|
|
58
|
+
|
|
59
|
+
This configuration class encapsulates all parameters required to perform model risk evaluation,
|
|
60
|
+
including model metadata, evaluation scope, thresholds, and output/reporting preferences.
|
|
61
|
+
|
|
62
|
+
Examples:
|
|
63
|
+
1. Create a basic configuration:
|
|
64
|
+
.. code-block:: python
|
|
65
|
+
|
|
66
|
+
model_details = WxAIFoundationModel(
|
|
67
|
+
model_name="mymodel_flan",
|
|
68
|
+
model_id="ibm/granite-3-3-8b-instruct",
|
|
69
|
+
project_id="project_id")
|
|
70
|
+
|
|
71
|
+
model_config = ModelRiskConfiguration(
|
|
72
|
+
model_details=model_details,
|
|
73
|
+
risk_dimensions=["hallucination"],
|
|
74
|
+
max_sample_size=500,
|
|
75
|
+
thresholds=(20, 80),
|
|
76
|
+
pdf_report_output_path="/reports"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
2. Include watsonx Governance Console integration:
|
|
80
|
+
.. code-block:: python
|
|
81
|
+
|
|
82
|
+
model_details = WxAIFoundationModel(
|
|
83
|
+
model_name="mymodel_flan",
|
|
84
|
+
model_id="ibm/granite-3-3-8b-instruct",
|
|
85
|
+
project_id="project_id")
|
|
86
|
+
|
|
87
|
+
wx_gc_credentials = WxGovConsoleCredentials(
|
|
88
|
+
url="https://governance.example.com",
|
|
89
|
+
username="admin",
|
|
90
|
+
password="securepass"
|
|
91
|
+
api_key="console API key"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
wx_config = WxGovConsoleConfiguration(
|
|
95
|
+
model_id="model-abc123",
|
|
96
|
+
credentials=wx_gc_credentials
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
model_config = ModelRiskConfiguration(
|
|
100
|
+
model_details=model_details,
|
|
101
|
+
risk_dimensions=["hallucination"],
|
|
102
|
+
max_sample_size=500,
|
|
103
|
+
thresholds=(20, 80),
|
|
104
|
+
wx_gc_configuration=wx_config,
|
|
105
|
+
pdf_report_output_path="/reports"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
Validators:
|
|
109
|
+
- `thresholds`: Ensures that the threshold values are between 0 and 100, and that the lower value is less than the upper value.
|
|
110
|
+
"""
|
|
111
|
+
model_details: Annotated[
|
|
112
|
+
FoundationModel,
|
|
113
|
+
Field(
|
|
114
|
+
title="Foundation Model Details",
|
|
115
|
+
description="The details of the foundation model being evaluated.",
|
|
116
|
+
)
|
|
117
|
+
]
|
|
118
|
+
risk_dimensions: Annotated[
|
|
119
|
+
Optional[List[str]],
|
|
120
|
+
Field(
|
|
121
|
+
title="Risk Dimensions",
|
|
122
|
+
description="A list of risk categories to be evaluated for the model. These could include hallucination, jailbreaking etc.",
|
|
123
|
+
default=None,
|
|
124
|
+
examples=[["hallucination", "jailbreaking",
|
|
125
|
+
"harmful-code-generation"]]
|
|
126
|
+
)
|
|
127
|
+
]
|
|
128
|
+
max_sample_size: Annotated[
|
|
129
|
+
Optional[PositiveInt],
|
|
130
|
+
Field(
|
|
131
|
+
title="Maximum Sample Size",
|
|
132
|
+
description="The maximum number of samples to be used during the evaluation process. Must be a positive integer.",
|
|
133
|
+
default=None,
|
|
134
|
+
examples=[50]
|
|
135
|
+
)
|
|
136
|
+
]
|
|
137
|
+
wx_gc_configuration: Annotated[
|
|
138
|
+
Optional[WxGovConsoleConfiguration],
|
|
139
|
+
Field(
|
|
140
|
+
title="watsonx Governance Console Configuration",
|
|
141
|
+
description="Optional configuration for storing results in watsonx Governance Console.",
|
|
142
|
+
default=None,
|
|
143
|
+
)
|
|
144
|
+
]
|
|
145
|
+
pdf_report_output_path: Annotated[
|
|
146
|
+
Optional[str],
|
|
147
|
+
Field(
|
|
148
|
+
title="PDF Report Output Path",
|
|
149
|
+
description="The output file path to store the model risk evaluation PDF report.",
|
|
150
|
+
default=None,
|
|
151
|
+
examples=["/reports/"]
|
|
152
|
+
)
|
|
153
|
+
]
|
|
154
|
+
thresholds: Annotated[
|
|
155
|
+
Optional[Tuple[int, int]],
|
|
156
|
+
Field(
|
|
157
|
+
title="Performance Thresholds",
|
|
158
|
+
description="A tuple representing the percentile-based threshold configuration used for categorizing LLM performance. The first element is the lower percentile threshold, and the second is the upper percentile threshold",
|
|
159
|
+
default=(25, 75),
|
|
160
|
+
examples=[(25, 75)]
|
|
161
|
+
)
|
|
162
|
+
]
|
|
163
|
+
model_config = ConfigDict(protected_namespaces=())
|
|
164
|
+
|
|
165
|
+
@field_validator("thresholds")
|
|
166
|
+
@classmethod
|
|
167
|
+
def validate_thresholds(cls, v):
|
|
168
|
+
if v is not None:
|
|
169
|
+
low, high = v
|
|
170
|
+
if not (0 <= low < high <= 100):
|
|
171
|
+
raise ValueError(
|
|
172
|
+
"Thresholds must be between 0 and 100, and the first must be less than the second.")
|
|
173
|
+
return v
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from ibm_watsonx_gov.entities.base_classes import BaseConfiguration
|
|
12
|
+
from ibm_watsonx_gov.entities.enums import InputDataType, ProblemType
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PredictiveAIConfiguration(BaseConfiguration):
|
|
16
|
+
problem_type: ProblemType
|
|
17
|
+
input_data_type: InputDataType
|
|
18
|
+
feature_fields: list[str]
|
|
19
|
+
categorical_fields: list[str] = []
|
|
20
|
+
text_fields: list[str] = []
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
|
|
2
|
+
# ----------------------------------------------------------------------------------------------------
|
|
3
|
+
# IBM Confidential
|
|
4
|
+
# Licensed Materials - Property of IBM
|
|
5
|
+
# 5737-H76, 5900-A3Q
|
|
6
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
7
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
8
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
9
|
+
# ----------------------------------------------------------------------------------------------------
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
from json import loads
|
|
13
|
+
from typing import Annotated, Optional
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, Field, TypeAdapter, field_serializer
|
|
16
|
+
|
|
17
|
+
from ibm_watsonx_gov.config.agentic_ai_configuration import \
|
|
18
|
+
AgenticAIConfiguration
|
|
19
|
+
from ibm_watsonx_gov.entities.enums import MetricGroup
|
|
20
|
+
from ibm_watsonx_gov.entities.foundation_model import FoundationModelInfo
|
|
21
|
+
from ibm_watsonx_gov.entities.metric import GenAIMetric, Mapping
|
|
22
|
+
from ibm_watsonx_gov.metrics import METRICS_UNION
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MetricsConfiguration(BaseModel):
|
|
26
|
+
"""
|
|
27
|
+
The class representing the metrics to be computed and the configuration details required for them.
|
|
28
|
+
|
|
29
|
+
Examples:
|
|
30
|
+
1. Create MetricsConfiguration with default agentic ai configuration
|
|
31
|
+
.. code-block:: python
|
|
32
|
+
|
|
33
|
+
metrics_configuration = MetricsConfiguration(metrics=[ContextRelevanceMetric()],
|
|
34
|
+
metric_groups=[MetricGroup.RETRIEVAL_QUALITY])])
|
|
35
|
+
|
|
36
|
+
2. Create MetricsConfiguration by specifying agentic ai configuration
|
|
37
|
+
.. code-block:: python
|
|
38
|
+
|
|
39
|
+
config = {
|
|
40
|
+
"input_fields": ["input"],
|
|
41
|
+
"context_fields": ["contexts"]
|
|
42
|
+
}
|
|
43
|
+
metrics_configuration = MetricsConfiguration(configuration=AgenticAIConfiguration(**config)
|
|
44
|
+
metrics=[ContextRelevanceMetric()],
|
|
45
|
+
metric_groups=[MetricGroup.RETRIEVAL_QUALITY])])
|
|
46
|
+
"""
|
|
47
|
+
configuration: Annotated[AgenticAIConfiguration,
|
|
48
|
+
Field(title="Metrics configuration",
|
|
49
|
+
description="The configuration of the metrics to compute. The configuration contains the fields names to be read when computing the metrics.",
|
|
50
|
+
default=AgenticAIConfiguration())]
|
|
51
|
+
metrics: Annotated[Optional[list[GenAIMetric]],
|
|
52
|
+
Field(title="Metrics",
|
|
53
|
+
description="The list of metrics to compute.",
|
|
54
|
+
default=[])]
|
|
55
|
+
metric_groups: Annotated[Optional[list[MetricGroup]],
|
|
56
|
+
Field(title="Metric Groups",
|
|
57
|
+
description="The list of metric groups to compute.",
|
|
58
|
+
default=[])]
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def model_validate(cls, obj, **kwargs):
|
|
62
|
+
if "metrics" in obj:
|
|
63
|
+
obj["metrics"] = [TypeAdapter(METRICS_UNION).validate_python(
|
|
64
|
+
m) for m in obj.get("metrics")]
|
|
65
|
+
return super().model_validate(obj, **kwargs)
|
|
66
|
+
|
|
67
|
+
@field_serializer("metrics", when_used="json")
|
|
68
|
+
def metrics_serializer(self, metrics: list[GenAIMetric]):
|
|
69
|
+
return [metric.model_dump(mode="json") for metric in metrics]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class Node(BaseModel):
|
|
73
|
+
"""
|
|
74
|
+
The class representing a node in an agentic application.
|
|
75
|
+
|
|
76
|
+
Examples:
|
|
77
|
+
1. Create Node with metrics configuration and default agentic ai configuration
|
|
78
|
+
.. code-block:: python
|
|
79
|
+
|
|
80
|
+
metrics_configurations = [MetricsConfiguration(metrics=[ContextRelevanceMetric()],
|
|
81
|
+
metric_groups=[MetricGroup.RETRIEVAL_QUALITY])])]
|
|
82
|
+
node = Node(name="Retrieval Node",
|
|
83
|
+
metrics_configurations=metrics_configurations)
|
|
84
|
+
|
|
85
|
+
2. Create Node with metrics configuration and specifying agentic ai configuration
|
|
86
|
+
.. code-block:: python
|
|
87
|
+
|
|
88
|
+
node_config = {"input_fields": ["input"],
|
|
89
|
+
"output_fields": ["output"],
|
|
90
|
+
"context_fields": ["contexts"],
|
|
91
|
+
"reference_fields": ["reference"]}
|
|
92
|
+
metrics_configurations = [MetricsConfiguration(configuration=AgenticAIConfiguration(**node_config)
|
|
93
|
+
metrics=[ContextRelevanceMetric()],
|
|
94
|
+
metric_groups=[MetricGroup.RETRIEVAL_QUALITY])])]
|
|
95
|
+
node = Node(name="Retrieval Node",
|
|
96
|
+
metrics_configurations=metrics_configurations)
|
|
97
|
+
"""
|
|
98
|
+
name: Annotated[str,
|
|
99
|
+
Field(title="Name",
|
|
100
|
+
description="The name of the node.")]
|
|
101
|
+
func_name: Annotated[Optional[str],
|
|
102
|
+
Field(title="Node function name",
|
|
103
|
+
description="The name of the node function.",
|
|
104
|
+
default=None)]
|
|
105
|
+
metrics_configurations: Annotated[list[MetricsConfiguration],
|
|
106
|
+
Field(title="Metrics configuration",
|
|
107
|
+
description="The list of metrics and their configuration details.",
|
|
108
|
+
default=[])]
|
|
109
|
+
foundation_models: Annotated[
|
|
110
|
+
list[FoundationModelInfo],
|
|
111
|
+
Field(
|
|
112
|
+
description="The Foundation models invoked by the node",
|
|
113
|
+
default=[],
|
|
114
|
+
),
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
@classmethod
|
|
118
|
+
def model_validate(cls, obj, **kwargs):
|
|
119
|
+
if "metrics_configurations" in obj:
|
|
120
|
+
obj["metrics_configurations"] = [MetricsConfiguration.model_validate(
|
|
121
|
+
m) for m in obj.get("metrics_configurations")]
|
|
122
|
+
return super().model_validate(obj, **kwargs)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class AgenticApp(BaseModel):
|
|
126
|
+
"""
|
|
127
|
+
The configuration class representing an agentic application.
|
|
128
|
+
An agent is composed of a set of nodes.
|
|
129
|
+
The metrics to be computed at the agent or message level should be specified in the metrics_configuration and the metrics to be computed for the node level should be specified in the nodes list.
|
|
130
|
+
|
|
131
|
+
Examples:
|
|
132
|
+
1. Create AgenticApp with agent level metrics configuration.
|
|
133
|
+
.. code-block:: python
|
|
134
|
+
|
|
135
|
+
# Below example provides the agent configuration to compute the AnswerRelevanceMetric and all the metrics in Content Safety group on agent or message level.
|
|
136
|
+
agentic_app = AgenticApp(name="Agentic App",
|
|
137
|
+
metrics_configuration=MetricsConfiguration(metrics=[AnswerRelevanceMetric()],
|
|
138
|
+
metric_groups=[MetricGroup.CONTENT_SAFETY]))
|
|
139
|
+
agentic_evaluator = AgenticEvaluator(agentic_app=agentic_app)
|
|
140
|
+
...
|
|
141
|
+
|
|
142
|
+
2. Create AgenticApp with agent and node level metrics configuration and default agentic ai configuration for metrics.
|
|
143
|
+
.. code-block:: python
|
|
144
|
+
|
|
145
|
+
# Below example provides the node configuration to compute the ContextRelevanceMetric and all the metrics in Retrieval Quality group.
|
|
146
|
+
nodes = [Node(name="Retrieval Node",
|
|
147
|
+
metrics_configurations=[MetricsConfiguration(metrics=[ContextRelevanceMetric()],
|
|
148
|
+
metric_groups=[MetricGroup.RETRIEVAL_QUALITY])])]
|
|
149
|
+
|
|
150
|
+
# Below example provides the agent configuration to compute the AnswerRelevanceMetric and all the metrics in Content Safety group on agent or message level.
|
|
151
|
+
agentic_app = AgenticApp(name="Agentic App",
|
|
152
|
+
metrics_configuration=MetricsConfiguration(metrics=[AnswerRelevanceMetric()],
|
|
153
|
+
metric_groups=[MetricGroup.CONTENT_SAFETY]),
|
|
154
|
+
nodes=nodes)
|
|
155
|
+
agentic_evaluator = AgenticEvaluator(agentic_app=agentic_app)
|
|
156
|
+
...
|
|
157
|
+
|
|
158
|
+
3. Create AgenticApp with agent and nodel level metrics configuration and with agentic ai configuration for metrics.
|
|
159
|
+
.. code-block:: python
|
|
160
|
+
|
|
161
|
+
# Below example provides the node configuration to compute the ContextRelevanceMetric and all the metrics in Retrieval Quality group.
|
|
162
|
+
node_fields_config = {
|
|
163
|
+
"input_fields": ["input"],
|
|
164
|
+
"context_fields": ["web_context"]
|
|
165
|
+
}
|
|
166
|
+
nodes = [Node(name="Retrieval Node",
|
|
167
|
+
metrics_configurations=[MetricsConfiguration(configuration=AgenticAIConfiguration(**node_fields_config)
|
|
168
|
+
metrics=[ContextRelevanceMetric()],
|
|
169
|
+
metric_groups=[MetricGroup.RETRIEVAL_QUALITY])])]
|
|
170
|
+
|
|
171
|
+
# Below example provides the agent configuration to compute the AnswerRelevanceMetric and all the metrics in Content Safety group on agent or message level.
|
|
172
|
+
agent_fields_config = {
|
|
173
|
+
"input_fields": ["input"],
|
|
174
|
+
"output_fields": ["output"]
|
|
175
|
+
}
|
|
176
|
+
agentic_app = AgenticApp(name="Agentic App",
|
|
177
|
+
metrics_configuration=MetricsConfiguration(configuration=AgenticAIConfiguration(**agent_fields_config)
|
|
178
|
+
metrics=[AnswerRelevanceMetric()],
|
|
179
|
+
metric_groups=[MetricGroup.CONTENT_SAFETY]),
|
|
180
|
+
nodes=nodes)
|
|
181
|
+
agentic_evaluator = AgenticEvaluator(agentic_app=agentic_app)
|
|
182
|
+
...
|
|
183
|
+
"""
|
|
184
|
+
name: Annotated[str, Field(title="Agentic application name",
|
|
185
|
+
description="The name of the agentic application.",
|
|
186
|
+
default="Agentic App")]
|
|
187
|
+
message_io_mapping: Annotated[Optional[Mapping],
|
|
188
|
+
Field(title="Message IO mapping",
|
|
189
|
+
description="The message input and output mapping.",
|
|
190
|
+
default=None)]
|
|
191
|
+
metrics_configuration: Annotated[Optional[MetricsConfiguration],
|
|
192
|
+
Field(title="Metrics configuration",
|
|
193
|
+
description="The list of metrics to be computed on the agentic application and their configuration details.",
|
|
194
|
+
default=MetricsConfiguration())]
|
|
195
|
+
nodes: Annotated[Optional[list[Node]],
|
|
196
|
+
Field(title="Node details",
|
|
197
|
+
description="The nodes details.",
|
|
198
|
+
default=[])]
|
|
199
|
+
|
|
200
|
+
@classmethod
|
|
201
|
+
def model_validate_json(cls, json_data, **kwargs):
|
|
202
|
+
data = loads(json_data)
|
|
203
|
+
if "metrics_configuration" in data:
|
|
204
|
+
data["metrics_configuration"] = MetricsConfiguration.model_validate(
|
|
205
|
+
data.get("metrics_configuration"))
|
|
206
|
+
if "nodes" in data:
|
|
207
|
+
data["nodes"] = [Node.model_validate(node)
|
|
208
|
+
for node in data.get("nodes", [])]
|
|
209
|
+
return cls.model_validate(data, **kwargs)
|