ibm-watsonx-gov 1.3.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ibm_watsonx_gov/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
- ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
- ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
- ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
- ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
- ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
- ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
- ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
- ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
- ibm_watsonx_gov/clients/__init__.py +0 -0
- ibm_watsonx_gov/clients/api_client.py +99 -0
- ibm_watsonx_gov/clients/segment_client.py +46 -0
- ibm_watsonx_gov/clients/usage_client.cp313-win_amd64.pyd +0 -0
- ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
- ibm_watsonx_gov/config/__init__.py +14 -0
- ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
- ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
- ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
- ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
- ibm_watsonx_gov/entities/__init__.py +8 -0
- ibm_watsonx_gov/entities/agentic_app.py +209 -0
- ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
- ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
- ibm_watsonx_gov/entities/ai_experiment.py +419 -0
- ibm_watsonx_gov/entities/base_classes.py +134 -0
- ibm_watsonx_gov/entities/container.py +54 -0
- ibm_watsonx_gov/entities/credentials.py +633 -0
- ibm_watsonx_gov/entities/criteria.py +508 -0
- ibm_watsonx_gov/entities/enums.py +274 -0
- ibm_watsonx_gov/entities/evaluation_result.py +444 -0
- ibm_watsonx_gov/entities/foundation_model.py +490 -0
- ibm_watsonx_gov/entities/llm_judge.py +44 -0
- ibm_watsonx_gov/entities/locale.py +17 -0
- ibm_watsonx_gov/entities/mapping.py +49 -0
- ibm_watsonx_gov/entities/metric.py +211 -0
- ibm_watsonx_gov/entities/metric_threshold.py +36 -0
- ibm_watsonx_gov/entities/model_provider.py +329 -0
- ibm_watsonx_gov/entities/model_risk_result.py +43 -0
- ibm_watsonx_gov/entities/monitor.py +71 -0
- ibm_watsonx_gov/entities/prompt_setup.py +40 -0
- ibm_watsonx_gov/entities/state.py +22 -0
- ibm_watsonx_gov/entities/utils.py +99 -0
- ibm_watsonx_gov/evaluators/__init__.py +26 -0
- ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
- ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
- ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
- ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cp313-win_amd64.pyd +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cp313-win_amd64.pyd +0 -0
- ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
- ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
- ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
- ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
- ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
- ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
- ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
- ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
- ibm_watsonx_gov/metrics/__init__.py +74 -0
- ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
- ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
- ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
- ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
- ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
- ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
- ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
- ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
- ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
- ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
- ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
- ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
- ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
- ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
- ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
- ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
- ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
- ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
- ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
- ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
- ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
- ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
- ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
- ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
- ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
- ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
- ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
- ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
- ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
- ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
- ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
- ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
- ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
- ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
- ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
- ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
- ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
- ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
- ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
- ibm_watsonx_gov/metrics/status/__init__.py +0 -0
- ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
- ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
- ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
- ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
- ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
- ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
- ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
- ibm_watsonx_gov/metrics/utils.py +440 -0
- ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
- ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
- ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
- ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
- ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
- ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
- ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
- ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
- ibm_watsonx_gov/providers/__init__.py +8 -0
- ibm_watsonx_gov/providers/detectors_provider.cp313-win_amd64.pyd +0 -0
- ibm_watsonx_gov/providers/detectors_provider.py +415 -0
- ibm_watsonx_gov/providers/eval_assist_provider.cp313-win_amd64.pyd +0 -0
- ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
- ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
- ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
- ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
- ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
- ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
- ibm_watsonx_gov/providers/unitxt_provider.cp313-win_amd64.pyd +0 -0
- ibm_watsonx_gov/tools/__init__.py +10 -0
- ibm_watsonx_gov/tools/clients/__init__.py +11 -0
- ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
- ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
- ibm_watsonx_gov/tools/core/__init__.py +8 -0
- ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
- ibm_watsonx_gov/tools/entities/__init__.py +8 -0
- ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
- ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
- ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
- ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
- ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
- ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
- ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
- ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
- ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
- ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
- ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
- ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
- ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
- ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
- ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
- ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
- ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
- ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
- ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
- ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
- ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
- ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
- ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
- ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
- ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
- ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
- ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
- ibm_watsonx_gov/tools/utils/__init__.py +14 -0
- ibm_watsonx_gov/tools/utils/constants.py +69 -0
- ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
- ibm_watsonx_gov/tools/utils/environment.py +108 -0
- ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
- ibm_watsonx_gov/tools/utils/platform_url_mapping.cp313-win_amd64.pyd +0 -0
- ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
- ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
- ibm_watsonx_gov/traces/__init__.py +8 -0
- ibm_watsonx_gov/traces/span_exporter.py +195 -0
- ibm_watsonx_gov/traces/span_node.py +251 -0
- ibm_watsonx_gov/traces/span_util.py +153 -0
- ibm_watsonx_gov/traces/trace_utils.py +1074 -0
- ibm_watsonx_gov/utils/__init__.py +8 -0
- ibm_watsonx_gov/utils/aggregation_util.py +346 -0
- ibm_watsonx_gov/utils/async_util.py +62 -0
- ibm_watsonx_gov/utils/authenticator.py +144 -0
- ibm_watsonx_gov/utils/constants.py +15 -0
- ibm_watsonx_gov/utils/errors.py +40 -0
- ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
- ibm_watsonx_gov/utils/insights_generator.py +1285 -0
- ibm_watsonx_gov/utils/python_utils.py +425 -0
- ibm_watsonx_gov/utils/rest_util.py +73 -0
- ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
- ibm_watsonx_gov/utils/singleton_meta.py +25 -0
- ibm_watsonx_gov/utils/url_mapping.cp313-win_amd64.pyd +0 -0
- ibm_watsonx_gov/utils/validation_util.py +126 -0
- ibm_watsonx_gov/visualizations/__init__.py +13 -0
- ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
- ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
- ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
- ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
- ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
- ibm_watsonx_gov-1.3.3.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import functools
|
|
12
|
+
from abc import abstractmethod
|
|
13
|
+
from typing import (TYPE_CHECKING, Annotated, List, Literal, Optional, Self,
|
|
14
|
+
Union)
|
|
15
|
+
|
|
16
|
+
import pandas as pd
|
|
17
|
+
from pydantic import (BaseModel, Field, computed_field, field_serializer,
|
|
18
|
+
field_validator, model_validator)
|
|
19
|
+
|
|
20
|
+
from ibm_watsonx_gov.entities.base_classes import BaseMetric
|
|
21
|
+
from ibm_watsonx_gov.entities.enums import MetricGroup, TaskType
|
|
22
|
+
from ibm_watsonx_gov.entities.metric_threshold import MetricThreshold
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from ibm_watsonx_gov.config import (AgenticAIConfiguration,
|
|
26
|
+
GenAIConfiguration)
|
|
27
|
+
from ibm_watsonx_gov.entities.evaluation_result import (
|
|
28
|
+
AggregateMetricResult, RecordMetricResult)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class MappingItem(BaseModel):
|
|
32
|
+
"""
|
|
33
|
+
The mapping details to be used for reading the values from the data.
|
|
34
|
+
"""
|
|
35
|
+
name: Annotated[str,
|
|
36
|
+
Field(title="Name",
|
|
37
|
+
description="The name of the item.",
|
|
38
|
+
examples=["input_text", "generated_text", "context", "ground_truth"])]
|
|
39
|
+
type_: Annotated[Literal["input", "output", "reference", "context", "tool_call", "start_time", "end_time", "input_token_count", "output_token_count", "model_usage_details", "status", "user_id", "target_component", "available_tools"],
|
|
40
|
+
Field(title="Type",
|
|
41
|
+
description="The type of the item.",
|
|
42
|
+
examples=["input"],
|
|
43
|
+
alias="type",
|
|
44
|
+
serialization_alias="type")]
|
|
45
|
+
column_name: Annotated[Optional[str],
|
|
46
|
+
Field(title="Column Name",
|
|
47
|
+
description="The column name in the tabular data to be used for reading the field value. Applicable for tabular source.", default=None)]
|
|
48
|
+
span_name: Annotated[Optional[str],
|
|
49
|
+
Field(title="Span Name",
|
|
50
|
+
description="The span name in the trace data to be used for reading the field value. Applicable for trace source.", default=None)]
|
|
51
|
+
attribute_name: Annotated[Optional[str],
|
|
52
|
+
Field(title="Attribute Name",
|
|
53
|
+
description="The attribute name in the trace to be used for reading the field value. Applicable for trace source.", default=None)]
|
|
54
|
+
json_path: Annotated[Optional[str],
|
|
55
|
+
Field(title="Json Path",
|
|
56
|
+
description="The json path to be used for reading the field value from the attribute value. Applicable for trace source. If not provided, the span attribute value is read as the field value.", default=None)]
|
|
57
|
+
lookup_child_spans: Annotated[Optional[bool],
|
|
58
|
+
Field(title="Look up child spans",
|
|
59
|
+
description="The flag to indicate if all the child spans should be searched for the attribute value. Applicable for trace source.",
|
|
60
|
+
default=False)]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class Mapping(BaseModel):
|
|
64
|
+
"""
|
|
65
|
+
Defines the field mapping details to be used for computing a metric.
|
|
66
|
+
"""
|
|
67
|
+
source: Annotated[Literal["trace", "tabular"],
|
|
68
|
+
Field(title="Source",
|
|
69
|
+
description="The source type of the data. Use trace if the data should be read from span in trace. Use tabular if the data is passed as a dataframe.",
|
|
70
|
+
default="trace",
|
|
71
|
+
examples=["trace", "tabular"])]
|
|
72
|
+
items: Annotated[list[MappingItem],
|
|
73
|
+
Field(title="Mapping Items",
|
|
74
|
+
description="The list of mapping items for the field. They are used to read the data from trace or tabular data for computing the metric.")]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class TargetComponent(BaseModel):
|
|
78
|
+
type: Literal["string", "mapping"] = Field(
|
|
79
|
+
description="How the component is referenced. By `string` for directly providing the node names or by `mapping` for reading it from span attributes",
|
|
80
|
+
examples=["string", "mapping"]
|
|
81
|
+
)
|
|
82
|
+
value: Union[str, MappingItem] = Field(
|
|
83
|
+
description="The component’s value, either a node name represented as a string, or a MappingItem containing span and attribute details."
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
@field_validator("value")
|
|
87
|
+
def validate_value_based_on_type(cls, v, info):
|
|
88
|
+
type = info.data.get("type")
|
|
89
|
+
if type == "string" and not isinstance(v, str):
|
|
90
|
+
raise ValueError(
|
|
91
|
+
"Value must be a string when type is 'string'")
|
|
92
|
+
if type == "mapping" and not isinstance(v, MappingItem):
|
|
93
|
+
raise ValueError(
|
|
94
|
+
"Value must be a MappingItem when type is 'mapping'")
|
|
95
|
+
return v
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class GenAIMetric(BaseMetric):
|
|
99
|
+
"""Defines the Generative AI metric interface"""
|
|
100
|
+
thresholds: Annotated[list[MetricThreshold],
|
|
101
|
+
Field(description="The list of thresholds", default=[])]
|
|
102
|
+
tasks: Annotated[list[TaskType], Field(
|
|
103
|
+
description="The task types this metric is associated with.", frozen=True, default=[])]
|
|
104
|
+
group: Annotated[MetricGroup | None, Field(
|
|
105
|
+
description="The metric group this metric belongs to.", frozen=True, default=None)]
|
|
106
|
+
is_reference_free: Annotated[bool, Field(
|
|
107
|
+
description="Decides whether this metric needs a reference for computation", frozen=True, default=True)]
|
|
108
|
+
method: Annotated[
|
|
109
|
+
str | None,
|
|
110
|
+
Field(description="The method used to compute the metric.",
|
|
111
|
+
default=None)]
|
|
112
|
+
metric_dependencies: Annotated[list["GenAIMetric"], Field(
|
|
113
|
+
description="Metrics that needs to be evaluated first", default=[])]
|
|
114
|
+
applies_to: Annotated[Optional[str],
|
|
115
|
+
Field(title="Applies to",
|
|
116
|
+
description="The tag to indicate for which the metric is applied to. Used for agentic application metric computation.",
|
|
117
|
+
examples=["message",
|
|
118
|
+
"conversation", "sub_agent"],
|
|
119
|
+
default="message")]
|
|
120
|
+
target_component: Annotated[Optional[TargetComponent],
|
|
121
|
+
Field(
|
|
122
|
+
title="Target Component",
|
|
123
|
+
description="The specific application component (node) where this metric is computed. Used for agentic application metric computation.",
|
|
124
|
+
examples=["Retrieval Node", "Context Node", "Generation Node"],
|
|
125
|
+
default=None
|
|
126
|
+
)]
|
|
127
|
+
mapping: Annotated[Optional[Mapping],
|
|
128
|
+
Field(title="Mapping",
|
|
129
|
+
description="The data mapping details for the metric which are used to read the values needed to compute the metric.",
|
|
130
|
+
default=None,
|
|
131
|
+
examples=Mapping(items=[MappingItem(name="input_text",
|
|
132
|
+
type="input",
|
|
133
|
+
span_name="LangGraph.workflow",
|
|
134
|
+
attribute_name="traceloop.entity.input",
|
|
135
|
+
json_path="$.inputs.input_text"),
|
|
136
|
+
MappingItem(name="generated_text",
|
|
137
|
+
type="output",
|
|
138
|
+
span_name="LangGraph.workflow",
|
|
139
|
+
attribute_name="traceloop.entity.output",
|
|
140
|
+
json_path="$.outputs.generated_text")])
|
|
141
|
+
)]
|
|
142
|
+
|
|
143
|
+
@field_serializer("metric_dependencies", when_used="json")
|
|
144
|
+
def metric_dependencies_serializer(self, metric_dependencies: list["GenAIMetric"]):
|
|
145
|
+
return [metric.model_dump(mode="json") for metric in metric_dependencies]
|
|
146
|
+
|
|
147
|
+
@computed_field(return_type=str)
|
|
148
|
+
@property
|
|
149
|
+
def id(self):
|
|
150
|
+
if self._id is None:
|
|
151
|
+
self._id = self.name + (f"_{self.method}" if self.method else "")
|
|
152
|
+
return self._id
|
|
153
|
+
|
|
154
|
+
@model_validator(mode="after")
|
|
155
|
+
def validate(self) -> Self:
|
|
156
|
+
if not self.display_name:
|
|
157
|
+
words = self.name.split('_')
|
|
158
|
+
self.display_name = ' '.join(word.capitalize() for word in words)
|
|
159
|
+
|
|
160
|
+
return self
|
|
161
|
+
|
|
162
|
+
@abstractmethod
|
|
163
|
+
def evaluate(self, data: pd.DataFrame | dict,
|
|
164
|
+
configuration: "GenAIConfiguration | AgenticAIConfiguration",
|
|
165
|
+
**kwargs) -> "AggregateMetricResult":
|
|
166
|
+
raise NotImplementedError
|
|
167
|
+
|
|
168
|
+
async def evaluate_async(
|
|
169
|
+
self,
|
|
170
|
+
data: pd.DataFrame | dict,
|
|
171
|
+
configuration: "GenAIConfiguration | AgenticAIConfiguration",
|
|
172
|
+
**kwargs,
|
|
173
|
+
) -> "AggregateMetricResult":
|
|
174
|
+
loop = asyncio.get_event_loop()
|
|
175
|
+
# If called as async, run it in a separate thread
|
|
176
|
+
return await loop.run_in_executor(
|
|
177
|
+
None,
|
|
178
|
+
functools.partial(
|
|
179
|
+
self.evaluate,
|
|
180
|
+
data=data,
|
|
181
|
+
configuration=configuration,
|
|
182
|
+
**kwargs,
|
|
183
|
+
)
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def info(self):
|
|
187
|
+
pass
|
|
188
|
+
|
|
189
|
+
def get_aggregated_results_from_individual_results(self, record_results: List["RecordMetricResult"]):
|
|
190
|
+
from ibm_watsonx_gov.entities.evaluation_result import \
|
|
191
|
+
AggregateMetricResult
|
|
192
|
+
|
|
193
|
+
values = [record.value for record in record_results]
|
|
194
|
+
record_result = record_results[0]
|
|
195
|
+
mean = sum(values) / len(values)
|
|
196
|
+
return AggregateMetricResult(
|
|
197
|
+
name=record_result.name,
|
|
198
|
+
method=record_result.method,
|
|
199
|
+
provider=record_result.provider,
|
|
200
|
+
group=record_result.group,
|
|
201
|
+
value=mean,
|
|
202
|
+
total_records=len(record_results),
|
|
203
|
+
record_level_metrics=record_results,
|
|
204
|
+
min=min(values),
|
|
205
|
+
max=max(values),
|
|
206
|
+
mean=mean,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
class PredictiveAIMetric(BaseMetric):
|
|
211
|
+
pass
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
from typing import Annotated, Literal
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, Field
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MetricThreshold(BaseModel):
|
|
16
|
+
"""
|
|
17
|
+
The class that defines the threshold for a metric.
|
|
18
|
+
"""
|
|
19
|
+
type: Annotated[Literal["lower_limit", "upper_limit"], Field(
|
|
20
|
+
description="Threshold type. One of 'lower_limit', 'upper_limit'")]
|
|
21
|
+
value: Annotated[float, Field(
|
|
22
|
+
title="Threshold value", description="The value of metric threshold", default=0)]
|
|
23
|
+
|
|
24
|
+
def __eq__(self, other):
|
|
25
|
+
"""Check if two MetricThreshold objects are equal."""
|
|
26
|
+
if not isinstance(other, MetricThreshold):
|
|
27
|
+
return False
|
|
28
|
+
return self.type == other.type and self.value == other.value
|
|
29
|
+
|
|
30
|
+
def __ne__(self, other):
|
|
31
|
+
"""Check if two MetricThreshold objects are not equal."""
|
|
32
|
+
return not self.__eq__(other)
|
|
33
|
+
|
|
34
|
+
def __hash__(self):
|
|
35
|
+
"""Make the object hashable so it can be used in sets and as dict keys."""
|
|
36
|
+
return hash((self.type, self.value))
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
from typing import Annotated
|
|
11
|
+
|
|
12
|
+
from ibm_watsonx_gov.entities.credentials import (AWSBedrockCredentials,
|
|
13
|
+
AzureOpenAICredentials,
|
|
14
|
+
GoogleAIStudioCredentials,
|
|
15
|
+
OpenAICredentials,
|
|
16
|
+
PortKeyCredentials,
|
|
17
|
+
RITSCredentials,
|
|
18
|
+
VertexAICredentials,
|
|
19
|
+
WxAICredentials,
|
|
20
|
+
WxoAIGatewayCredentials)
|
|
21
|
+
from ibm_watsonx_gov.entities.enums import ModelProviderType
|
|
22
|
+
from pydantic import BaseModel, Field, model_validator
|
|
23
|
+
from typing_extensions import Self
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ModelProvider(BaseModel):
|
|
27
|
+
type: Annotated[
|
|
28
|
+
ModelProviderType, Field(
|
|
29
|
+
description="The type of model provider.")
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class WxAIModelProvider(ModelProvider):
|
|
34
|
+
"""
|
|
35
|
+
This class represents a model provider configuration for IBM watsonx.ai. It includes the provider type and
|
|
36
|
+
credentials required to authenticate and interact with the watsonx.ai platform. If credentials are not explicitly
|
|
37
|
+
provided, it attempts to load them from environment variables.
|
|
38
|
+
|
|
39
|
+
Examples:
|
|
40
|
+
1. Create provider using credentials object:
|
|
41
|
+
.. code-block:: python
|
|
42
|
+
|
|
43
|
+
credentials = WxAICredentials(
|
|
44
|
+
url="https://us-south.ml.cloud.ibm.com",
|
|
45
|
+
api_key="your-api-key"
|
|
46
|
+
)
|
|
47
|
+
provider = WxAIModelProvider(credentials=credentials)
|
|
48
|
+
|
|
49
|
+
2. Create provider using environment variables:
|
|
50
|
+
.. code-block:: python
|
|
51
|
+
|
|
52
|
+
import os
|
|
53
|
+
|
|
54
|
+
os.environ['WATSONX_URL'] = "https://us-south.ml.cloud.ibm.com"
|
|
55
|
+
os.environ['WATSONX_APIKEY'] = "your_api_key"
|
|
56
|
+
|
|
57
|
+
provider = WxAIModelProvider()
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
type: Annotated[
|
|
61
|
+
ModelProviderType,
|
|
62
|
+
Field(
|
|
63
|
+
description="The type of model provider.",
|
|
64
|
+
default=ModelProviderType.IBM_WATSONX_AI,
|
|
65
|
+
frozen=True
|
|
66
|
+
)
|
|
67
|
+
]
|
|
68
|
+
credentials: Annotated[
|
|
69
|
+
WxAICredentials | None,
|
|
70
|
+
Field(
|
|
71
|
+
default=None,
|
|
72
|
+
description="The credentials used to authenticate with watsonx.ai. If not provided, they will be loaded from environment variables."
|
|
73
|
+
)
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
@model_validator(mode="after")
|
|
77
|
+
def create_credentials_from_env(self) -> Self:
|
|
78
|
+
if self.credentials is None:
|
|
79
|
+
try:
|
|
80
|
+
self.credentials = WxAICredentials.create_from_env()
|
|
81
|
+
except ValueError:
|
|
82
|
+
self.credentials = None
|
|
83
|
+
return self
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class OpenAIModelProvider(ModelProvider):
|
|
87
|
+
type: Annotated[ModelProviderType,
|
|
88
|
+
Field(description="The type of model provider.",
|
|
89
|
+
default=ModelProviderType.OPENAI, frozen=True)]
|
|
90
|
+
credentials: Annotated[OpenAICredentials | None, Field(
|
|
91
|
+
description="OpenAI credentials. This can also be set by using `OPENAI_API_KEY` environment variable.", default=None)]
|
|
92
|
+
|
|
93
|
+
@model_validator(mode="after")
|
|
94
|
+
def create_credentials_from_env(self) -> Self:
|
|
95
|
+
if self.credentials is None:
|
|
96
|
+
self.credentials = OpenAICredentials.create_from_env()
|
|
97
|
+
return self
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class PortKeyModelProvider(ModelProvider):
|
|
101
|
+
type: Annotated[ModelProviderType,
|
|
102
|
+
Field(description="The type of model provider.",
|
|
103
|
+
default=ModelProviderType.PORTKEY, frozen=True)]
|
|
104
|
+
credentials: Annotated[PortKeyCredentials | None, Field(
|
|
105
|
+
description="PortKey credentials.", default=None)]
|
|
106
|
+
|
|
107
|
+
@model_validator(mode="after")
|
|
108
|
+
def create_credentials_from_env(self) -> Self:
|
|
109
|
+
if self.credentials is None:
|
|
110
|
+
self.credentials = PortKeyCredentials.create_from_env()
|
|
111
|
+
return self
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class AzureOpenAIModelProvider(ModelProvider):
|
|
115
|
+
type: Annotated[ModelProviderType,
|
|
116
|
+
Field(description="The type of model provider.",
|
|
117
|
+
default=ModelProviderType.AZURE_OPENAI, frozen=True)]
|
|
118
|
+
credentials: Annotated[AzureOpenAICredentials | None, Field(
|
|
119
|
+
description="Azure OpenAI credentials.", default=None
|
|
120
|
+
)]
|
|
121
|
+
|
|
122
|
+
@model_validator(mode="after")
|
|
123
|
+
def create_credentials_from_env(self) -> Self:
|
|
124
|
+
if self.credentials is None:
|
|
125
|
+
self.credentials = AzureOpenAICredentials.create_from_env()
|
|
126
|
+
return self
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class RITSModelProvider(ModelProvider):
|
|
130
|
+
type: Annotated[ModelProviderType,
|
|
131
|
+
Field(description="The type of model provider.",
|
|
132
|
+
default=ModelProviderType.RITS, frozen=True)]
|
|
133
|
+
credentials: Annotated[RITSCredentials | None, Field(
|
|
134
|
+
description="RITS credentials.", default=None
|
|
135
|
+
)]
|
|
136
|
+
|
|
137
|
+
@model_validator(mode="after")
|
|
138
|
+
def create_credentials_from_env(self) -> Self:
|
|
139
|
+
if self.credentials is None:
|
|
140
|
+
self.credentials = RITSCredentials.create_from_env()
|
|
141
|
+
return self
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class VertexAIModelProvider(ModelProvider):
|
|
145
|
+
"""
|
|
146
|
+
Represents a model provider using Vertex AI.
|
|
147
|
+
|
|
148
|
+
Examples:
|
|
149
|
+
1. Create provider using credentials object:
|
|
150
|
+
.. code-block:: python
|
|
151
|
+
|
|
152
|
+
provider = VertexAIModelProvider(
|
|
153
|
+
credentials=VertexAICredentials(
|
|
154
|
+
credentials_path="path/to/key.json",
|
|
155
|
+
project_id="your-project",
|
|
156
|
+
location="us-central1"
|
|
157
|
+
)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
2. Create provider using environment variables:
|
|
161
|
+
.. code-block:: python
|
|
162
|
+
|
|
163
|
+
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "/path/to/service_account.json"
|
|
164
|
+
os.environ['GOOGLE_CLOUD_PROJECT'] = "your-project"
|
|
165
|
+
os.environ['GOOGLE_CLOUD_LOCATION'] = "us-central1" # This is optional field, by default us-central1 location is selected
|
|
166
|
+
|
|
167
|
+
provider = VertexAIModelProvider()
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
type: Annotated[ModelProviderType, Field(
|
|
171
|
+
description="The type of model provider.",
|
|
172
|
+
default=ModelProviderType.VERTEX_AI,
|
|
173
|
+
frozen=True
|
|
174
|
+
)]
|
|
175
|
+
credentials: Annotated[VertexAICredentials | None, Field(
|
|
176
|
+
description="Vertex AI credentials.", default=None
|
|
177
|
+
)]
|
|
178
|
+
|
|
179
|
+
@model_validator(mode="after")
|
|
180
|
+
def create_credentials_from_env(self) -> "VertexAIModelProvider":
|
|
181
|
+
if self.credentials is None:
|
|
182
|
+
self.credentials = VertexAICredentials.create_from_env()
|
|
183
|
+
return self
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class GoogleAIStudioModelProvider(ModelProvider):
|
|
187
|
+
"""
|
|
188
|
+
Represents a model provider using Google AI Studio.
|
|
189
|
+
|
|
190
|
+
Examples:
|
|
191
|
+
1. Create provider using credentials object:
|
|
192
|
+
.. code-block:: python
|
|
193
|
+
|
|
194
|
+
provider = GoogleAIStudioModelProvider(
|
|
195
|
+
credentials=GoogleAIStudioCredentials(api_key="api-key")
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
2. Create provider using environment variables:
|
|
199
|
+
.. code-block:: python
|
|
200
|
+
|
|
201
|
+
os.environ['GOOGLE_API_KEY'] = "your_api_key"
|
|
202
|
+
|
|
203
|
+
provider = GoogleAIStudioModelProvider()
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
type: Annotated[ModelProviderType, Field(
|
|
207
|
+
description="The type of model provider.",
|
|
208
|
+
default=ModelProviderType.GOOGLE_AI_STUDIO,
|
|
209
|
+
frozen=True
|
|
210
|
+
)]
|
|
211
|
+
credentials: Annotated[GoogleAIStudioCredentials | None, Field(
|
|
212
|
+
description="Google AI Studio credentials.", default=None
|
|
213
|
+
)]
|
|
214
|
+
|
|
215
|
+
@model_validator(mode="after")
|
|
216
|
+
def create_credentials_from_env(self) -> "GoogleAIStudioModelProvider":
|
|
217
|
+
if self.credentials is None:
|
|
218
|
+
self.credentials = GoogleAIStudioCredentials.create_from_env()
|
|
219
|
+
return self
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class AWSBedrockModelProvider(ModelProvider):
|
|
223
|
+
"""
|
|
224
|
+
Represents a model provider using Amazon Bedrock.
|
|
225
|
+
|
|
226
|
+
Examples:
|
|
227
|
+
1. Create provider using credentials object:
|
|
228
|
+
.. code-block:: python
|
|
229
|
+
|
|
230
|
+
provider = AWSBedrockModelProvider(
|
|
231
|
+
credentials=AWSBedrockCredentials(
|
|
232
|
+
aws_access_key_id="your-access-key-id",
|
|
233
|
+
aws_secret_access_key="your-secret-access-key",
|
|
234
|
+
aws_region_name="us-east-1",
|
|
235
|
+
aws_session_token="optional-session-token"
|
|
236
|
+
)
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
2. Create provider using environment variables:
|
|
240
|
+
.. code-block:: python
|
|
241
|
+
|
|
242
|
+
os.environ['AWS_ACCESS_KEY_ID'] = "your-access-key-id"
|
|
243
|
+
os.environ['AWS_SECRET_ACCESS_KEY'] = "your-secret-access-key"
|
|
244
|
+
os.environ['AWS_SESSION_TOKEN'] = "optional-session-token" # Optional
|
|
245
|
+
os.environ['AWS_DEFAULT_REGION'] = "us-east-1"
|
|
246
|
+
provider = AWSBedrockModelProvider()
|
|
247
|
+
"""
|
|
248
|
+
|
|
249
|
+
type: Annotated[ModelProviderType, Field(
|
|
250
|
+
description="The type of model provider.",
|
|
251
|
+
default=ModelProviderType.AWS_BEDROCK,
|
|
252
|
+
frozen=True
|
|
253
|
+
)]
|
|
254
|
+
credentials: Annotated[AWSBedrockCredentials | None, Field(
|
|
255
|
+
description="AWS Bedrock credentials.", default=None
|
|
256
|
+
)]
|
|
257
|
+
|
|
258
|
+
@model_validator(mode="after")
|
|
259
|
+
def create_credentials_from_env(self) -> "AWSBedrockModelProvider":
|
|
260
|
+
if self.credentials is None:
|
|
261
|
+
self.credentials = AWSBedrockCredentials.create_from_env()
|
|
262
|
+
return self
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class CustomModelProvider(ModelProvider):
|
|
266
|
+
"""
|
|
267
|
+
Defines the CustomModelProvider class.
|
|
268
|
+
|
|
269
|
+
This class represents a custom model provider, typically used when integrating with non-standard or user-defined
|
|
270
|
+
model backends. It sets the provider type to `CUSTOM` by default.
|
|
271
|
+
|
|
272
|
+
Examples:
|
|
273
|
+
1. Create a custom model provider:
|
|
274
|
+
.. code-block:: python
|
|
275
|
+
|
|
276
|
+
provider = CustomModelProvider()
|
|
277
|
+
|
|
278
|
+
2. Use with a custom foundation model:
|
|
279
|
+
.. code-block:: python
|
|
280
|
+
|
|
281
|
+
custom_model = CustomFoundationModel(
|
|
282
|
+
scoring_fn=my_scoring_function,
|
|
283
|
+
provider=CustomModelProvider()
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
Attributes:
|
|
287
|
+
type (ModelProviderType): The type of model provider. Always set to `ModelProviderType.CUSTOM`.
|
|
288
|
+
"""
|
|
289
|
+
type: Annotated[ModelProviderType, Field(
|
|
290
|
+
description="The type of model provider.", default=ModelProviderType.CUSTOM)]
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class WxoAIGatewayModelProvider(ModelProvider):
|
|
294
|
+
"""
|
|
295
|
+
This class represents a model provider configuration for WXO AI Gateway Interface. It includes the provider type and
|
|
296
|
+
credentials required to authenticate and interact with the WXO AI Gateway. If credentials are not explicitly
|
|
297
|
+
provided, it attempts to load them from environment variables.
|
|
298
|
+
|
|
299
|
+
Examples:
|
|
300
|
+
1. Create provider using credentials object:
|
|
301
|
+
.. code-block:: python
|
|
302
|
+
|
|
303
|
+
credentials = WxoAIGatewayCredentials(
|
|
304
|
+
url="wxo-gateway-url",
|
|
305
|
+
api_key="your-api-key"
|
|
306
|
+
)
|
|
307
|
+
provider = WxoAIGatewayModelProvider(credentials=credentials)
|
|
308
|
+
|
|
309
|
+
2. Create provider using environment variables:
|
|
310
|
+
.. code-block:: python
|
|
311
|
+
|
|
312
|
+
import os
|
|
313
|
+
|
|
314
|
+
os.environ['WXO_AI_GATEWAY_URL'] = "wxo-gateway-url"
|
|
315
|
+
os.environ['WATSONX_APIKEY'] = "your_api_key"
|
|
316
|
+
|
|
317
|
+
provider = WxoAIGatewayModelProvider()
|
|
318
|
+
"""
|
|
319
|
+
type: Annotated[ModelProviderType,
|
|
320
|
+
Field(description="The type of model provider.",
|
|
321
|
+
default=ModelProviderType.WXO_AI_GATEWAY, frozen=True)]
|
|
322
|
+
credentials: Annotated[WxoAIGatewayCredentials | None, Field(
|
|
323
|
+
description="WXO AI Gateway credentials.", default=None)]
|
|
324
|
+
|
|
325
|
+
@model_validator(mode="after")
|
|
326
|
+
def create_credentials_from_env(self) -> Self:
|
|
327
|
+
if self.credentials is None:
|
|
328
|
+
self.credentials = WxoAIGatewayCredentials.create_from_env()
|
|
329
|
+
return self
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
from typing import List
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
from pydantic import BaseModel
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RiskMetric(BaseModel):
|
|
17
|
+
name: str
|
|
18
|
+
value: float | str | List[float]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Benchmark(BaseModel):
|
|
22
|
+
name: str
|
|
23
|
+
metrics: list[RiskMetric]
|
|
24
|
+
|
|
25
|
+
def get_metric_df(self) -> pd.DataFrame:
|
|
26
|
+
return pd.DataFrame(self.model_dump()["metrics"])
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Risk(BaseModel):
|
|
30
|
+
name: str
|
|
31
|
+
benchmarks: list[Benchmark]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ModelRiskResult(BaseModel):
|
|
35
|
+
risks: list[Risk]
|
|
36
|
+
output_file_path: str | None = None
|
|
37
|
+
|
|
38
|
+
def to_json(self, **kwargs):
|
|
39
|
+
"""
|
|
40
|
+
Transform the model risk result to a json.
|
|
41
|
+
The kwargs are passed to the model_dump_json method of pydantic model. All the arguments supported by pydantic model_dump_json can be passed.
|
|
42
|
+
"""
|
|
43
|
+
return self.model_dump_json(**kwargs)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# OCO Source Materials
|
|
4
|
+
# 5900-A3Q, 5737-H76
|
|
5
|
+
# Copyright IBM Corp. 2025
|
|
6
|
+
# The source code for this program is not published or other-wise divested of its trade
|
|
7
|
+
# secrets, irrespective of what has been deposited with the U.S.Copyright Office.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
14
|
+
from typing_extensions import Annotated
|
|
15
|
+
|
|
16
|
+
from ibm_watsonx_gov.entities.metric import MetricThreshold
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MonitorThreshold(MetricThreshold):
|
|
20
|
+
metric_id: Annotated[str, Field(
|
|
21
|
+
description="Metric id", examples=["faithfulness"])]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class BaseMonitor(BaseModel):
|
|
25
|
+
model_config = ConfigDict(
|
|
26
|
+
arbitrary_types_allowed=True)
|
|
27
|
+
monitor_name: Annotated[str, Field(description="Monitor name", examples=[
|
|
28
|
+
"generative_ai_quality", "drift_v2"])]
|
|
29
|
+
thresholds: Annotated[list[MonitorThreshold] | None, Field(
|
|
30
|
+
default=None, description="List of metric thresholds")]
|
|
31
|
+
parameters: Annotated[dict[str, Any] | None, Field(
|
|
32
|
+
default=None, description="Monitor parameters")]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class GenerativeAIQualityMonitor(BaseMonitor):
|
|
36
|
+
monitor_name: str = "generative_ai_quality"
|
|
37
|
+
parameters: dict[str, Any] = {
|
|
38
|
+
"metrics_configuration": {
|
|
39
|
+
"rouge_score": {},
|
|
40
|
+
"exact_match": {},
|
|
41
|
+
"bleu": {},
|
|
42
|
+
"unsuccessful_requests": {},
|
|
43
|
+
"hap_input_score": {},
|
|
44
|
+
"hap_score": {},
|
|
45
|
+
"pii": {},
|
|
46
|
+
"pii_input": {},
|
|
47
|
+
},
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class DriftV2Monitor(BaseMonitor):
|
|
52
|
+
monitor_name: str = "drift_v2"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class QualityMonitor(BaseMonitor):
|
|
56
|
+
monitor_name: str = "quality"
|
|
57
|
+
parameters: dict[str, Any] = {
|
|
58
|
+
"min_feedback_data_size": 10,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class FairnessMonitor(BaseMonitor):
|
|
63
|
+
monitor_name: str = "fairness"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class DriftMonitor(BaseMonitor):
|
|
67
|
+
monitor_name: str = "drift"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class ModelHealthMonitor(BaseMonitor):
|
|
71
|
+
monitor_name: str = "model_health"
|